Read the instruction and think carefully about how to develop R code to answer each questions using the following database.
Task 00: Preparation by
rm(list=ls(all.names = T))
curPath <- dirname(rstudioapi::getSourceEditorContext()$path)
setwd(curPath)
Task 01: import skus.txt using ‘Import Dataset’ (Upper Right Pannel) and convert it into data.table class, named sku.DT.
require(data.table)
sku.DT <- as.data.table(skus)
Typically, R user prefers load data using text command:
skus <- read.csv("resources/skus.txt",sep="\t",stringsAsFactors = F)
sku.DT <- as.data.table(skus)
Task 02: summarize sku.DT using ‘str()’,‘summary()’, ‘head()’, ‘tail()’
str(sku.DT)
## Classes 'data.table' and 'data.frame': 21097 obs. of 12 variables:
## $ SkuID : chr "AAG47294" "AAG47342" "AAG47344" "AAG47346" ...
## $ VendorID : chr "AAG" "AAG" "AAG" "AAG" ...
## $ Description : chr "PLNR,\"FAT LITTLE\",BK" "ORGNZR,LTHR,SNAP,8.5X11,BK" "ORGNZR,VYL,ZIP,3.75X6.75,BK" "ORGNZR,SMLTH,ZIP,5.5X8.5,BK" ...
## $ UnitLength : num 6.2 11.9 8.6 10.3 13.7 14 6.7 8.5 11 6.7 ...
## $ UnitWidth : num 6.1 12 5.9 8.1 12 11.7 3.7 5.5 8.5 3.7 ...
## $ UnitHeight : num 1.7 2.4 1.5 1.9 2.5 3.2 0.1 0.1 0.2 0.1 ...
## $ UnitWeight : num 0.9 2.91 1.15 1.48 3.48 3.27 0.1 0.2 0.4 0.1 ...
## $ CaseQuantity: int 3 6 6 6 3 6 24 24 24 24 ...
## $ CaseLength : num 6.4 11.7 13.8 11.3 14.8 18 7.6 9.3 11.8 7.6 ...
## $ CaseWidth : num 5.5 11.5 8.1 8.8 11.8 ...
## $ CaseHeight : num 6.8 13 9.9 11.4 11.2 ...
## $ CaseWeight : num 3.3 16.7 8.1 9.76 17.91 ...
## - attr(*, ".internal.selfref")=<externalptr>
summary(sku.DT)
## SkuID VendorID Description UnitLength
## Length:21097 Length:21097 Length:21097 Min. : 0.00
## Class :character Class :character Class :character 1st Qu.: 6.20
## Mode :character Mode :character Mode :character Median : 11.10
## Mean : 12.34
## 3rd Qu.: 13.90
## Max. :105.50
## UnitWidth UnitHeight UnitWeight CaseQuantity
## Min. : 0.000 Min. : 0.000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 3.700 1st Qu.: 0.700 1st Qu.: 0.250 1st Qu.: 3.00
## Median : 7.800 Median : 1.940 Median : 1.000 Median : 6.00
## Mean : 8.135 Mean : 3.441 Mean : 4.179 Mean : 37.63
## 3rd Qu.:10.200 3rd Qu.: 4.000 3rd Qu.: 3.550 3rd Qu.: 20.00
## Max. :74.000 Max. :100.500 Max. :202.000 Max. :1728.00
## CaseLength CaseWidth CaseHeight CaseWeight
## Min. : 0.00 Min. : 0.00 Min. : 0.000 Min. : 0.00
## 1st Qu.: 10.70 1st Qu.: 7.80 1st Qu.: 4.500 1st Qu.: 4.20
## Median : 13.60 Median : 10.40 Median : 8.100 Median : 10.30
## Mean : 15.32 Mean : 10.80 Mean : 8.575 Mean : 14.95
## 3rd Qu.: 18.25 3rd Qu.: 12.75 3rd Qu.: 12.000 3rd Qu.: 21.60
## Max. :123.00 Max. :112.50 Max. :147.000 Max. :202.00
## for data.table class only
sku.DT
## SkuID VendorID Description UnitLength UnitWidth
## 1: AAG47294 AAG PLNR,"FAT LITTLE",BK 6.2 6.1
## 2: AAG47342 AAG ORGNZR,LTHR,SNAP,8.5X11,BK 11.9 12.0
## 3: AAG47344 AAG ORGNZR,VYL,ZIP,3.75X6.75,BK 8.6 5.9
## 4: AAG47346 AAG ORGNZR,SMLTH,ZIP,5.5X8.5,BK 10.3 8.1
## 5: AAG47348 AAG ORGNZR,VNYL,ZIP,8.5X11,BK 13.7 12.0
## ---
## 21093: ZPC85412 ZPC REFILL,PEN,F,1.0MM,BK 7.5 2.2
## 21094: ZPC85422 ZPC REFILL,PEN,F,1.0MM,BE 7.5 2.2
## 21095: ZPC85512 ZPC REFILL,F301,BK,2 rfls/EA 7.6 2.3
## 21096: ZPC85522 ZPC REFILL,PEN,F,.7MM,BE 7.5 2.3
## 21097: ZPC87012 ZPC REFILL,JF,BK, 2rfls per EA 7.5 2.2
## UnitHeight UnitWeight CaseQuantity CaseLength CaseWidth CaseHeight
## 1: 1.7 0.90 3 6.40 5.50 6.80
## 2: 2.4 2.91 6 11.70 11.50 13.00
## 3: 1.5 1.15 6 13.80 8.10 9.90
## 4: 1.9 1.48 6 11.30 8.80 11.40
## 5: 2.5 3.48 3 14.80 11.80 11.20
## ---
## 21093: 0.2 0.02 144 9.88 6.25 8.19
## 21094: 0.3 0.10 144 9.88 6.25 8.19
## 21095: 0.5 0.10 144 9.88 6.25 8.19
## 21096: 0.1 0.01 0 0.00 0.00 0.00
## 21097: 0.1 0.10 144 10.40 6.70 9.00
## CaseWeight
## 1: 3.30
## 2: 16.70
## 3: 8.10
## 4: 9.76
## 5: 17.91
## ---
## 21093: 4.48
## 21094: 4.48
## 21095: 4.48
## 21096: 0.00
## 21097: 5.10
head(sku.DT)
## SkuID VendorID Description UnitLength UnitWidth
## 1: AAG47294 AAG PLNR,"FAT LITTLE",BK 6.2 6.1
## 2: AAG47342 AAG ORGNZR,LTHR,SNAP,8.5X11,BK 11.9 12.0
## 3: AAG47344 AAG ORGNZR,VYL,ZIP,3.75X6.75,BK 8.6 5.9
## 4: AAG47346 AAG ORGNZR,SMLTH,ZIP,5.5X8.5,BK 10.3 8.1
## 5: AAG47348 AAG ORGNZR,VNYL,ZIP,8.5X11,BK 13.7 12.0
## 6: AAG47358 AAG ORGNZR,LTHR,ZIP,8.5X11,BK 14.0 11.7
## UnitHeight UnitWeight CaseQuantity CaseLength CaseWidth CaseHeight
## 1: 1.7 0.90 3 6.4 5.50 6.80
## 2: 2.4 2.91 6 11.7 11.50 13.00
## 3: 1.5 1.15 6 13.8 8.10 9.90
## 4: 1.9 1.48 6 11.3 8.80 11.40
## 5: 2.5 3.48 3 14.8 11.80 11.20
## 6: 3.2 3.27 6 18.0 13.13 15.25
## CaseWeight
## 1: 3.30
## 2: 16.70
## 3: 8.10
## 4: 9.76
## 5: 17.91
## 6: 28.25
tail(sku.DT)
## SkuID VendorID Description UnitLength UnitWidth UnitHeight
## 1: ZPC84222 ZPC REFILL,PEN,K,1.0MM,BE 7.5 2.3 0.1
## 2: ZPC85412 ZPC REFILL,PEN,F,1.0MM,BK 7.5 2.2 0.2
## 3: ZPC85422 ZPC REFILL,PEN,F,1.0MM,BE 7.5 2.2 0.3
## 4: ZPC85512 ZPC REFILL,F301,BK,2 rfls/EA 7.6 2.3 0.5
## 5: ZPC85522 ZPC REFILL,PEN,F,.7MM,BE 7.5 2.3 0.1
## 6: ZPC87012 ZPC REFILL,JF,BK, 2rfls per EA 7.5 2.2 0.1
## UnitWeight CaseQuantity CaseLength CaseWidth CaseHeight CaseWeight
## 1: 0.06 12 7.60 2.50 2.00 0.25
## 2: 0.02 144 9.88 6.25 8.19 4.48
## 3: 0.10 144 9.88 6.25 8.19 4.48
## 4: 0.10 144 9.88 6.25 8.19 4.48
## 5: 0.01 0 0.00 0.00 0.00 0.00
## 6: 0.10 144 10.40 6.70 9.00 5.10
print(summarytools::freq(sku.DT$VendorID),method="browser")
print(summarytools::dfSummary(sku.DT),method="browser")
Task 03: accessing unconditional data as matrix and list
sku.DT[3,1]
## SkuID
## 1: AAG47344
sku.DT$SkuID[3]
## [1] "AAG47344"
sku.DT[[1]][3]
## [1] "AAG47344"
which(is.na(sku.DT),arr.ind = T)
## row col
View(unique.data.frame(sku.DT))
which(duplicated(sku.DT))
## integer(0)
Task 04: accessing conditional data / select query
head(sku.DT[sku.DT$UnitLength > 5.0])[1:5,1:3]
## SkuID VendorID Description
## 1: AAG47294 AAG PLNR,"FAT LITTLE",BK
## 2: AAG47342 AAG ORGNZR,LTHR,SNAP,8.5X11,BK
## 3: AAG47344 AAG ORGNZR,VYL,ZIP,3.75X6.75,BK
## 4: AAG47346 AAG ORGNZR,SMLTH,ZIP,5.5X8.5,BK
## 5: AAG47348 AAG ORGNZR,VNYL,ZIP,8.5X11,BK
## for data.table
sku.DT[UnitLength > 5.0][1:5,1:3]
## SkuID VendorID Description
## 1: AAG47294 AAG PLNR,"FAT LITTLE",BK
## 2: AAG47342 AAG ORGNZR,LTHR,SNAP,8.5X11,BK
## 3: AAG47344 AAG ORGNZR,VYL,ZIP,3.75X6.75,BK
## 4: AAG47346 AAG ORGNZR,SMLTH,ZIP,5.5X8.5,BK
## 5: AAG47348 AAG ORGNZR,VNYL,ZIP,8.5X11,BK
sku.DT[UnitLength > 5.0 & CaseLength < 20.0][1:5,1:3]
## SkuID VendorID Description
## 1: AAG47294 AAG PLNR,"FAT LITTLE",BK
## 2: AAG47342 AAG ORGNZR,LTHR,SNAP,8.5X11,BK
## 3: AAG47344 AAG ORGNZR,VYL,ZIP,3.75X6.75,BK
## 4: AAG47346 AAG ORGNZR,SMLTH,ZIP,5.5X8.5,BK
## 5: AAG47348 AAG ORGNZR,VNYL,ZIP,8.5X11,BK
Task 05: calculate unit volumne and case volumne
findVol <- function(l,w,h){ return(l*w*h)}
sku.DT[,UnitVol:= findVol(UnitLength,UnitWidth,UnitHeight)][1:5,c(1,11:12)]
## SkuID CaseHeight CaseWeight
## 1: AAG47294 6.8 3.30
## 2: AAG47342 13.0 16.70
## 3: AAG47344 9.9 8.10
## 4: AAG47346 11.4 9.76
## 5: AAG47348 11.2 17.91
sku.DT[,CaseVol:= findVol(CaseLength,CaseWidth,CaseHeight)][1:5,c(1,11:13)]
## SkuID CaseHeight CaseWeight UnitVol
## 1: AAG47294 6.8 3.30 64.294
## 2: AAG47342 13.0 16.70 342.720
## 3: AAG47344 9.9 8.10 76.110
## 4: AAG47346 11.4 9.76 158.517
## 5: AAG47348 11.2 17.91 411.000
sku.DT[order(-UnitVol)]
## SkuID VendorID Description UnitLength
## 1: RUB9T18-00 RUC TRUCK,SERVICE,1CU.YD. 69.0
## 2: RUB101100GRAY RUC TRUCK,TILT,STRUCTURAL 66.5
## 3: FEL38485 FEL SHREDDER,OFFC,480,GY 41.1
## 4: CRA1041R-19 CRA LADDER,SAFETY,4-STEP,BG 49.0
## 5: CRA1041L-19 CRA LADDER,SFTY,4 STEP,LFT HND, 49.5
## ---
## 21093: SPZMOUSEPAD SPZ MOUSE PAD 0.0
## 21094: SPZPW2003 SPZ CATALOG,2003,PENNYWISE 0.0
## 21095: SPZR1536TG SPZ File Pockets, Molzwell 0.0
## 21096: VIIOPT200G VII LENSES,READING,OPTX 200 0.0
## 21097: WTB32-0141TM01B WTB DISPENSER,TIMEMISTPLUS,LGY 0.0
## UnitWidth UnitHeight UnitWeight CaseQuantity CaseLength CaseWidth
## 1: 33.0 42.0 50 1 69.0 33.0
## 2: 28.0 38.0 90 1 66.5 28.0
## 3: 26.0 41.3 150 1 41.1 26.0
## 4: 26.5 31.5 37 1 49.0 26.5
## 5: 24.6 31.5 15 1 49.5 24.6
## ---
## 21093: 0.0 0.0 0 0 0.0 0.0
## 21094: 0.0 0.0 0 10 0.0 0.0
## 21095: 0.0 0.0 0 25 0.0 0.0
## 21096: 0.0 0.0 0 144 0.0 0.0
## 21097: 0.0 0.0 0 0 0.0 0.0
## CaseHeight CaseWeight UnitVol CaseVol
## 1: 42.0 50 95634.00 95634.00
## 2: 38.0 90 70756.00 70756.00
## 3: 41.3 150 44133.18 44133.18
## 4: 31.5 37 40902.75 40902.75
## 5: 31.5 15 38357.55 38357.55
## ---
## 21093: 0.0 0 0.00 0.00
## 21094: 0.0 0 0.00 0.00
## 21095: 0.0 0 0.00 0.00
## 21096: 0.0 0 0.00 0.00
## 21097: 0.0 0 0.00 0.00
dim(sku.DT[UnitVol==0])
## [1] 123 14
require(stringr)
sku.DT[,digit3:= str_sub(SkuID,start=1,end = 3)][1:5,c(1,12:14)]
## SkuID CaseWeight UnitVol CaseVol
## 1: AAG47294 3.30 64.294 239.360
## 2: AAG47342 16.70 342.720 1749.150
## 3: AAG47344 8.10 76.110 1106.622
## 4: AAG47346 9.76 158.517 1133.616
## 5: AAG47348 17.91 411.000 1955.968
nrow(sku.DT[digit3==VendorID])
## [1] 13724
table(str_count(sku.DT$Description,pattern = ","))
##
## 0 1 2 3 4 5 6
## 286 1079 5791 8733 4478 713 17
sku.DT[str_detect(Description,pattern = "BK")][1:5,c(1,12:15)]
## SkuID CaseWeight UnitVol CaseVol digit3
## 1: AAG47294 3.30 64.294 239.360 AAG
## 2: AAG47342 16.70 342.720 1749.150 AAG
## 3: AAG47344 8.10 76.110 1106.622 AAG
## 4: AAG47346 9.76 158.517 1133.616 AAG
## 5: AAG47348 17.91 411.000 1955.968 AAG
Task 06: save as weight and volume of ‘sku.DT’ as ‘dim.DT’ for future used
dim.DT <- sku.DT[UnitVol>0 & CaseVol>0 & UnitWeight>0 & CaseWeight > 0,
.(SkuID,UnitWeight,UnitVol,CaseWeight,CaseVol)]
##-- for piping
library(dplyr)
sku.DT[UnitVol>0 & CaseVol>0 & UnitWeight>0 & CaseWeight > 0,
.(SkuID,UnitWeight,UnitVol,CaseWeight,CaseVol)] -> dim.DT
Task 01: import locations.txt using ‘read.csv()’ and convert it into data.table class, named loc.DT.
loc.DT <- as.data.table(read.csv("resources/locations.txt",sep="\t",stringsAsFactors = F))
loc.DT[1:5,1:3]
## SkuID Zone Aisle
## 1: ESS4153-1/5BLU A 101
## 2: ESS4152-1/5ASST A 101
## 3: ESS4152 A 101
## 4: ESSH10U13 A 101
## 5: ESS42591 A 101
Task 02: check whether location and skuID unique.
loc.SkuID <- table(loc.DT$SkuID)
loc.SkuID[loc.SkuID>1]
## named integer(0)
loc.DT[,locID:=str_c(Zone,Aisle,Bay,Level,Position)]
loc.locID <- table(loc.DT$locID)
loc.locID[loc.locID>1]
## named integer(0)
table(loc.DT$Units)
##
## Cases
## 20804
Task 03: calculate weight and volume of each location
joint.DT <- loc.DT[dim.DT,on="SkuID"]
dim(loc.DT)
## [1] 20804 9
dim(dim.DT)
## [1] 19474 5
dim(joint.DT)
## [1] 19474 13
joint.DT[, totWt :=Qty*CaseWeight]
joint.DT[, totVol:=Qty*CaseVol]
Task 04A: summary weight and volume by ‘Zone’ and ‘Aisle’
joint.DT[, .(sumWt=sum(totWt),sumVol=sum(totVol)), by=.(Zone)][order(Zone)]
## Zone sumWt sumVol
## 1: A 930678.01 101018052
## 2: B 659721.47 58296413
## 3: C 951735.70 106041575
## 4: F 130525.44 34059266
## 5: L 27612.64 2250032
## 6: <NA> NA NA
joint.DT[, .(sumWt=sum(totWt),sumVol=sum(totVol)), by=.(Zone,Aisle)][order(Zone,Aisle)] %>%
dcast.data.table(Zone~Aisle,value.var="sumWt") -> result.DT
result.DT[1:6,c(2:5,60:64,130:134)]
## NA 101 102 103 210 211 212 213 214
## 1: NA NA NA NA NA NA NA NA NA
## 2: NA 15393.14 26127.28 10483.16 NA NA NA NA NA
## 3: NA NA NA NA 10669.28 12410.9 7528.96 7944.08 4759.1
## 4: NA NA NA NA NA NA NA NA NA
## 5: NA NA NA NA NA NA NA NA NA
## 6: NA NA NA NA NA NA NA NA NA
## 333 334 335 336 337
## 1: NA NA NA NA NA
## 2: NA NA NA NA NA
## 3: NA NA NA NA NA
## 4: 14872.88 11270.7 10806.6 9551.97 19921.42
## 5: NA NA NA NA NA
## 6: NA NA NA NA NA
Task 04B: Given ‘Aisle’=152summary weight and volume by ‘Level’ and ‘Bay’
temp.DT <- joint.DT[Aisle==152, .(sumWt=sum(totWt),sumVol=sum(totVol)), by=.(Level,Bay)]
require(data.table)
dcast(temp.DT,Level~Bay,value.var="sumWt")[order(-Level)]
## Level A B C D E F G H I J
## 1: 5 NA NA NA 383.35 384.70 515.4 NA NA NA 353.25
## 2: 4 240.00 NA 180.60 445.50 15.30 859.9 148.2 64.35 313.85 62.25
## 3: 3 121.25 8.1 76.05 170.00 66.15 325.5 108.9 456.75 333.25 84.00
## 4: 2 410.40 96.0 649.35 146.00 NA 185.5 242.5 201.75 30.70 82.80
## 5: 1 173.00 213.5 43.20 310.10 39.00 243.2 190.0 324.90 375.00 69.25
## K L M N O P
## 1: NA NA NA NA NA NA
## 2: 709.2 386.00 NA 594.85 144.5 209.00
## 3: 164.1 563.00 398.35 489.85 163.0 397.75
## 4: 243.5 363.55 203.80 248.10 183.0 296.50
## 5: 229.5 224.75 270.00 136.55 NA 53.25
require(ggplot2)
ggplot(temp.DT) +
geom_tile(aes(y=Level,x=Bay,fill=sumWt)) +
scale_fill_gradient(low="light yellow", high="red") +
ylim(c(1,6))+
theme_minimal()
Task 01: import sales.txt using ‘read.csv()’ and convert it into data.table class, named sales.DT, and clean data.
sales.DT <- as.data.table(read.csv("resources/sales.txt",sep="\t",stringsAsFactors = F))
sales.DT[1:5,1:3]
## SkuID OrderNumber CustomerNumber
## 1: CRD19050 24000001 444255
## 2: DYM30376 24000001 444255
## 3: GBC1757700 24000001 444255
## 4: MNK925047 24000001 444255
## 5: QRT20703 24000001 444255
##-- duplicated data --#
sales.DT[duplicated(sales.DT)][order(OrderNumber)][1:5,1:3]
## SkuID OrderNumber CustomerNumber
## 1: GBC3745100 23020920 552321
## 2: GLWB3143DT 23023599 55560
## 3: PENK230-MV 23028741 919203
## 4: ESS1526ET2/5OX 23297507 552321
## 5: GEPTM20 23297851 146807
##-- removed duplicated data--#
sales.DT <- unique(sales.DT)
##-- missing data --#
which(is.na(sales.DT), arr.ind = T)
## row col
## [1,] 98818 2
sales.DT[98818,]
## SkuID OrderNumber CustomerNumber Date
## 1: NSH752002247238 NA 552321 2004/12/1 0:00:00
## Time OrderQuantity
## 1: 1899/12/30 8:00:00 10
##-- removed missing data--#
sales.DT <- sales.DT[-98818,]
Task 02: convert ‘Date’ and ‘Time’ of sales.DT into proper format, named ‘aDate’ and ‘aTime’ respectively
sales.DT[,aDate:=lubridate::ymd(str_split_fixed(Date,patter=" ",n=2)[,1])][1:5,c(1,7)]
## SkuID aDate
## 1: CRD19050 2004-01-01
## 2: DYM30376 2004-01-01
## 3: GBC1757700 2004-01-01
## 4: MNK925047 2004-01-01
## 5: QRT20703 2004-01-01
sales.DT[,aTime:=hms::as_hms(str_split_fixed(Time,patter=" ",n=2)[,2])][1:5,c(1,7:8)]
## SkuID aDate aTime
## 1: CRD19050 2004-01-01 00:00:00
## 2: DYM30376 2004-01-01 00:00:00
## 3: GBC1757700 2004-01-01 00:00:00
## 4: MNK925047 2004-01-01 00:00:00
## 5: QRT20703 2004-01-01 00:00:00
sales.DT[,aMth:= lubridate::month(aDate,label = T)][1:5,c(1,7:9)]
## SkuID aDate aTime aMth
## 1: CRD19050 2004-01-01 00:00:00 Jan
## 2: DYM30376 2004-01-01 00:00:00 Jan
## 3: GBC1757700 2004-01-01 00:00:00 Jan
## 4: MNK925047 2004-01-01 00:00:00 Jan
## 5: QRT20703 2004-01-01 00:00:00 Jan
sales.DT[,aWek:= factor(weekdays(aDate,abbreviate = T),
levels=c("Mon","Tue","Wed","Thu","Fri","Sat","Sun"))][1:5,c(1,7:10)]
## SkuID aDate aTime aMth aWek
## 1: CRD19050 2004-01-01 00:00:00 Jan Thu
## 2: DYM30376 2004-01-01 00:00:00 Jan Thu
## 3: GBC1757700 2004-01-01 00:00:00 Jan Thu
## 4: MNK925047 2004-01-01 00:00:00 Jan Thu
## 5: QRT20703 2004-01-01 00:00:00 Jan Thu
Task 03: plot total shipped weight on each weekday
sales.joint <- dim.DT[sales.DT,nomatch=NULL,on="SkuID"]
sales.joint[,.(sumWt=sum(OrderQuantity*CaseWeight,na.rm = T),.N),by=.(aDate,aWek)] %>%
ggplot(aes(y=sumWt,x=aWek)) + theme_bw()+
geom_boxplot(outlier.shape = NA) + geom_jitter(width = 0.2) -> weekdayDistPlot
weekdayDistPlot
Task 04: identify locations what are visit more than 16 times (hint group less than 16-time locations as ‘other’ )
loc.joint <- loc.DT[sales.DT,nomatch=NULL,on="SkuID"]
pickByloc.DT <- loc.joint[,.N,by="locID"][order(-N)]
table(pickByloc.DT$N)
##
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 1230 2078 2944 3149 2894 2220 1522 943 523 302 146 87 45 35 13 9
## 17 18 19 20 21 22 23 26 27 29 30
## 9 5 5 4 2 2 3 3 1 2 2
limitN <- 16
topVist16Time.DT <- pickByloc.DT[N>=limitN]
topVist16Time.DT <- rbind(topVist16Time.DT,pickByloc.DT[N<limitN,.(locID="other",N=sum(N))])
topVist16Time.DT
## locID N
## 1: B246J7E 30
## 2: B208C5A 30
## 3: B218A4B 29
## 4: B230F3C 29
## 5: B247D5C 27
## 6: B247C5C 26
## 7: B249G5C 26
## 8: B218D2B 26
## 9: B220D3C 23
## 10: B241J3C 23
## 11: B209C4C 23
## 12: B220B2B 22
## 13: B215A3B 22
## 14: B209H2C 21
## 15: B209E2B 21
## 16: B249I5C 20
## 17: B233O3C 20
## 18: B201E1C 20
## 19: B213C3B 20
## 20: A141B1C 19
## 21: B206J3C 19
## 22: B216B4B 19
## 23: B220F6B 19
## 24: B204E3A 19
## 25: B220E2B 18
## 26: B242B3E 18
## 27: B231S2E 18
## 28: B242D7C 18
## 29: C347E1A 18
## 30: B242B5A 17
## 31: A153D3C 17
## 32: B245E4C 17
## 33: B223G5A 17
## 34: A122E2B 17
## 35: B205A2A 17
## 36: C312C6C 17
## 37: B211G4B 17
## 38: B208D4C 17
## 39: C321K3C 16
## 40: B247D4C 16
## 41: B242E4A 16
## 42: B215C4C 16
## 43: C313L1C 16
## 44: B203B5A 16
## 45: B214A5B 16
## 46: B232Q4C 16
## 47: B211B4B 16
## 48: other 84449
## locID N
Task 05: plot line per order as bar chart (hint group more than 12-SKU as ‘other’ )
lineOrder <- sales.DT[,.(numSku=.N),by="OrderNumber"][,.N,by="numSku"][order(numSku)]
lineOrder
## numSku N
## 1: 1 5589
## 2: 2 821
## 3: 3 222
## 4: 4 110
## 5: 5 66
## 6: 6 223
## 7: 7 836
## 8: 8 1539
## 9: 9 2012
## 10: 10 1841
## 11: 11 1392
## 12: 12 798
## 13: 13 402
## 14: 14 182
## 15: 15 85
## 16: 16 45
## 17: 17 6
## 18: 18 6
## 19: 20 2
## 20: 37 1
## 21: 81 1
## 22: 114 1
## numSku N
topLineOrder <- lineOrder[numSku<=12]
topLineOrder <- rbind(topLineOrder,lineOrder[numSku>12,.(numSku="other",N=sum(N))])
topLineOrder$numSku <- factor(topLineOrder$numSku,levels = c(1:12,"other"))
ggplot(topLineOrder,aes(y=N,x=numSku ) ) + geom_col() +theme_bw()
Copyright 2019 Oran Kittithreerapronchai. All Rights Reserved. Last modified: 2022-41-19,