Instruction

Read the instruction and think carefully about how to develop R code to answer each questions using the following database.

Article Master

Prepare

Task 00: Preparation by

  • removed existing data and package
  • find location of this file
  • set location or ‘work directory’
  rm(list=ls(all.names = T))
  
  curPath <- dirname(rstudioapi::getSourceEditorContext()$path)
  
  setwd(curPath)

import

Task 01: import skus.txt using ‘Import Dataset’ (Upper Right Pannel) and convert it into data.table class, named sku.DT.

  require(data.table)
  sku.DT <- as.data.table(skus)
Note

Typically, R user prefers load data using text command:

  skus   <- read.csv("resources/skus.txt",sep="\t",stringsAsFactors = F)  

  sku.DT <- as.data.table(skus)
  • stringsAsFactors = F = do not convert/normalize String into Factor

summary

Task 02: summarize sku.DT using ‘str()’,‘summary()’, ‘head()’, ‘tail()’

  str(sku.DT)
## Classes 'data.table' and 'data.frame':   21097 obs. of  12 variables:
##  $ SkuID       : chr  "AAG47294" "AAG47342" "AAG47344" "AAG47346" ...
##  $ VendorID    : chr  "AAG" "AAG" "AAG" "AAG" ...
##  $ Description : chr  "PLNR,\"FAT LITTLE\",BK" "ORGNZR,LTHR,SNAP,8.5X11,BK" "ORGNZR,VYL,ZIP,3.75X6.75,BK" "ORGNZR,SMLTH,ZIP,5.5X8.5,BK" ...
##  $ UnitLength  : num  6.2 11.9 8.6 10.3 13.7 14 6.7 8.5 11 6.7 ...
##  $ UnitWidth   : num  6.1 12 5.9 8.1 12 11.7 3.7 5.5 8.5 3.7 ...
##  $ UnitHeight  : num  1.7 2.4 1.5 1.9 2.5 3.2 0.1 0.1 0.2 0.1 ...
##  $ UnitWeight  : num  0.9 2.91 1.15 1.48 3.48 3.27 0.1 0.2 0.4 0.1 ...
##  $ CaseQuantity: int  3 6 6 6 3 6 24 24 24 24 ...
##  $ CaseLength  : num  6.4 11.7 13.8 11.3 14.8 18 7.6 9.3 11.8 7.6 ...
##  $ CaseWidth   : num  5.5 11.5 8.1 8.8 11.8 ...
##  $ CaseHeight  : num  6.8 13 9.9 11.4 11.2 ...
##  $ CaseWeight  : num  3.3 16.7 8.1 9.76 17.91 ...
##  - attr(*, ".internal.selfref")=<externalptr>
  summary(sku.DT)
##     SkuID             VendorID         Description          UnitLength    
##  Length:21097       Length:21097       Length:21097       Min.   :  0.00  
##  Class :character   Class :character   Class :character   1st Qu.:  6.20  
##  Mode  :character   Mode  :character   Mode  :character   Median : 11.10  
##                                                           Mean   : 12.34  
##                                                           3rd Qu.: 13.90  
##                                                           Max.   :105.50  
##    UnitWidth        UnitHeight        UnitWeight       CaseQuantity    
##  Min.   : 0.000   Min.   :  0.000   Min.   :  0.000   Min.   :   0.00  
##  1st Qu.: 3.700   1st Qu.:  0.700   1st Qu.:  0.250   1st Qu.:   3.00  
##  Median : 7.800   Median :  1.940   Median :  1.000   Median :   6.00  
##  Mean   : 8.135   Mean   :  3.441   Mean   :  4.179   Mean   :  37.63  
##  3rd Qu.:10.200   3rd Qu.:  4.000   3rd Qu.:  3.550   3rd Qu.:  20.00  
##  Max.   :74.000   Max.   :100.500   Max.   :202.000   Max.   :1728.00  
##    CaseLength       CaseWidth        CaseHeight        CaseWeight    
##  Min.   :  0.00   Min.   :  0.00   Min.   :  0.000   Min.   :  0.00  
##  1st Qu.: 10.70   1st Qu.:  7.80   1st Qu.:  4.500   1st Qu.:  4.20  
##  Median : 13.60   Median : 10.40   Median :  8.100   Median : 10.30  
##  Mean   : 15.32   Mean   : 10.80   Mean   :  8.575   Mean   : 14.95  
##  3rd Qu.: 18.25   3rd Qu.: 12.75   3rd Qu.: 12.000   3rd Qu.: 21.60  
##  Max.   :123.00   Max.   :112.50   Max.   :147.000   Max.   :202.00
  ## for data.table class only
  sku.DT
##           SkuID VendorID                 Description UnitLength UnitWidth
##     1: AAG47294      AAG        PLNR,"FAT LITTLE",BK        6.2       6.1
##     2: AAG47342      AAG  ORGNZR,LTHR,SNAP,8.5X11,BK       11.9      12.0
##     3: AAG47344      AAG ORGNZR,VYL,ZIP,3.75X6.75,BK        8.6       5.9
##     4: AAG47346      AAG ORGNZR,SMLTH,ZIP,5.5X8.5,BK       10.3       8.1
##     5: AAG47348      AAG   ORGNZR,VNYL,ZIP,8.5X11,BK       13.7      12.0
##    ---                                                                   
## 21093: ZPC85412      ZPC       REFILL,PEN,F,1.0MM,BK        7.5       2.2
## 21094: ZPC85422      ZPC       REFILL,PEN,F,1.0MM,BE        7.5       2.2
## 21095: ZPC85512      ZPC    REFILL,F301,BK,2 rfls/EA        7.6       2.3
## 21096: ZPC85522      ZPC        REFILL,PEN,F,.7MM,BE        7.5       2.3
## 21097: ZPC87012      ZPC  REFILL,JF,BK, 2rfls per EA        7.5       2.2
##        UnitHeight UnitWeight CaseQuantity CaseLength CaseWidth CaseHeight
##     1:        1.7       0.90            3       6.40      5.50       6.80
##     2:        2.4       2.91            6      11.70     11.50      13.00
##     3:        1.5       1.15            6      13.80      8.10       9.90
##     4:        1.9       1.48            6      11.30      8.80      11.40
##     5:        2.5       3.48            3      14.80     11.80      11.20
##    ---                                                                   
## 21093:        0.2       0.02          144       9.88      6.25       8.19
## 21094:        0.3       0.10          144       9.88      6.25       8.19
## 21095:        0.5       0.10          144       9.88      6.25       8.19
## 21096:        0.1       0.01            0       0.00      0.00       0.00
## 21097:        0.1       0.10          144      10.40      6.70       9.00
##        CaseWeight
##     1:       3.30
##     2:      16.70
##     3:       8.10
##     4:       9.76
##     5:      17.91
##    ---           
## 21093:       4.48
## 21094:       4.48
## 21095:       4.48
## 21096:       0.00
## 21097:       5.10
  head(sku.DT)
##       SkuID VendorID                 Description UnitLength UnitWidth
## 1: AAG47294      AAG        PLNR,"FAT LITTLE",BK        6.2       6.1
## 2: AAG47342      AAG  ORGNZR,LTHR,SNAP,8.5X11,BK       11.9      12.0
## 3: AAG47344      AAG ORGNZR,VYL,ZIP,3.75X6.75,BK        8.6       5.9
## 4: AAG47346      AAG ORGNZR,SMLTH,ZIP,5.5X8.5,BK       10.3       8.1
## 5: AAG47348      AAG   ORGNZR,VNYL,ZIP,8.5X11,BK       13.7      12.0
## 6: AAG47358      AAG   ORGNZR,LTHR,ZIP,8.5X11,BK       14.0      11.7
##    UnitHeight UnitWeight CaseQuantity CaseLength CaseWidth CaseHeight
## 1:        1.7       0.90            3        6.4      5.50       6.80
## 2:        2.4       2.91            6       11.7     11.50      13.00
## 3:        1.5       1.15            6       13.8      8.10       9.90
## 4:        1.9       1.48            6       11.3      8.80      11.40
## 5:        2.5       3.48            3       14.8     11.80      11.20
## 6:        3.2       3.27            6       18.0     13.13      15.25
##    CaseWeight
## 1:       3.30
## 2:      16.70
## 3:       8.10
## 4:       9.76
## 5:      17.91
## 6:      28.25
  tail(sku.DT)
##       SkuID VendorID                Description UnitLength UnitWidth UnitHeight
## 1: ZPC84222      ZPC      REFILL,PEN,K,1.0MM,BE        7.5       2.3        0.1
## 2: ZPC85412      ZPC      REFILL,PEN,F,1.0MM,BK        7.5       2.2        0.2
## 3: ZPC85422      ZPC      REFILL,PEN,F,1.0MM,BE        7.5       2.2        0.3
## 4: ZPC85512      ZPC   REFILL,F301,BK,2 rfls/EA        7.6       2.3        0.5
## 5: ZPC85522      ZPC       REFILL,PEN,F,.7MM,BE        7.5       2.3        0.1
## 6: ZPC87012      ZPC REFILL,JF,BK, 2rfls per EA        7.5       2.2        0.1
##    UnitWeight CaseQuantity CaseLength CaseWidth CaseHeight CaseWeight
## 1:       0.06           12       7.60      2.50       2.00       0.25
## 2:       0.02          144       9.88      6.25       8.19       4.48
## 3:       0.10          144       9.88      6.25       8.19       4.48
## 4:       0.10          144       9.88      6.25       8.19       4.48
## 5:       0.01            0       0.00      0.00       0.00       0.00
## 6:       0.10          144      10.40      6.70       9.00       5.10
Note
  • str(): provide class and example of data
  • summary(): provide basic desp statisitic
  • sku.DT: show/list all variable; will show top 5 and bottom 5 for data.table
Advance Summary with summarytools
  print(summarytools::freq(sku.DT$VendorID),method="browser")
  
  print(summarytools::dfSummary(sku.DT),method="browser")

access

Task 03: accessing unconditional data as matrix and list

  • do the data has duplicate?
  • do the data has NA?
  sku.DT[3,1] 
##       SkuID
## 1: AAG47344
  sku.DT$SkuID[3]
## [1] "AAG47344"
  sku.DT[[1]][3]
## [1] "AAG47344"
  which(is.na(sku.DT),arr.ind = T)
##      row col
  View(unique.data.frame(sku.DT))
  
  which(duplicated(sku.DT))
## integer(0)
Note
  • sku.DT[3,1] accessing row 3 and col 1 of sku.DT
  • unique(): list only unique dataset (no duplicate)
  • which(): return index that satisfies condition
  • duplicate(): list all duplicated dataset

query I

Task 04: accessing conditional data / select query

  • how many SKU that have ‘Unitlength’ more than 5.0
  • how many SKU that have ‘Unitlength’ more than 5.0 and ‘CaseLength < 20.0’
  head(sku.DT[sku.DT$UnitLength > 5.0])[1:5,1:3]
##       SkuID VendorID                 Description
## 1: AAG47294      AAG        PLNR,"FAT LITTLE",BK
## 2: AAG47342      AAG  ORGNZR,LTHR,SNAP,8.5X11,BK
## 3: AAG47344      AAG ORGNZR,VYL,ZIP,3.75X6.75,BK
## 4: AAG47346      AAG ORGNZR,SMLTH,ZIP,5.5X8.5,BK
## 5: AAG47348      AAG   ORGNZR,VNYL,ZIP,8.5X11,BK
  ## for data.table 
  sku.DT[UnitLength > 5.0][1:5,1:3]
##       SkuID VendorID                 Description
## 1: AAG47294      AAG        PLNR,"FAT LITTLE",BK
## 2: AAG47342      AAG  ORGNZR,LTHR,SNAP,8.5X11,BK
## 3: AAG47344      AAG ORGNZR,VYL,ZIP,3.75X6.75,BK
## 4: AAG47346      AAG ORGNZR,SMLTH,ZIP,5.5X8.5,BK
## 5: AAG47348      AAG   ORGNZR,VNYL,ZIP,8.5X11,BK
  sku.DT[UnitLength > 5.0 & CaseLength < 20.0][1:5,1:3]
##       SkuID VendorID                 Description
## 1: AAG47294      AAG        PLNR,"FAT LITTLE",BK
## 2: AAG47342      AAG  ORGNZR,LTHR,SNAP,8.5X11,BK
## 3: AAG47344      AAG ORGNZR,VYL,ZIP,3.75X6.75,BK
## 4: AAG47346      AAG ORGNZR,SMLTH,ZIP,5.5X8.5,BK
## 5: AAG47348      AAG   ORGNZR,VNYL,ZIP,8.5X11,BK
Note
  • View(): show data in another tab

query II

Task 05: calculate unit volumne and case volumne

  findVol <- function(l,w,h){ return(l*w*h)}
  
  sku.DT[,UnitVol:= findVol(UnitLength,UnitWidth,UnitHeight)][1:5,c(1,11:12)]
##       SkuID CaseHeight CaseWeight
## 1: AAG47294        6.8       3.30
## 2: AAG47342       13.0      16.70
## 3: AAG47344        9.9       8.10
## 4: AAG47346       11.4       9.76
## 5: AAG47348       11.2      17.91
  sku.DT[,CaseVol:= findVol(CaseLength,CaseWidth,CaseHeight)][1:5,c(1,11:13)]
##       SkuID CaseHeight CaseWeight UnitVol
## 1: AAG47294        6.8       3.30  64.294
## 2: AAG47342       13.0      16.70 342.720
## 3: AAG47344        9.9       8.10  76.110
## 4: AAG47346       11.4       9.76 158.517
## 5: AAG47348       11.2      17.91 411.000
  sku.DT[order(-UnitVol)]
##                  SkuID VendorID                 Description UnitLength
##     1:      RUB9T18-00      RUC       TRUCK,SERVICE,1CU.YD.       69.0
##     2:   RUB101100GRAY      RUC       TRUCK,TILT,STRUCTURAL       66.5
##     3:        FEL38485      FEL        SHREDDER,OFFC,480,GY       41.1
##     4:     CRA1041R-19      CRA     LADDER,SAFETY,4-STEP,BG       49.0
##     5:     CRA1041L-19      CRA LADDER,SFTY,4 STEP,LFT HND,       49.5
##    ---                                                                
## 21093:     SPZMOUSEPAD      SPZ                   MOUSE PAD        0.0
## 21094:       SPZPW2003      SPZ      CATALOG,2003,PENNYWISE        0.0
## 21095:      SPZR1536TG      SPZ      File Pockets, Molzwell        0.0
## 21096:      VIIOPT200G      VII     LENSES,READING,OPTX 200        0.0
## 21097: WTB32-0141TM01B      WTB  DISPENSER,TIMEMISTPLUS,LGY        0.0
##        UnitWidth UnitHeight UnitWeight CaseQuantity CaseLength CaseWidth
##     1:      33.0       42.0         50            1       69.0      33.0
##     2:      28.0       38.0         90            1       66.5      28.0
##     3:      26.0       41.3        150            1       41.1      26.0
##     4:      26.5       31.5         37            1       49.0      26.5
##     5:      24.6       31.5         15            1       49.5      24.6
##    ---                                                                  
## 21093:       0.0        0.0          0            0        0.0       0.0
## 21094:       0.0        0.0          0           10        0.0       0.0
## 21095:       0.0        0.0          0           25        0.0       0.0
## 21096:       0.0        0.0          0          144        0.0       0.0
## 21097:       0.0        0.0          0            0        0.0       0.0
##        CaseHeight CaseWeight  UnitVol  CaseVol
##     1:       42.0         50 95634.00 95634.00
##     2:       38.0         90 70756.00 70756.00
##     3:       41.3        150 44133.18 44133.18
##     4:       31.5         37 40902.75 40902.75
##     5:       31.5         15 38357.55 38357.55
##    ---                                        
## 21093:        0.0          0     0.00     0.00
## 21094:        0.0          0     0.00     0.00
## 21095:        0.0          0     0.00     0.00
## 21096:        0.0          0     0.00     0.00
## 21097:        0.0          0     0.00     0.00
  dim(sku.DT[UnitVol==0])
## [1] 123  14
Note
  • := create new column from expression
  • order() sort data by specific column
  • dim() return number of columns and rows, ‘ncol()’ and ‘nrow()’
  • length() return length of variable

string

  • find relationship between first three digit of ‘SkuID’ and ‘VendorID’
  require(stringr)
  
  sku.DT[,digit3:= str_sub(SkuID,start=1,end = 3)][1:5,c(1,12:14)]
##       SkuID CaseWeight UnitVol  CaseVol
## 1: AAG47294       3.30  64.294  239.360
## 2: AAG47342      16.70 342.720 1749.150
## 3: AAG47344       8.10  76.110 1106.622
## 4: AAG47346       9.76 158.517 1133.616
## 5: AAG47348      17.91 411.000 1955.968
  nrow(sku.DT[digit3==VendorID])
## [1] 13724
  table(str_count(sku.DT$Description,pattern = ","))
## 
##    0    1    2    3    4    5    6 
##  286 1079 5791 8733 4478  713   17
  sku.DT[str_detect(Description,pattern = "BK")][1:5,c(1,12:15)]
##       SkuID CaseWeight UnitVol  CaseVol digit3
## 1: AAG47294       3.30  64.294  239.360    AAG
## 2: AAG47342      16.70 342.720 1749.150    AAG
## 3: AAG47344       8.10  76.110 1106.622    AAG
## 4: AAG47346       9.76 158.517 1133.616    AAG
## 5: AAG47348      17.91 411.000 1955.968    AAG
Note
  • stringr::str_sub() retrived sub string (must continue)
  • stringr::str_count() return number of <>
  • stringr::str_detect() check of <>
  • order() sort data by specific column
  • dim() return number of columns and rows, ‘ncol()’ and ‘nrow()’
  • length() return length of variable

copy

Task 06: save as weight and volume of ‘sku.DT’ as ‘dim.DT’ for future used

  dim.DT <- sku.DT[UnitVol>0 & CaseVol>0 & UnitWeight>0 & CaseWeight > 0,
                   .(SkuID,UnitWeight,UnitVol,CaseWeight,CaseVol)]
  
  ##-- for piping 
  library(dplyr)
  sku.DT[UnitVol>0 & CaseVol>0 & UnitWeight>0 & CaseWeight > 0,
                   .(SkuID,UnitWeight,UnitVol,CaseWeight,CaseVol)] -> dim.DT

Location Master

prepare

Task 01: import locations.txt using ‘read.csv()’ and convert it into data.table class, named loc.DT.

  loc.DT   <- as.data.table(read.csv("resources/locations.txt",sep="\t",stringsAsFactors = F))  
  
  loc.DT[1:5,1:3]
##              SkuID Zone Aisle
## 1:  ESS4153-1/5BLU    A   101
## 2: ESS4152-1/5ASST    A   101
## 3:         ESS4152    A   101
## 4:       ESSH10U13    A   101
## 5:        ESS42591    A   101

check

Task 02: check whether location and skuID unique.

  loc.SkuID <- table(loc.DT$SkuID)
  loc.SkuID[loc.SkuID>1]
## named integer(0)
  loc.DT[,locID:=str_c(Zone,Aisle,Bay,Level,Position)]
  loc.locID <- table(loc.DT$locID)
  loc.locID[loc.locID>1]
## named integer(0)
  table(loc.DT$Units)
## 
## Cases 
## 20804
Finding
  • no any sku stored in more than one location
  • no any location hold more than one sku
  • all unit is cases

joint

Task 03: calculate weight and volume of each location

  joint.DT <- loc.DT[dim.DT,on="SkuID"]
  
  dim(loc.DT)
## [1] 20804     9
  dim(dim.DT)
## [1] 19474     5
  dim(joint.DT)
## [1] 19474    13
  joint.DT[, totWt :=Qty*CaseWeight]
  joint.DT[, totVol:=Qty*CaseVol]
Note
  • what is the joint type (left/right/inner)?

group_by

Task 04A: summary weight and volume by ‘Zone’ and ‘Aisle’

  joint.DT[, .(sumWt=sum(totWt),sumVol=sum(totVol)), by=.(Zone)][order(Zone)]
##    Zone     sumWt    sumVol
## 1:    A 930678.01 101018052
## 2:    B 659721.47  58296413
## 3:    C 951735.70 106041575
## 4:    F 130525.44  34059266
## 5:    L  27612.64   2250032
## 6: <NA>        NA        NA
  joint.DT[, .(sumWt=sum(totWt),sumVol=sum(totVol)), by=.(Zone,Aisle)][order(Zone,Aisle)] %>%
    dcast.data.table(Zone~Aisle,value.var="sumWt") -> result.DT
  
  result.DT[1:6,c(2:5,60:64,130:134)]
##    NA      101      102      103      210     211     212     213    214
## 1: NA       NA       NA       NA       NA      NA      NA      NA     NA
## 2: NA 15393.14 26127.28 10483.16       NA      NA      NA      NA     NA
## 3: NA       NA       NA       NA 10669.28 12410.9 7528.96 7944.08 4759.1
## 4: NA       NA       NA       NA       NA      NA      NA      NA     NA
## 5: NA       NA       NA       NA       NA      NA      NA      NA     NA
## 6: NA       NA       NA       NA       NA      NA      NA      NA     NA
##         333     334     335     336      337
## 1:       NA      NA      NA      NA       NA
## 2:       NA      NA      NA      NA       NA
## 3:       NA      NA      NA      NA       NA
## 4: 14872.88 11270.7 10806.6 9551.97 19921.42
## 5:       NA      NA      NA      NA       NA
## 6:       NA      NA      NA      NA       NA

visual

Task 04B: Given ‘Aisle’=152summary weight and volume by ‘Level’ and ‘Bay’

  temp.DT <- joint.DT[Aisle==152, .(sumWt=sum(totWt),sumVol=sum(totVol)), by=.(Level,Bay)] 
  
  require(data.table)
  dcast(temp.DT,Level~Bay,value.var="sumWt")[order(-Level)] 
##    Level      A     B      C      D      E     F     G      H      I      J
## 1:     5     NA    NA     NA 383.35 384.70 515.4    NA     NA     NA 353.25
## 2:     4 240.00    NA 180.60 445.50  15.30 859.9 148.2  64.35 313.85  62.25
## 3:     3 121.25   8.1  76.05 170.00  66.15 325.5 108.9 456.75 333.25  84.00
## 4:     2 410.40  96.0 649.35 146.00     NA 185.5 242.5 201.75  30.70  82.80
## 5:     1 173.00 213.5  43.20 310.10  39.00 243.2 190.0 324.90 375.00  69.25
##        K      L      M      N     O      P
## 1:    NA     NA     NA     NA    NA     NA
## 2: 709.2 386.00     NA 594.85 144.5 209.00
## 3: 164.1 563.00 398.35 489.85 163.0 397.75
## 4: 243.5 363.55 203.80 248.10 183.0 296.50
## 5: 229.5 224.75 270.00 136.55    NA  53.25
  require(ggplot2)
  ggplot(temp.DT) +
    geom_tile(aes(y=Level,x=Bay,fill=sumWt)) + 
    scale_fill_gradient(low="light yellow", high="red") +
    ylim(c(1,6))+
    theme_minimal() 

Order Master

prepare

Task 01: import sales.txt using ‘read.csv()’ and convert it into data.table class, named sales.DT, and clean data.

  sales.DT   <- as.data.table(read.csv("resources/sales.txt",sep="\t",stringsAsFactors = F))  
  
  sales.DT[1:5,1:3]
##         SkuID OrderNumber CustomerNumber
## 1:   CRD19050    24000001         444255
## 2:   DYM30376    24000001         444255
## 3: GBC1757700    24000001         444255
## 4:  MNK925047    24000001         444255
## 5:   QRT20703    24000001         444255
  ##-- duplicated data --#
  sales.DT[duplicated(sales.DT)][order(OrderNumber)][1:5,1:3]
##             SkuID OrderNumber CustomerNumber
## 1:     GBC3745100    23020920         552321
## 2:     GLWB3143DT    23023599          55560
## 3:     PENK230-MV    23028741         919203
## 4: ESS1526ET2/5OX    23297507         552321
## 5:        GEPTM20    23297851         146807
  ##-- removed duplicated data--#
  sales.DT <- unique(sales.DT)
  
  ##-- missing data --#
  which(is.na(sales.DT), arr.ind = T)
##        row col
## [1,] 98818   2
  sales.DT[98818,]
##              SkuID OrderNumber CustomerNumber              Date
## 1: NSH752002247238          NA         552321 2004/12/1 0:00:00
##                  Time OrderQuantity
## 1: 1899/12/30 8:00:00            10
  ##-- removed missing data--#
  sales.DT <- sales.DT[-98818,]
Note
  • there are 38 dataset that has duplication
  • there is one dataset that contains NA (missing data)
  • we treat this by removing both cases

data/time

Task 02: convert ‘Date’ and ‘Time’ of sales.DT into proper format, named ‘aDate’ and ‘aTime’ respectively

  sales.DT[,aDate:=lubridate::ymd(str_split_fixed(Date,patter=" ",n=2)[,1])][1:5,c(1,7)]
##         SkuID      aDate
## 1:   CRD19050 2004-01-01
## 2:   DYM30376 2004-01-01
## 3: GBC1757700 2004-01-01
## 4:  MNK925047 2004-01-01
## 5:   QRT20703 2004-01-01
  sales.DT[,aTime:=hms::as_hms(str_split_fixed(Time,patter=" ",n=2)[,2])][1:5,c(1,7:8)]
##         SkuID      aDate    aTime
## 1:   CRD19050 2004-01-01 00:00:00
## 2:   DYM30376 2004-01-01 00:00:00
## 3: GBC1757700 2004-01-01 00:00:00
## 4:  MNK925047 2004-01-01 00:00:00
## 5:   QRT20703 2004-01-01 00:00:00
  sales.DT[,aMth:= lubridate::month(aDate,label = T)][1:5,c(1,7:9)]
##         SkuID      aDate    aTime aMth
## 1:   CRD19050 2004-01-01 00:00:00  Jan
## 2:   DYM30376 2004-01-01 00:00:00  Jan
## 3: GBC1757700 2004-01-01 00:00:00  Jan
## 4:  MNK925047 2004-01-01 00:00:00  Jan
## 5:   QRT20703 2004-01-01 00:00:00  Jan
  sales.DT[,aWek:= factor(weekdays(aDate,abbreviate = T),
                          levels=c("Mon","Tue","Wed","Thu","Fri","Sat","Sun"))][1:5,c(1,7:10)]
##         SkuID      aDate    aTime aMth aWek
## 1:   CRD19050 2004-01-01 00:00:00  Jan  Thu
## 2:   DYM30376 2004-01-01 00:00:00  Jan  Thu
## 3: GBC1757700 2004-01-01 00:00:00  Jan  Thu
## 4:  MNK925047 2004-01-01 00:00:00  Jan  Thu
## 5:   QRT20703 2004-01-01 00:00:00  Jan  Thu
Note
  • lubridate::ymd() convert year/month/day text format into date (The function is from lubridate package)
  • lubridate::ymd_hms() convert year/month/day hour:minute:second text format into date (The function is from lubridate package)
  • hms::as_hms() convert hh/mm/ss text format into time (The function is from hms package)
  • lubridate::month() convert date into abrr Month
  • weekday() convert date into day of week
  • factor() convert into factor with spec order, e.g., levels=c(“Mon”,“Tue”,“Wed”,“Thu”,“Fri”,“Sat”,“Sun”)

weight out

Task 03: plot total shipped weight on each weekday

  sales.joint <- dim.DT[sales.DT,nomatch=NULL,on="SkuID"]
  
  sales.joint[,.(sumWt=sum(OrderQuantity*CaseWeight,na.rm = T),.N),by=.(aDate,aWek)] %>%
    ggplot(aes(y=sumWt,x=aWek)) + theme_bw()+
    geom_boxplot(outlier.shape = NA) + geom_jitter(width = 0.2)  -> weekdayDistPlot
  
  weekdayDistPlot

hot spot

Task 04: identify locations what are visit more than 16 times (hint group less than 16-time locations as ‘other’ )

  loc.joint <- loc.DT[sales.DT,nomatch=NULL,on="SkuID"]
  
  pickByloc.DT <- loc.joint[,.N,by="locID"][order(-N)]
  
  
  table(pickByloc.DT$N)
## 
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 1230 2078 2944 3149 2894 2220 1522  943  523  302  146   87   45   35   13    9 
##   17   18   19   20   21   22   23   26   27   29   30 
##    9    5    5    4    2    2    3    3    1    2    2
  limitN <- 16
  
  topVist16Time.DT <- pickByloc.DT[N>=limitN] 
  topVist16Time.DT <- rbind(topVist16Time.DT,pickByloc.DT[N<limitN,.(locID="other",N=sum(N))])
  
  topVist16Time.DT
##       locID     N
##  1: B246J7E    30
##  2: B208C5A    30
##  3: B218A4B    29
##  4: B230F3C    29
##  5: B247D5C    27
##  6: B247C5C    26
##  7: B249G5C    26
##  8: B218D2B    26
##  9: B220D3C    23
## 10: B241J3C    23
## 11: B209C4C    23
## 12: B220B2B    22
## 13: B215A3B    22
## 14: B209H2C    21
## 15: B209E2B    21
## 16: B249I5C    20
## 17: B233O3C    20
## 18: B201E1C    20
## 19: B213C3B    20
## 20: A141B1C    19
## 21: B206J3C    19
## 22: B216B4B    19
## 23: B220F6B    19
## 24: B204E3A    19
## 25: B220E2B    18
## 26: B242B3E    18
## 27: B231S2E    18
## 28: B242D7C    18
## 29: C347E1A    18
## 30: B242B5A    17
## 31: A153D3C    17
## 32: B245E4C    17
## 33: B223G5A    17
## 34: A122E2B    17
## 35: B205A2A    17
## 36: C312C6C    17
## 37: B211G4B    17
## 38: B208D4C    17
## 39: C321K3C    16
## 40: B247D4C    16
## 41: B242E4A    16
## 42: B215C4C    16
## 43: C313L1C    16
## 44: B203B5A    16
## 45: B214A5B    16
## 46: B232Q4C    16
## 47: B211B4B    16
## 48:   other 84449
##       locID     N

line/order

Task 05: plot line per order as bar chart (hint group more than 12-SKU as ‘other’ )

  lineOrder <- sales.DT[,.(numSku=.N),by="OrderNumber"][,.N,by="numSku"][order(numSku)]
  
  lineOrder
##     numSku    N
##  1:      1 5589
##  2:      2  821
##  3:      3  222
##  4:      4  110
##  5:      5   66
##  6:      6  223
##  7:      7  836
##  8:      8 1539
##  9:      9 2012
## 10:     10 1841
## 11:     11 1392
## 12:     12  798
## 13:     13  402
## 14:     14  182
## 15:     15   85
## 16:     16   45
## 17:     17    6
## 18:     18    6
## 19:     20    2
## 20:     37    1
## 21:     81    1
## 22:    114    1
##     numSku    N
  topLineOrder <- lineOrder[numSku<=12]
  topLineOrder <- rbind(topLineOrder,lineOrder[numSku>12,.(numSku="other",N=sum(N))])
  topLineOrder$numSku <- factor(topLineOrder$numSku,levels = c(1:12,"other"))
  
  ggplot(topLineOrder,aes(y=N,x=numSku ) ) + geom_col() +theme_bw()


Copyright 2019   Oran Kittithreerapronchai.   All Rights Reserved.   Last modified: 2022-41-19,