From 7bab1ebeac5eace118d312d774629e4d9f897b0e Mon Sep 17 00:00:00 2001
From: avimallu <66530011+avimallu@users.noreply.github.com>
Date: Thu, 16 Jul 2020 19:18:28 +0530
Subject: [PATCH] Add files via upload

---
 agg_by_period.R | 79 +++++++++++++++++++++++++++++++++++++++++++++++++
 decile.R        | 27 +++++++++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 agg_by_period.R
 create mode 100644 decile.R

diff --git a/agg_by_period.R b/agg_by_period.R
new file mode 100644
index 0000000..4e15feb
--- /dev/null
+++ b/agg_by_period.R
@@ -0,0 +1,79 @@
+
+# Required libraries ------------------------------------------------------
+
+library(data.table)
+library(purrr) # to be removed once mergelist is released in data.table
+
+# Sample large data table -------------------------------------------------
+
+DT = data.table(
+  dates = seq.Date(as.Date("2019-01-01"), by = "1 week", length.out = 200),
+  grp_1 = c("India", "Brazil", "China", "Russia", "South Africa"),
+  grp_2 = c("Buttons", "Salt"),
+  sales = runif(1e8, 0, 100)
+)
+
+# Aggregation function ----------------------------------------------------
+
+agg_by_period <- function(i, DT, j, by, index) {
+  
+  # sum is optimized internally in data.table
+  # between is parallelized in data.table
+  # key the data.table by the index to have even faster operation times
+  temp <- 
+    setnames(DT[
+      between(get(index), date_periods[[i]][[1]], date_periods[[i]][[2]]),
+      lapply(.SD, sum, na.rm = TRUE), keyby = by, .SDcols = j],
+      j, paste0(j, "_", names(date_periods[i])))
+  
+  # scale the latter period by any amount if required
+  if(date_periods[[i]][[3]] != 1) {
+    
+    temp[, lapply(.SD, `*`, date_periods[[i]][[3]]),
+         keyby = by]
+    
+  } else {
+    
+    temp
+  }
+  
+}
+
+# Aggregate By Period = agp, for data.tables ala DT
+agp_DT <- function(DT, j, by, index) { 
+  
+  
+  # to be replaced by mergelist when it is released in data.table
+  reduce(
+    lapply(seq_along(date_periods), agg_by_period, DT, j, by, index),
+    merge.data.table, allow.cartesian = TRUE,
+    by = by, all.x = TRUE,
+    .init = unique(DT[, ..by]))
+
+}
+
+# Create fixed tables and functions to ease date period creation ----------
+
+# Only to ease recalculation of min and max value over and over again
+max_date = max(DT[, dates])
+min_date = min(DT[, dates])
+
+# Creating an alias to avoid using as.Date
+d <- function(x) {as.Date(x)}
+
+# This list can be configured to calcaulte different aggregate
+# time periods of interest
+date_periods <- list(
+  # name      = start_date, end_date, scaling
+  r3m         = list(max_date - 12 * 7, max_date, 1),
+  p3m         = list(max_date - 25 * 7, max_date - 13 * 7, 1),
+  r6m         = list(max_date - 25 * 7, max_date, 1) ,
+  p6m         = list(max_date - 51 * 7, max_date - 26 * 7, 1),
+  ytd         = list(d("2020-01-01"), max_date, 0.7)
+)
+
+# Benchmark ---------------------------------------------------------------
+
+system.time(agp_DT(DT, "sales", "grp_1", "dates"))
+system.time(agp_DT(DT, "sales", c("grp_1", "grp_2"), "dates"))•
+
diff --git a/decile.R b/decile.R
new file mode 100644
index 0000000..ed1a057
--- /dev/null
+++ b/decile.R
@@ -0,0 +1,27 @@
+
+# Required libraries ------------------------------------------------------
+
+library(data.table)
+
+# Add deciles -------------------------------------------------------------
+
+decile_dt <-  function(x, decreasing = TRUE) {
+  
+  # browser()
+  
+  NA_positions = (is.na(x) | (x == 0))
+  
+  x = as.data.table(x = x)
+  
+  x[is.na(x), x := 0][
+    , num := .I][
+      order(-x)][
+        , cumsums := cumsum(x)][
+          , tot := cumsums/max(cumsums)][
+            , dec := (if(decreasing) 11 - ceiling(tot * 10)
+                      else ceiling(tot * 10))][
+                        order(num)][
+                          , dec := fifelse(NA_positions, NA_real_, dec)][
+                            , dec]
+  
+}
\ No newline at end of file