forked from MatCyt/Price_Elasticity
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcross_price_elasticity.R
112 lines (76 loc) · 4.05 KB
/
cross_price_elasticity.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# Calculating Cross Price Elasticity for any given SKU
### LOAD LIBRARIES ----------------------------------------------
pacman::p_load(tidyverse, data.table)
### LOAD DATA ----------------------------------------------
# For a simple case scenario only two variables are important: price and sales (in value, units and volume depending on goal)
# In a real world we should include all possible variables impacting sales (distribution, advertising, competitors actions etc.)
df_base = read_delim('./data/dummy_sales.csv', delim = ',') %>%
mutate(X1 = NULL)
# Dataset represents sales (in value and units) and average price for 8 SKUs across 156 weeks
# while it is not a real dataset it is very closely based on one
str(df_base)
length(unique(df_base$SalesDate))
length(unique(df_base$SKU))
### CAST DATA ----------------------------------------------
# we need to dcast/pivot data to change it into model appropriate format
df_casted = data.table::dcast(setDT(df_base), SalesDate ~ SKU, value.var = c('sum_value', 'sum_units', 'average_price'))
sku_list = unique(df_base$SKU)
# RUN MODELS FOR EACH COMBINATION OF SKUs ---------------------------------
# Store the results in a list and add a new table to gather final metrics
# Create new table to be used later for price elasticity calculation
additional_metrics = df_base %>%
group_by(SKU) %>%
summarise(mean_sales = mean(sum_units),
mean_price = mean(average_price))
models = list()
model_names = list()
cross_results = data.frame()
# Run a model for each SKU
for (sku in sku_list) {
target_variable = paste('sum_units', sku, sep = '_') # select target - here we model on units
input = df_casted %>% # keep sales of sku as target and price of all other skus as other variables
select(str_subset(names(df_casted), 'average_price'), target_variable)
model_name = paste(sku, 'model' ,sep = '_')
model_iterated = lm( # run models on all skus
as.formula(paste(target_variable,
"~ ."))
, data = input)
### Build end output and add line by line to final data frame with results
# Extract model metrics and transform into desired format
model_output = as.data.table(summary(model_iterated)$coefficients) %>%
mutate(variables = rownames(summary(model_iterated)$coefficients)) %>%
select(c(1,4,5)) %>%
rename(p_value = 2) %>%
filter(variables != '(Intercept)')
# Calculate price elasticities - use price and sales from additional metrics by joining the tables
elasticity_input = model_output %>%
mutate(model = model_name,
SKU = sub("^[^_]*_[^_]*_", "", variables)) %>%
left_join(additional_metrics) %>%
mutate(elasticity = Estimate * (mean_price / mean_sales),
significant = ifelse(p_value < 0.2, 'significant', 'not significant'),
own_price = ifelse(SKU == sku, 'yes', 'no')) %>%
arrange(desc(variables)) %>%
# Add information about significance for each SKU (separately for cross and own) - see Readme for this point discussion
# Change the elasticity value to 0 for each not significant variable
mutate(elasticity = replace(elasticity,
own_price == 'no' & (significant == 'not significant' | Estimate < 0), 0),
elasticity = replace(elasticity,
own_price == 'yes' & (significant == 'not significant' | Estimate > 0), 0))
elasticity_output = dcast(elasticity_input, model ~ variables, value.var = 'elasticity')
# Add the results from the model to the final table
cross_results = rbind(cross_results, elasticity_output)
}
# SET SAME ORDER OF COLUMNS AND ROWS --------------------------------------
# to read the output easier
# change the naming order to facilitate arrangement of columns
colnames(cross_results) = colnames(cross_results) %>%
str_remove(., 'average_price_') %>%
paste(., 'price', sep = '_')
# transform into final format
cross_results = cross_results %>%
arrange(model_price)
# final output
cross_results
# Save the output
write.csv(cross_results, './data/cross_price_elasticity.csv')