Professional Documents
Culture Documents
final_ca
final_ca
library(dplyr)
library(lubridate)
library(Hmisc)
library(ggplot2)
library(plotly)
world_cities = read.csv("Cohort-6/worldcities.csv")
#Added new columns and save dataframe into new variable, df_eu_storeData
eu_storeData |>
x[is.na(x)] <- 0
return(x)
df_eu_storeData |>
summarise(
total_spent = sum(Sales),
profit_generated =sum(Profit),
date_first_transaction = min(converted_date),
date_last_transaction = max(converted_date),
mutate(
df_eu_storeData %>%
df_clv_data |>
mutate(
df_eu_storeData |>
summarise(
left_join(
df_eu_storeData |>
summarise(
) -> df_discounted_customers
df_merge1 |>
mutate(
n_discounted_items_purchased = na.zero(n_discounted_items_purchased)
) ->df_merge1
#customer's favorite category in terms of most spending, quantity and n_times bought
df_merge1 |>
summarise(
spending= sum(Sales),
quantity = sum(Quantity),
mutate(
p_spending = round(spending/sum(spending),2),
ungroup() |>
df_eu_storeData |>
summarise(most_category_purchased = first(Category,
#NA occurs
df_eu_storeData |>
spending= sum(Sales),
quantity = sum(Quantity),
mutate(
df_eu_storeData |>
group_by(Country) |>
summarise(
spending= sum(Sales),
profit_generated = sum(Profit),
mutate(
ungroup() |>
df_eu_storeData |>
summarise(
country_spending =sum(Sales)
) |>
summarise(most_purchased_from = first(Country,
df_eu_storeData |>
group_by(City) |>
summarise(
spending= sum(Sales),
profit_generated = sum(Profit),
mutate(
ungroup() |>
df_eu_storeData |>
summarise(
mutate(
shipmode_data |>
ungroup() |>
summarise(
mutate(
max_shipmode_used =names(counts)[which.max(counts)],
p_shipmode_used = round(counts/sum(counts),2)
) -> overal_max_shiping_used
df_eu_storeData |>
df_eu_storeData |>
group_by(Segment) |>
summarise(
total_segment_sale = sum(Sales),
mutate(
p_total_segment_sale = round(total_segment_sale/sum(total_segment_sale),2),
p_total_segment_profit = round(total_segment_profit/sum(total_segment_profit),2),
) ->segment_sale
# Median of Customer Transactions
df_eu_storeData |>
summarise(
date_order = first(converted_date))|>
mutate(
#Dividing customers into different Intervals and Scoring them using RFM model
df_rfm_customer |>
ungroup() |>
mutate(
score_recency = as.integer(fct_rev(interval_recency)),
score_frequency = as.integer(interval_frequency),
score_spending = as.integer(interval_spending),
(score_spending),
rfm_segment = case_when(
RFMScore %in% c(555, 554, 544, 545, 454, 455, 445) ~ "Champions",
RFMScore %in% c(543, 444, 435, 355, 354, 345, 344, 335) ~ "Loyal Customers",
#Recent customers but spent a good amount and bought more than once
RFMScore %in% c(553, 551, 552, 541, 542, 533, 532, 531, 452, 451, 442, 441, 431, 453, 433, 432, 423,
353, 352, 351, 342, 341, 333, 323) ~ "Potential Loyalists",
RFMScore %in% c(512, 511, 422, 421, 412, 411, 311) ~ "Recent Customers",
RFMScore %in% c(525, 524, 523, 522, 521, 515, 514, 513, 425, 424, 413, 414, 415, 315, 314, 313) ~
"Promising",
#Above-average recency, frequency and monetary values. They may not have bought very recently
though
RFMScore %in% c(535, 534, 443, 434, 343, 334, 325, 324) ~ "Needs Attention",
#Below average recency, frequency, and monetary values. Will lose them if not reactivated
#They spent big money and purchased often. But the last purchase was a long time ago
RFMScore %in% c(255, 254, 245, 244, 253, 252, 243, 242, 235, 234, 225, 224, 153, 152, 145, 143, 142,
135, 134, 133, 125, 124) ~ "At Risk",
#Often made the biggest purchases but they haven’t returned for a long time
RFMScore %in% c(155, 154, 144, 214, 215, 115, 114, 113) ~ "Can’t lose",
#The last purchase was long ago. Low spenders with a low number of orders
RFMScore %in% c(332, 322, 231, 241, 251, 233, 232, 223, 222, 132, 123, 122, 212, 211) ~
"Hibernating",
)) -> rfm_matrix
df_eu_storeData |>
summarise(
c_profit_generated = sum(Profit)) |>
ungroup() |>
p_score = as.integer(profit_intervals),
customer_Type = case_when(
)) -> customer_type_profit_based