Book Sales Data
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
data <- read.csv("sales2019.csv")
#colnames(data)
#glimpse(data)
rm_review <- data%>%
filter(!(is.na(user_submitted_review)))
# na_review <- data%>%
# filter(is.na(user_submitted_review))
# num_na <- length(na_review[["user_submitted_review"]])
rm_purchase <- data%>%
filter(!(is.na(total_purchased)))
avg_purchase <- as.integer(mean(rm_purchase[["total_purchased"]]))
fixed_purchase <- rm_review%>%
mutate(new_total_purchased = if_else (is.na(total_purchased),avg_purchase,total_purchased)
)
new_review <- fixed_purchase%>%
mutate(new_review = case_when(
str_detect(user_submitted_review,"Awesome") ~ "positive",
str_detect(user_submitted_review,"okay") ~ "positive",
str_detect(user_submitted_review,"better book") ~ "positive",
str_detect(user_submitted_review,"OK") ~ "positive",
str_detect(user_submitted_review,"not") ~ "negative",
str_detect(user_submitted_review,"Hated") ~ "negative",
str_detect(user_submitted_review,"other") ~ "negative",
str_detect(user_submitted_review,"learned") ~ "positive",
TRUE ~ "no review"
))
# dateformat <- function(str){
# date <- mdy(str)
# return (date)
# }
library(lubridate)
##
## Attaching package: 'lubridate'
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
standard_date <- mdy(new_review$date)
new_date <- new_review %>%
mutate(new_date = standard_date)
#mutate(new_date = unlist(map(date,mdy))) #result is number instead of a date format??
# mutate(new_date = map(date, dateformat))
groupby2 <- new_date%>%
mutate(bigday = if_else(new_date< '2019-7-1',"before","after")) #18078 equals to 2019/7/1
#mutate(bigday = if_else(new_date<18078,"before","after")) #18078 equals to 2019/7/1
program_effect <- groupby2 %>%
group_by(bigday)%>%
summarise(sales = sum(new_total_purchased))
individual <- groupby2 %>%
filter (customer_type=="Individual")
individual_program <- individual %>%
group_by(bigday)%>%
summarise(sales = sum(new_total_purchased))
business_program <- groupby2 %>%
filter (customer_type=="Business")%>%
group_by(bigday)%>%
summarise(sales = sum(new_total_purchased))
positive_program <- groupby2 %>%
filter (new_review=="positive")%>%
group_by(bigday)%>%
summarise(positive_num = length(new_review))
negative_program <- groupby2 %>%
filter (new_review=="negative")%>%
group_by(bigday)%>%
summarise(positive_num = length(new_review))
summary(positive_program)
## bigday positive_num
## Length:2 Min. :1128
## Class :character 1st Qu.:1130
## Mode :character Median :1131
## Mean :1131
## 3rd Qu.:1132
## Max. :1134
Including Plots