First Data analysis project in R.
Which countries have had the highest number of positive cases against the number of tests?
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
covid19 <- read.csv('covid19.csv')
covid19_all <- filter(covid19,Province_State == 'All States')
highest_case_positive_rate <- covid19_all %>%
group_by(Country_Region) %>%
summarize(sum_positive = sum(daily_positive),
sum_test= sum(daily_tested),
rate = sum_positive/sum_test
)
sorted <- arrange (highest_case_positive_rate,-rate)
top3 <- head(sorted, 3)
question <- "Which countries have had the highest number of positive cases against the number of tests?"
answer <- top3 %>% pull(Country_Region)
#top3_name <- top3 %>% pull(Country_Region)
#answer <- c("Positive against tested Top3" = top3_name)
covid_analysis_list <- list(question, answer, highest_case_positive_rate)
covid_analysis_list[2]
## [[1]]
## [1] "Ecuador" "Mexico" "Sweden"