-
Notifications
You must be signed in to change notification settings - Fork 9
Open
Labels
Description
#install.packages("devtools")
#devtools::install_github("ROpenStats/COVID19analytics")
library(COVID19analytics)
#> Warning: replacing previous import 'ggplot2::Layout' by 'lgr::Layout' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'dplyr::intersect' by 'lubridate::intersect'
#> when loading 'COVID19analytics'
#> Warning: replacing previous import 'dplyr::union' by 'lubridate::union' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'dplyr::setdiff' by 'lubridate::setdiff' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'readr::col_factor' by 'scales::col_factor'
#> when loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::equals' by 'testthat::equals' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::not' by 'testthat::not' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::is_less_than' by
#> 'testthat::is_less_than' when loading 'COVID19analytics'
#> Warning: replacing previous import 'dplyr::matches' by 'testthat::matches' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'testthat::matches' by 'tidyr::matches' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::extract' by 'tidyr::extract' when
#> loading 'COVID19analytics'
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(ggplot2)
# Generate daily plots
processor <- COVID19DataProcessor$new(provider.id = "JohnsHopkingsUniversity", missing.values.model.id = "imputation")
dummy <- processor$setupData()
#> INFO [17:21:29.236] {stage: processor-setup}
#> INFO [17:21:29.263] Checking required downloaded {downloaded.max.date: 2020-05-27, daily.update.time: 21:00:00, current.datetime: 2020-05-28 1.., download.flag: FALSE}
#> INFO [17:21:29.353] Checking required downloaded {downloaded.max.date: 2020-05-27, daily.update.time: 21:00:00, current.datetime: 2020-05-28 1.., download.flag: FALSE}
#> INFO [17:21:29.374] Checking required downloaded {downloaded.max.date: 2020-05-27, daily.update.time: 21:00:00, current.datetime: 2020-05-28 1.., download.flag: FALSE}
#> INFO [17:21:29.417] {stage: data loaded}
#> INFO [17:21:29.419] {stage: data-setup}
dummy <- processor$transform()
#> INFO [17:21:29.421] Executing transform
#> INFO [17:21:29.421] Executing consolidate
#> INFO [17:21:30.758] {stage: consolidated}
#> INFO [17:21:30.759] Executing standarize
#> INFO [17:21:30.814] gathering DataModel
#> INFO [17:21:30.815] {stage: datamodel-setup}
dummy <- processor$curate()
#> INFO [17:21:30.818] {stage: loading-aggregated-data-model}
#> Warning in countrycode(x, origin = "country.name", destination = "continent"): Some values were not matched unambiguously: MS Zaandam
#> INFO [17:21:32.584] {stage: calculating-rates}
#> INFO [17:21:32.727] {stage: making-data-comparison}
#> INFO [17:21:33.832] {stage: applying-missing-values-method}
#> INFO [17:21:33.833] {stage: Starting first imputation}
#> INFO [17:21:33.837] {stage: calculating-rates}
#> INFO [17:21:34.066] {stage: making-data-comparison-2}
#> INFO [17:21:35.068] {stage: calculating-top-countries}
#> INFO [17:21:35.084] {stage: processed}
data.significative <- processor$data.agg %>% filter(confirmed >= 1000)
data.country.avg <- data.significative %>%
group_by(country) %>%
summarize(confirmed = max(confirmed),
fatality.rate.min.mean = mean(fatality.rate.min),
fatality.rate.min.cv = sd(fatality.rate.min)/fatality.rate.min.mean,
fatality.rate.max.mean =mean(fatality.rate.max),
fatality.rate.max.cv = sd(fatality.rate.max)/fatality.rate.max.mean) %>%
arrange(fatality.rate.min.mean)
data.country.avg
#> # A tibble: 108 x 6
#> country confirmed fatality.rate.m… fatality.rate.m… fatality.rate.m…
#> <chr> <int> <dbl> <dbl> <dbl>
#> 1 Qatar 48947 0.00118 0.646 0.00223
#> 2 Singap… 32876 0.00170 0.750 0.00304
#> 3 Bahrain 9692 0.00261 0.439 0.00403
#> 4 Djibou… 2697 0.00309 0.451 0.00439
#> 5 Maldiv… 1457 0.00338 0.0925 0.00646
#> 6 Uzbeki… 3369 0.00408 0.115 0.00569
#> 7 Oman 8373 0.00480 0.0814 0.00840
#> 8 Iceland 1805 0.00488 0.236 0.00575
#> 9 Guinea… 1195 0.00545 0.110 0.0107
#> 10 Guinea 3275 0.00563 0.104 0.00908
#> # … with 98 more rows, and 1 more variable: fatality.rate.max.cv <dbl>
ggplot(data.country.avg) + geom_histogram(aes(x = fatality.rate.min.mean), bins = 60)
least.letality <- data.country.avg %>%
arrange(fatality.rate.min.mean) %>%
filter(fatality.rate.min.mean <= 0.05 & confirmed >= 30000) %>%
arrange(desc(confirmed))
least.letality
#> # A tibble: 17 x 6
#> country confirmed fatality.rate.m… fatality.rate.m… fatality.rate.m…
#> <chr> <int> <dbl> <dbl> <dbl>
#> 1 US 1699176 0.0452 0.358 0.0828
#> 2 Russia 370680 0.00868 0.164 0.0160
#> 3 Germany 181524 0.0262 0.654 0.0328
#> 4 Turkey 159797 0.0241 0.154 0.0387
#> 5 India 158086 0.0314 0.0748 0.0543
#> 6 Peru 135905 0.0285 0.134 0.0463
#> 7 China 84106 0.0409 0.288 0.0488
#> 8 Chile 82289 0.0103 0.323 0.0163
#> 9 Saudi … 78541 0.00831 0.397 0.0146
#> 10 Pakist… 59151 0.0187 0.238 0.0328
#> 11 Qatar 48947 0.00118 0.646 0.00223
#> 12 Belarus 38956 0.00704 0.264 0.0127
#> 13 Bangla… 38292 0.0220 0.425 0.0415
#> 14 Singap… 32876 0.00170 0.750 0.00304
#> 15 United… 31969 0.00774 0.236 0.0134
#> 16 Portug… 31292 0.0331 0.309 0.0612
#> 17 Switze… 30776 0.0431 0.454 0.0541
#> # … with 1 more variable: fatality.rate.max.cv <dbl>
most.letality <- data.country.avg %>%
arrange(fatality.rate.min.mean) %>%
filter(fatality.rate.min.mean > 0.05 & confirmed >= 30000) %>%
arrange(desc(confirmed))
most.letality
#> # A tibble: 11 x 6
#> country confirmed fatality.rate.m… fatality.rate.m… fatality.rate.m…
#> <chr> <int> <dbl> <dbl> <dbl>
#> 1 Brazil 411821 0.0561 0.289 0.0903
#> 2 United… 268619 0.124 0.311 0.231
#> 3 Spain 236259 0.0941 0.315 0.137
#> 4 Italy 231139 0.112 0.313 0.175
#> 5 France 183067 0.109 0.465 0.173
#> 6 Iran 141591 0.0590 0.195 0.0806
#> 7 Mexico 78023 0.0842 0.294 0.117
#> 8 Belgium 57592 0.117 0.474 0.190
#> 9 Nether… 45970 0.102 0.325 0.192
#> 10 Ecuador 38103 0.0532 0.376 0.0971
#> 11 Sweden 35088 0.0874 0.491 0.157
#> # … with 1 more variable: fatality.rate.max.cv <dbl>
compared.countries <- unique(c(least.letality$country,
"Argentina", "Brazil", "Chile", "US", "Japan", "Korea, South", "Germany", "Japan"))
compared.countries
#> [1] "US" "Russia" "Germany"
#> [4] "Turkey" "India" "Peru"
#> [7] "China" "Chile" "Saudi Arabia"
#> [10] "Pakistan" "Qatar" "Belarus"
#> [13] "Bangladesh" "Singapore" "United Arab Emirates"
#> [16] "Portugal" "Switzerland" "Argentina"
#> [19] "Brazil" "Japan" "Korea, South"
rg <- ReportGeneratorEnhanced$new(data.processor = processor)
ggplot <- rg$ggplotCountriesLines(included.countries = compared.countries,
min.confirmed = 100,
field.description = "Death Rates min",
field = "fatality.rate.min", countries.text = "Compared Countries",
log.scale = FALSE)
ggplot
ggplot <- rg$ggplotCrossSection(included.countries = compared.countries,
field.x = "confirmed",
field.y = "fatality.rate.min",
plot.description = "Cross section Confirmed vs Death rate min",
log.scale.x = TRUE,
log.scale.y = FALSE)
ggplot
ggplot <- rg$ggplotCrossSection(included.countries = most.letality$country,
field.x = "confirmed",
field.y = "fatality.rate.min",
plot.description = "Cross section Confirmed vs Death rate min",
log.scale.x = TRUE,
log.scale.y = FALSE)
ggplot
Created on 2020-05-28 by the reprex package (v0.3.0)