diff --git a/R/chagas2012.R b/R/chagas2012.R deleted file mode 100644 index 6c7ea6f8..00000000 --- a/R/chagas2012.R +++ /dev/null @@ -1,17 +0,0 @@ -# TODO Check if we really need to have the package `qtl` installed. Otherwise -# remove all entries of the form `see \code{\link[qtl]...` -#' Seroprevalence data on serofoi -#' -#' Data from a serological surveys -#' -#' @docType data -#' -#' @usage chagas2012 -#' -#' @format An object of class `"cross"`; see [qtl::read.cross()]. -#' -#' @keywords datasets -#' -#' @examples -#' chagas2012 -"chagas2012" diff --git a/R/chik2015.R b/R/chik2015.R deleted file mode 100644 index 30142601..00000000 --- a/R/chik2015.R +++ /dev/null @@ -1,15 +0,0 @@ -#' Seroprevalence data on serofoi -#' -#' Data from a serological surveys -#' -#' @docType data -#' -#' @usage chik2015 -#' -#' @format An object of class `"cross"`; see [qtl::read.cross()]. -#' -#' @keywords datasets -#' -#' @examples -#' chik2015 -"chik2015" diff --git a/R/seroprevalence_data.R b/R/seroprevalence_data.R deleted file mode 100644 index 94d56450..00000000 --- a/R/seroprevalence_data.R +++ /dev/null @@ -1,147 +0,0 @@ -# TODO Complete @param documentation - - -#' Prepare data from a serological survey for modelling -#' -#' This function adds the necessary additional variables to the given dataset -#' `serodata` corresponding to a serological survey. -#' @param serodata A data frame containing the data from a serological survey. -#' This data frame must contain the following columns: -#' \describe{ -#' \item{`survey`}{survey Label of the current survey} -#' \item{`total`}{Number of samples for each age group} -#' \item{`counts`}{Number of positive samples for each age group} -#' \item{`age_min`}{age_min} -#' \item{`age_max`}{age_max} -#' \item{`tsur`}{Year in which the survey took place} -#' \item{`country`}{The country where the survey took place} -#' \item{`test`}{The type of test taken} -#' \item{`antibody`}{antibody} -#' } -#' Alternatively to `age_min` and `age_max`, the dataset could already include -#' the age group marker `age_mean_f`, representing the middle point between -#' `age_min` and `age_max`. If `afe_mean_f` is missing, it will be generated -#' by the function. -#' @param alpha probability of a type I error. For further details refer to -#' [binconf][Hmisc::binconf]. -#' @return serodata with additional columns necessary for the analysis. These -#' columns are: -#' \describe{ -#' \item{`age_mean_f`}{Floor value of the average between age_min and age_max -#' for the age groups delimited by `age_min` and `age_max`} -#' \item{`sample_size`}{The size of the sample} -#' \item{`birth_year`}{Years in which the subject was born according to the -#' age group marker `age_mean_f`} -#' \item{`prev_obs`}{Observed prevalence} -#' \item{`prev_obs_lower`}{Lower limit of the confidence interval for the -#' observed prevalence} -#' \item{`prev_obs_upper`}{Upper limit of the confidence interval for the -#' observed prevalence} -#' } -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' @export -prepare_serodata <- function(serodata = serodata, - alpha = 0.05) { - checkmate::assert_numeric(alpha, lower = 0, upper = 1) - serodata <- validate_serodata(serodata) - - if (!any(colnames(serodata) == "age_mean_f")) { - serodata <- serodata %>% - dplyr::mutate( - age_mean_f = floor((.data$age_min + .data$age_max) / 2), - sample_size = sum(.data$total) - ) - } - - if (!any(colnames(serodata) == "birth_year")) { - serodata <- dplyr::mutate( - serodata, - birth_year = .data$tsur - .data$age_mean_f - ) - } - - serodata <- serodata %>% - cbind( - Hmisc::binconf( - serodata$counts, - serodata$total, - alpha = alpha, - method = "exact", - return.df = TRUE - ) - ) %>% - dplyr::rename( - prev_obs = "PointEst", - prev_obs_lower = "Lower", - prev_obs_upper = "Upper" - ) %>% - dplyr::arrange(.data$age_mean_f) - - return(serodata) -} - - -#' Prepare pre-processed serological survey dataset to plot the -#' binomial confidence intervals of the seroprevalence by age group -#' -#' This function prepapares a given pre-processed serological dataset (see -#' [prepare_serodata()]) to plot the binomial confidence intervals of its -#' corresponding seroprevalence grouped by age group. -#' @inheritParams run_seromodel -#' @return data set with the binomial confidence intervals -#' @examples -#' data(chagas2012) -#' serodata <- prepare_serodata(chagas2012) -#' prepare_bin_data(serodata) -#' @keywords internal -#' @noRd -prepare_bin_data <- function(serodata, - bin_step = 5, - alpha = 0.05) { - if (!any(colnames(serodata) == "age_mean_f")) { - serodata <- serodata %>% - dplyr::mutate( - age_mean_f = floor((.data$age_min + .data$age_max) / 2), - sample_size = sum(.data$total) - ) - } - serodata$age_group <- get_age_group( - age = serodata$age_mean_f, - step = bin_step - ) - - serodata_bin <- serodata %>% - dplyr::group_by(.data$age_group) %>% - dplyr::summarise( - total = sum(.data$total), - counts = sum(.data$counts) - ) %>% - dplyr::mutate( - survey = unique(serodata$survey), - tsur = unique(serodata$tsur), - age_min = as.integer(gsub("[(]|\\,.*", "\\1", .data$age_group)) + 1, - age_max = as.integer(gsub(".*\\,|[]]", "\\1", .data$age_group)), - age_mean_f = floor((.data$age_min + .data$age_max) / 2) - ) - - serodata_bin <- cbind( - serodata_bin, - Hmisc::binconf( - serodata_bin$counts, - serodata_bin$total, - alpha = alpha, - method = "exact", - return.df = TRUE - ) - ) %>% - dplyr::rename( - prev_obs = "PointEst", - prev_obs_lower = "Lower", - prev_obs_upper = "Upper" - ) %>% - dplyr::arrange(.data$age_mean_f) - - return(serodata_bin) -} diff --git a/R/veev2012.R b/R/veev2012.R deleted file mode 100644 index 49e4d6fd..00000000 --- a/R/veev2012.R +++ /dev/null @@ -1,15 +0,0 @@ -#' Seroprevalence data on serofoi -#' -#' Data from a serological surveys -#' -#' @docType data -#' -#' @usage veev2012 -#' -#' @format An object of class `"cross"`; see [qtl::read.cross()]. -#' -#' @keywords datasets -#' -#' @examples -#' veev2012 -"veev2012" diff --git a/tests/testthat/clean_expected_files.R b/tests/testthat/clean_expected_files.R deleted file mode 100644 index 72ad5a0a..00000000 --- a/tests/testthat/clean_expected_files.R +++ /dev/null @@ -1,14 +0,0 @@ -# This script removes the svg and csv files used in the automatic tests -# This is usually required when a new version of rstan is released -# Random number generation changes for each new version of -# Rstan, https://mc-stan.org/docs/2_18/reference-manual/reproducibility-chapter.html -# thus all files with expected results (both tables and graphs) become -# obsolete and need to be regenerated - -library(testthat) -paths <- c(testthat::test_path("_snaps", "*"), testthat::test_path("extdata", "dataframes", "expected", "*")) - -for (path in paths) { - cat("Deleting", path, "\n") - unlink(path, recursive = TRUE) -} diff --git a/tests/testthat/extdata/haiti_ssa_sample.RDS b/tests/testthat/extdata/haiti_ssa_sample.RDS deleted file mode 100644 index 04f9fa75..00000000 Binary files a/tests/testthat/extdata/haiti_ssa_sample.RDS and /dev/null differ diff --git a/tests/testthat/test_fit_seromodel.R b/tests/testthat/test_fit_seromodel.R deleted file mode 100644 index 541f48bc..00000000 --- a/tests/testthat/test_fit_seromodel.R +++ /dev/null @@ -1,164 +0,0 @@ -test_that("fit_seromodel constant model estimates the right force of infection", { - - foi_df <- data.frame( - year = 1970:1999, - foi = rep(0.03, 30) - ) - survey_features <- data.frame( - age_min = seq(1, 21, 10), - age_max = seq(10, 30, 10), - sample_size = c(1000, 1500, 2000) - ) - - simdata <- simulate_serosurvey( - model = "time", - foi = foi_df, - survey_features = survey_features - ) %>% - mutate( - survey = "constant_foi", - tsur = max(foi_df$year) + 1 - ) %>% - rename( - total = sample_size, - counts = n_seropositive - ) %>% prepare_serodata() - - model_object <- fit_seromodel( - serodata = simdata, - foi_model = "constant", - iter = 1000 - ) - - foi_central_estimates <- get_foi_central_estimates( - seromodel_object = model_object, - serodata = simdata - ) %>% - mutate( - # calculates tolerance as half the confidence interval size - tol = (upper - lower)/2 - ) %>% - left_join(foi_df, by = "year") - - expect_true( - all( - dplyr::near( - foi_central_estimates$medianv, - foi_central_estimates$foi, - tol = max(foi_central_estimates$tol, 0.05) - ) - ) - ) -}) - -test_that("fit_seromodel tv_normal model estimates the right force of infection", { - - foi_df <- data.frame( - year = 1940:1999, - foi = rep(c(0.06, 0.03, 0.01), c(20, 20, 20)) - ) - survey_features <- data.frame( - age_min = seq(1, 51, 10), - age_max = seq(10, 60, 10), - sample_size = 100 - ) - - simdata <- simulate_serosurvey( - model = "time", - foi = foi_df, - survey_features = survey_features - ) %>% - mutate( - survey = "sw_dec_foi", - tsur = max(foi_df$year) + 1 - ) %>% - rename( - total = sample_size, - counts = n_seropositive - ) %>% prepare_serodata() - - model_object <- fit_seromodel( - serodata = simdata, - foi_model = "tv_normal", - chunks = rep(c(1, 2, 3), c(15, 20, 20)), - iter = 800 - ) - - foi_central_estimates <- get_foi_central_estimates( - seromodel_object = model_object, - serodata = simdata - ) %>% - mutate( - # calculates tolerance as half the confidence interval size - tol = (upper - lower) / 2 - ) %>% - left_join(foi_df, by = "year") - - expect_true( - all( - dplyr::near( - foi_central_estimates$medianv, - foi_central_estimates$foi, - tol = max(foi_central_estimates$tol, 0.05) - ) - ) - ) -}) - -test_that("fit_seromodel tv_normal_log model estimates the right force of infection", { - - foi_df <- data.frame( - year = 1950:1999, - foi = rep( - c(0.001, 0.4, 0.001), - c(30, 5, 15) - ) - ) - - survey_features <- data.frame( - age_min = seq(1, 41, 10), - age_max = seq(10, 50, 10), - sample_size = 100 - ) - - simdata <- simulate_serosurvey( - model = "time", - foi = foi_df, - survey_features = survey_features - ) %>% - mutate( - survey = "big_outbreak", - tsur = max(foi_df$year) + 1 - ) %>% - rename( - total = sample_size, - counts = n_seropositive - ) %>% prepare_serodata() - - model_object <- fit_seromodel( - serodata = simdata, - foi_model = "tv_normal_log", - chunks = rep(c(1, 2, 3), c(25, 5, 15)), - iter = 700 - ) - - foi_central_estimates <- get_foi_central_estimates( - seromodel_object = model_object, - serodata = simdata - ) %>% - mutate( - # calculates tolerance as half the confidence interval size - tol = (upper - lower) / 2 - ) %>% - left_join(foi_df, by = "year") - - expect_true( - all( - dplyr::near( - foi_central_estimates$medianv, - foi_central_estimates$foi, - tol = max(foi_central_estimates$tol, 0.05) - ) - ) - ) -}) diff --git a/tests/testthat/test_issue_47.R b/tests/testthat/test_issue_47.R deleted file mode 100644 index d4bb27eb..00000000 --- a/tests/testthat/test_issue_47.R +++ /dev/null @@ -1,24 +0,0 @@ -test_that("issue 47", { - skip_on_os(c("windows", "mac")) - source("testing_utils.R") - - library(dplyr) - - # Load data - ## This dataset is already prepared - serodata_path <- testthat::test_path("extdata", "haiti_ssa_sample.RDS") - serodata <- readRDS(serodata_path) - - # Error reproduction - model_test <- fit_seromodel( - serodata = serodata, - foi_model = "tv_normal", - print_summary = FALSE - ) - foi <- rstan::extract(model_test, "foi", inc_warmup = FALSE)[[1]] - prev_expanded <- get_prev_expanded(foi, serodata = serodata) - - # Test - age_max <- max(serodata$age_mean_f) - expect_length(prev_expanded$age, n = age_max) -}) diff --git a/tests/testthat/testing_utils.R b/tests/testthat/testing_utils.R deleted file mode 100644 index 3119eddf..00000000 --- a/tests/testthat/testing_utils.R +++ /dev/null @@ -1,69 +0,0 @@ -# TODO Move to separate package ### -# TODO Document all functions and provide examples -library(testthat) -library(vdiffr) -equal_with_tolerance <- function(tolerance = 2e-1) { - function(a, b) { - c <- base::mapply(function(x, y) { - if (is.na(x) && is.na(y)) { - return(0) - } else if (is.na(x) || is.na(y)) { - return(tolerance + 1) - } else { - return(abs(x - y)) - } - }, a, b) - return(base::all(c < tolerance)) - } -} -equal_exact <- function() { - function(a, b) { - x <- base::mapply(function(x, y) x == y || (is.na(x) && is.na(y)), a, b) - return(base::all(x == TRUE)) - } -} - -# TODO use testthat snapshots -expect_similar_dataframes <- function(name, actual_df, column_comparation_functions) { - actual_df_filename <- file.path(tempdir(), paste(name, "csv", sep = ".")) - write.csv(actual_df, actual_df_filename) - compare_fun <- function(expected_df_filename, actual_df_filename) { - return(compare_dataframes(expected_df_filename, actual_df_filename, column_comparation_functions)) - } - expect_snapshot_file(actual_df_filename, compare = compare_fun) -} - - -compare_dataframes <- function(expected_df_filename, actual_df_filename, column_comparation_functions) { - expected_df <- read.csv(expected_df_filename) - actual_df <- read.csv(actual_df_filename) - - all_columns_ok <- TRUE - for (col in base::names(column_comparation_functions)) { - if (col %in% colnames(expected_df) && col %in% colnames(actual_df)) { - compare_function <- column_comparation_functions[[col]] - col_ok <- compare_function(expected_df[[col]], actual_df[[col]]) - if (!col_ok) { - cat("Column", col, "differs ", expected_df[[col]], "!=", actual_df[[col]], "\n") - } - all_columns_ok <- all_columns_ok && col_ok - } else { - if (!(col %in% colnames(expected_df))) { - cat("Column", col, "not in first dataframe") - } - if (!(col %in% colnames(expected_df))) { - cat("Column", col, "not in second dataframe") - } - } - } - return(all_columns_ok) -} - -expect_same_plot <- function(plot_name, actual_plot) { - if (per_platform_snapshots) { - title <- file.path(r_version_id(), plot_name) - } else { - title <- plot_name - } - return(vdiffr::expect_doppelganger(title, actual_plot)) -}