From d97c9213534ca0ceb12ff60ed3508cc61e3d86e5 Mon Sep 17 00:00:00 2001
From: ntorresd <ntorresd@unal.edu.co>
Date: Fri, 24 May 2024 11:56:55 -0500
Subject: [PATCH] clean R and tests folders

---
 R/chagas2012.R                              |  17 --
 R/chik2015.R                                |  15 --
 R/seroprevalence_data.R                     | 147 ------------------
 R/veev2012.R                                |  15 --
 tests/testthat/clean_expected_files.R       |  14 --
 tests/testthat/extdata/haiti_ssa_sample.RDS | Bin 1150 -> 0 bytes
 tests/testthat/test_fit_seromodel.R         | 164 --------------------
 tests/testthat/test_issue_47.R              |  24 ---
 tests/testthat/testing_utils.R              |  69 --------
 9 files changed, 465 deletions(-)
 delete mode 100644 R/chagas2012.R
 delete mode 100644 R/chik2015.R
 delete mode 100644 R/seroprevalence_data.R
 delete mode 100644 R/veev2012.R
 delete mode 100644 tests/testthat/clean_expected_files.R
 delete mode 100644 tests/testthat/extdata/haiti_ssa_sample.RDS
 delete mode 100644 tests/testthat/test_fit_seromodel.R
 delete mode 100644 tests/testthat/test_issue_47.R
 delete mode 100644 tests/testthat/testing_utils.R

diff --git a/R/chagas2012.R b/R/chagas2012.R
deleted file mode 100644
index 6c7ea6f8..00000000
--- a/R/chagas2012.R
+++ /dev/null
@@ -1,17 +0,0 @@
-# TODO Check if we really need to have the package `qtl` installed. Otherwise
-# remove all entries of the form `see \code{\link[qtl]...`
-#' Seroprevalence data on serofoi
-#'
-#' Data from a serological surveys
-#'
-#' @docType data
-#'
-#' @usage chagas2012
-#'
-#' @format An object of class `"cross"`; see [qtl::read.cross()].
-#'
-#' @keywords datasets
-#'
-#' @examples
-#' chagas2012
-"chagas2012"
diff --git a/R/chik2015.R b/R/chik2015.R
deleted file mode 100644
index 30142601..00000000
--- a/R/chik2015.R
+++ /dev/null
@@ -1,15 +0,0 @@
-#' Seroprevalence data on serofoi
-#'
-#' Data from a serological surveys
-#'
-#' @docType data
-#'
-#' @usage chik2015
-#'
-#' @format An object of class `"cross"`; see [qtl::read.cross()].
-#'
-#' @keywords datasets
-#'
-#' @examples
-#' chik2015
-"chik2015"
diff --git a/R/seroprevalence_data.R b/R/seroprevalence_data.R
deleted file mode 100644
index 94d56450..00000000
--- a/R/seroprevalence_data.R
+++ /dev/null
@@ -1,147 +0,0 @@
-# TODO Complete @param documentation
-
-
-#' Prepare data from a serological survey for modelling
-#'
-#' This function adds the necessary additional variables to the given dataset
-#' `serodata` corresponding to a serological survey.
-#' @param serodata A data frame containing the data from a serological survey.
-#'  This data frame must contain the following columns:
-#' \describe{
-#'   \item{`survey`}{survey Label of the current survey}
-#'   \item{`total`}{Number of samples for each age group}
-#'   \item{`counts`}{Number of positive samples for each age group}
-#'   \item{`age_min`}{age_min}
-#'   \item{`age_max`}{age_max}
-#'   \item{`tsur`}{Year in which the survey took place}
-#'   \item{`country`}{The country where the survey took place}
-#'   \item{`test`}{The type of test taken}
-#'   \item{`antibody`}{antibody}
-#' }
-#' Alternatively to `age_min` and `age_max`, the dataset could already include
-#' the age group marker `age_mean_f`, representing the middle point between
-#' `age_min` and `age_max`. If `afe_mean_f` is missing, it will be generated
-#' by the function.
-#' @param alpha probability of a type I error. For further details refer to
-#'   [binconf][Hmisc::binconf].
-#' @return serodata with additional columns necessary for the analysis. These
-#'   columns are:
-#' \describe{
-#'   \item{`age_mean_f`}{Floor value of the average between age_min and age_max
-#'     for the age groups delimited by `age_min` and `age_max`}
-#'   \item{`sample_size`}{The size of the sample}
-#'   \item{`birth_year`}{Years in which the subject was born according to the
-#'     age group marker `age_mean_f`}
-#'   \item{`prev_obs`}{Observed prevalence}
-#'   \item{`prev_obs_lower`}{Lower limit of the confidence interval for the
-#'     observed prevalence}
-#'   \item{`prev_obs_upper`}{Upper limit of the confidence interval for the
-#'     observed prevalence}
-#' }
-#' @examples
-#' data(chagas2012)
-#' serodata <- prepare_serodata(chagas2012)
-#' @export
-prepare_serodata <- function(serodata = serodata,
-                             alpha = 0.05) {
-  checkmate::assert_numeric(alpha, lower = 0, upper = 1)
-  serodata <- validate_serodata(serodata)
-
-  if (!any(colnames(serodata) == "age_mean_f")) {
-    serodata <- serodata %>%
-      dplyr::mutate(
-        age_mean_f = floor((.data$age_min + .data$age_max) / 2),
-        sample_size = sum(.data$total)
-      )
-  }
-
-  if (!any(colnames(serodata) == "birth_year")) {
-    serodata <- dplyr::mutate(
-      serodata,
-      birth_year = .data$tsur - .data$age_mean_f
-    )
-  }
-
-  serodata <- serodata %>%
-    cbind(
-      Hmisc::binconf(
-        serodata$counts,
-        serodata$total,
-        alpha = alpha,
-        method = "exact",
-        return.df = TRUE
-      )
-    ) %>%
-    dplyr::rename(
-      prev_obs = "PointEst",
-      prev_obs_lower = "Lower",
-      prev_obs_upper = "Upper"
-    ) %>%
-    dplyr::arrange(.data$age_mean_f)
-
-  return(serodata)
-}
-
-
-#' Prepare pre-processed serological survey dataset to plot the
-#' binomial confidence intervals of the seroprevalence by age group
-#'
-#' This function prepapares a given pre-processed serological dataset (see
-#' [prepare_serodata()]) to plot the binomial confidence intervals of its
-#' corresponding seroprevalence grouped by age group.
-#' @inheritParams run_seromodel
-#' @return data set with the binomial confidence intervals
-#' @examples
-#' data(chagas2012)
-#' serodata <- prepare_serodata(chagas2012)
-#' prepare_bin_data(serodata)
-#' @keywords internal
-#' @noRd
-prepare_bin_data <- function(serodata,
-                             bin_step = 5,
-                             alpha = 0.05) {
-  if (!any(colnames(serodata) == "age_mean_f")) {
-    serodata <- serodata %>%
-      dplyr::mutate(
-        age_mean_f = floor((.data$age_min + .data$age_max) / 2),
-        sample_size = sum(.data$total)
-      )
-  }
-  serodata$age_group <- get_age_group(
-    age = serodata$age_mean_f,
-    step = bin_step
-  )
-
-  serodata_bin <- serodata %>%
-    dplyr::group_by(.data$age_group) %>%
-    dplyr::summarise(
-      total = sum(.data$total),
-      counts = sum(.data$counts)
-    ) %>%
-    dplyr::mutate(
-      survey = unique(serodata$survey),
-      tsur = unique(serodata$tsur),
-      age_min = as.integer(gsub("[(]|\\,.*", "\\1", .data$age_group)) + 1,
-      age_max = as.integer(gsub(".*\\,|[]]", "\\1", .data$age_group)),
-      age_mean_f = floor((.data$age_min + .data$age_max) / 2)
-    )
-
-    serodata_bin <- cbind(
-      serodata_bin,
-      Hmisc::binconf(
-        serodata_bin$counts,
-        serodata_bin$total,
-        alpha = alpha,
-        method = "exact",
-        return.df = TRUE
-      )
-    ) %>%
-    dplyr::rename(
-      prev_obs = "PointEst",
-      prev_obs_lower = "Lower",
-      prev_obs_upper = "Upper"
-    ) %>%
-    dplyr::arrange(.data$age_mean_f)
-
-  return(serodata_bin)
-}
diff --git a/R/veev2012.R b/R/veev2012.R
deleted file mode 100644
index 49e4d6fd..00000000
--- a/R/veev2012.R
+++ /dev/null
@@ -1,15 +0,0 @@
-#' Seroprevalence data on serofoi
-#'
-#' Data from a serological surveys
-#'
-#' @docType data
-#'
-#' @usage veev2012
-#'
-#' @format An object of class `"cross"`; see [qtl::read.cross()].
-#'
-#' @keywords datasets
-#'
-#' @examples
-#' veev2012
-"veev2012"
diff --git a/tests/testthat/clean_expected_files.R b/tests/testthat/clean_expected_files.R
deleted file mode 100644
index 72ad5a0a..00000000
--- a/tests/testthat/clean_expected_files.R
+++ /dev/null
@@ -1,14 +0,0 @@
-# This script removes the svg and csv files used in the automatic tests
-# This is usually required when a new version of rstan is released
-# Random number generation changes for each new version of
-# Rstan, https://mc-stan.org/docs/2_18/reference-manual/reproducibility-chapter.html
-# thus all files with expected results (both tables and graphs) become
-# obsolete and need to be regenerated
-
-library(testthat)
-paths <- c(testthat::test_path("_snaps", "*"), testthat::test_path("extdata", "dataframes", "expected", "*"))
-
-for (path in paths) {
-  cat("Deleting", path, "\n")
-  unlink(path, recursive = TRUE)
-}
diff --git a/tests/testthat/extdata/haiti_ssa_sample.RDS b/tests/testthat/extdata/haiti_ssa_sample.RDS
deleted file mode 100644
index 04f9fa750e7d714fa65c62758a106ead078306aa..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1150
zcmV-^1cCb>iwFP!000001MQY=Oj~6X$1hMOFa(T@#RL~HOeSFsg+fa<IOV-SH(onh
z1J=v9KqItWdl|5egnjUfF>WK81;}7ojIr6`GIdNdjGz<ThY3pDP+93K%cR+GqFE$c
zyzrc-FZj_fi}NNozyJT7d!PHiC+X9xiDDQgj1h%~F=63?WWv?;rMu-q?Rmk(3TK>f
z#xo)&QedQ|%Y2yYm1JlAU-<V0hKX69t)j=YK6plP>?C0?L)ceMcogg18HDB)LKEKi
z##uuB1bV!`SwYAm8~X_lpfB$a=F50rN-%vn+mcA#fY%$*o9%>$Xdl^5pO5|5FrNn5
zqzKMahwE11^Gnd@GQmtl#v|i`b5i5$)Atc0rJL5vM;74oiqV&%FGpXAen0wZ%w2=^
zT70j1?5pYfVZUJ_C@0OWcnO(~%tFeMc{Crg2-hn`uS8#oejoa3^fj2H7VCAGQ;mH(
z{|4-9Hsu`cA@wJeOz<8Ru4MJR{%JLvW>KOluPb<3P4Fzj%G=8Rdy@V}@rU5s8+Rck
z$Q<MzB;KGX>?bTiUxvN{y$U_uldC5PYth${g%tI8of`WM*w@fxnDf#WQh!3}%Q<Qy
z_0dM85-CL{A)^UL9wObyS>!nKL*%<ioYzPPQiCi;W*`%h5rk(}gK~mR8NSywe+3-d
z=C50?TEVsa=8n&5yx<-d-5t3V2QJ5KYuf5Va9=;>N%%$!?!KtCskas2XqBFf9asa$
zj8-zE%>>)zZK>q42^{K;16;=^>?V04I1>uL)ox7&N1IAJm(T^a<=aOucHM_X-Nxag
zyJ>sL_k0uBNjUgvaQzta<@_b4NHr6ofyK0?-s019;8fo{5h=b3u3ckqrTwuCPTiwB
zMQQ1R&#<1mp90SFDwq03GuUQ6a@)?xV6jkks6BoRepWKuKe%KCTS0Pa!gpVQeReR#
z%6<j5;Zyw7`z$P+Po3T=4u$!c)OYT!{RuboJYM^-+sC<2FaK3NSOC)xjIS=F*TR=O
zqWf5Jh%cx8T7J!V^eu1>oPDwE6Fqp$dqY=h)ZlrneQ+fH26zr+tCLiju+$KJ;?$KE
zSdu??txuZ(uEsWR;p76iRvr!H|0oB?x#NquyT`!QKQuL-_c|;+`>}KJx39rHUA67v
zdzIkkJTEjxMu79)o*ypAs=!?luecye_R-sa_xIvSEBqSP@``fR3wMSFD?Z;Te8Gr9
zg`6RDqcgCboP{A9T`U&2kS|Pn32)@t4%#|s>@x5cbkDGDoUT)E2(oMsS`ja-Lt7j7
znR|UL&cdUPU=6&!+1NsRQ5&mptU=d`m8}-G)6~J~EczoHy{K7l=G%3>97~@NVKQ@v
zb;f3zErxnshq0S8|D)PvG6~g9(>U0{TIe)rF{*{-S#hgb7}TcABhAKcao_;yKShX!
z?ywrM#4j`bGRrTeewpo;IesbgOSxa>(oqVJ$OzDq0G%12vjVg<KxYT&oB%Bg(DDGi
Q{>}UNZ-Z3CnDq?+06khk`Tzg`

diff --git a/tests/testthat/test_fit_seromodel.R b/tests/testthat/test_fit_seromodel.R
deleted file mode 100644
index 541f48bc..00000000
--- a/tests/testthat/test_fit_seromodel.R
+++ /dev/null
@@ -1,164 +0,0 @@
-test_that("fit_seromodel constant model estimates the right force of infection", {
-
-  foi_df <- data.frame(
-    year = 1970:1999,
-    foi = rep(0.03, 30)
-  )
-  survey_features <- data.frame(
-    age_min = seq(1, 21, 10),
-    age_max = seq(10, 30, 10),
-    sample_size = c(1000, 1500, 2000)
-  )
-
-  simdata <- simulate_serosurvey(
-    model = "time",
-    foi = foi_df,
-    survey_features = survey_features
-  )  %>%
-    mutate(
-      survey = "constant_foi",
-      tsur = max(foi_df$year) + 1
-    ) %>%
-    rename(
-      total = sample_size,
-      counts = n_seropositive
-  ) %>% prepare_serodata()
-
-  model_object <- fit_seromodel(
-    serodata = simdata,
-    foi_model = "constant",
-    iter = 1000
-  )
-
-  foi_central_estimates <- get_foi_central_estimates(
-    seromodel_object = model_object,
-    serodata = simdata
-  ) %>%
-    mutate(
-      # calculates tolerance as half the confidence interval size
-      tol = (upper - lower)/2
-    ) %>%
-    left_join(foi_df, by = "year")
-
-  expect_true(
-    all(
-      dplyr::near(
-        foi_central_estimates$medianv,
-        foi_central_estimates$foi,
-        tol = max(foi_central_estimates$tol, 0.05)
-      )
-    )
-  )
-})
-
-test_that("fit_seromodel tv_normal model estimates the right force of infection", {
-
-  foi_df <- data.frame(
-    year = 1940:1999,
-    foi = rep(c(0.06, 0.03, 0.01), c(20, 20, 20))
-  )
-  survey_features <- data.frame(
-    age_min = seq(1, 51, 10),
-    age_max = seq(10, 60, 10),
-    sample_size = 100
-  )
-
-  simdata <- simulate_serosurvey(
-    model = "time",
-    foi = foi_df,
-    survey_features = survey_features
-  )  %>%
-    mutate(
-      survey = "sw_dec_foi",
-      tsur = max(foi_df$year) + 1
-    ) %>%
-    rename(
-      total = sample_size,
-      counts = n_seropositive
-    ) %>% prepare_serodata()
-
-  model_object <- fit_seromodel(
-    serodata = simdata,
-    foi_model = "tv_normal",
-    chunks = rep(c(1, 2, 3), c(15, 20, 20)),
-    iter = 800
-  )
-
-  foi_central_estimates <- get_foi_central_estimates(
-    seromodel_object = model_object,
-    serodata = simdata
-  ) %>%
-    mutate(
-      # calculates tolerance as half the confidence interval size
-      tol = (upper - lower) / 2
-    ) %>%
-    left_join(foi_df, by = "year")
-
-  expect_true(
-    all(
-      dplyr::near(
-        foi_central_estimates$medianv,
-        foi_central_estimates$foi,
-        tol = max(foi_central_estimates$tol, 0.05)
-      )
-    )
-  )
-})
-
-test_that("fit_seromodel tv_normal_log model estimates the right force of infection", {
-
-  foi_df <- data.frame(
-    year = 1950:1999,
-    foi = rep(
-      c(0.001, 0.4, 0.001),
-      c(30, 5, 15)
-    )
-  )
-
-  survey_features <- data.frame(
-    age_min = seq(1, 41, 10),
-    age_max = seq(10, 50, 10),
-    sample_size = 100
-  )
-
-  simdata <- simulate_serosurvey(
-    model = "time",
-    foi = foi_df,
-    survey_features = survey_features
-  )  %>%
-    mutate(
-      survey = "big_outbreak",
-      tsur = max(foi_df$year) + 1
-    ) %>%
-    rename(
-      total = sample_size,
-      counts = n_seropositive
-    ) %>% prepare_serodata()
-
-  model_object <- fit_seromodel(
-    serodata = simdata,
-    foi_model = "tv_normal_log",
-    chunks = rep(c(1, 2, 3), c(25, 5, 15)),
-    iter = 700
-  )
-
-  foi_central_estimates <- get_foi_central_estimates(
-    seromodel_object = model_object,
-    serodata = simdata
-  ) %>%
-    mutate(
-      # calculates tolerance as half the confidence interval size
-      tol = (upper - lower) / 2
-    ) %>%
-    left_join(foi_df, by = "year")
-
-  expect_true(
-    all(
-      dplyr::near(
-        foi_central_estimates$medianv,
-        foi_central_estimates$foi,
-        tol = max(foi_central_estimates$tol, 0.05)
-      )
-    )
-  )
-})
diff --git a/tests/testthat/test_issue_47.R b/tests/testthat/test_issue_47.R
deleted file mode 100644
index d4bb27eb..00000000
--- a/tests/testthat/test_issue_47.R
+++ /dev/null
@@ -1,24 +0,0 @@
-test_that("issue 47", {
-  skip_on_os(c("windows", "mac"))
-  source("testing_utils.R")
-
-  library(dplyr)
-
-  # Load data
-  ## This dataset is already prepared
-  serodata_path <- testthat::test_path("extdata", "haiti_ssa_sample.RDS")
-  serodata <- readRDS(serodata_path)
-
-  # Error reproduction
-  model_test <- fit_seromodel(
-    serodata = serodata,
-    foi_model = "tv_normal",
-    print_summary = FALSE
-  )
-  foi <- rstan::extract(model_test, "foi", inc_warmup = FALSE)[[1]]
-  prev_expanded <- get_prev_expanded(foi, serodata = serodata)
-
-  # Test
-  age_max <- max(serodata$age_mean_f)
-  expect_length(prev_expanded$age, n = age_max)
-})
diff --git a/tests/testthat/testing_utils.R b/tests/testthat/testing_utils.R
deleted file mode 100644
index 3119eddf..00000000
--- a/tests/testthat/testing_utils.R
+++ /dev/null
@@ -1,69 +0,0 @@
-# TODO Move to separate package ###
-# TODO Document all functions and provide examples
-library(testthat)
-library(vdiffr)
-equal_with_tolerance <- function(tolerance = 2e-1) {
-  function(a, b) {
-    c <- base::mapply(function(x, y) {
-      if (is.na(x) && is.na(y)) {
-        return(0)
-      } else if (is.na(x) || is.na(y)) {
-        return(tolerance + 1)
-      } else {
-        return(abs(x - y))
-      }
-    }, a, b)
-    return(base::all(c < tolerance))
-  }
-}
-equal_exact <- function() {
-  function(a, b) {
-    x <- base::mapply(function(x, y) x == y || (is.na(x) && is.na(y)), a, b)
-    return(base::all(x == TRUE))
-  }
-}
-
-# TODO use testthat snapshots
-expect_similar_dataframes <- function(name, actual_df, column_comparation_functions) {
-  actual_df_filename <- file.path(tempdir(), paste(name, "csv", sep = "."))
-  write.csv(actual_df, actual_df_filename)
-  compare_fun <- function(expected_df_filename, actual_df_filename) {
-    return(compare_dataframes(expected_df_filename, actual_df_filename, column_comparation_functions))
-  }
-  expect_snapshot_file(actual_df_filename, compare = compare_fun)
-}
-
-
-compare_dataframes <- function(expected_df_filename, actual_df_filename, column_comparation_functions) {
-  expected_df <- read.csv(expected_df_filename)
-  actual_df <- read.csv(actual_df_filename)
-
-  all_columns_ok <- TRUE
-  for (col in base::names(column_comparation_functions)) {
-    if (col %in% colnames(expected_df) && col %in% colnames(actual_df)) {
-      compare_function <- column_comparation_functions[[col]]
-      col_ok <- compare_function(expected_df[[col]], actual_df[[col]])
-      if (!col_ok) {
-        cat("Column", col, "differs ", expected_df[[col]], "!=", actual_df[[col]], "\n")
-      }
-      all_columns_ok <- all_columns_ok && col_ok
-    } else {
-      if (!(col %in% colnames(expected_df))) {
-        cat("Column", col, "not in first dataframe")
-      }
-      if (!(col %in% colnames(expected_df))) {
-        cat("Column", col, "not in second dataframe")
-      }
-    }
-  }
-  return(all_columns_ok)
-}
-
-expect_same_plot <- function(plot_name, actual_plot) {
-  if (per_platform_snapshots) {
-    title <- file.path(r_version_id(), plot_name)
-  } else {
-    title <- plot_name
-  }
-  return(vdiffr::expect_doppelganger(title, actual_plot))
-}