diff --git a/DESCRIPTION b/DESCRIPTION index de6d4ec4..b06d8618 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -54,7 +54,8 @@ Imports: loo, purrr, tidyr, - tibble + tibble, + Matrix LinkingTo: BH (>= 1.66.0), Rcpp (>= 0.12.0), diff --git a/NAMESPACE b/NAMESPACE index 45f597e3..6dad2c97 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,10 +15,12 @@ export(plot_seroprev) export(plot_seroprev_fitted) export(prepare_serodata) export(probability_seropositive_by_age) +export(probability_seropositive_general_model_by_age) export(run_seromodel) export(simulate_serosurvey) export(simulate_serosurvey_age_and_time_model) export(simulate_serosurvey_age_model) +export(simulate_serosurvey_general_model) export(simulate_serosurvey_time_model) import(Rcpp) import(dplyr) diff --git a/R/simulate_serosurvey.R b/R/simulate_serosurvey.R index e5da1c10..de0e98a9 100644 --- a/R/simulate_serosurvey.R +++ b/R/simulate_serosurvey.R @@ -223,30 +223,50 @@ probability_seropositive_by_age <- function( return(df) } -#' Generate probabilities of seropositivity by age based on an age-varying FOI model. +sum_of_A <- function(t, tau, construct_A_fn, ...) { + k <- 1 + for(t_primed in (tau + 1):t) { + if(k == 1) + A <- construct_A_fn(t_primed, tau, ...) + else { + tmp <- construct_A_fn(t_primed, tau, ...) + A <- A + tmp + } + k <- k + 1 + } + A +} + +#' Generate probabilities of seropositivity by age based on a general FOI model. #' -#' This function calculates the probabilities of seropositivity by age based on an age-varying FOI model. -#' It takes into account the FOI and the rate of seroreversion. +#' This function calculates the probabilities of seropositivity by age based on +#' an abstract model of the serocatalytic system. #' -#' @param foi A dataframe containing the force of infection (FOI) values for different ages. -#' It should have two columns: 'age' and 'foi'. -#' @param seroreversion_rate A non-negative numeric value representing the rate of seroreversion. +#' @param construct_A_fn A function that constructs a matrix that defines the multiplier +#' term in the linear ODE system. +#' @param calculate_seropositivity_function A function which takes the state vector +#' and returns the seropositive fraction. +#' @param initial_conditions The initial state vector proportions for each birth cohort. +#' @param max_age The maximum age to simulate seropositivity for. #' #' @return A dataframe with columns 'age' and 'seropositivity'. +#' @export probability_seropositive_general_model_by_age <- function( - foi, - seroreversion_rate) { - - ages <- seq_along(foi$age) - - probabilities <- probability_exact_age_varying( - ages = ages, - fois = foi$foi, - seroreversion_rate = seroreversion_rate - ) + construct_A_function, + calculate_seropositivity_function, + initial_conditions, + max_age, + ...) { + + probabilities <- vector(length = max_age) + for(i in seq_along(probabilities)) { + A_sum <- sum_of_A(max_age, max_age - i, construct_A, ...) + Y <- Matrix::expm(A_sum) %>% as.matrix() %*% initial_conditions + probabilities[i] <- calculate_seropositivity_function(Y) + } df <- data.frame( - age = ages, + age = 1:max_age, seropositivity = probabilities ) @@ -760,4 +780,46 @@ simulate_serosurvey <- function( return(serosurvey) } +#' Simulate serosurvey data based on general serocatalytic model. +#' +#' This simulation method assumes only that the model system can be written as a piecewise- +#' linear ordinary differential equation system. +#' +#' @inheritParams probability_seropositive_general_model_by_age +#' @inheritParams simulate_serosurvey +#' +#' @return A dataframe with simulated serosurvey data, including age group information, overall +#' sample sizes, the number of seropositive individuals, and other survey features. +#' +#' @export +simulate_serosurvey_general_model <- function( + construct_A_function, + calculate_seropositivity_function, + initial_conditions, + survey_features, + ... +) { + + # Input validation + validate_survey(survey_features) + + probability_serop_by_age <- probability_seropositive_general_model_by_age( + construct_A_function, + calculate_seropositivity_function, + initial_conditions, + max(survey_features$age_max), + ... + ) + sample_size_by_age_random <- sample_size_by_individual_age_random( + survey_features = survey_features + ) + + grouped_df <- generate_seropositive_counts_by_age_bin( + probability_serop_by_age, + sample_size_by_age_random, + survey_features + ) + + return(grouped_df) +} diff --git a/man/probability_seropositive_by_age.Rd b/man/probability_seropositive_by_age.Rd index d860ea21..e416a8b4 100644 --- a/man/probability_seropositive_by_age.Rd +++ b/man/probability_seropositive_by_age.Rd @@ -4,7 +4,7 @@ \alias{probability_seropositive_by_age} \title{Generate probabilities of seropositivity by age based user's choice of model.} \usage{ -probability_seropositive_by_age(model, foi, seroreversion_rate) +probability_seropositive_by_age(model, foi, seroreversion_rate = 0) } \arguments{ \item{model}{A string specifying the model type which can be one of \link{'age', 'time', 'age-time'}.} diff --git a/man/probability_seropositive_general_model_by_age.Rd b/man/probability_seropositive_general_model_by_age.Rd new file mode 100644 index 00000000..d80e3d8a --- /dev/null +++ b/man/probability_seropositive_general_model_by_age.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate_serosurvey.R +\name{probability_seropositive_general_model_by_age} +\alias{probability_seropositive_general_model_by_age} +\title{Generate probabilities of seropositivity by age based on a general FOI model.} +\usage{ +probability_seropositive_general_model_by_age( + construct_A_function, + calculate_seropositivity_function, + initial_conditions, + max_age, + ... +) +} +\arguments{ +\item{calculate_seropositivity_function}{A function which takes the state vector +and returns the seropositive fraction.} + +\item{initial_conditions}{The initial state vector proportions for each birth cohort.} + +\item{max_age}{The maximum age to simulate seropositivity for.} + +\item{construct_A_fn}{A function that constructs a matrix that defines the multiplier +term in the linear ODE system.} +} +\value{ +A dataframe with columns 'age' and 'seropositivity'. +} +\description{ +This function calculates the probabilities of seropositivity by age based on +an abstract model of the serocatalytic system. +} diff --git a/man/simulate_serosurvey.Rd b/man/simulate_serosurvey.Rd index be3b0706..2e250102 100644 --- a/man/simulate_serosurvey.Rd +++ b/man/simulate_serosurvey.Rd @@ -60,5 +60,18 @@ model = "age", foi = foi_df, survey_features = survey_features) -# age-and-time-varying model TODO +# age-and-time varying model +foi_df <- expand.grid( + year = seq(1990, 2009, 1), + age = seq(1, 20, 1) +) \%>\% +mutate(foi = rnorm(20 * 20, 0.1, 0.01)) +survey_features <- data.frame( + age_min = c(1, 3, 15), + age_max = c(2, 14, 20), + sample_size = c(1000, 2000, 1500)) +serosurvey <- simulate_serosurvey( +model = "age", +foi = foi_df, +survey_features = survey_features) } diff --git a/man/simulate_serosurvey_general_model.Rd b/man/simulate_serosurvey_general_model.Rd new file mode 100644 index 00000000..0d7ca514 --- /dev/null +++ b/man/simulate_serosurvey_general_model.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulate_serosurvey.R +\name{simulate_serosurvey_general_model} +\alias{simulate_serosurvey_general_model} +\title{Simulate serosurvey data based on general serocatalytic model.} +\usage{ +simulate_serosurvey_general_model( + construct_A_function, + calculate_seropositivity_function, + initial_conditions, + survey_features, + ... +) +} +\arguments{ +\item{calculate_seropositivity_function}{A function which takes the state vector +and returns the seropositive fraction.} + +\item{initial_conditions}{The initial state vector proportions for each birth cohort.} + +\item{survey_features}{A dataframe containing information about the binned age groups and sample +sizes for each. It should contain columns: \link{'age_min', 'age_max', 'sample_size'}.} +} +\value{ +A dataframe with simulated serosurvey data, including age group information, overall +sample sizes, the number of seropositive individuals, and other survey features. +} +\description{ +This simulation method assumes only that the model system can be written as a piecewise- +linear ordinary differential equation system. +} diff --git a/tests/testthat/test-simulate_serosurvey.R b/tests/testthat/test-simulate_serosurvey.R index 61bc7941..b0f640eb 100644 --- a/tests/testthat/test-simulate_serosurvey.R +++ b/tests/testthat/test-simulate_serosurvey.R @@ -655,3 +655,173 @@ test_that("simulate_serosurvey handles invalid model inputs", { "model must be one of 'age', 'time', or 'age-time'.") }) +test_that("probability_seropositive_general_model_by_age reduces to time-varying model under + appropriate limits", { + + # simple time-varying FOI model + construct_A <- function(t, tau, lambda) { + A <- matrix(0, ncol = 2, nrow = 2) + A[1, 1] <- -lambda[t] + A[2, 1] <- lambda[t] + A + } + + # determines the sum of seropositive compartments of those still alive + calculate_seropositivity_function <- function(Y) { + Y[2] + } + + # initial conditions in 12D state vector + initial_conditions <- rep(0, 2) + initial_conditions[1] <- 1 + + # random FOIs + lambda <- runif(70, 0, 0.01) + + # solve linear system of ODEs + seropositive_linear_system <- probability_seropositive_general_model_by_age( + construct_A, + calculate_seropositivity_function, + initial_conditions, + max_age=length(lambda), + lambda + ) + + foi_df <- data.frame( + year=seq_along(lambda), + foi=lambda + ) + + seropositive_true <- probability_seropositive_by_age( + model = "time", + foi = foi_df, + seroreversion_rate = 0 + ) + + expect_equal(seropositive_true, seropositive_linear_system) + +}) + +test_that("probability_seropositive_general_model_by_age reduces to age-varying model under + appropriate limits", { + + # simple age-varying FOI model + construct_A <- function(t, tau, lambda) { + A <- matrix(0, ncol = 2, nrow = 2) + A[1, 1] <- -lambda[t - tau] + A[2, 1] <- lambda[t - tau] + A + } + + # determines the sum of seropositive compartments of those still alive + calculate_seropositivity_function <- function(Y) { + Y[2] + } + + # initial conditions in 12D state vector + initial_conditions <- rep(0, 2) + initial_conditions[1] <- 1 + + # random FOIs + lambda <- runif(70, 0, 0.01) + + # solve linear system of ODEs + seropositive_linear_system <- probability_seropositive_general_model_by_age( + construct_A, + calculate_seropositivity_function, + initial_conditions, + max_age=length(lambda), + lambda + ) + + foi_df <- data.frame( + age=seq_along(lambda), + foi=lambda + ) + + seropositive_true <- probability_seropositive_by_age( + model = "age", + foi = foi_df, + seroreversion_rate = 0 + ) + + expect_equal(seropositive_true, seropositive_linear_system) + +}) + + +test_that("probability_seropositive_general_model_by_age reduces to age- and time-varying model under + appropriate limits", { + + # age- and time-varying FOI model + construct_A <- function(t, tau, u, v) { + A <- matrix(0, ncol = 2, nrow = 2) + u_bar <- u[t - tau] + v_bar <- v[t] + + A[1, 1] <- -u_bar * v_bar + A[2, 1] <- u_bar * v_bar + A + } + + # determines the sum of seropositive compartments of those still alive + calculate_seropositivity_function <- function(Y) { + Y[2] + } + + # initial conditions in 12D state vector + initial_conditions <- rep(0, 2) + initial_conditions[1] <- 1 + + # age and time-varying FOIs + ages <- seq(1, 70, 1) + foi_age <- 2 * dlnorm( + ages, meanlog = 3.5, sdlog = 0.5) + + foi_df_age <- data.frame( + age = ages, + foi = foi_age + ) + + foi_time <- c(rep(0, 30), rep(1, 40)) + foi_df_time <- data.frame( + year = seq(1956, 2025, 1), + foi = foi_time + ) + + u <- foi_df_age$foi + v <- foi_df_time$foi + + # solve linear system of ODEs + seropositive_linear_system <- probability_seropositive_general_model_by_age( + construct_A, + calculate_seropositivity_function, + initial_conditions, + max_age=length(lambda), + u, + v + ) + + foi_df <- expand.grid( + year=foi_df_time$year, + age=foi_df_age$age + ) %>% + left_join(foi_df_age, by="age") %>% + rename(foi_age=foi) %>% + left_join(foi_df_time, by="year") %>% + rename(foi_time=foi) %>% + mutate(foi = foi_age * foi_time) %>% + select(-c("foi_age", "foi_time")) %>% + mutate(birth_year = year - age) %>% + filter(birth_year >= 1955) %>% + arrange(birth_year, age) + + seropositive_true <- probability_seropositive_by_age( + model = "age-time", + foi = foi_df %>% select(-birth_year), + seroreversion_rate = 0 + ) + + expect_equal(seropositive_true, seropositive_linear_system) + +}) diff --git a/vignettes/simulating_serosurveys.Rmd b/vignettes/simulating_serosurveys.Rmd index d6e870ed..29eca41e 100644 --- a/vignettes/simulating_serosurveys.Rmd +++ b/vignettes/simulating_serosurveys.Rmd @@ -321,7 +321,7 @@ serosurvey_serorevert %>% ``` # Simulating from a general serological model -We now illustrate an example age- and time-dependent FOI model for representing the prevalence of HIV (in the absence of treatment). In the absence of treatment, HIV invariably ends in death, and death usually occurs after the onset of AIDS which can occur typically many years after death. This means that the time from infection to death has a characteristic waiting time, which year we model as being around 10 years long. +We now illustrate an example age- and time-dependent FOI model for representing the prevalence of HIV (in the absence of treatment). In the absence of treatment, HIV invariably ends in death, and death usually occurs after the onset of AIDS which occurs typically many years after infection. This means that the time from infection to death has a characteristic waiting time, which year we model as being around 10 years long. Our model considers a single susceptible group, $S^{\tau}(t)$ indexed by their birth year, $\tau$, and calendar time, $t$. There are then a series of consecutive seropositive states, $X_i^{\tau}(t)$, for $i\in[1,10]$; then finally a dead state, $D^{\tau}(t)$. By distributing the seropositive population across a range of states, this model effectively institutes a delay from infection until death. Our model takes the following form: @@ -336,7 +336,7 @@ Our model considers a single susceptible group, $S^{\tau}(t)$ indexed by their b where $S^{\tau}(0)=1$ and all other compartments have initial conditions set to zero. We assume that $u(.)$ and $v(.)$ are piecewise-constant with one-year pieces, with their values the same as those from the sexually transmitted disease example above. -This model does not fit neatly into our other classes of serocatalytic models and, more generally, we cannot produce specific methods for any foreseeable model structure. But it is possible to produce a general strategy for solving such models by rewriting them as linear systems of ordinary differential equations. +This model does not fit neatly into our other classes of serocatalytic models, and, more generally, we cannot produce specific methods for any foreseeable model structure. But it is possible to find a general strategy for solving such models by rewriting them as piecewise-linear systems of ordinary differential equations. Since the system is linear, we can write it as a vector differential equation by defining $\boldsymbol{Y}^{\tau}(t):=[S^{\tau}(t),X_1^{\tau}(t),...,X_{10}^{\tau}(t),D^{\tau}(t)]'$ as a vector of states and $\bar{\boldsymbol{A}}_{t,\tau}$ as a matrix of constants that are fixed for a given year and year of birth: @@ -363,12 +363,14 @@ where we have used matrix exponentials to find the solution because the individu \boldsymbol{Y}^{\tau}(t) = \exp\left(\sum_{t'=1}^{t}\bar{\boldsymbol{A}}_{t',\tau}\right) \boldsymbol{Y}^{\tau}(0). \end{equation} -We now illustrate how such a system can be solved and used to simulate a serosurvey. +We now illustrate how such a system can be solved and used to simulate a serosurvey using `serofoi`. The key to this is writing a method for constructing the matrix $\boldsymbol{A}}_{t',\tau}$ in the above expression. ```{r} +# use same age- and time-specific multipliers u <- foi_df_age$foi v <- foi_df_time$foi +# function to construct A matrix for one piece construct_A <- function(t, tau, u, v) { u_bar <- u[t - tau] v_bar <- v[t] @@ -382,175 +384,49 @@ construct_A <- function(t, tau, u, v) { A } -initial_conditions <- rep(0, 12) -initial_conditions[1] <- 1 - -sum_of_A <- function(t, tau, construct_A_fn, ...) { - k <- 1 - for(i in (tau + 1):t) { - if(k == 1) - A <- construct_A_fn(i, tau, ...) - else { - tmp <- construct_A_fn(i, tau, ...) - A <- A + tmp - } - k <- k + 1 - } - A -} - -library(Matrix) -# solving for all groups born between tau=1 and tau=80 -prop_seropositive_of_alive <- vector(length = 80) -for(i in seq_along(prop_seropositive_of_alive)) { - A_sum <- sum_of_A(80, 80 - i, construct_A, u, v) - Y <- Matrix::expm(A_sum) %>% as.matrix() %*% initial_conditions - prop_seropositive_of_alive[i] <- sum(Y[2:11]) / (1 - Y[12]) -} - -plot(1:80, prop_seropositive_of_alive) -``` - -Compare with time-varying model -```{r} -construct_A <- function(t, tau, lambda) { - A <- matrix(0, ncol = 2, nrow = 2) - A[1, 1] <- -lambda[t] - A[2, 1] <- lambda[t] - A +# determines the sum of seropositive compartments of those still alive +calculate_seropositivity_function <- function(Y) { + sum(Y[2:11]) / (1 - Y[12]) } -lambda <- runif(80, 0, 0.01) -initial_conditions <- c(1, 0) -prop_seropositive_of_alive <- vector(length = 80) -for(i in seq_along(prop_seropositive_of_alive)) { - A_sum <- sum_of_A(80, 80 - i, construct_A, lambda) - Y <- Matrix::expm(A_sum) %>% as.matrix() %*% initial_conditions - prop_seropositive_of_alive[i] <- Y[2] -} - -plot(1:80, prop_seropositive_of_alive) -seroreversion_system <- data.frame( - age=1:80, - seropositivity=prop_seropositive_of_alive -) %>% - mutate(type="system") - - -foi_df <- data.frame( - year=seq(1, 80, 1), - foi=lambda -) - -seropositive_true <- probability_seropositive_by_age( - model = "time", - foi = foi_df, - seroreversion_rate = 0 -) %>% - mutate(type="official") - -seroreversion_system %>% - bind_rows(seropositive_true) %>% - ggplot(aes(x=age, y=seropositivity)) + - geom_line(aes(colour=type)) - -``` - -Compare with an age-varying model -```{r} -construct_A <- function(t, tau, lambda) { - A <- matrix(0, ncol = 2, nrow = 2) - A[1, 1] <- -lambda[t - tau] - A[2, 1] <- lambda[t - tau] - A -} - -lambda <- runif(80, 0, 0.01) -initial_conditions <- c(1, 0) -prop_seropositive_of_alive <- vector(length = 80) -for(i in seq_along(prop_seropositive_of_alive)) { - A_sum <- sum_of_A(80, 80 - i, construct_A, lambda) - Y <- Matrix::expm(A_sum) %>% as.matrix() %*% initial_conditions - prop_seropositive_of_alive[i] <- Y[2] -} - -plot(1:80, prop_seropositive_of_alive) -seroreversion_system <- data.frame( - age=1:80, - seropositivity=prop_seropositive_of_alive -) %>% - mutate(type="system") - - -foi_df <- data.frame( - age=seq(1, 80, 1), - foi=lambda -) - -seropositive_true <- probability_seropositive_by_age( - model = "age", - foi = foi_df, - seroreversion_rate = 0 -) %>% - mutate(type="official") +# initial conditions in 12D state vector +initial_conditions <- rep(0, 12) +initial_conditions[1] <- 1 -seroreversion_system %>% - bind_rows(seropositive_true) %>% +# solve +seropositive_hiv <- probability_seropositive_general_model_by_age( + construct_A, + calculate_seropositivity_function, + initial_conditions, + max_age=80, + u, + v + ) +seropositive_hiv %>% ggplot(aes(x=age, y=seropositivity)) + - geom_line(aes(colour=type)) + geom_line() + + scale_y_continuous(labels=scales::percent) + + ylab("Seropositivity") + + xlab("Age") ``` -Check age and time +We can also simulate a serosurvey assuming the above model. ```{r} -construct_A <- function(t, tau, u, v) { - A <- matrix(0, ncol = 2, nrow = 2) - u_bar <- u[t - tau] - v_bar <- v[t] - - A[1, 1] <- -u_bar * v_bar - A[2, 1] <- u_bar * v_bar - A -} - -u <- foi_df_age$foi -v <- foi_df_time$foi -initial_conditions <- c(1, 0) -prop_seropositive_of_alive <- vector(length = 80) -for(i in seq_along(prop_seropositive_of_alive)) { - A_sum <- sum_of_A(80, 80 - i, construct_A, u, v) - Y <- Matrix::expm(A_sum) %>% as.matrix() %*% initial_conditions - prop_seropositive_of_alive[i] <- Y[2] -} - -seroreversion_system <- data.frame( - age=1:80, - seropositivity=prop_seropositive_of_alive -) %>% - mutate(type="system") - -foi_df <- expand.grid( - year=foi_df_time$year, - age=foi_df_age$age -) %>% - left_join(foi_df_age, by="age") %>% - rename(foi_age=foi) %>% - left_join(foi_df_time, by="year") %>% - rename(foi_time=foi) %>% - mutate(foi = foi_age * foi_time) %>% - select(-c("foi_age", "foi_time")) %>% - mutate(birth_year = year - age) %>% - filter(birth_year >= 1945) %>% - arrange(birth_year, age) +serosurvey <- simulate_serosurvey_general_model( + construct_A, + calculate_seropositivity_function, + initial_conditions, + survey_features, + u, + v +) -serosurvey_true <- probability_seropositive_by_age( - model = "age-time", - foi = foi_df %>% select(-birth_year) -) %>% - mutate(type="official") - -seroreversion_system %>% - bind_rows(serosurvey_true) %>% - ggplot(aes(x=age, y=seropositivity)) + - geom_line(aes(colour=type)) +# plot +serosurvey %>% + add_posterior_quantiles() %>% + ggplot(aes(x=age_min, y=middle)) + + geom_pointrange(aes(ymin=lower, ymax=upper)) + + geom_smooth(se=FALSE) + + scale_y_continuous(labels=scales::percent) + + ylab("Seropositivity") ``` -