99
1010# ' Validate a version bound arg
1111# '
12- # ' Expected to be used on `clobberable_versions_start`, `versions_end`,
13- # ' and similar arguments. Some additional context-specific checks may be needed.
12+ # ' Expected to be used on `clobberable_versions_start`, `versions_end`, and
13+ # ' similar arguments. Some additional context-specific checks may be needed.
14+ # ' Side effects: raises an error if version bound appears invalid.
1415# '
1516# ' @param version_bound the version bound to validate
1617# ' @param x a data frame containing a version column with which to check
2021# ' @param version_bound_arg optional string; what to call the version bound in
2122# ' error messages
2223# '
23- # ' @section Side effects: raises an error if version bound appears invalid
24- # '
25- # ' @noRd
24+ # ' @keywords internal
2625validate_version_bound <- function (version_bound , x , na_ok = FALSE ,
2726 version_bound_arg = rlang :: caller_arg(version_bound ),
2827 x_arg = rlang :: caller_arg(x )) {
@@ -75,9 +74,7 @@ validate_version_bound <- function(version_bound, x, na_ok = FALSE,
7574# ' @return `max(x$version)` if it has any rows; raises error if it has 0 rows or
7675# ' an `NA` version value
7776# '
78- # ' @importFrom checkmate check_names
79- # '
80- # ' @export
77+ # ' @keywords internal
8178max_version_with_row_in <- function (x ) {
8279 if (nrow(x ) == 0L ) {
8380 cli_abort(
@@ -108,72 +105,71 @@ max_version_with_row_in <- function(x) {
108105# ' @param x the starting "value"(s)
109106# ' @return same class, typeof, and length as `x`
110107# '
111- # ' @export
108+ # ' @keywords internal
112109next_after <- function (x ) UseMethod(" next_after" )
113110
114111
115- # ' @export
112+ # ' @keywords internal
116113next_after.integer <- function (x ) x + 1L
117114
118115
119- # ' @export
116+ # ' @keywords internal
120117next_after.Date <- function (x ) x + 1L
121118
122119
123- # ' Compactify
124- # '
125- # ' This section describes the internals of how compactification works in an
126- # ' `epi_archive()`. Compactification can potentially improve code speed or
127- # ' memory usage, depending on your data.
128- # '
129- # ' In general, the last version of each observation is carried forward (LOCF) to
130- # ' fill in data between recorded versions, and between the last recorded
131- # ' update and the `versions_end`. One consequence is that the `DT` doesn't
132- # ' have to contain a full snapshot of every version (although this generally
133- # ' works), but can instead contain only the rows that are new or changed from
134- # ' the previous version (see `compactify`, which does this automatically).
135- # ' Currently, deletions must be represented as revising a row to a special
136- # ' state (e.g., making the entries `NA` or including a special column that
137- # ' flags the data as removed and performing some kind of post-processing), and
138- # ' the archive is unaware of what this state is. Note that `NA`s *can* be
139- # ' introduced by `epi_archive` methods for other reasons, e.g., in
140- # ' [`epix_fill_through_version`] and [`epix_merge`], if requested, to
141- # ' represent potential update data that we do not yet have access to; or in
142- # ' [`epix_merge`] to represent the "value" of an observation before the
143- # ' version in which it was first released, or if no version of that
144- # ' observation appears in the archive data at all.
120+ # ' `epi_archive` object
145121# '
146- # ' @name compactify
147- NULL
148-
149-
150- # ' Epi Archive
151- # '
152- # ' @title `epi_archive` object
122+ # ' @description The second main data structure for storing time series in
123+ # ' `epiprocess`. It is similar to `epi_df` in that it fundamentally a table with
124+ # ' a few required columns that stores epidemiological time series data. An
125+ # ' `epi_archive` requires a `geo_value`, `time_value`, and `version` column (and
126+ # ' possibly other key columns) along with measurement values. In brief, an
127+ # ' `epi_archive` is a history of the time series data, where the `version`
128+ # ' column tracks the time at which the data was available. This allows for
129+ # ' version-aware forecasting.
153130# '
154- # ' @description An `epi_archive` is an S3 class which contains a data table
155- # ' along with several relevant pieces of metadata. The data table can be seen
156- # ' as the full archive (version history) for some signal variables of
157- # ' interest.
131+ # ' `new_epi_archive` is the constructor for `epi_archive` objects that assumes
132+ # ' all arguments have been validated. Most users should use `as_epi_archive`.
158133# '
159- # ' @details An `epi_archive` contains a data table `DT`, of class `data.table`
160- # ' from the ` data.table` package, with (at least) the following columns:
134+ # ' @details An `epi_archive` contains a ` data. table` object `DT` (from the
135+ # ' `{ data.table} ` package) , with (at least) the following columns:
161136# '
162- # ' * `geo_value`: the geographic value associated with each row of measurements.
163- # ' * `time_value`: the time value associated with each row of measurements.
137+ # ' * `geo_value`: the geographic value associated with each row of measurements,
138+ # ' * `time_value`: the time value associated with each row of measurements,
164139# ' * `version`: the time value specifying the version for each row of
165140# ' measurements. For example, if in a given row the `version` is January 15,
166141# ' 2022 and `time_value` is January 14, 2022, then this row contains the
167142# ' measurements of the data for January 14, 2022 that were available one day
168143# ' later.
169144# '
170- # ' The data table `DT` has key variables `geo_value`, `time_value`, `version`,
171- # ' as well as any others (these can be specified when instantiating the
172- # ' `epi_archive` object via the `other_keys` argument, and/or set by operating
173- # ' on `DT` directly). Note that there can only be a single row per unique
174- # ' combination of key variables.
145+ # ' The variables `geo_value`, `time_value`, `version` serve as key variables for
146+ # ' the data table (in addition to any other keys specified in the metadata).
147+ # ' There can only be a single row per unique combination of key variables. The
148+ # ' keys for an `epi_archive` can be viewed with `key(epi_archive$DT)`.
149+ # '
150+ # ' ## Compactification
151+ # '
152+ # ' By default, an `epi_archive` will compactify the data table to remove
153+ # ' redundant rows. This is done by not storing rows that have the same value,
154+ # ' except for the `version` column (this is essentially a last observation
155+ # ' carried forward, but along the version index). This is done to save space and
156+ # ' improve performance. If you do not want to compactify the data, you can set
157+ # ' `compactify = FALSE` in `as_epi_archive()`.
158+ # '
159+ # ' Note that in some data scenarios, LOCF may not be appropriate. For instance,
160+ # ' if you expected data to be updated on a given day, but your data source did
161+ # ' not update, then it could be reasonable to code the data as `NA` for that
162+ # ' day, instead of assuming LOCF.
163+ # '
164+ # ' `NA`s *can* be introduced by `epi_archive` methods for other
165+ # ' reasons, e.g., in [`epix_fill_through_version`] and [`epix_merge`], if
166+ # ' requested, to represent potential update data that we do not yet have access
167+ # ' to; or in [`epix_merge`] to represent the "value" of an observation before
168+ # ' the version in which it was first released, or if no version of that
169+ # ' observation appears in the archive data at all.
170+ # '
171+ # ' ## Metadata
175172# '
176- # ' @section Metadata:
177173# ' The following pieces of metadata are included as fields in an `epi_archive`
178174# ' object:
179175# '
@@ -187,20 +183,6 @@ NULL
187183# ' archive. Unexpected behavior may result from modifying the metadata
188184# ' directly.
189185# '
190- # ' @section Generating Snapshots:
191- # ' An `epi_archive` object can be used to generate a snapshot of the data in
192- # ' `epi_df` format, which represents the most up-to-date time series values up
193- # ' to a point in time. This is accomplished by calling `epix_as_of()`.
194- # '
195- # ' @section Sliding Computations:
196- # ' We can run a sliding computation over an `epi_archive` object, much like
197- # ' `epi_slide()` does for an `epi_df` object. This is accomplished by calling
198- # ' the `slide()` method for an `epi_archive` object, which works similarly to
199- # ' the way `epi_slide()` works for an `epi_df` object, but with one key
200- # ' difference: it is version-aware. That is, for an `epi_archive` object, the
201- # ' sliding computation at any given reference time point t is performed on
202- # ' **data that would have been available as of t**.
203- # '
204186# ' @param x A data.frame, data.table, or tibble, with columns `geo_value`,
205187# ' `time_value`, `version`, and then any additional number of columns.
206188# ' @param geo_type DEPRECATED Has no effect. Geo value type is inferred from the
@@ -239,10 +221,11 @@ NULL
239221# ' value of `clobberable_versions_start` does not fully trust these empty
240222# ' updates, and assumes that any version `>= max(x$version)` could be
241223# ' clobbered.) If `nrow(x) == 0`, then this argument is mandatory.
242- # ' @param compactify_tol double. the tolerance used to detect approximate equality for compactification
224+ # ' @param compactify_tol double. the tolerance used to detect approximate
225+ # ' equality for compactification
243226# ' @return An `epi_archive` object.
244227# '
245- # ' @importFrom data.table as.data.table key setkeyv
228+ # ' @seealso [`epix_as_of`] [`epix_merge`] [`epix_slide`]
246229# ' @importFrom dplyr if_any if_all everything
247230# ' @importFrom utils capture.output
248231# '
@@ -356,12 +339,13 @@ new_epi_archive <- function(
356339 )
357340}
358341
359- # ' given a tibble as would be found in an epi_archive, remove duplicate entries.
360- # ' @description
361- # ' works by shifting all rows except the version, then comparing values to see
342+ # ' Given a tibble as would be found in an epi_archive, remove duplicate entries.
343+ # '
344+ # ' Works by shifting all rows except the version, then comparing values to see
362345# ' if they've changed. We need to arrange in descending order, but note that
363346# ' we don't need to group, since at least one column other than version has
364347# ' changed, and so is kept.
348+ # '
365349# ' @keywords internal
366350# ' @importFrom dplyr filter
367351apply_compactify <- function (df , keys , tolerance = .Machine $ double.eps ^ .5 ) {
@@ -466,6 +450,7 @@ validate_epi_archive <- function(
466450
467451# ' `as_epi_archive` converts a data frame, data table, or tibble into an
468452# ' `epi_archive` object.
453+ # '
469454# ' @param ... used for specifying column names, as in [`dplyr::rename`]. For
470455# ' example `version = release_date`
471456# ' @param .versions_end location based versions_end, used to avoid prefix
0 commit comments