Skip to content

Commit 04a8a75

Browse files
authored
Merge branch 'dev' into optAllOfFix
2 parents b37160c + 1f44295 commit 04a8a75

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1403
-230
lines changed

DESCRIPTION

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: epiprocess
33
Title: Tools for basic signal processing in epidemiology
4-
Version: 0.8.0
4+
Version: 0.8.3
55
Authors@R: c(
66
person("Jacob", "Bien", role = "ctb"),
77
person("Logan", "Brooks", email = "[email protected]", role = c("aut", "cre")),
@@ -29,9 +29,11 @@ Imports:
2929
checkmate,
3030
cli,
3131
data.table,
32-
dplyr (>= 1.0.0),
32+
dplyr (>= 1.0.8),
3333
genlasso,
34+
glue,
3435
ggplot2,
36+
glue,
3537
lifecycle (>= 1.0.1),
3638
lubridate,
3739
magrittr,
@@ -82,6 +84,7 @@ Collate:
8284
'methods-epi_df.R'
8385
'outliers.R'
8486
'reexports.R'
87+
'revision_analysis.R'
8588
'slide.R'
8689
'utils.R'
8790
'utils_pipe.R'

NAMESPACE

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ S3method(as_tsibble,epi_df)
1111
S3method(autoplot,epi_df)
1212
S3method(clone,epi_archive)
1313
S3method(clone,grouped_epi_archive)
14+
S3method(complete,epi_df)
1415
S3method(dplyr_col_modify,col_modify_recorder_df)
1516
S3method(dplyr_col_modify,epi_df)
1617
S3method(dplyr_reconstruct,epi_df)
@@ -37,7 +38,6 @@ S3method(next_after,integer)
3738
S3method(print,epi_archive)
3839
S3method(print,epi_df)
3940
S3method(print,grouped_epi_archive)
40-
S3method(select,epi_df)
4141
S3method(summary,epi_df)
4242
S3method(ungroup,epi_df)
4343
S3method(ungroup,grouped_epi_archive)
@@ -50,6 +50,7 @@ export(as_epi_df)
5050
export(as_tsibble)
5151
export(autoplot)
5252
export(clone)
53+
export(complete)
5354
export(detect_outlr)
5455
export(detect_outlr_rm)
5556
export(detect_outlr_stl)
@@ -64,6 +65,7 @@ export(epix_merge)
6465
export(epix_slide)
6566
export(epix_truncate_versions_after)
6667
export(filter)
68+
export(full_seq)
6769
export(geo_column_names)
6870
export(group_by)
6971
export(group_modify)
@@ -79,6 +81,7 @@ export(new_epi_df)
7981
export(next_after)
8082
export(relocate)
8183
export(rename)
84+
export(revision_summary)
8285
export(slice)
8386
export(time_column_names)
8487
export(ungroup)
@@ -109,6 +112,7 @@ importFrom(checkmate,vname)
109112
importFrom(cli,cat_line)
110113
importFrom(cli,cli_abort)
111114
importFrom(cli,cli_inform)
115+
importFrom(cli,cli_li)
112116
importFrom(cli,cli_vec)
113117
importFrom(cli,cli_warn)
114118
importFrom(cli,format_message)
@@ -126,9 +130,11 @@ importFrom(data.table,set)
126130
importFrom(data.table,setDF)
127131
importFrom(data.table,setkeyv)
128132
importFrom(dplyr,"%>%")
133+
importFrom(dplyr,across)
129134
importFrom(dplyr,all_of)
130135
importFrom(dplyr,arrange)
131136
importFrom(dplyr,bind_rows)
137+
importFrom(dplyr,c_across)
132138
importFrom(dplyr,dplyr_col_modify)
133139
importFrom(dplyr,dplyr_reconstruct)
134140
importFrom(dplyr,dplyr_row_slice)
@@ -141,11 +147,17 @@ importFrom(dplyr,group_vars)
141147
importFrom(dplyr,groups)
142148
importFrom(dplyr,if_all)
143149
importFrom(dplyr,if_any)
150+
importFrom(dplyr,if_else)
151+
importFrom(dplyr,lag)
144152
importFrom(dplyr,mutate)
153+
importFrom(dplyr,near)
154+
importFrom(dplyr,pick)
155+
importFrom(dplyr,pull)
145156
importFrom(dplyr,relocate)
146157
importFrom(dplyr,rename)
147158
importFrom(dplyr,select)
148159
importFrom(dplyr,slice)
160+
importFrom(dplyr,summarize)
149161
importFrom(dplyr,tibble)
150162
importFrom(dplyr,ungroup)
151163
importFrom(ggplot2,autoplot)
@@ -177,6 +189,7 @@ importFrom(rlang,is_formula)
177189
importFrom(rlang,is_function)
178190
importFrom(rlang,is_missing)
179191
importFrom(rlang,is_quosure)
192+
importFrom(rlang,list2)
180193
importFrom(rlang,missing_arg)
181194
importFrom(rlang,new_function)
182195
importFrom(rlang,quo_get_expr)
@@ -195,6 +208,8 @@ importFrom(stats,median)
195208
importFrom(tibble,as_tibble)
196209
importFrom(tibble,new_tibble)
197210
importFrom(tibble,validate_tibble)
211+
importFrom(tidyr,complete)
212+
importFrom(tidyr,full_seq)
198213
importFrom(tidyr,unnest)
199214
importFrom(tidyselect,any_of)
200215
importFrom(tidyselect,eval_select)

NEWS.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,18 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
44

55
# epiprocess 0.9
66

7+
## Improvements
8+
9+
- Added `complete.epi_df`, which fills in missing values in an `epi_df` with
10+
`NA`s. Uses `tidyr::complete` underneath and preserves `epi_df` metadata.
11+
- Inclusion of the function `revision_summary` to provide basic revision information for `epi_archive`s out of the box. (#492)
12+
13+
## Bug fixes
14+
15+
- Fix `epi_slide_opt` (and related functions) to correctly handle `before=Inf`.
16+
- Disallow `after=Inf` in slide functions, since it doesn't seem like a likely
17+
use case and complicates code.
18+
719
# epiprocess 0.8
820

921
## Breaking changes

R/archive.R

Lines changed: 71 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ NULL
240240
#' value of `clobberable_versions_start` does not fully trust these empty
241241
#' updates, and assumes that any version `>= max(x$version)` could be
242242
#' clobbered.) If `nrow(x) == 0`, then this argument is mandatory.
243+
#' @param compactify_tol double. the tolerance used to detect approximate equality for compactification
243244
#' @return An `epi_archive` object.
244245
#'
245246
#' @importFrom data.table as.data.table key setkeyv
@@ -295,15 +296,16 @@ new_epi_archive <- function(
295296
additional_metadata,
296297
compactify,
297298
clobberable_versions_start,
298-
versions_end) {
299+
versions_end,
300+
compactify_tol = .Machine$double.eps^0.5) {
299301
# Create the data table; if x was an un-keyed data.table itself,
300302
# then the call to as.data.table() will fail to set keys, so we
301303
# need to check this, then do it manually if needed
302304
key_vars <- c("geo_value", "time_value", other_keys, "version")
303-
DT <- as.data.table(x, key = key_vars) # nolint: object_name_linter
304-
if (!identical(key_vars, key(DT))) setkeyv(DT, cols = key_vars)
305+
data_table <- as.data.table(x, key = key_vars) # nolint: object_name_linter
306+
if (!identical(key_vars, key(data_table))) setkeyv(data_table, cols = key_vars)
305307

306-
if (anyDuplicated(DT, by = key(DT)) != 0L) {
308+
if (anyDuplicated(data_table, by = key(data_table)) != 0L) {
307309
cli_abort("`x` must have one row per unique combination of the key variables. If you
308310
have additional key variables other than `geo_value`, `time_value`, and
309311
`version`, such as an age group column, please specify them in `other_keys`.
@@ -313,38 +315,17 @@ new_epi_archive <- function(
313315
)
314316
}
315317

316-
# Checks to see if a value in a vector is LOCF
317-
is_locf <- function(vec) { # nolint: object_usage_linter
318-
dplyr::if_else(!is.na(vec) & !is.na(dplyr::lag(vec)),
319-
vec == dplyr::lag(vec),
320-
is.na(vec) & is.na(dplyr::lag(vec))
321-
)
322-
}
323-
324-
# LOCF is defined by a row where all values except for the version
325-
# differ from their respective lag values
326-
327-
# Checks for LOCF's in a data frame
328-
rm_locf <- function(df) {
329-
dplyr::filter(df, if_any(c(everything(), -version), ~ !is_locf(.))) # nolint: object_usage_linter
330-
}
331-
332-
# Keeps LOCF values, such as to be printed
333-
keep_locf <- function(df) {
334-
dplyr::filter(df, if_all(c(everything(), -version), ~ is_locf(.))) # nolint: object_usage_linter
335-
}
336-
318+
nrow_before_compactify <- nrow(data_table)
337319
# Runs compactify on data frame
338320
if (is.null(compactify) || compactify == TRUE) {
339-
elim <- keep_locf(DT)
340-
DT <- rm_locf(DT) # nolint: object_name_linter
321+
compactified <- apply_compactify(data_table, key_vars, compactify_tol)
341322
} else {
342-
# Create empty data frame for nrow(elim) to be 0
343-
elim <- tibble::tibble()
323+
compactified <- data_table
344324
}
345-
346-
# Warns about redundant rows
347-
if (is.null(compactify) && nrow(elim) > 0) {
325+
# Warns about redundant rows if the number of rows decreased, and we didn't
326+
# explicitly say to compactify
327+
if (is.null(compactify) && nrow(compactified) < nrow_before_compactify) {
328+
elim <- removed_by_compactify(data_table, key_vars, compactify_tol)
348329
warning_intro <- cli::format_inline(
349330
"Found rows that appear redundant based on
350331
last (version of each) observation carried forward;
@@ -366,7 +347,7 @@ new_epi_archive <- function(
366347

367348
structure(
368349
list(
369-
DT = DT,
350+
DT = compactified,
370351
geo_type = geo_type,
371352
time_type = time_type,
372353
additional_metadata = additional_metadata,
@@ -377,6 +358,63 @@ new_epi_archive <- function(
377358
)
378359
}
379360

361+
#' given a tibble as would be found in an epi_archive, remove duplicate entries.
362+
#' @description
363+
#' works by shifting all rows except the version, then comparing values to see
364+
#' if they've changed. We need to arrange in descending order, but note that
365+
#' we don't need to group, since at least one column other than version has
366+
#' changed, and so is kept.
367+
#' @keywords internal
368+
#' @importFrom dplyr filter
369+
apply_compactify <- function(df, keys, tolerance = .Machine$double.eps^.5) {
370+
df %>%
371+
arrange(!!!keys) %>%
372+
filter(if_any(
373+
c(everything(), -version), # all non-version columns
374+
~ !is_locf(., tolerance)
375+
))
376+
}
377+
378+
#' get the entries that `compactify` would remove
379+
#' @keywords internal
380+
#' @importFrom dplyr filter if_all everything
381+
removed_by_compactify <- function(df, keys, tolerance) {
382+
df %>%
383+
arrange(!!!keys) %>%
384+
filter(if_all(
385+
c(everything(), -version),
386+
~ is_locf(., tolerance)
387+
)) # nolint: object_usage_linter
388+
}
389+
390+
#' Checks to see if a value in a vector is LOCF
391+
#' @description
392+
#' LOCF meaning last observation carried forward. lags the vector by 1, then
393+
#' compares with itself. For doubles it uses float comparison via
394+
#' [`dplyr::near`], otherwise it uses equality. `NA`'s and `NaN`'s are
395+
#' considered equal to themselves and each other.
396+
#' @importFrom dplyr lag if_else near
397+
#' @keywords internal
398+
is_locf <- function(vec, tolerance) { # nolint: object_usage_linter
399+
lag_vec <- dplyr::lag(vec)
400+
if (typeof(vec) == "double") {
401+
res <- if_else(
402+
!is.na(vec) & !is.na(lag_vec),
403+
near(vec, lag_vec, tol = tolerance),
404+
is.na(vec) & is.na(lag_vec)
405+
)
406+
return(res)
407+
} else {
408+
res <- if_else(
409+
!is.na(vec) & !is.na(lag_vec),
410+
vec == lag_vec,
411+
is.na(vec) & is.na(lag_vec)
412+
)
413+
return(res)
414+
}
415+
}
416+
417+
380418
#' `validate_epi_archive` ensures correctness of arguments fed to `as_epi_archive`.
381419
#'
382420
#' @rdname epi_archive

R/epi_df.R

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,7 @@
8181
#'
8282
#' An unrecognizable time type is labeled "custom".
8383
#'
84-
#' @template epi_df-params
85-
#' @rdname epi_df
86-
#'
87-
#' @export
84+
#' @name epi_df
8885
#' @examples
8986
#' # Convert a `tsibble` that has county code as an extra key
9087
#' # Notice that county code should be a character string to preserve any leading zeroes
@@ -154,6 +151,30 @@
154151
#' as_epi_df(additional_metadata = list(other_keys = c("state", "pol")))
155152
#'
156153
#' attr(ex3, "metadata")
154+
NULL
155+
156+
#' Create an `epi_df` object
157+
#'
158+
#' @rdname epi_df
159+
#' @param geo_type DEPRECATED Has no effect. Geo value type is inferred from the
160+
#' location column and set to "custom" if not recognized.
161+
#' @param time_type DEPRECATED Has no effect. Time value type inferred from the time
162+
#' column and set to "custom" if not recognized. Unpredictable behavior may result
163+
#' if the time type is not recognized.
164+
#' @param as_of Time value representing the time at which the given data were
165+
#' available. For example, if `as_of` is January 31, 2022, then the `epi_df`
166+
#' object that is created would represent the most up-to-date version of the
167+
#' data available as of January 31, 2022. If the `as_of` argument is missing,
168+
#' then the current day-time will be used.
169+
#' @param additional_metadata List of additional metadata to attach to the
170+
#' `epi_df` object. The metadata will have `geo_type`, `time_type`, and
171+
#' `as_of` fields; named entries from the passed list will be included as
172+
#' well. If your tibble has additional keys, be sure to specify them as a
173+
#' character vector in the `other_keys` component of `additional_metadata`.
174+
#' @param ... Additional arguments passed to methods.
175+
#' @return An `epi_df` object.
176+
#'
177+
#' @export
157178
new_epi_df <- function(x = tibble::tibble(), geo_type, time_type, as_of,
158179
additional_metadata = list()) {
159180
# Define metadata fields
@@ -180,20 +201,24 @@ new_epi_df <- function(x = tibble::tibble(), geo_type, time_type, as_of,
180201
}
181202

182203
#' @rdname epi_df
204+
#' @param x An `epi_df`, `data.frame`, [tibble::tibble], or [tsibble::tsibble]
205+
#' to be converted
206+
#' @param ... used for specifying column names, as in [`dplyr::rename`]. For
207+
#' example, `geo_value = STATEFP, time_value = end_date`.
183208
#' @export
184209
as_epi_df <- function(x, ...) {
185210
UseMethod("as_epi_df")
186211
}
187212

188-
#' @method as_epi_df epi_df
189213
#' @rdname epi_df
214+
#' @method as_epi_df epi_df
190215
#' @export
191216
as_epi_df.epi_df <- function(x, ...) {
192217
return(x)
193218
}
194219

195-
#' @method as_epi_df tbl_df
196220
#' @rdname epi_df
221+
#' @method as_epi_df tbl_df
197222
#' @importFrom rlang .data
198223
#' @importFrom tidyselect any_of
199224
#' @importFrom cli cli_inform
@@ -217,7 +242,6 @@ as_epi_df.tbl_df <- function(
217242
must be present in `x`."
218243
)
219244
}
220-
221245
if (lifecycle::is_present(geo_type)) {
222246
cli_warn("epi_archive constructor argument `geo_type` is now ignored. Consider removing.")
223247
}

R/group_by_epi_df_methods.R

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,3 @@
33
# `epi_df`s. It would be nice if there were a way to replace these with a
44
# generic core that automatically fixed all misbehaving methods; see
55
# brainstorming within Issue #223.
6-
7-
#' @importFrom dplyr select
8-
#' @export
9-
select.epi_df <- function(.data, ...) {
10-
selected <- NextMethod(.data)
11-
might_decay <- reclass(selected, attr(selected, "metadata"))
12-
return(dplyr_reconstruct(might_decay, might_decay))
13-
}

0 commit comments

Comments
 (0)