Skip to content

Commit 80966e2

Browse files
committed
Attempt rewriting combined opt slide logic to match old archive perf chars
Something still seems different; some Date arithmetic stuck out, and attempts to avoid helped somewhat, but this still seems slower for archives than pre-unified approach.
1 parent c568774 commit 80966e2

File tree

4 files changed

+35
-8
lines changed

4 files changed

+35
-8
lines changed

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ importFrom(vctrs,vec_recycle_common)
284284
importFrom(vctrs,vec_rep)
285285
importFrom(vctrs,vec_rep_each)
286286
importFrom(vctrs,vec_seq_along)
287+
importFrom(vctrs,vec_set_intersect)
287288
importFrom(vctrs,vec_set_names)
288289
importFrom(vctrs,vec_size)
289290
importFrom(vctrs,vec_size_common)

R/epi_slide_opt_archive.R

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,7 @@ epi_slide_opt_archive_one_epikey <- function(
6969
# input *snapshot*.
7070
out_update_min_t <- time_minus_slide_window_arg(inp_update_min_t, after, time_type)
7171
out_update_max_t <- time_plus_slide_window_arg(inp_update_max_t, before, time_type, max(inp_snapshot$time_value))
72-
out_update_ts <- vec_slice(inp_snapshot$time_value, between(inp_snapshot$time_value, out_update_min_t, out_update_max_t))
73-
out_update <- epi_slide_opt_one_epikey(inp_snapshot, f_dots_baked, f_from_package, before, after, unit_step, time_type, out_update_ts, in_colnames, out_colnames)
72+
out_update <- epi_slide_opt_one_epikey(inp_snapshot, f_dots_baked, f_from_package, before, after, unit_step, time_type, c(out_update_min_t, out_update_max_t), NULL, in_colnames, out_colnames)
7473
out_diff <- tbl_diff2(prev_out_snapshot, out_update, "time_value", "update")
7574
prev_inp_snapshot <<- inp_snapshot
7675
prev_out_snapshot <<- tbl_patch(prev_out_snapshot, out_diff, "time_value")

R/epi_slide_opt_edf.R

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -169,13 +169,26 @@ across_ish_names_info <- function(.x, time_type, col_names_quo, .f_namer,
169169

170170
epi_slide_opt_one_epikey <- function(inp_tbl,
171171
f_dots_baked, f_from_package, before, after, unit_step, time_type,
172-
out_time_values,
172+
out_filter_time_range, out_filter_time_set,
173173
in_colnames, out_colnames) {
174174
# TODO try converting time values to reals, do work on reals, convert back at very end?
175175
#
176-
# FIXME min time_value for this epikey vs. entire edf; match existing behavior, or complete changeover
177-
slide_t_min <- time_minus_slide_window_arg(min(out_time_values), before, time_type, min(inp_tbl$time_value))
178-
slide_t_max <- time_plus_slide_window_arg(max(out_time_values), after, time_type)
176+
# TODO loosen restrictions here. each filter optional?
177+
if (!is.null(out_filter_time_range) && is.null(out_filter_time_set)) {
178+
out_filter_time_style <- "range"
179+
out_t_min <- out_filter_time_range[[1L]]
180+
out_t_max <- out_filter_time_range[[2L]]
181+
} else if (is.null(out_filter_time_range) && !is.null(out_filter_time_set)) {
182+
# FIXME min time_value for this epikey vs. entire edf; match existing behavior, or complete changeover
183+
out_filter_time_style <- "set"
184+
out_time_values <- vec_set_intersect(inp_tbl$time_value, out_filter_time_set)
185+
out_t_min <- min(out_time_values)
186+
out_t_max <- max(out_time_values)
187+
} else {
188+
cli_abort("Exactly one of `out_filter_time_range` and `out_filter_time_set` must be non-`NULL`.")
189+
}
190+
slide_t_min <- time_minus_slide_window_arg(out_t_min, before, time_type, min(inp_tbl$time_value))
191+
slide_t_max <- time_plus_slide_window_arg(out_t_max, after, time_type)
179192
slide_nrow <- time_delta_to_n_steps(slide_t_max - slide_t_min, time_type) + 1L
180193
slide_time_values <- slide_t_min + 0L:(slide_nrow - 1L) * unit_step
181194
slide_inp_backrefs <- vec_match(slide_time_values, inp_tbl$time_value)
@@ -212,7 +225,20 @@ epi_slide_opt_one_epikey <- function(inp_tbl,
212225
class = "epiprocess__epi_slide_opt_archive__f_from_package_invalid"
213226
)
214227
}
215-
rows_should_keep <- vec_match(out_time_values, slide_time_values)
228+
rows_should_keep1 <- !is.na(slide_inp_backrefs)
229+
rows_should_keep2 <- switch(
230+
out_filter_time_style,
231+
range = vec_rep_each(
232+
c(FALSE, TRUE, FALSE),
233+
time_minus_time_in_n_steps(
234+
vctrs::vec_c(out_t_min, out_t_max, slide_t_max),
235+
vctrs::vec_c(slide_t_min, out_t_min, out_t_max),
236+
time_type
237+
) + c(0L, 1L, 0L)
238+
),
239+
set = vec_in(slide_time_values, out_time_values)
240+
)
241+
rows_should_keep <- rows_should_keep1 & rows_should_keep2
216242
out_tbl <- vec_slice(slide, rows_should_keep)
217243
out_tbl
218244
}
@@ -498,7 +524,7 @@ epi_slide_opt.epi_df <- function(.x, .col_names, .f, ...,
498524

499525
result <- .x %>%
500526
group_modify(function(grp_data, grp_key) {
501-
epi_slide_opt_one_epikey(grp_data, f_dots_baked, f_from_package, before, after, unit_step, time_type, vctrs::vec_set_intersect(ref_time_values, grp_data$time_value), names_info$input_col_names, names_info$output_col_names)
527+
epi_slide_opt_one_epikey(grp_data, f_dots_baked, f_from_package, before, after, unit_step, time_type, NULL, ref_time_values, names_info$input_col_names, names_info$output_col_names)
502528
}) %>%
503529
arrange_col_canonical()
504530

R/epiprocess-package.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
#' @importFrom vctrs vec_rep
6262
#' @importFrom vctrs vec_rep_each
6363
#' @importFrom vctrs vec_seq_along
64+
#' @importFrom vctrs vec_set_intersect
6465
#' @importFrom vctrs vec_set_names
6566
#' @importFrom vctrs vec_size_common
6667
#' @importFrom vctrs vec_slice

0 commit comments

Comments
 (0)