Skip to content

inner_split() methods for ordered resamples #569

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ S3method(inner_split,group_mc_split)
S3method(inner_split,group_val_split)
S3method(inner_split,group_vfold_split)
S3method(inner_split,mc_split)
S3method(inner_split,sliding_index_split)
S3method(inner_split,sliding_period_split)
S3method(inner_split,sliding_window_split)
S3method(inner_split,time_val_split)
S3method(inner_split,val_split)
S3method(inner_split,vfold_split)
Expand Down
261 changes: 261 additions & 0 deletions R/inner_split.R
Original file line number Diff line number Diff line change
Expand Up @@ -287,3 +287,264 @@
class(split_inner) <- c(class_inner, class(x))
split_inner
}


# slide ------------------------------------------------------------------

#' @rdname inner_split
#' @export
inner_split.sliding_window_split <- function(x, split_args, ...) {
check_dots_empty()

analysis_set <- analysis(x)

if (nrow(analysis_set) < 2) {
cli::cli_abort(
"This set cannot be split into an analysis and a calibration set as there
is only one row."
)
}

split_args_inner <- translate_window_definition(
split_args$lookback,
split_args$assess_start,
split_args$assess_stop
)

lookback <- split_args_inner$lookback
assess_start <- split_args_inner$assess_start
assess_stop <- split_args_inner$assess_stop

lookback <- check_lookback(lookback)
assess_start <- check_assess(assess_start, "assess_start")
assess_stop <- check_assess(assess_stop, "assess_stop")
if (assess_start > assess_stop) {
cli_abort(
"{.arg assess_start} must be less than or equal to {.arg assess_stop}."
)

Check warning on line 324 in R/inner_split.R

View check run for this annotation

Codecov / codecov/patch

R/inner_split.R#L322-L324

Added lines #L322 - L324 were not covered by tests
}

seq <- vctrs::vec_seq_along(analysis_set)

id_in <- slider::slide(
.x = seq,
.f = identity,
.before = lookback,
.after = 0L,
.step = 1L,
.complete = split_args$complete
)

id_out <- slider::slide(
.x = seq,
.f = identity,
.before = -assess_start,
.after = assess_stop,
.step = 1L,
.complete = TRUE
)

indices <- compute_complete_indices(id_in, id_out)

if (length(indices) < 1) {
cli::cli_abort("No calibration split possible.")
}

# no need to use skip and step args since they don't apply to _within_ an rsplit

splits <- purrr::map(
indices,
~ make_splits(.x, data = analysis_set, class = "sliding_window_split")
)
split_inner <- splits[[length(splits)]]

class_inner <- "sliding_window_split_inner"
split_inner <- add_class(split_inner, class_inner)
split_inner
}

#' @rdname inner_split
#' @export
inner_split.sliding_index_split <- function(x, split_args, ...) {
check_dots_empty()

analysis_set <- analysis(x)

if (nrow(analysis_set) < 2) {
cli::cli_abort(
"This set cannot be split into an analysis and a calibration set as there
is only one row."
)
}

split_args_inner <- translate_window_definition(
split_args$lookback,
split_args$assess_start,
split_args$assess_stop
)

lookback <- split_args_inner$lookback
assess_start <- split_args_inner$assess_start
assess_stop <- split_args_inner$assess_stop
Comment on lines +380 to +388
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i like this pattern. Given how complex the code is in translate_window_definition() it makes good sense


lookback <- check_lookback(lookback)
assess_start <- check_assess(assess_start, "assess_start")
assess_stop <- check_assess(assess_stop, "assess_stop")
if (assess_start > assess_stop) {
cli_abort(
"{.arg assess_start} must be less than or equal to {.arg assess_stop}."
)

Check warning on line 396 in R/inner_split.R

View check run for this annotation

Codecov / codecov/patch

R/inner_split.R#L394-L396

Added lines #L394 - L396 were not covered by tests
}
Comment on lines +390 to +397
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would you consider doing this inside translate_window_definition()? given that these checks always follows a call to translate_window_definition().


loc <- tidyselect::eval_select(split_args$index, analysis_set)
index <- analysis_set[[loc]]

seq <- vctrs::vec_seq_along(analysis_set)

id_in <- slider::slide_index(
.x = seq,
.i = index,
.f = identity,
.before = lookback,
.after = 0L,
.complete = split_args$complete
)

id_out <- slider::slide_index(
.x = seq,
.i = index,
.f = identity,
.before = -assess_start,
.after = assess_stop,
.complete = TRUE
)

indices <- compute_complete_indices(id_in, id_out)

if (length(indices) < 1) {
cli::cli_abort("No calibration split possible.")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would prefer a more informative error here

}

# no need to use skip and step args since they don't apply to _within_ an rsplit

splits <- purrr::map(
indices,
~ make_splits(.x, data = analysis_set, class = "sliding_index_split")
)
split_inner <- splits[[length(splits)]]
Comment on lines +430 to +434
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we avoid the map completely?

Suggested change
splits <- purrr::map(
indices,
~ make_splits(.x, data = analysis_set, class = "sliding_index_split")
)
split_inner <- splits[[length(splits)]]
indices <- indices[[length(indices)]]
split_inner <-make_splits(indices, data = analysis_set, class = "sliding_index_split")

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same question for other methods


class_inner <- "sliding_index_split_inner"
split_inner <- add_class(split_inner, class_inner)
split_inner
}

#' @rdname inner_split
#' @export
inner_split.sliding_period_split <- function(x, split_args, ...) {
check_dots_empty()

analysis_set <- analysis(x)

if (nrow(analysis_set) < 2) {
# TODO this should return an empty split with a warning
cli::cli_abort(
"This set cannot be split into an analysis and a calibration set as there
is only one row."
)

Check warning on line 453 in R/inner_split.R

View check run for this annotation

Codecov / codecov/patch

R/inner_split.R#L450-L453

Added lines #L450 - L453 were not covered by tests
}

split_args_inner <- translate_window_definition(
split_args$lookback,
split_args$assess_start,
split_args$assess_stop
)

lookback <- split_args_inner$lookback
assess_start <- split_args_inner$assess_start
assess_stop <- split_args_inner$assess_stop

lookback <- check_lookback(lookback)
assess_start <- check_assess(assess_start, "assess_start")
assess_stop <- check_assess(assess_stop, "assess_stop")
if (assess_start > assess_stop) {
cli_abort(
"{.arg assess_start} must be less than or equal to {.arg assess_stop}."
)

Check warning on line 472 in R/inner_split.R

View check run for this annotation

Codecov / codecov/patch

R/inner_split.R#L470-L472

Added lines #L470 - L472 were not covered by tests
}

loc <- tidyselect::eval_select(split_args$index, analysis_set)
index <- analysis_set[[loc]]

seq <- vctrs::vec_seq_along(analysis_set)

id_in <- slider::slide_period(
.x = seq,
.i = index,
.period = split_args$period,
.f = identity,
.every = split_args$every,
.origin = split_args$origin,
.before = lookback,
.after = 0L,
.complete = split_args$complete
)

id_out <- slider::slide_period(
.x = seq,
.i = index,
.period = split_args$period,
.f = identity,
.every = split_args$every,
.origin = split_args$origin,
.before = -assess_start,
.after = assess_stop,
.complete = TRUE
)

indices <- compute_complete_indices(id_in, id_out)

if (length(indices) < 1) {
cli::cli_abort("No calibration split possible.")

Check warning on line 507 in R/inner_split.R

View check run for this annotation

Codecov / codecov/patch

R/inner_split.R#L507

Added line #L507 was not covered by tests
}

# no need to use skip and step args since they don't apply to _within_ an rsplit

splits <- purrr::map(
indices,
~ make_splits(.x, data = analysis_set, class = "sliding_period_split")
)
split_inner <- splits[[length(splits)]]

class_inner <- "sliding_period_split_inner"
split_inner <- add_class(split_inner, class_inner)
split_inner
}

translate_window_definition <- function(lookback, assess_start, assess_stop) {
length_window <- lookback + 1 + assess_stop
length_analysis <- lookback + 1

prop_analysis <- length_analysis / length_window
prop_assess <- (assess_stop - assess_start + 1) /
length_window

length_inner_analysis <- ceiling(prop_analysis * length_analysis)
length_calibration <- ceiling(prop_assess * length_analysis)
if (length_inner_analysis + length_calibration > length_analysis) {
if (length_calibration > 1) {
length_calibration <- length_calibration - 1

Check warning on line 535 in R/inner_split.R

View check run for this annotation

Codecov / codecov/patch

R/inner_split.R#L535

Added line #L535 was not covered by tests
} else {
length_inner_analysis <- length_inner_analysis - 1
}
}

lookback <- length_inner_analysis - 1
assess_stop <- length_analysis - length_inner_analysis
assess_start <- assess_stop - length_calibration + 1

list(
lookback = lookback,
assess_start = assess_start,
assess_stop = assess_stop
)
}
Loading