Skip to content

Add check for threshold, warn if NA in predictors, and minor styling cleanup #29

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
.Rproj.user
.DS_Store
inst/doc
docs/
pkgdown/
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ Depends:
ggplot2,
R (>= 3.4)
Imports:
cli,
dplyr,
glue,
hardhat (>= 0.1.2),
Expand Down Expand Up @@ -46,6 +47,6 @@ Encoding: UTF-8
Language: en-US
LazyData: true
Roxygen: list(markdown = TRUE)
RoxygenNote: 7.1.2
RoxygenNote: 7.2.3
Config/Needs/website: tidyverse/tidytemplate
Config/testthat/edition: 3
47 changes: 0 additions & 47 deletions R/0.R

This file was deleted.

42 changes: 42 additions & 0 deletions R/applicable-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,47 @@
# The following block is used by usethis to automatically manage
# roxygen namespace tags. Modify with care!
## usethis namespace: start
#' @importFrom dplyr %>%
#' @importFrom dplyr select
#' @importFrom dplyr slice
#' @importFrom dplyr matches
#' @importFrom dplyr starts_with
#' @importFrom dplyr rename_all
#' @importFrom dplyr mutate
#' @importFrom dplyr mutate_all
#' @importFrom dplyr group_by
#' @importFrom dplyr ungroup
#' @importFrom dplyr count
#' @importFrom dplyr sample_n
#' @importFrom glue glue
#' @importFrom tibble as_tibble
#' @importFrom tibble tibble
#' @importFrom purrr map_dfc
#' @importFrom purrr map2_dfc
#' @importFrom rlang abort
#' @importFrom rlang enquos
#' @importFrom rlang arg_match
#' @importFrom stats predict
#' @importFrom stats prcomp
#' @importFrom stats approx
#' @importFrom stats quantile
#' @importFrom stats ecdf
#' @importFrom stats setNames
#' @importFrom hardhat validate_prediction_size
#' @importFrom hardhat forge
#' @importFrom hardhat mold
#' @importFrom hardhat new_model
#' @importFrom ggplot2 ggplot geom_step xlab ylab aes autoplot
#' @importFrom Matrix Matrix colSums
#' @importFrom tidyselect vars_select
#' @importFrom tidyr gather
#' @importFrom proxyC simil
## usethis namespace: end
NULL

# Reduce false positives when R CMD check runs its "no visible binding for
# global variable" check
#' @importFrom utils globalVariables
utils::globalVariables(
c("cumulative", "n", "sim", "percentile", "component", "value")
)
14 changes: 6 additions & 8 deletions R/hat_values-fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ apd_hat_values_bridge <- function(processed, ...) {
predictors <- processed$predictors

if (ncol(predictors) >= nrow(predictors)) {
rlang::abort("The number of columns must be less than the number of rows.")
cli::cli_abort("The number of columns must be less than number of rows.")
}

fit <- apd_hat_values_impl(predictors)
Expand Down Expand Up @@ -110,13 +110,11 @@ apd_hat_values <- function(x, ...) {
#' @export
#' @rdname apd_hat_values
apd_hat_values.default <- function(x, ...) {
cls <- class(x)[1]
message <-
"`x` is not of a recognized type.
Only data.frame, matrix, recipe, and formula objects are allowed.
A {cls} was specified."
message <- glue::glue(message)
rlang::abort(message = message)
cli::cli_abort(c(
"`x` is not of a recognized type.",
"i", "Only data.frame, matrix, recipe, and formula objects are allowed.",
"i", "A {class(x)[1]} was specified."
))
}

# Data frame method
Expand Down
2 changes: 1 addition & 1 deletion R/hat_values-score.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

score_apd_hat_values_numeric <- function(model, predictors) {
if (!("XtX_inv" %in% names(model))) {
rlang::abort("The model must contain an XtX_inv argument.")
cli::cli_abort("The model must contain an XtX_inv argument.")
}

proj_matrix <- predictors %*% model$XtX_inv %*% t(predictors)
Expand Down
13 changes: 5 additions & 8 deletions R/helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,12 @@ get_inv <- function(X) {
if (inherits(XpX_inv, "try-error")) {
message <- as.character(XpX_inv)
if (message == "Error in qr.solve(XpX) : singular matrix 'a' in solve\n") {
message <- paste(
"Unable to compute the hat values of the matrix X of",
"predictors because the matrix resulting from multiplying",
"the transpose of X by X is singular.",
sep = "\n"
)
cli::cli_abort(c(
"Unable to compute the hat values of the matrix X.",
"i", "Singular matrix results from multiplying transpose of X by X."
))
}

rlang::abort(message = message)
cli::cli_abort(message)
}

dimnames(XpX_inv) <- NULL
Expand Down
2 changes: 1 addition & 1 deletion R/misc.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# from recipes:::names0
names0 <- function(num, prefix = "x") {
if (num < 1) {
rlang::abort("`num` should be > 0")
cli::cli_abort("`num` should be > 0")
}
ind <- format(1:num)
ind <- gsub(" ", "0", ind)
Expand Down
17 changes: 9 additions & 8 deletions R/pca-fit.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ apd_pca_impl <- function(predictors, threshold) {
retx = TRUE
)

# TODO: verify threshold \in (0, 1]
if (threshold <= 0 || threshold > 1) {
cli::cli_abort("threshold must be between 0 and 1: (0, 1]")
}

eigs <- pcs$sdev^2
cum_sum <- cumsum(eigs) / sum(eigs)
num_comp <- sum(cum_sum <= threshold) + 1
Expand Down Expand Up @@ -140,13 +143,11 @@ apd_pca <- function(x, ...) {
#' @export
#' @rdname apd_pca
apd_pca.default <- function(x, ...) {
cls <- class(x)[1]
message <-
"`x` is not of a recognized type.
Only data.frame, matrix, recipe, and formula objects are allowed.
A {cls} was specified."
message <- glue::glue(message)
rlang::abort(message = message)
cli::cli_abort(c(
"`x` is not of a recognized type.",
"i", "Only data.frame, matrix, recipe, and formula objects are allowed.",
"i", "A {class(x)[1]} was specified."
))
}

# Data frame method
Expand Down
13 changes: 12 additions & 1 deletion R/pca-score.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,22 @@

score_apd_pca_numeric <- function(model, predictors) {
if (!("pcs" %in% names(model))) {
rlang::abort("The model must contain a pcs argument.")
cli::cli_abort("The model must contain a pcs argument.")
}

incomplete_rows <- which(!stats::complete.cases(predictors))

if (length(incomplete_rows) > 0) {
cols_with_na <- names(which(colSums(is.na(predictors)) > 0))
cli::cli_warn(c("Missing values found in {.code new_data}",
"i" = "This will result in scores of NA",
"i" = "Found missing values in rows: {incomplete_rows}",
"i" = "Found missing values in columns: {cols_with_na}"))
}

# Predict output and subset using `num_comp`
predicted_output <- stats::predict(model$pcs, predictors)
cli::cat_print(predicted_output)
predicted_output <- predicted_output[, 1:model$num_comp, drop = FALSE]

# Compute distances between new pca values and the pca means
Expand Down
12 changes: 5 additions & 7 deletions R/score.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,9 @@ score <- function(object, ...) {
#' @export score.default
#' @rdname score
score.default <- function(object, ...) {
cls <- class(object)[1]
message <-
"`object` is not of a recognized type.
Only data.frame, matrix, recipe, and formula objects are allowed.
A {cls} was specified."
message <- glue::glue(message)
rlang::abort(message = message)
cli::cli_abort(c(
"`object` is not of a recognized type.",
"i", "Only data.frame, matrix, recipe, and formula objects are allowed.",
"i", "A {class(object)[1]} was specified."
))
}
34 changes: 12 additions & 22 deletions R/similarity.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ apd_similarity_impl <- function(predictors, quantile, options) {
apd_similarity_bridge <- function(processed, quantile = NA_real_, ...) {
opts <- list(...)

msg <- "The `quantile` argument should be NA or a single numeric value in [0, 1]."
msg <- "`quantile` should be NA or a single numeric value in [0, 1]."
if (!is.na(quantile) && (!is.numeric(quantile) || length(quantile) != 1)) {
rlang::abort(msg)
cli::cli_abort(msg)
}
if (!is.na(quantile) && (quantile < 0 | quantile > 1)) {
rlang::abort(msg)
cli::cli_abort(msg)
}

predictors <- processed$predictors
Expand All @@ -60,12 +60,7 @@ apd_similarity_bridge <- function(processed, quantile = NA_real_, ...) {
not_bin <- apply(predictors, 2, function(x) any(x != 1 & x != 0))
if (any(not_bin)) {
bad_x <- colnames(predictors)[not_bin]
bad_x <- glue::glue_collapse(bad_x, sep = ", ", last = ", and ")
rlang::abort(
glue(
"The following variables are not binary: {bad_x}"
)
)
cli::cli_abort("The following variables are not binary: {bad_x}")
}

if (!inherits(predictors, "dgCMatrix")) {
Expand All @@ -76,15 +71,12 @@ apd_similarity_bridge <- function(processed, quantile = NA_real_, ...) {

zv <- Matrix::colSums(predictors)
if (all(zv == 0)) {
rlang::abort("All variables have a single unique value.")
cli::cli_abort("All variables have a single unique value.")
} else {
if (any(zv == 0)) {
bad_x <- colnames(predictors)[zv == 0]
bad_x <- glue::glue_collapse(bad_x, sep = ", ", last = ", and ")
rlang::warn(
glue(
cli::cli_warn(
"The following variables had zero variance and were removed: {bad_x}"
)
)
predictors <- predictors[, zv > 0, drop = FALSE]
}
Expand Down Expand Up @@ -199,13 +191,11 @@ apd_similarity <- function(x, ...) {
#' @export
#' @rdname apd_similarity
apd_similarity.default <- function(x, quantile = NA_real_, ...) {
cls <- class(x)[1]
message <-
"`x` is not of a recognized type.
Only data.frame, matrix, recipe, and formula objects are allowed.
A {cls} was specified."
message <- glue::glue(message)
rlang::abort(message = message)
cli::cli_abort(c(
"`x` is not of a recognized type.",
"i" = "Only data.frame, matrix, recipe, and formula objects are allowed.",
"i" = "A {class(x)[1]} was specified.")
)
}

# Data frame method
Expand Down Expand Up @@ -294,7 +284,7 @@ score_apd_similarity_bridge <- function(type, model, predictors) {

get_sim_score_function <- function(type) {
switch(type,
numeric = score_apd_similarity_numeric
numeric = score_apd_similarity_numeric
)
}

Expand Down
Loading