From 8030b4bc6e14a23e3c88d1116997a2a988ba435a Mon Sep 17 00:00:00 2001 From: Matt Dancho Date: Mon, 19 Mar 2018 10:47:20 -0400 Subject: [PATCH] initial commit --- .Rbuildignore | 7 + .gitignore | 4 + DESCRIPTION | 38 ++++ NAMESPACE | 45 +++++ NEWS.md | 3 + R/anomalize-package.R | 19 ++ R/anomalize.R | 173 +++++++++++++++++ R/anomalize_methods.R | 228 ++++++++++++++++++++++ R/global_vars.R | 30 +++ R/plot_anomaly_decomposition.R | 99 ++++++++++ R/prep_tbl_time.R | 56 ++++++ R/tidyverse_cran_downloads.R | 30 +++ R/time_apply.R | 155 +++++++++++++++ R/time_decompose.R | 207 ++++++++++++++++++++ R/time_decompose_methods.R | 153 +++++++++++++++ R/time_frequency.R | 271 +++++++++++++++++++++++++++ R/time_recompose.R | 142 ++++++++++++++ R/utils.R | 178 ++++++++++++++++++ README.Rmd | 49 +++++ README.md | 47 +++++ _pkgdown.yml | 0 anomalize.Rproj | 20 ++ cran-comments.md | 10 + data-raw/tidyverse_cran_downloads.R | 17 ++ data/tidyverse_cran_downloads.rda | Bin 0 -> 41305 bytes man/anomalize.Rd | 98 ++++++++++ man/anomalize_methods.Rd | 54 ++++++ man/anomalize_package.Rd | 19 ++ man/decompose_methods.Rd | 57 ++++++ man/figures/README-pressure-1.png | Bin 0 -> 3744 bytes man/plot_anomaly_decomposition.Rd | 61 ++++++ man/prep_tbl_time.Rd | 39 ++++ man/tidyverse_cran_downloads.Rd | 39 ++++ man/time_apply.Rd | 61 ++++++ man/time_decompose.Rd | 115 ++++++++++++ man/time_frequency.Rd | 106 +++++++++++ man/time_recompose.Rd | 50 +++++ tests/testthat.R | 4 + tests/testthat/test-anomalize.R | 61 ++++++ tests/testthat/test-time_frequency.R | 77 ++++++++ 40 files changed, 2822 insertions(+) create mode 100644 .Rbuildignore create mode 100644 .gitignore create mode 100644 DESCRIPTION create mode 100644 NAMESPACE create mode 100644 NEWS.md create mode 100644 R/anomalize-package.R create mode 100644 R/anomalize.R create mode 100644 R/anomalize_methods.R create mode 100644 R/global_vars.R create mode 100644 R/plot_anomaly_decomposition.R create mode 100644 R/prep_tbl_time.R create mode 100644 R/tidyverse_cran_downloads.R create mode 100644 R/time_apply.R create mode 100644 R/time_decompose.R create mode 100644 R/time_decompose_methods.R create mode 100644 R/time_frequency.R create mode 100644 R/time_recompose.R create mode 100644 R/utils.R create mode 100644 README.Rmd create mode 100644 README.md create mode 100644 _pkgdown.yml create mode 100644 anomalize.Rproj create mode 100644 cran-comments.md create mode 100644 data-raw/tidyverse_cran_downloads.R create mode 100644 data/tidyverse_cran_downloads.rda create mode 100644 man/anomalize.Rd create mode 100644 man/anomalize_methods.Rd create mode 100644 man/anomalize_package.Rd create mode 100644 man/decompose_methods.Rd create mode 100644 man/figures/README-pressure-1.png create mode 100644 man/plot_anomaly_decomposition.Rd create mode 100644 man/prep_tbl_time.Rd create mode 100644 man/tidyverse_cran_downloads.Rd create mode 100644 man/time_apply.Rd create mode 100644 man/time_decompose.Rd create mode 100644 man/time_frequency.Rd create mode 100644 man/time_recompose.Rd create mode 100644 tests/testthat.R create mode 100644 tests/testthat/test-anomalize.R create mode 100644 tests/testthat/test-time_frequency.R diff --git a/.Rbuildignore b/.Rbuildignore new file mode 100644 index 0000000..aa06f51 --- /dev/null +++ b/.Rbuildignore @@ -0,0 +1,7 @@ +^.*\.Rproj$ +^\.Rproj\.user$ +^README\.Rmd$ +^cran-comments\.md$ +^_pkgdown\.yml$ +^docs$ +^data-raw$ diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5b6a065 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/DESCRIPTION b/DESCRIPTION new file mode 100644 index 0000000..4bf582a --- /dev/null +++ b/DESCRIPTION @@ -0,0 +1,38 @@ +Package: anomalize +Type: Package +Title: Tidy anomaly detection +Version: 0.1.0 +Author: c( + person("Matt", "Dancho", email = "mdancho@business-science.io", role = c("aut", "cre")), + person("Davis", "Vaughan", email = "dvaughan@business-science.io", role = c("aut")) + ) +Maintainer: Matt Dancho +Description: + The `anomalize` package enables a "tidy" workflow for detecting anomalies in data. + The main functions are `time_decompose()`, `anomalize()`, and `time_recompose()`. + When combined, it's quite simple to decompose time series, detect anomalies, + and create bands separating the "normal" data from the anomalous data. +URL: https://github.com/business-science/anomalize +BugReports: https://github.com/business-science/anomalize/issues +License: GPL (>= 3) +Encoding: UTF-8 +LazyData: true +Depends: + R (>= 3.0.0) +Imports: + dplyr, + glue, + tidyquant, + timetk, + sweep, + tibbletime, + purrr, + rlang, + tibble, + stringr, + tidyr, + ggplot2 +RoxygenNote: 6.0.1 +Roxygen: list(markdown = TRUE) +Suggests: + testthat diff --git a/NAMESPACE b/NAMESPACE new file mode 100644 index 0000000..38afec3 --- /dev/null +++ b/NAMESPACE @@ -0,0 +1,45 @@ +# Generated by roxygen2: do not edit by hand + +S3method(anomalize,default) +S3method(anomalize,grouped_df) +S3method(anomalize,tbl_df) +S3method(plot_anomaly_decomposition,default) +S3method(plot_anomaly_decomposition,grouped_tbl_time) +S3method(plot_anomaly_decomposition,tbl_time) +S3method(prep_tbl_time,data.frame) +S3method(prep_tbl_time,tbl_time) +S3method(time_apply,data.frame) +S3method(time_decompose,default) +S3method(time_decompose,grouped_df) +S3method(time_decompose,grouped_tbl_time) +S3method(time_decompose,tbl_df) +S3method(time_decompose,tbl_time) +S3method(time_recompose,default) +S3method(time_recompose,grouped_df) +S3method(time_recompose,grouped_tbl_time) +S3method(time_recompose,tbl_df) +S3method(time_recompose,tbl_time) +export(anomalize) +export(decompose_multiplicative) +export(decompose_stl) +export(decompose_twitter) +export(gesd) +export(iqr) +export(plot_anomaly_decomposition) +export(prep_tbl_time) +export(time_apply) +export(time_decompose) +export(time_frequency) +export(time_recompose) +export(time_scale_template) +export(time_trend) +importFrom(dplyr,"%>%") +importFrom(dplyr,contains) +importFrom(dplyr,n) +importFrom(dplyr,row_number) +importFrom(rlang,"!!!") +importFrom(rlang,"!!") +importFrom(rlang,":=") +importFrom(stats,mad) +importFrom(stats,median) +importFrom(stats,qt) diff --git a/NEWS.md b/NEWS.md new file mode 100644 index 0000000..575762b --- /dev/null +++ b/NEWS.md @@ -0,0 +1,3 @@ +# anomalize 0.1.0 + +* Added a `NEWS.md` file to track changes to the package. diff --git a/R/anomalize-package.R b/R/anomalize-package.R new file mode 100644 index 0000000..3c3129b --- /dev/null +++ b/R/anomalize-package.R @@ -0,0 +1,19 @@ +#' anomalize: Tidy anomaly detection +#' +#' @details +#' The `anomalize` package enables a tidy workflow for detecting anomalies in data. +#' The main functions are `time_decompose()`, `anomalize()`, and `time_recompose()`. +#' When combined, it's quite simple to decompose time series, detect anomalies, +#' and create bands separating the "normal" data from the anomalous data. +#' +#' To learn more about `anomalize`, start with the vignettes: +#' `browseVignettes(package = "anomalize")` +#' +#' @docType package +#' @name anomalize_package +#' +#' @importFrom rlang := !! !!! +#' @importFrom dplyr %>% n row_number contains +#' @importFrom stats median mad qt + +NULL diff --git a/R/anomalize.R b/R/anomalize.R new file mode 100644 index 0000000..520638a --- /dev/null +++ b/R/anomalize.R @@ -0,0 +1,173 @@ +#' Detect anomalies using the tidyverse +#' +#' @inheritParams time_apply +#' @param data A `tibble` or `tbl_time` object. +#' @param method The anomaly detection method. One of `"iqr"` or `"gesd"`. +#' The IQR method is faster at the expense of possibly not being quite as accurate. +#' The GESD method has the best properties for outlier detection, but is loop-based +#' and therefore a bit slower. +#' @param alpha Controls the width of the "normal" range. +#' Lower values are more conservative while higher values are less prone +#' to incorrectly classifying "normal" observations. +#' @param max_anoms The maximum percent of anomalies permitted to be identified. +#' @param verbose A boolean. If `TRUE`, will return a list containing useful information +#' about the anomalies. If `FALSE`, just returns the data expanded with the anomalies and +#' the lower (l1) and upper (l2) bounds. +#' +#' @return Returns a `tibble` / `tbl_time` object or list depending on the value of `verbose`. +#' +#' @details +#' The `anomalize()` function is used to detect outliers in a distribution +#' with no trend or seasonality present. The return has three columns: +#' "remainder_l1" (lower limit for anomalies), "remainder_l2" (upper limit for +#' anomalies), and "anomaly" (Yes/No). +#' +#' Use [time_decompose()] to decompose a time series prior to performing +#' anomaly detection with `anomalize()`. Typically, `anomalize()` is +#' performed on the "remainder" of the time series decomposition. +#' +#' For non-time series data (data without trend), the `anomalize()` function can +#' be used without time series decomposition. +#' +#' The `anomalize()` function uses two methods for outlier detection +#' each with benefits. +#' +#' __IQR__: +#' +#' The IQR Method uses an innerquartile range of 25% and 75% to establish a baseline distribution around +#' the median. With the default `alpha = 0.05`, the limits are established by expanding +#' the 25/75 baseline by an IQR Factor of 3 (3X). The IQR Factor = 0.15 / alpha (hense 3X with alpha = 0.05). +#' To increase the IQR Factor controling the limits, decrease the alpha, which makes +#' it more difficult to be an outlier. Increase alpha to make it easier to be an outlier. +#' +#' __GESD__: +#' +#' The GESD Method (Generlized Extreme Studentized Deviate Test) progressively +#' eliminates outliers using a Student's T-Test comparing the test statistic to a critical value. +#' Each time an outlier is removed, the test statistic is updated. Once test statistic +#' drops below the critical value, all outliers are considered removed. Because this method +#' involves continuous updating via a loop, it is slower than the IQR method. However, it +#' tends to be the best performing method for outlier removal. +#' +#' @seealso +#' Anomaly Detection Methods (Powers `anomalize`) +#' - [iqr()] +#' - [gesd()] +#' +#' Time Series Anomaly Detection Functions (anomaly detection workflow): +#' - [time_decompose()] +#' - [time_recompose()] +#' +#' @examples +#' +#' library(dplyr) +#' +#' data(tidyverse_cran_downloads) +#' +#' tidyverse_cran_downloads %>% +#' time_decompose(count, method = "stl") %>% +#' anomalize(remainder, method = "iqr") +#' +#' @references +#' - The IQR method is used in [`forecast::tsoutliers()`](https://github.com/robjhyndman/forecast/blob/master/R/clean.R) +#' - The GESD method is used in Twitter's [`AnomalyDetection`](https://github.com/twitter/AnomalyDetection) package and is also available as a function in [@raunakms's GESD method](https://github.com/raunakms/GESD/blob/master/runGESD.R) +#' +#' @export +anomalize <- function(data, target, method = c("iqr", "gesd"), + alpha = 0.05, max_anoms = 0.20, verbose = FALSE) { + UseMethod("anomalize", data) +} + +#' @export +anomalize.default <- function(data, target, method = c("iqr", "gesd"), + alpha = 0.05, max_anoms = 0.20, verbose = FALSE) { + stop("Object is not of class `tbl_df` or `tbl_time`.", call. = FALSE) +} + +#' @export +anomalize.tbl_df <- function(data, target, method = c("iqr", "gesd"), + alpha = 0.05, max_anoms = 0.20, verbose = FALSE) { + + # Checks + if (missing(target)) stop('Error in anomalize(): argument "target" is missing, with no default', call. = FALSE) + + # Setup + target_expr <- rlang::enquo(target) + + method <- tolower(method[[1]]) + x <- data %>% dplyr::pull(!! target_expr) + + # Detect Anomalies + # method <- tolower(method[[1]]) + # args <- list(x = data %>% dplyr::pull(!! target_expr), + # alpha = alpha, + # max_anoms = max_anoms, + # verbose = TRUE) + # + # outlier_list <- do.call(method, args) + + # Explicitly call functions + if (method == "iqr") { + outlier_list <- anomalize::iqr(x = x, + alpha = alpha, + max_anoms = max_anoms, + verbose = TRUE) + } else if (method == "gesd") { + outlier_list <- anomalize::gesd(x = x, + alpha = alpha, + max_anoms = max_anoms, + verbose = TRUE) + + } else { + stop("The `method` selected is invalid.", call. = FALSE) + } + + outlier <- outlier_list$outlier + limit_lower <- outlier_list$critical_limits[[1]] + limit_upper <- outlier_list$critical_limits[[2]] + + # Returns + ret <- data %>% + dplyr::mutate(!! paste0(dplyr::quo_name(target_expr), "_l1") := limit_lower, + !! paste0(dplyr::quo_name(target_expr), "_l2") := limit_upper) %>% + tibble::add_column(anomaly = outlier) + + if (verbose) { + ret <- list( + anomalized_tbl = ret, + anomaly_details = outlier_list + ) + + return(ret) + + } else { + return(ret) + } + +} + +#' @export +anomalize.grouped_df <- function(data, target, method = c("iqr", "gesd"), + alpha = 0.05, max_anoms = 0.20, verbose = FALSE, ...) { + + # Checks + if (missing(target)) stop('Error in anomalize(): argument "target" is missing, with no default', call. = FALSE) + if (verbose) warning(glue::glue("Cannot use 'verbose = TRUE' with grouped data.")) + + # Setup + target_expr <- dplyr::enquo(target) + + ret <- data %>% + grouped_mapper( + .f = anomalize, + target = !! target_expr, + method = method[[1]], + alpha = alpha, + max_anoms = max_anoms, + verbose = F, + ...) + + return(ret) + +} + diff --git a/R/anomalize_methods.R b/R/anomalize_methods.R new file mode 100644 index 0000000..39731f0 --- /dev/null +++ b/R/anomalize_methods.R @@ -0,0 +1,228 @@ +#' Methods that power anomalize() +#' +#' @inheritParams anomalize +#' @param x A vector of numeric data. +#' @param verbose A boolean. If `TRUE`, will return a list containing useful information +#' about the anomalies. If `FALSE`, just returns a vector of "Yes" / "No" values. +#' +#' @return Returns character vector or list depending on the value of `verbose`. +#' +#' +#' @seealso [anomalize()] +#' +#' @examples +#' +#' set.seed(100) +#' x <- rnorm(100) +#' idx_outliers <- sample(100, size = 5) +#' x[idx_outliers] <- x[idx_outliers] + 10 +#' +#' iqr(x, alpha = 0.05, max_anoms = 0.2) +#' iqr(x, alpha = 0.05, max_anoms = 0.2, verbose = TRUE) +#' +#' gesd(x, alpha = 0.05, max_anoms = 0.2) +#' gesd(x, alpha = 0.05, max_anoms = 0.2, verbose = TRUE) +#' +#' +#' @references +#' - The IQR method is used in [`forecast::tsoutliers()`](https://github.com/robjhyndman/forecast/blob/master/R/clean.R) +#' - The GESD method is used in Twitter's [`AnomalyDetection`](https://github.com/twitter/AnomalyDetection) package and is also available as a function in [@raunakms's GESD method](https://github.com/raunakms/GESD/blob/master/runGESD.R) +#' +#' @name anomalize_methods + +# 1A. IQR Method ---- + +#' @export +#' @rdname anomalize_methods +iqr <- function(x, alpha = 0.05, max_anoms = 0.2, verbose = FALSE) { + + quantile_x <- stats::quantile(x, prob = c(0.25, 0.75), na.rm = TRUE) + iq_range <- quantile_x[[2]] - quantile_x[[1]] + limits <- quantile_x + (0.15/alpha) * iq_range * c(-1, 1) + + outlier_idx <- ((x < limits[1]) | (x > limits[2])) + outlier_vals <- x[outlier_idx] + outlier_response <- ifelse(outlier_idx == TRUE, "Yes", "No") + + vals_tbl <- tibble::tibble(value = x) %>% + tibble::rownames_to_column(var = "index") %>% + # Establish limits and assess if outside of limits + dplyr::mutate( + limit_lower = limits[1], + limit_upper = limits[2], + abs_diff_lower = ifelse(value <= limit_lower, abs(value - limit_lower), 0), + abs_diff_upper = ifelse(value >= limit_upper, abs(value - limit_upper), 0), + max_abs_diff = ifelse(abs_diff_lower > abs_diff_upper, abs_diff_lower, abs_diff_upper) + ) %>% + dplyr::select(index, dplyr::everything()) %>% + dplyr::select(-c(abs_diff_lower, abs_diff_upper)) %>% + # Sort by absolute distance from centerline of limits + dplyr::mutate( + centerline = (limit_upper + limit_lower) / 2, + sorting = abs(value - centerline) + ) %>% + dplyr::arrange(dplyr::desc(sorting)) %>% + dplyr::select(-c(centerline, sorting)) %>% + tibble::rownames_to_column(var = "rank") %>% + dplyr::mutate( + rank = as.numeric(rank), + index = as.numeric(index) + ) %>% + # Identify outliers + dplyr::arrange(dplyr::desc(max_abs_diff)) %>% + dplyr::mutate( + outlier = ifelse(max_abs_diff > 0, "Yes", "No") , + below_max_anoms = ifelse(row_number() / n() > max_anoms, + "No", "Yes"), + outlier_reported = ifelse(outlier == "Yes" & below_max_anoms == "Yes", + "Yes", "No"), + direction = dplyr::case_when( + (outlier_reported == "Yes") & (value > limit_upper) ~ "Up", + (outlier_reported == "Yes") & (value < limit_lower) ~ "Down", + TRUE ~ "NA"), + direction = ifelse(direction == "NA", NA, direction) + ) + + vals_tbl_filtered <- vals_tbl %>% + dplyr::filter(below_max_anoms == "Yes") %>% + dplyr::select(-c(max_abs_diff:below_max_anoms)) %>% + dplyr::rename(outlier = outlier_reported) + + # Critical Limits + if (any(vals_tbl$outlier == "No")) { + # Non outliers identified, pick first limit + limit_tbl <- vals_tbl %>% + dplyr::filter(outlier == "No") %>% + dplyr::slice(1) + limits_vec <- c(limit_lower = limit_tbl$limit_lower, + limit_upper = limit_tbl$limit_upper) + } else { + # All outliers, pick last limits + limit_tbl <- vals_tbl %>% + dplyr::slice(n()) + limits_vec <- c(limit_lower = limit_tbl$limit_lower, + limit_upper = limit_tbl$limit_upper) + } + + # Return results + if (verbose) { + outlier_list <- list( + outlier = vals_tbl %>% dplyr::arrange(index) %>% dplyr::pull(outlier_reported), + outlier_idx = vals_tbl %>% dplyr::filter(outlier_reported == "Yes") %>% dplyr::pull(index), + outlier_vals = vals_tbl %>% dplyr::filter(outlier_reported == "Yes") %>% dplyr::pull(value), + outlier_direction = vals_tbl %>% dplyr::filter(outlier_reported == "Yes") %>% dplyr::pull(direction), + critical_limits = limits_vec, + outlier_report = vals_tbl_filtered) + return(outlier_list) + } else { + return(vals_tbl %>% dplyr::arrange(index) %>% dplyr::pull(outlier_reported)) + } +} + + + +# 1B. GESD: Generlized Extreme Studentized Deviate Test ---- + +#' @export +#' @rdname anomalize_methods +gesd <- function(x, alpha = 0.05, max_anoms = 0.2, verbose = FALSE) { + + # Variables + n <- length(x) + # r <- floor(n/2) # by default, set upper bound on number of outliers 'r' to 1/2 sample size + r <- trunc(n * max_anoms) # use max anoms to limit loop + R <- numeric(length=r) # test statistics for 'r' outliers + + lambda <- numeric(length = r) # critical values for 'r' outliers + outlier_ind <- numeric(length = r) # removed outlier observation values + outlier_val <- numeric(length = r) # removed outlier observation values + m <- 0 # number of outliers + x_new <- x # temporary observation values + median_new <- numeric(length = r) + mad_new <- numeric(length = r) + + # Outlier detection + for(i in 1:r){ + + # Compute test statistic + # z <- abs(x_new - mean(x_new))/sd(x_new) # Z-scores + median_new[i] <- median(x_new) + mad_new[i] <- mad(x_new) + + z <- abs(x_new - median(x_new))/mad(x_new) # Z-scores + + max_ind <- which(z == max(z),arr.ind=T)[1] # in case of ties, return first one + R[i] <- z[max_ind] # max Z-score + outlier_val[i] <- x_new[max_ind] # removed outlier observation values + outlier_ind[i] <- which(x_new[max_ind] == x, arr.ind=T)[1] # index of removed outlier observation values + x_new <- x_new[-max_ind] # remove observation that maximizes |x_i - x_mean| + + # Compute critical values + p <- 1 - alpha/(2*(n-i+1)) # probability + t_pv <- qt(p, df = (n-i-1)) # Critical value from Student's t distribution + lambda[i] <- ((n-i)*t_pv) / (sqrt((n-i-1+t_pv^2)*(n-i+1))) + + # Find exact number of outliers + # largest 'i' such that R_i > lambda_i + if(!is.na(R[i]) & !is.na(lambda[i])) { # qt can produce NaNs + if (R[i] > lambda[i]) { + m <- i + } + } + } + + vals_tbl <- tibble::tibble( + rank = as.numeric(1:r), + index = outlier_ind, + value = outlier_val, + test_statistic = R, + critical_value = lambda, + median = median_new, + mad = mad_new, + limit_lower = median - critical_value * mad, + limit_upper = critical_value * mad - median + ) %>% + dplyr::mutate( + outlier = ifelse(test_statistic > critical_value, "Yes", "No"), + direction = dplyr::case_when( + (outlier == "Yes") & (value > limit_upper) ~ "Up", + (outlier == "Yes") & (value < limit_lower) ~ "Down", + TRUE ~ "NA"), + direction = ifelse(direction == "NA", NA, direction) + ) %>% + dplyr::select(-c(test_statistic:mad)) + + outlier_index <- vals_tbl %>% dplyr::filter(outlier == "Yes") %>% dplyr::pull(index) + outlier_idx <- seq_along(x) %in% outlier_index + outlier_response <- ifelse(outlier_idx == TRUE, "Yes", "No") + + # Critical Limits + if (any(vals_tbl$outlier == "No")) { + # Non outliers identified, pick first limit + limit_tbl <- vals_tbl %>% + dplyr::filter(outlier == "No") %>% + dplyr::slice(1) + limits_vec <- c(limit_lower = limit_tbl$limit_lower, + limit_upper = limit_tbl$limit_upper) + } else { + # All outliers, pick last limits + limit_tbl <- vals_tbl %>% + dplyr::slice(n()) + limits_vec <- c(limit_lower = limit_tbl$limit_lower, + limit_upper = limit_tbl$limit_upper) + } + + # Return results + if (verbose) { + outlier_list <- list( + outlier = outlier_response, + outlier_idx = outlier_index, + outlier_vals = vals_tbl %>% dplyr::filter(outlier == "Yes") %>% dplyr::pull(value), + outlier_direction = vals_tbl %>% dplyr::filter(outlier == "Yes") %>% dplyr::pull(direction), + critical_limits = limits_vec, + outlier_report = vals_tbl) + return(outlier_list) + } else { + return(outlier_response) + } +} diff --git a/R/global_vars.R b/R/global_vars.R new file mode 100644 index 0000000..4e74892 --- /dev/null +++ b/R/global_vars.R @@ -0,0 +1,30 @@ +globalVariables(c( + "n", + ".", + ".period_groups", + "data", + "abs_diff_lower", + "abs_diff_upper", + "below_max_anoms", + "centerline", + "critical_value", + "direction", + "index", + "limit_lower", + "limit_upper", + "max_abs_diff", + "outlier", + "outlier_reported", + "sorting", + "test_statistic", + "value", + "observed", + "random", + "remainder", + "seasadj", + "season", + "trend", + "target", + "anomaly", + "key" + )) diff --git a/R/plot_anomaly_decomposition.R b/R/plot_anomaly_decomposition.R new file mode 100644 index 0000000..32b3087 --- /dev/null +++ b/R/plot_anomaly_decomposition.R @@ -0,0 +1,99 @@ +#' Visualize the time series decomposition with anomalies shown +#' +#' @param data A `tibble` or `tbl_time` object. +#' @param ncol Number of columns to display. Set to 1 for single column by default. +#' @param color_no Color for non-anomalous data. +#' @param color_yes Color for anomalous data. +#' @param alpha_dots Controls the transparency of the dots. Reduce when too many dots on the screen. +#' @param alpha_circles Controls the transparency of the circles that identify anomalies. +#' @param size_dots Controls the size of the dots. +#' @param size_circles Controls the size of the circles that identify anomalies. +#' @param strip.position Controls the placement of the strip that identifies the time series decomposition components. +#' +#' @return Returns a `ggplot` object. +#' +#' @details +#' The first step in reviewing the anomaly detection process is to evaluate +#' a single times series to observe how the algorithm is selecting anomalies. +#' The `plot_anomaly_decomposition()` function is used to gain +#' an understanding as to whether or not the method is detecting anomalies correctly and +#' whether or not parameters such as decomposition method, anomalize method, +#' alpha, frequency, and so on should be adjusted. +#' +#' @seealso [plot_anomalies()] +#' +#' @examples +#' +#' library(dplyr) +#' library(ggplot2) +#' +#' data(tidyverse_cran_downloads) +#' +#' tidyverse_cran_downloads %>% +#' filter(package == "tidyquant") %>% +#' ungroup() %>% +#' time_decompose(count, method = "stl") %>% +#' anomalize(remainder, method = "iqr") %>% +#' plot_anomaly_decomposition() +#' +#' @export +plot_anomaly_decomposition <- function(data, ncol = 1, color_no = "#2c3e50", color_yes = "#e31a1c", + alpha_dots = 1, alpha_circles = 1, size_dots = 1.5, size_circles = 4, + strip.position = "right") { + UseMethod("plot_anomaly_decomposition", data) + +} + +#' @export +plot_anomaly_decomposition.default <- function(data, ncol = 1, color_no = "#2c3e50", color_yes = "#e31a1c", + alpha_dots = 1, alpha_circles = 1, size_dots = 1.5, size_circles = 4, + strip.position = "right") { + stop("Object is not of class `tbl_time`.", call. = FALSE) + + +} + +#' @export +plot_anomaly_decomposition.grouped_tbl_time <- function(data, ncol = 1, color_no = "#2c3e50", color_yes = "#e31a1c", + alpha_dots = 1, alpha_circles = 1, size_dots = 1.5, size_circles = 4, + strip.position = "right") { + stop("Object cannot be grouped. Select a single time series for evaluation, and use `dplyr::ungroup()`.", call. = FALSE) + + +} + +#' @export +plot_anomaly_decomposition.tbl_time <- function(data, ncol = 1, color_no = "#2c3e50", color_yes = "#e31a1c", + alpha_dots = 1, alpha_circles = 1, size_dots = 1.5, size_circles = 4, + strip.position = "right") { + + # Checks + column_names <- names(data) + check_names <- c("observed", "remainder", "anomaly", "remainder_l1", "remainder_l2") %in% column_names + if (!all(check_names)) stop('Error in plot_anomaly_decomposition(): key names are missing. Make sure observed:remainder, remainder_l1, and remainder_l2 are present', call. = FALSE) + + + # Setup + date_expr <- tibbletime::get_index_quo(data) + date_col <- tibbletime::get_index_char(data) + + data_anomaly_tbl <- data %>% + dplyr::select(!! date_expr, observed:remainder, anomaly) %>% + tidyr::gather(key = key, value = value, -c(!! date_col, anomaly), factor_key = T) + + g <- data_anomaly_tbl %>% + ggplot2::ggplot(ggplot2::aes_string(x = date_col, y = "value", color = "anomaly")) + + # Points + ggplot2::geom_point(size = size_dots, alpha = alpha_dots) + + # Circles + ggplot2::geom_point(size = size_circles, shape = 1, alpha = alpha_circles, + data = data_anomaly_tbl %>% dplyr::filter(anomaly == "Yes")) + + # Horizontal Line at Y = 0 + ggplot2::geom_hline(yintercept = 0, color = tidyquant::palette_light()[[1]]) + + tidyquant::theme_tq() + + ggplot2::facet_wrap(~ key, ncol = ncol, scales = "free_y", strip.position = strip.position) + + ggplot2::scale_color_manual(values = c("No" = color_no, "Yes" = color_yes)) + + return(g) + +} diff --git a/R/prep_tbl_time.R b/R/prep_tbl_time.R new file mode 100644 index 0000000..5cc21eb --- /dev/null +++ b/R/prep_tbl_time.R @@ -0,0 +1,56 @@ +#' Automatically create tibbletime objects from tibbles +#' +#' @param data A `tibble`. +#' @param message A boolean. If `TRUE`, returns a message indicating any +#' conversion details important to know during the conversion to `tbl_time` class. +#' +#' @return Returns a `tibbletime` object of class `tbl_time`. +#' +#' @details +#' Detects a date or datetime index column and automatically +#' +#' @seealso [tibbletime::as_tbl_time()] +#' +#' @examples +#' +#' library(dplyr) +#' library(tibbletime) +#' +#' data_tbl <- tibble( +#' date = seq.Date(from = as.Date("2018-01-01"), by = "day", length.out = 10), +#' value = rnorm(10) +#' ) +#' +#' prep_tbl_time(data_tbl) +#' +#' @export +prep_tbl_time <- function(data, message = FALSE) { + UseMethod("prep_tbl_time", data) +} + + +prep_tbl_time.default <- function(data, message = FALSE) { + stop("Object is not of class `data.frame`.", call. = FALSE) +} + + +#' @export +prep_tbl_time.data.frame <- function(data, message = FALSE) { + + cl <- class(data)[[1]] + idx <- timetk::tk_get_timeseries_variables(data)[[1]] + + data <- data %>% + tibbletime::as_tbl_time(index = !! rlang::sym(idx)) + + if (message) message(glue::glue("Converting from {cl} to {class(data)[[1]]}. + Auto-index message: index = {idx}")) + + return(data) +} + +#' @export +prep_tbl_time.tbl_time <- function(data, message = FALSE) { + return(data) +} + diff --git a/R/tidyverse_cran_downloads.R b/R/tidyverse_cran_downloads.R new file mode 100644 index 0000000..0bff1f9 --- /dev/null +++ b/R/tidyverse_cran_downloads.R @@ -0,0 +1,30 @@ +#' Downloads of various "tidyverse" packages from CRAN +#' +#' A dataset containing the daily download counts from 2017-01-01 to 2018-03-01 +#' for the following tidyverse packages: +#' - `tidyr` +#' - `lubridate` +#' - `dplyr` +#' - `broom` +#' - `tidyquant` +#' - `tidytext` +#' - `ggplot2` +#' - `purrr` +#' - `stringr` +#' - `forcats` +#' - `knitr` +#' - `readr` +#' - `tibble` +#' - `tidyverse` +#' +#' +#' @format A grouped `tbl_time` object with 6,375 rows and 3 variables: +#' \describe{ +#' \item{date}{Date of the daily observation} +#' \item{count}{Number of downloads that day} +#' \item{package}{The package corresponding to the daily download number} +#' } +#' +#' @source +#' The package downloads come from CRAN by way of the `cranlogs` package. +"tidyverse_cran_downloads" diff --git a/R/time_apply.R b/R/time_apply.R new file mode 100644 index 0000000..a032575 --- /dev/null +++ b/R/time_apply.R @@ -0,0 +1,155 @@ +#' Apply a function to a time series by period +#' +#' @inheritParams tibbletime::collapse_by +#' @param data A `tibble` with a date or datetime index. +#' @param target A column to apply the function to +#' @param period A time-based definition (e.g. "2 weeks"). +#' or a numeric number of observations per frequency (e.g. 10). +#' See [tibbletime::collapse_by()] for period notation. +#' @param .fun A function to apply (e.g. `median`) +#' @param ... Additional parameters passed to the function, `.fun` +#' @param message A boolean. If `message = TRUE`, the frequency used is output +#' along with the units in the scale of the data. +#' +#' @return Returns a `tibbletime` object of class `tbl_time`. +#' +#' @details +#' Uses a time-based period to apply functions to. This is useful in circumstances where you want to +#' compare the observation values to aggregated values such as `mean()` or `median()` +#' during a set time-based period. The returned output extends the +#' length of the data frame so the differences can easily be computed. +#' +#' +#' @examples +#' +#' library(dplyr) +#' +#' data(tidyverse_cran_downloads) +#' +#' # Basic Usage +#' tidyverse_cran_downloads %>% +#' time_apply(count, period = "1 week", .fun = mean, na.rm = TRUE) +#' +#' @export +time_apply <- function(data, target, period, .fun, ..., + start_date = NULL, side = "end", clean = FALSE, message = TRUE) { + + UseMethod("time_apply", data) + +} + + +#' @export +time_apply.data.frame <- function(data, target, period, .fun, ..., + start_date = NULL, side = "end", clean = FALSE, message = TRUE) { + + # Checks + if (missing(target)) stop('Error in time_apply(): argument "target" is missing, with no default', call. = FALSE) + if (missing(period)) stop('Error in time_apply(): argument "period" is missing, with no default', call. = FALSE) + if (missing(.fun)) stop('Error in time_apply(): argument ".fun" is missing, with no default', call. = FALSE) + + + # Setup inputs + data <- prep_tbl_time(data, message = F) + + date_col_expr <- tibbletime::get_index_quo(data) + date_col_name <- dplyr::quo_name(date_col_expr) + + target_expr <- dplyr::enquo(target) + + # Function apply logic + if (is.character(period)) { + # See collapse_by for valid character sequences (e.g. "1 Y") + ret <- data %>% + tibbletime::collapse_by(period = period, clean = clean, start_date = start_date, side = side) %>% + dplyr::group_by(!! tibbletime::get_index_quo(.)) %>% + dplyr::mutate(time_apply = .fun(!! target_expr, ...)) %>% + dplyr::ungroup() %>% + dplyr::mutate(!! date_col_name := data %>% dplyr::pull(!! date_col_expr)) + + } else { + # Numeric (e.g. 15 data points) + ret <- data %>% + dplyr::mutate( + .period_groups = rep(1:period, length.out = nrow(.)) %>% sort() + ) %>% + dplyr::group_by(.period_groups) %>% + dplyr::mutate( + time_apply = .fun(!! target_expr, ...) + ) %>% + dplyr::ungroup() %>% + dplyr::select(-.period_groups) + } + + return(ret) + +} + +time_apply.grouped_df <- function(data, target, period, .fun, ..., + start_date = NULL, side = "end", clean = FALSE, message = TRUE) { + + # Checks + if (missing(target)) stop('Error in time_apply(): argument "target" is missing, with no default', call. = FALSE) + if (missing(period)) stop('Error in time_apply(): argument "period" is missing, with no default', call. = FALSE) + if (missing(.fun)) stop('Error in time_apply(): argument ".fun" is missing, with no default', call. = FALSE) + + + # Setup + data <- prep_tbl_time(data, message = F) + + target_expr <- dplyr::enquo(target) + + # Map time_apply.data.frame + ret <- data %>% + grouped_mapper( + .f = time_apply, + target = !! target_expr, + period = period, + .fun = .fun, + ... = ..., + start_date = start_date, + side = side, + clean = clean, + message = message) + + return(ret) + +} + +# Helper function for decompose_twitter +time_median <- function(data, target, period = "auto", template = time_scale_template(), message = TRUE) { + + # Setup inputs + data <- prep_tbl_time(data, message = F) + + date_col_expr <- tibbletime::get_index_quo(data) + date_col_name <- dplyr::quo_name(date_col_expr) + + target_expr <- dplyr::enquo(target) + + # For median_span = "auto" use template + if (period == "auto") { + + # Get timeseries summary attributes + ts_summary <- data %>% + tibbletime::get_index_col() %>% + timetk::tk_get_timeseries_summary() + + ts_scale <- ts_summary$scale + + period <- template %>% + target_time_decomposition_scale(ts_scale, "trend", index_shift = 0) + + } + + # Use time_apply() + ret <- data %>% + time_apply(!! target_expr, period = period, + .fun = median, na.rm = T, clean = F, message = message) %>% + dplyr::rename(median_spans = time_apply) + + if (message) message(glue::glue("median_span = {period}")) + + return(ret) + +} diff --git a/R/time_decompose.R b/R/time_decompose.R new file mode 100644 index 0000000..f91ba88 --- /dev/null +++ b/R/time_decompose.R @@ -0,0 +1,207 @@ +#' Decompose a time series in preparation for anomaly detection +#' +#' @inheritParams anomalize +#' @param data A `tibble` or `tbl_time` object. +#' @param method The time series decomposition method. One of `"stl"`, `"twitter"`, or +#' `"multiplicative"`. The STL method uses seasonal decomposition (see [decompose_stl()]). +#' The Twitter method uses `median_spans` to remove the trend (see [decompose_twitter()]). +#' The Multiplicative method uses multiplicative decomposition (see [decompose_multiplicative()]). +#' @param frequency Controls the seasonal adjustment (removal of seasonality). +#' Input can be either "auto", a time-based definition (e.g. "2 weeks"), +#' or a numeric number of observations per frequency (e.g. 10). +#' Refer to [time_frequency()]. +#' @param ... Additional parameters passed to the underlying method functions. +#' @param merge A boolean. `FALSE` by default. If `TRUE`, will append results to the original data. +#' @param message A boolean. If `TRUE`, will output information related to `tbl_time` conversions, frequencies, +#' and median spans (if applicable). +#' +#' @return Returns a `tbl_time` object. +#' +#' @details +#' The `time_decompose()` function generates a time series decomposition on +#' `tbl_time` objects. The function is "tidy" in the sense that it works +#' on data frames. It is designed to work with time-based data, and as such +#' must have a column that contains date or datetime information. The function +#' also works with grouped data. The function implements several methods +#' of time series decomposition, each with benefits. +#' +#' __STL__: +#' +#' The STL method (`method = "stl"`) implements time series decomposition using +#' the underlying [decompose_stl()] function. If you are familiar with [stats::stl()], +#' the function is a "tidy" version that is designed to work with `tbl_time` objects. +#' The decomposition separates the "season" and "trend" components from +#' the "observed" values leaving the "remainder" for anomaly detection. +#' The main parameter that controls the seasonal adjustment is `frequency`. +#' Setting `frequency = "auto"` will lets the [time_frequency()] function +#' automatically determine the frequency based on the scale of the time series. +#' +#' __Twitter__: +#' +#' The Twitter method (`method = "twitter"`) implements time series decomposition using +#' the methodology from the Twitter [AnomalyDetection](https://github.com/twitter/AnomalyDetection) package. +#' The decomposition separates the "seasonal" component and then removes +#' the median data, which is a different approach than the STL method for removing +#' the trend. This approach works very well for low-growth + high seasonality data. +#' STL may be a better approach when trend is a large factor. +#' The user can control two parameters: `frequency` and `median_spans`. +#' The `frequency` parameter adjusts the "season" component that is removed +#' from the "observed" values. The `median_spans` parameter adjusts the +#' number of median spans that are used. The user may supply both `frequency` +#' and `median_spans` as time-based durations (e.g. "6 weeks") or numeric values +#' (e.g. 180) or "auto", which predetermines the frequency and/or median spans +#' based on the scale of the time series. +#' +#' __Multiplicative__: +#' +#' The Multiplicative method (`method = "multiplicative"`) time series decomposition +#' uses the [stats::decompose()] function with `type = "multiplicative"`. This +#' method is useful in circumstances where variance is non-constantant and typically +#' growing in a multiplicative fashion. The parameters are the same as the STL method. +#' Alternatively, users may wish to try a transformation (e.g. `log()` or `sqrt()`) in combination +#' with the STL method to get near-constant variance. +#' +#' @seealso +#' Decomposition Methods (Powers `time_decompose`) +#' - [decompose_stl()] +#' - [decompose_twitter()] +#' - [decompose_multiplicative()] +#' +#' Time Series Anomaly Detection Functions (anomaly detection workflow): +#' - [anomalize()] +#' - [time_recompose()] +#' +#' @examples +#' +#' library(dplyr) +#' +#' data(tidyverse_cran_downloads) +#' +#' # Basic Usage +#' tidyverse_cran_downloads %>% +#' time_decompose(count, method = "stl") +#' +#' # twitter + median_spans +#' tidyverse_cran_downloads %>% +#' time_decompose(count, +#' method = "twitter", +#' frequency = "1 week", +#' median_spans = "3 months", +#' merge = TRUE, +#' message = FALSE) +#' +#' @export +time_decompose <- function(data, target, method = c("stl", "twitter", "multiplicative"), + frequency = "auto", ..., merge = FALSE, message = TRUE) { + UseMethod("time_decompose", data) +} + +#' @export +time_decompose.default <- function(data, target, method = c("stl", "twitter", "multiplicative"), + frequency = "auto", ..., merge = FALSE, message = TRUE) { + stop("Object is not of class `tbl_df` or `tbl_time`.", call. = FALSE) +} + +#' @export +time_decompose.tbl_time <- function(data, target, method = c("stl", "twitter", "multiplicative"), + frequency = "auto", ..., merge = FALSE, message = TRUE) { + + # Checks + if (missing(target)) stop('Error in time_decompose(): argument "target" is missing, with no default', call. = FALSE) + + # Setup + target_expr <- dplyr::enquo(target) + method <- tolower(method[[1]]) + + # Set method + if (method == "twitter") { + decomp_tbl <- data %>% + decompose_twitter(!! target_expr, frequency = frequency, message = message, ...) + } else if (method == "stl") { + decomp_tbl <- data %>% + decompose_stl(!! target_expr, frequency = frequency, message = message, ...) + } else if (method == "multiplicative") { + decomp_tbl <- data %>% + decompose_multiplicative(!! target_expr, frequency = frequency, message = message, ...) + } else { + stop(paste0("method = '", method[[1]], "' is not a valid option.")) + } + + # Merge if desired + if (merge) { + ret <- merge_two_tibbles(data, decomp_tbl, .f = time_decompose) + } else { + ret <- decomp_tbl + } + + return(ret) + +} + +#' @export +time_decompose.tbl_df <- function(data, target, method = c("stl", "twitter", "multiplicative"), + frequency = "auto", ..., merge = FALSE, message = TRUE) { + + # Checks + if (missing(target)) stop('Error in time_decompose(): argument "target" is missing, with no default', call. = FALSE) + + # Prep + data <- prep_tbl_time(data, message = message) + + # Send to time_decompose.tbl_time + time_decompose(data = data, + target = !! dplyr::enquo(target), + method = method[[1]], + frequency = frequency, + ... = ..., + merge = merge, + message = message) + +} + + + + +#' @export +time_decompose.grouped_tbl_time <- function(data, target, method = c("stl", "twitter", "multiplicative"), + frequency = "auto", merge = FALSE, message = FALSE, ...) { + + # Checks + if (missing(target)) stop('Error in time_decompose(): argument "target" is missing, with no default', call. = FALSE) + + # Setup + target_expr <- dplyr::enquo(target) + + # Mapping + ret <- data %>% + grouped_mapper( + .f = time_decompose, + target = !! target_expr, + method = method[[1]], + frequency = frequency, + merge = merge, + message = message, + ...) + + return(ret) + +} + +#' @export +time_decompose.grouped_df <- function(data, target, method = c("stl", "twitter", "multiplicative"), + frequency = "auto", merge = FALSE, message = FALSE, ...) { + + data <- prep_tbl_time(data, message = message) + + # Send to grouped_tbl_time + time_decompose(data = data, + target = !! dplyr::enquo(target), + method = method[[1]], + frequency = frequency, + ... = ..., + merge = merge, + message = message) + +} + + diff --git a/R/time_decompose_methods.R b/R/time_decompose_methods.R new file mode 100644 index 0000000..c73a7e1 --- /dev/null +++ b/R/time_decompose_methods.R @@ -0,0 +1,153 @@ +#' Methods that power time_decompose() +#' +#' @inheritParams time_decompose +#' +#' @return A `tbl_time` object containing the time series decomposition. +#' +#' @seealso [time_decompose()] +#' +#' @examples +#' +#' library(dplyr) +#' +#' tidyverse_cran_downloads %>% +#' ungroup() %>% +#' filter(package == "tidyquant") %>% +#' decompose_stl(count) +#' +#' +#' @references +#' - The "twitter" method is used in Twitter's [`AnomalyDetection` package](https://github.com/twitter/AnomalyDetection) +#' +#' @name decompose_methods + +# 2A. Twitter ---- + +#' @export +#' @rdname decompose_methods +#' @param median_spans Applies to the "twitter" method only. +#' The median spans are used to remove the trend and center the remainder. +decompose_twitter <- function(data, target, frequency = "auto", median_spans = "auto", message = TRUE) { + + # Checks + if (missing(target)) stop('Error in decompose_twitter(): argument "target" is missing, with no default', call. = FALSE) + + + data <- prep_tbl_time(data) + date_col_vals <- tibbletime::get_index_col(data) + + target_expr <- dplyr::enquo(target) + + date_col_name <- timetk::tk_get_timeseries_variables(data)[[1]] + date_col_expr <- rlang::sym(date_col_name) + + # Time Series Decomposition + decomp_tbl <- data %>% + dplyr::pull(!! target_expr) %>% + stats::ts(frequency = time_frequency(data, period = frequency, message = message)) %>% + stats::stl(s.window = "periodic", robust = TRUE) %>% + sweep::sw_tidy_decomp() %>% + dplyr::select(-c(index, seasadj)) %>% + # forecast::mstl() %>% + # as.tibble() %>% + tibble::add_column(!! date_col_name := date_col_vals, .after = 0) %>% + purrr::set_names(c(date_col_name, "observed", "season", "trend", "remainder")) %>% + dplyr::mutate(seasadj = observed - season) %>% + dplyr::select(!! date_col_expr, observed, season, seasadj, trend, remainder) %>% + + # Median Groups + time_median(observed, period = median_spans, message = message) %>% + + # Observed transformations + dplyr::mutate( + remainder = observed - season - median_spans + ) %>% + dplyr::select(!! date_col_expr, observed, season, median_spans, remainder) + + decomp_tbl <- anomalize::prep_tbl_time(decomp_tbl) + + return(decomp_tbl) + +} + + + +# 2B. STL ---- + +#' @export +#' @rdname decompose_methods +decompose_stl <- function(data, target, frequency = "auto", message = TRUE) { + + # Checks + if (missing(target)) stop('Error in decompose_stl(): argument "target" is missing, with no default', call. = FALSE) + + + data <- prep_tbl_time(data) + date_col_vals <- tibbletime::get_index_col(data) + + target_expr <- dplyr::enquo(target) + + date_col_name <- timetk::tk_get_timeseries_variables(data)[[1]] + date_col_expr <- rlang::sym(date_col_name) + + # Time Series Decomposition + decomp_tbl <- data %>% + dplyr::pull(!! target_expr) %>% + stats::ts(frequency = time_frequency(data, period = frequency, message = message)) %>% + stats::stl(s.window = "periodic", robust = TRUE) %>% + sweep::sw_tidy_decomp() %>% + # forecast::mstl() %>% + # as.tibble() %>% + tibble::add_column(!! date_col_name := date_col_vals, .after = 0) %>% + dplyr::select(!! date_col_expr, observed, season, trend, remainder) + + decomp_tbl <- anomalize::prep_tbl_time(decomp_tbl) + + return(decomp_tbl) + +} + + + + +# 2C. Multiplicative ---- + +#' @export +#' @rdname decompose_methods +decompose_multiplicative <- function(data, target, frequency = "auto", message = TRUE) { + + # Checks + if (missing(target)) stop('Error in decompose_multiplicative(): argument "target" is missing, with no default', call. = FALSE) + + # Setup inputs + data <- prep_tbl_time(data) + date_col_vals <- tibbletime::get_index_col(data) + + target_expr <- dplyr::enquo(target) + + date_col_name <- timetk::tk_get_timeseries_variables(data)[[1]] + date_col_expr <- rlang::sym(date_col_name) + + frequency <- anomalize::time_frequency(data, period = frequency, message = message) + + # Time Series Decomposition + decomp_tbl <- data %>% + dplyr::pull(!! target_expr) %>% + stats::ts(frequency = frequency) %>% + stats::decompose(type = "multiplicative") %>% + sweep::sw_tidy_decomp() %>% + dplyr::select(-index) %>% + dplyr::rename(remainder = random) %>% + dplyr::select(observed, season, seasadj, trend, remainder) %>% + tibble::add_column(!! date_col_name := date_col_vals, .after = 0) %>% + # Fix trend and remainder + dplyr::mutate( + trend = stats::supsmu(seq_along(observed), seasadj)$y, + remainder = observed / (trend * season) + ) + + decomp_tbl <- anomalize::prep_tbl_time(decomp_tbl) + + return(decomp_tbl) + +} diff --git a/R/time_frequency.R b/R/time_frequency.R new file mode 100644 index 0000000..ab87589 --- /dev/null +++ b/R/time_frequency.R @@ -0,0 +1,271 @@ +#' Generate a time series frequency from a periodicity +#' +#' @param data A `tibble` with a date or datetime index. +#' @param period Either "auto", a time-based definition (e.g. "2 weeks"), +#' or a numeric number of observations per frequency (e.g. 10). +#' See [tibbletime::collapse_by()] for period notation. +#' @param template A template that converts the scale of the data to +#' the expected frequency and trend spans. +#' @param message A boolean. If `message = TRUE`, the frequency used is output +#' along with the units in the scale of the data. +#' +#' @return Returns a scalar numeric value indicating the number of observations in the frequency or trend span. +#' +#' @details +#' A frequency is loosely defined as the number of observations that comprise a cycle +#' in a data set. The trend is loosely defined as time span that can +#' be aggregated across to visualize the central tendency of the data. +#' It's often easiest to think of frequency and trend in terms of the time-based units +#' that the data is already in. __This is what `time_frequency()` and `time_trend()` +#' enable: using time-based periods to define the frequency or trend.__ +#' +#' __Frequency__: +#' +#' As an example, a weekly cycle is often 5-days (for working +#' days) or 7-days (for calendar days). Rather than specify a frequency of 5 or 7, +#' the user can specify `period = "1 week"`, and +#' time_frequency()` will detect the scale of the time series and return 5 or 7 +#' based on the actual data. +#' +#' The `period` argument has three basic options for returning a frequency. +#' Options include: +#' - `"auto"`: A target frequency is determined using a pre-defined template (see `template` below). +#' - `time-based duration`: (e.g. "1 week" or "2 quarters" per cycle) +#' - `numeric number of observations`: (e.g. 5 for 5 observations per cycle) +#' +#' The `template` argument is only used when `period = "auto"`. The template is a tibble +#' of three features: `time_scale`, `frequency`, and `trend`. The algorithm will inspect +#' the scale of the time series and select the best frequency that matches the scale and +#' number of observations per target frequency. A frequency is then chosen on be the +#' best match. The predefined template is stored in a function `time_scale_template()`. +#' However, the user can come up with his or her own template changing the values +#' for frequency in the data frame. +#' +#' __Trend__: +#' +#' As an example, the trend of daily data is often best aggregated by evaluating +#' the moving average over a quarter or a month span. Rather than specify the number +#' of days in a quarter or month, the user can specify "1 quarter" or "1 month", +#' and the `time_trend()` function will return the correct number of observations +#' per trend cycle. In addition, there is an option, `period = "auto"`, to +#' auto-detect an appropriate trend span depending on the data. The `template` +#' is used to define the appropriate trend span. +#' +#' @examples +#' +#' library(dplyr) +#' +#' data(tidyverse_cran_downloads) +#' +#' #### FREQUENCY DETECTION #### +#' +#' # period = "auto" +#' tidyverse_cran_downloads %>% +#' filter(package == "tidyquant") %>% +#' ungroup() %>% +#' time_frequency(period = "auto", template = time_scale_template()) +#' +#' time_scale_template() +#' +#' # period = "1 month" +#' tidyverse_cran_downloads %>% +#' filter(package == "tidyquant") %>% +#' ungroup() %>% +#' time_frequency(period = "1 month") +#' +#' #### TREND DETECTION #### +#' +#' tidyverse_cran_downloads %>% +#' filter(package == "tidyquant") %>% +#' ungroup() %>% +#' time_trend(period = "auto") + + +#' @export +#' @rdname time_frequency +time_frequency <- function(data, period = "auto", template = time_scale_template(), message = TRUE) { + + # Checks + if (dplyr::is.grouped_df(data)) + stop(glue::glue("Cannot use on a grouped data frame. + Frequency should be performed on a single time series.")) + + # Setup inputs + data <- prep_tbl_time(data, message = F) + + index_expr <- data %>% tibbletime::get_index_quo() + index_name <- dplyr::quo_name(index_expr) + + # Get timeseries summary attributes + ts_summary <- data %>% + tibbletime::get_index_col() %>% + timetk::tk_get_timeseries_summary() + + ts_nobs <- ts_summary$n.obs + ts_scale <- ts_summary$scale + + + if (is.numeric(period)) { + # 1. Numeric Periods + freq <- period + + } else if (period != "auto") { + # 2. Text (e.g. period = "2 Weeks") + freq <- data %>% + tibbletime::collapse_by(period = period) %>% + dplyr::count(!! index_expr) %>% + dplyr::pull(n) %>% + stats::median(na.rm = T) + + } else { + # 3. period = "auto" + + periodicity_target <- time_scale_template() %>% + target_time_decomposition_scale(time_scale = ts_scale, target = "frequency", index_shift = 0) + + freq <- data %>% + tibbletime::collapse_by(period = periodicity_target) %>% + dplyr::count(!! index_expr) %>% + dplyr::pull(n) %>% + stats::median(na.rm = T) + + # Insufficient observations: nobs-to-freq should be at least 3-1 + if (ts_nobs < 3*freq) { + periodicity_target <- time_scale_template() %>% + target_time_decomposition_scale(time_scale = ts_scale, target = "frequency", index_shift = 1) + + freq <- data %>% + tibbletime::collapse_by(period = periodicity_target) %>% + dplyr::count(!! index_expr) %>% + dplyr::pull(n) %>% + stats::median(na.rm = T) + } + + if (ts_nobs < 3*freq) { + freq <- 1 + } + } + + if (message) { + freq_string <- glue::glue("frequency = {freq} {ts_scale}s") + message(freq_string) + } + + return(freq) +} + +#' @export +#' @rdname time_frequency +time_trend <- function(data, period = "auto", template = time_scale_template(), message = TRUE) { + + # Checks + if (dplyr::is.grouped_df(data)) + stop(glue::glue("Cannot use on a grouped data frame. + Frequency should be performed on a single time series.")) + + # Setup inputs + data <- prep_tbl_time(data, message = F) + + index_expr <- data %>% tibbletime::get_index_quo() + index_name <- dplyr::quo_name(index_expr) + + # Get timeseries summary attributes + ts_summary <- data %>% + tibbletime::get_index_col() %>% + timetk::tk_get_timeseries_summary() + + ts_nobs <- ts_summary$n.obs + ts_scale <- ts_summary$scale + + + if (is.numeric(period)) { + # 1. Numeric Periods + trend <- period + + } else if (period != "auto") { + # 2. Text (e.g. period = "2 Weeks") + trend <- data %>% + tibbletime::collapse_by(period = period) %>% + dplyr::count(!! index_expr) %>% + dplyr::pull(n) %>% + stats::median(na.rm = T) + + } else { + # 3. period = "auto" + + periodicity_target <- time_scale_template() %>% + target_time_decomposition_scale(time_scale = ts_scale, target = "trend", index_shift = 0) + + trend <- data %>% + tibbletime::collapse_by(period = periodicity_target) %>% + dplyr::count(!! index_expr) %>% + dplyr::pull(n) %>% + stats::median(na.rm = T) + + # Insufficient observations: nobs-to-trend should be at least 2-1 + if (ts_nobs / trend < 2) { + periodicity_target <- time_scale_template() %>% + target_time_decomposition_scale(time_scale = ts_scale, target = "trend", index_shift = 1) + + trend <- data %>% + tibbletime::collapse_by(period = periodicity_target) %>% + dplyr::count(!! index_expr) %>% + dplyr::pull(n) %>% + stats::median(na.rm = T) + + trend <- ceiling(trend) + + } + + if (ts_nobs / trend < 2) { + trend <- ts_nobs + } + } + + if (message) { + trend_string <- glue::glue("trend = {trend} {ts_scale}s") + message(trend_string) + } + + return(trend) +} + +#' @export +#' @rdname time_frequency +time_scale_template <- function() { + + tibble::tribble( + ~ "time_scale", ~ "frequency", ~ "trend", + "second", "1 hour", "12 hours", + "minute", "1 day", "14 days", + "hour", "1 day", "1 month", + "day", "1 week", "3 months", + "week", "1 quarter", "1 year", + "month", "1 year", "5 years", + "quarter", "1 year", "10 years", + "year", "5 years", "30 years" + ) + +} + +# Helper function to get the time decomposition scale +target_time_decomposition_scale <- function(template, time_scale, target = c("frequency", "trend"), index_shift = 0) { + + target_expr <- rlang::sym(target[[1]]) + + idx <- which(template$time_scale == time_scale) - index_shift + key_value <- template$time_scale[idx] + + template %>% + dplyr::filter(time_scale == key_value) %>% + dplyr::pull(!! target_expr) +} + +# time_scale_template() %>% +# target_time_decomposition_scale(time_scale = "minute", target = "frequency") +# +# time_scale_template() %>% +# target_time_decomposition_scale(time_scale = "minute", target = "trend") + + + diff --git a/R/time_recompose.R b/R/time_recompose.R new file mode 100644 index 0000000..8adb3e7 --- /dev/null +++ b/R/time_recompose.R @@ -0,0 +1,142 @@ +#' Recompose bands separating anomalies from "normal" observations +#' +#' @param data A `tibble` or `tbl_time` object that has been +#' processed with `time_decompose()` and `anomalize()`. +#' +#' @return Returns a `tbl_time` object. +#' +#' @details +#' The `time_recompose()` function is used to generate bands around the +#' "normal" levels of observed values. The function uses the remainder_l1 +#' and remainder_l2 levels produced during the [anomalize()] step +#' and the season and trend/median_spans values from the [time_decompose()] +#' step to reconstruct bands around the normal values. +#' +#' The following key names are required: observed:remainder from the +#' `time_decompose()` step and remainder_l1 and remainder_l2 from the +#' `anomalize()` step. +#' +#' +#' @seealso +#' Time Series Anomaly Detection Functions (anomaly detection workflow): +#' - [time_decompose()] +#' - [anomalize()] +#' +#' @examples +#' +#' library(dplyr) +#' +#' data(tidyverse_cran_downloads) +#' +#' # Basic Usage +#' tidyverse_cran_downloads %>% +#' time_decompose(count, method = "stl") %>% +#' anomalize(remainder, method = "iqr") %>% +#' time_recompose() +#' +#' +#' @export +time_recompose <- function(data) { + UseMethod("time_recompose", data) +} + +#' @export +time_recompose.default <- function(data) { + stop("Object is not of class `tbl_df` or `tbl_time`.", call. = FALSE) +} + +#' @export +time_recompose.tbl_time <- function(data) { + + # Checks + column_names <- names(data) + check_names <- c("observed", "remainder", "remainder_l1", "remainder_l2") %in% column_names + if (!all(check_names)) stop('Error in time_recompose(): key names are missing. Make sure observed:remainder, remainder_l1, and remainder_l2 are present', call. = FALSE) + + # Setup + target_expr <- dplyr::enquo(target) + method <- tolower(method[[1]]) + + l1 <- data %>% + dplyr::select(observed:remainder, contains("_l1")) %>% + dplyr::select(-c(observed, remainder)) %>% + apply(MARGIN = 1, FUN = sum) + + l2 <- data %>% + dplyr::select(observed:remainder, contains("_l2")) %>% + dplyr::select(-c(observed, remainder)) %>% + apply(MARGIN = 1, FUN = sum) + + ret <- data %>% + # add_column(!! paste0(quo_name(target_expr), "_l1") := l1) + tibble::add_column( + recomposed_l1 = l1, + recomposed_l2 = l2 + ) + + return(ret) + +} + +#' @export +time_recompose.tbl_df <- function(data) { + + # Prep + data <- prep_tbl_time(data, message = FALSE) + + # Send to time_recompose.tbl_time + time_recompose(data = data) + +} + + +#' @export +time_recompose.grouped_tbl_time <- function(data) { + + # Checks + column_names <- names(data) + check_names <- c("observed", "remainder", "remainder_l1", "remainder_l2") %in% column_names + if (!all(check_names)) stop('Error in time_recompose(): key names are missing. Make sure observed:remainder, remainder_l1, and remainder_l2 are present', call. = FALSE) + + # Setup + group_names <- dplyr::groups(data) + group_vars_expr <- rlang::syms(group_names) + + # Recompose l1 and l2 bands + l1 <- data %>% + dplyr::ungroup() %>% + dplyr::select(observed:remainder, contains("_l1")) %>% + dplyr::select(-c(observed, remainder)) %>% + apply(MARGIN = 1, FUN = sum) + + l2 <- data %>% + dplyr::ungroup() %>% + dplyr::select(observed:remainder, contains("_l2")) %>% + dplyr::select(-c(observed, remainder)) %>% + apply(MARGIN = 1, FUN = sum) + + ret <- data %>% + dplyr::ungroup() %>% + tibble::add_column( + recomposed_l1 = l1, + recomposed_l2 = l2 + ) %>% + dplyr::group_by(!!! group_vars_expr) + + return(ret) + +} + +#' @export +time_recompose.grouped_df <- function(data) { + + data <- prep_tbl_time(data, message = message) + + # Send to grouped_tbl_time + time_recompose(data = data) + +} + + + + diff --git a/R/utils.R b/R/utils.R new file mode 100644 index 0000000..562951f --- /dev/null +++ b/R/utils.R @@ -0,0 +1,178 @@ +# UTILITY FUNCTIONS ---- + +# 1. Mapping Functions ----- + +grouped_mapper <- function(data, target, .f, ...) { + + data <- prep_tbl_time(data, message = F) + + target_expr <- dplyr::enquo(target) + + group_names <- dplyr::groups(data) + group_vars_expr <- rlang::syms(group_names) + + ret <- data %>% + tidyr::nest() %>% + dplyr::mutate(nested.col = purrr::map( + .x = data, + .f = .f, + target = !! target_expr, + ...) + ) %>% + dplyr::select(-data) %>% + tidyr::unnest() %>% + dplyr::group_by(!!! group_vars_expr) + + # if (merge) { + # ret <- merge_two_tibbles(tib1 = data, tib2 = ret, .f = .f) + # } + + return(ret) + +} + +# 2. Merging Time-Based Tibbles ----- + +merge_two_tibbles <- function(tib1, tib2, .f) { + + # Merge results + if (identical(nrow(tib1), nrow(tib2))) { + + # Arrange dates - Possibility of issue if dates not decending in tib1 + tib1 <- arrange_by_date(tib1) + + # Drop date column and groups + tib2 <- drop_date_and_group_cols(tib2) + + # Replace bad names + tib2 <- replace_bad_names(tib2, .f) + + # Replace duplicate names + tib2 <- replace_duplicate_colnames(tib1, tib2) + + ret <- dplyr::bind_cols(tib1, tib2) + + } else { + + stop("Could not join. Incompatible structures.") + } + + return(ret) +} + +replace_duplicate_colnames <- function(tib1, tib2) { + + # Collect column names + name_list_tib1 <- colnames(tib1) + name_list_tib2 <- colnames(tib2) + name_list <- c(name_list_tib1, name_list_tib2) + + duplicates_exist <- detect_duplicates(name_list) + + # Iteratively add .1, .2, .3 ... onto end of column names + if (duplicates_exist) { + + i <- 1 + + while (duplicates_exist) { + + dup_names_stripped <- + stringr::str_split(name_list[duplicated(name_list)], + pattern = "\\.\\.", + simplify = FALSE) %>% + sapply(function(x) x[[1]]) + + name_list[duplicated(name_list)] <- + stringr::str_c(dup_names_stripped, "..", i) + + i <- i + 1 + + duplicates_exist <- detect_duplicates(name_list) + + } + + name_list_tib2 <- name_list[(ncol(tib1) + 1):length(name_list)] + + colnames(tib2) <- name_list_tib2 + } + + return(tib2) +} + +detect_duplicates <- function(name_list) { + + name_list %>% + duplicated() %>% + any() +} + +# bad / restricted names are names that get selected unintetionally by OHLC functions +replace_bad_names <- function(tib, fun_name) { + + bad_names_regex <- "open|high|low|close|volume|adjusted|price" + + name_list_tib <- colnames(tib) + name_list_tib_lower <- stringr::str_to_lower(name_list_tib) + + detect_bad_names <- stringr::str_detect(name_list_tib_lower, bad_names_regex) + + if (any(detect_bad_names)) { + + len <- length(name_list_tib_lower[detect_bad_names]) + name_list_tib[detect_bad_names] <- rep(fun_name, length.out = len) + + } + + colnames(tib) <- name_list_tib + + return(tib) +} + +arrange_by_date <- function(tib) { + + if (dplyr::is.grouped_df(tib)) { + + group_names <- dplyr::groups(tib) + + arrange_date <- function(tib) { + date_col <- timetk::tk_get_timeseries_variables(tib)[[1]] + tib %>% + dplyr::arrange_(date_col) + } + + tib <- tib %>% + tidyr::nest() %>% + dplyr::mutate(nested.col = + purrr::map(data, arrange_date) + ) %>% + dplyr::select(-data) %>% + tidyr::unnest() %>% + dplyr::group_by_(.dots = group_names) + + + } else { + + tib <- tib %>% + dplyr::arrange_(timetk::tk_get_timeseries_variables(tib)[[1]]) + + } + + return(tib) +} + +drop_date_and_group_cols <- function(tib) { + + date_col <- timetk::tk_get_timeseries_variables(tib)[[1]] + group_cols <- dplyr::groups(tib) %>% + as.character() + cols_to_remove <- c(date_col, group_cols) + tib_names <- colnames(tib) + cols_to_remove_logical <- tib_names %in% cols_to_remove + tib_names_without_date_or_group <- tib_names[!cols_to_remove_logical] + + tib <- tib %>% + dplyr::ungroup() %>% + dplyr::select_(.dots = as.list(tib_names_without_date_or_group)) + + return(tib) +} diff --git a/README.Rmd b/README.Rmd new file mode 100644 index 0000000..556db55 --- /dev/null +++ b/README.Rmd @@ -0,0 +1,49 @@ +--- +output: github_document +--- + + + +```{r setup, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>", + fig.path = "man/figures/README-", + out.width = "100%" +) +``` +# anomalize + +The goal of anomalize is to ... + +## Installation + +You can install the released version of anomalize from [CRAN](https://CRAN.R-project.org) with: + +``` r +install.packages("anomalize") +``` + +## Example + +This is a basic example which shows you how to solve a common problem: + +```{r example} +## basic example code +``` + +What is special about using `README.Rmd` instead of just `README.md`? You can include R chunks like so: + +```{r cars} +summary(cars) +``` + +You'll still need to render `README.Rmd` regularly, to keep `README.md` up-to-date. + +You can also embed plots, for example: + +```{r pressure, echo = FALSE} +plot(pressure) +``` + +In that case, don't forget to commit and push the resulting figure files, so they display on GitHub! diff --git a/README.md b/README.md new file mode 100644 index 0000000..7d798fe --- /dev/null +++ b/README.md @@ -0,0 +1,47 @@ + + + +# anomalize + +The goal of anomalize is to … + +## Installation + +You can install the released version of anomalize from +[CRAN](https://CRAN.R-project.org) with: + +``` r +install.packages("anomalize") +``` + +## Example + +This is a basic example which shows you how to solve a common problem: + +``` r +## basic example code +``` + +What is special about using `README.Rmd` instead of just `README.md`? +You can include R chunks like so: + +``` r +summary(cars) +#> speed dist +#> Min. : 4.0 Min. : 2.00 +#> 1st Qu.:12.0 1st Qu.: 26.00 +#> Median :15.0 Median : 36.00 +#> Mean :15.4 Mean : 42.98 +#> 3rd Qu.:19.0 3rd Qu.: 56.00 +#> Max. :25.0 Max. :120.00 +``` + +You’ll still need to render `README.Rmd` regularly, to keep `README.md` +up-to-date. + +You can also embed plots, for example: + + + +In that case, don’t forget to commit and push the resulting figure +files, so they display on GitHub\! diff --git a/_pkgdown.yml b/_pkgdown.yml new file mode 100644 index 0000000..e69de29 diff --git a/anomalize.Rproj b/anomalize.Rproj new file mode 100644 index 0000000..a648ce1 --- /dev/null +++ b/anomalize.Rproj @@ -0,0 +1,20 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 4 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source diff --git a/cran-comments.md b/cran-comments.md new file mode 100644 index 0000000..a95d65e --- /dev/null +++ b/cran-comments.md @@ -0,0 +1,10 @@ +## Test environments +* local OS X install, R 3.4.4 +* ubuntu 14.04 (on travis-ci), R 3.4.4 +* win-builder (devel and release) + +## R CMD check results + +0 errors | 0 warnings | 1 note + +* This is a new release. diff --git a/data-raw/tidyverse_cran_downloads.R b/data-raw/tidyverse_cran_downloads.R new file mode 100644 index 0000000..e22f1a5 --- /dev/null +++ b/data-raw/tidyverse_cran_downloads.R @@ -0,0 +1,17 @@ +library(dplyr) +library(tibbletime) +library(cranlogs) + +pkgs <- c( + "tidyr", "lubridate", "dplyr", + "broom", "tidyquant", "tidytext", + "ggplot2", "purrr", "glue", + "stringr", "forcats", "knitr", + "readr", "tibble", "tidyverse" +) + +tidyverse_cran_downloads <- cran_downloads(pkgs, from = "2017-01-01", to = "2018-03-01") %>% + group_by(package) %>% + as_tbl_time(date) + +tidyverse_cran_downloads diff --git a/data/tidyverse_cran_downloads.rda b/data/tidyverse_cran_downloads.rda new file mode 100644 index 0000000000000000000000000000000000000000..2bc683f3bdbb4ffc95ed0d82fb83a6a3442f7285 GIT binary patch literal 41305 zcmdSB2V4|M@F={<_Es>CHMIW5Tm zNCpWD!h*ylXIPT@dUgTfPVesh-}}Aq_qAhBPj_{7RdrQ$P4_GkYWIY=uc)#ps(w#& zTiY=|{P6p~r7zQ3Fk%u@I-E!x%7C|$p`<2sipoKgaXim4@g4Rc4GzDaSJ0p>Z*cQ!_p z02^ie^R?I6ePEhZS144E_)+i7aU2ag+%@HNZra=S0M%Zy~@+&cLg*rvC zVx@#Q_)Xorw7XZ50_N#1pbP=l_^(Q3q1-gGM=m!uy`d;`)4iu}?Cy9~R`#;7 z=c|~iQl-uAa@No0(x@;VO`ncz}=bsYiQAshd-8pj5U{E5zOuTK&b~2wuGU_WR zwm68!pPk_slj9a+G0uJ(P%U;LfO6C|DNK%UUe;vSKSEU!!w$VSCx zH3|BEmj4VT3GqMWhX8%#>vZKOyFtYM^Oe$}|2wPhRz@^^m2~%OtwKX;F4Xovqm8&5`rhY$y zaLn1q71wIQ66S^dRDNT`RU&8qQLfwNEes}VIZ`IiQ|&VVf*ccxQIlXr<>bQ$HA0jF zP2z4--^k42-k=y0S zfvCY=2U!&^)Q9q3gjRK{Zh@;7wlW`=R=aP}v+cE$*}j!TGnL%EH!Z!)BrkTn+`JXDY2is91SXg+Ht7lV`E}Z|gRLnJ&wOlkf=%lNt*g8&C-ow_u z^TApfD#X3~1_fF>P$uWk96NdI+WoMi^c42rKX8uoJb2}zqr4wOQG7=3!Z`1!0Q(9* z?#!VNXk#C?q(ASr`cSRCWp83v!wP@kVjJvr)M?v~Y0#pHo1uy`tP{tW1xh_uL09i@ z?$f5RUGUP4{BwM$)dZjZ{eaB4r<>NDL8m>Lcd3d6DoQi<$c#u?3mWDT=_mRjgAb%6SmdN08GK4U zW>-a3V1ky3!k}*$eAOuy4_iyrrhS^>Zy2J??4qb2a#312UH4Yzc+(TMz`fNuBd=9? zYTnh!x$7QPINyK@WIKOjSJMTPr_9G!Ry~geNgKZ7MddQ+R4{6GCuan7-1Lp zhH8c|FBp0{8#QsX+gbT&x5JS+?rIs9wsOu2wSLO#A9+S%vRQSJwm|W=Nqa(Wp;Efv zgiciYJyoMx-EPLUcNrr!SvPCY*FWP|C9^d!$9!s%xc0~Iyz-~k9qJmjJS}S3_Zo?2 z$65v!IvCet-?LHI2H;avpWY4DY;UVY87hx`dazQNbE8@C#Y1>XkhwKW5wE#~?qkk$ z)szk?nQu{Zt*C^ZemY9QcC_wykB)Z=C);>gCJuIz|5lYgaon3{{K)5>;VCxBv*pr` zlDYb-73Lns*oAk4x^MXp-VUJp%(Z`ZastU~hMa89tgm~iqsHHSb5-^XXSxxsU5Agn zfm)t;8IPY;hM}g{h<*dNZ1u<^ofA*bWvXS#@&x#+u}6lz9nNSNZ{a$qww9#bZYbrY z6Iw3wDqXUSy(0%Tc>Hak5sv-EgTo(7(J82<#SS}o=v`Wa*WtTL`DJpY^wX{C!dZ$0 zhp0oNvwY{0na_)Tnp8D!X^|;E@mu^=lR@ z4b(3zl=s7}gROlY+jVn!GcO#kYAT5JdMf5`Oq)P}pYQVOcz)}_p+Ps5w4)a02@eip zU~TAnF&zwAw)YHQ*9BR&S8;0Rm<^QWJzuME$=4}2r5|)Gey6DO2}L(IlRk73VVyQ3 zRS4(vOUkiO3*vXY+sV@LQSb)!>r}Zkb9L$t7c1)X?~!-gS$Fzd)tdg^a6wulizQ#C zFZB+!;_X0N_gAWXbK5&EQ;eCx*DjlUFit z^i2=8`Z9iND~>I{v8N(!g119KtBZOj!cJbOp&*YfN%9?!+^TMtY4G9h3TA`Qt2xRY z@xnorXv^S3uWC^zPyB|tD4*2A>q*DjX{8cxuo|Y{mgHd>@FHWlA;~5Yi998(4!cA z?HBG=zn?I)eW^{!v;QK4i+1}iT=~_~^wIdczDlSF!?-tQ!T5`x*i@g5+xfidxEIbs zN!8n!R(IyPVYo`O)ue6c{g;6UymFdL=61aoO#Z|yGg4a`e!PQ+y|3yBBffo+xw-^{ zHK)7!=p$!HuE9u-&pvC8_o7FcU*0nB;tfJrhJ2s9Kd6DkeaugJ%<+0C+B5a)aEH?3 zh^Hpj#-de!2)D!9y*K&j#2K5899eRod}At}^Ior%e5W%k3O;7b>J4RDyo%Kek-~k` z?(kC;WtC_vct$^bvRbP8?UDQ4kusB|XwDo*n`9P)`pncDN=e4*gSy@!=0|( zyd%s`HObA=_22wD#&V@9RYQR1#gT!6CjvXWxXs?jYu^c$sp5=i{yj~VZ>)KsIKK7i z-R{SQW?gf#xfVTZA70;8C6o>Jgcj&(s@qSfR3vqX*y8GWEt!jqJDyaBhsliN!ov(h zRfbGyE5y{Y?GB8FJ65AQ0%xQ#PN{5b22uNF(~UZV-iF`28@7{q;bW;|PD_T@!SGb` z&b%?>;|TlKJPRtEv)1w8##{HhF79PZ?Z_B^9nOg@HoD#;m>j>e4P(cjZ^sdq(a+@( z!HQf{h#*8h4mCR-Yx1PoexL(UeIJ=GE8>Aa#n3KN5gMf|8n?^1nq8GJ>=Jj(Y*M?4 zee_cQy*jZWhW8Gf%k8C)FMldDNOq3Ex2O*-*f}0l-!~jdg$!G4&hY7ft`};g6Y|wE zzPX5Sl|U~-NI4#?wmOlO?LxRylrvgcf~UW6GyUF4nO8XoPWEtXc9r`*rOopt$HpHH z6}UV4X2m;r2S51A)>hmoOt=saFu1qzno7#`msp&nQgQ1Q^xt_R$=34A8Y=IrwiA z=gaapKCAMojyl>_i{a(X;*|y1YCFG<1c&Oyb0G?rN2h3&@Zkx5om~F4>5hkdBb6?2 zoo^|?vW%(7GUC=~b|aS$f37%t0sW}DaIof3pv71Jwz=PX)@q8BnX5$+MN@Ejt2YmG zTc2rl=2DxjjxI{0ToJlWIg>N)hNsG-^+MZnoQyh+g5H*TtrZInHXohRepJbGs9;(+ zuT{Oo>A<4aN_k%{SN%kf@7$d{x`GG3iVOJ!wXb8%^P+f`DK#nUBuyT)@s(`nY|E1^ zoSNak*YZ~%ykR&ISu<8Wg;-0hZ$8eTv%nM&N zfP-5JI`p=mQ#-DaC)EjglWPjT9D-FEI?NYU z>RTv(Gt%yWNVAhddvU&HOI^Cc75m$!8rB}G=nutd5i3<=+=o1yOV>stvS_}JOz`Gg z20xp)zSL$Z*UM<_moX`d_4tbB?zn`>G>tZBx@eBMiWOXR@O6+*hW}wQPE~ZQ+A#7) zxXaVlPq{wqV;RyK-YlOk_`gQmTOM3cOV}wrQPNpz&Gl&U#5IoiK8qPXj-S(0XYJAK zuAvyB#VRdy2DMg+;%f{#V?|javEnt$g_R~T zlQfl#Uu9np4rkXD)-_zJNxw&H_~N~5ImYH|{oRk8VXSE(?=DOgji1EG%qk}b)k%F6 zf0Ll#il=<)l=fs1M_zA@cDa{)w{v&Bc2`a7y{Jx&x1wzf`L@Z|Dn(ki{!U>v1!3G=;Pli#95OvNYgQz?0aQKvws8Qt8s zf`r5j#|G)1qF}~va{G5JMK~q7FfV*AL}V^E&`x36ia%XaQ)#_NgAZy_ca8$GEX%@St|zovBtfT>qfMdvyL{?wSVF(`Va+Q9TtCL~utAF$ zb?=E*Wig}JsqW6IyJ~rz4^=KU2r6WEmFmT5D<02FnG3^n4$)qtoGK2!|036+>o- zN^rYso_xmw_8>b=Z?1e?fiecaGr)fT`_tnm2!GD@FsyVrRhviO^3V=r$;h>LvQ1qD+cQXsA-0i9|xJBbZ8D=&Ixw{WUzI>QX}+U1$rN;2Z3&D5j$U=Ci6 z>F7|DGk(PJVDtUa=!vg^hpymDPt-aSbQU`C^LN!NtlVr;O&!0<>XkeCXLq43A4OX4 z^-o!(*K2e=%HZrY7W{geo?I)>;c&#VLgQkOl$*R7)5F~sbBZ(uMDC@zUea`|5f5!x2UU&WD7i3WGWf^OTEOGb{5r zUVCHZ9B#Z^bjerd^O7&n7>P_4Z_i6RlW7oSt{z~McDG-+(xa(lcUg`;ZHpykZo;k~ULV6HC)kbdXKmArtf_x^t=2`q zvP*K3Uhj58_T-t&ySEFEc`ayp?;~g%#Mt@Q4@4gs5}0g?MI72cBOLrL<>4Skc1e+L zPCP^IwzYy-PvO&~cv~Ng-`$hm2kMHRntuyTcv*PcYV7KQ;exi*H~3`nbE9yq@W3Zt z1jN-K3VOE8cc!<>%ICS zEMHaLRQ&fj{CPSjw7t)psY0yBq;}jOy5UQMNRIcADB|)*zLXExMytl1nb-C~=^?W48zC|>cWdf&b+T&_O zDd+3Jl{rM8b@n?xY_*}bR(SW$Fl}wyg-D5|@0RDZUJRo-HOov+AOxq>7E?`<6zvq+ zmv6A_jIsAWSN1@`s`b+TNF(a;6ZuYTT+~zieqZ%J@=oyw*!a$MKS-$&O}t0L!XIW| z7eLdQ?WdZ5Up?M0IGO!1 zFMRdua-{mb#hxopcTWaPi+kf)FsZ4JZzP*LtU#f-w&I&iB@X0l#JE7RxX?NWZ2ye&&wX~+|4wg zWjdB1S~+9yGuHV~;Q6?=aiPo8RBTZ&%Urm@oB3k9CrU*k{21kt?i1(-2Oq~fSTwEt ze##ShTle$s2IpXoO4QMm{r;{M+z*!2RbH-eNx2~o^P6Pq{;tML(I<=#!(gy7xh{FL za(a>EbC})p%B6elU3Pn|Snh1SXp%e9F6h_fJo!sq*4)HO`AU97z(r=-7>bW+ud?aD~B zJ75~4d&0)wp{k(LXZGY+jr|S%G@-B2Vp2V5`MN4`{u{$@tY!P_?nb}u6C=FFX2>T6 znRc+Q$y60CraIbiosWHJ7uO(27`H+9O;KCk9m86RhHrT7iAmh;-Lj;s_MyZDtO%d4bRW_}sHb1mYi?SfXo^#!eFww08z)k^`4 zQ(F7R3Ao1%Cas*Ri?}zR+SfZGl`l#kugr} zt2;W;@+W3XSl_)B$&+m2M?1VT`K*%pQSeZP&1h(@SiUw+ap9tjrPPG*OEc?6+r!+= z=ZcQ85NKKzIZ&3=@=tQiPKE^pSe)!kbq!|BGd&rwSQ@)*puGg2uPz!z2f?S=N~@E< znH0~=kCxz6Lg4sTmuj?Gat&%xp&PZjpe?w{tgVwgS6%w_n=ty~yiCz&bO&k$@e#Es zQ&RqoS^M}>oE8tdn{9<(Yd(3L`eC_BE!)zyBsjsV^m+1dt3#Z2FMI)2@{0_d>sx6>*OIf|PoO?K@U!}l5DOljum)$cUr`u6YWMw0Hknw)e_n zF`us~f&I=dvfJ&YN2c=*nq;WGf0aU*=VFBz7g6EFXhW#T~c{gq}M??P zcnOMc^#tUzwxKmw)hx-us2fQStyyZ=!he_54YwFOw#d(~F$(;ii=j|;{z30Dm#0== zE;Xk=3@##Q*0>FJIbjg8!n@8Uj?v>9k-Xl#>Vn_;9Hf*a8dm2K zr8g#=(a34%00pL()h{hs9XQc8h{&<(oG&s-rd9LyVo^EG#f%KtH-r*g8~pbyoU6*) z#mQ=<>cSzV-x@4tlVgz@M!y-rZ;gK!vKkb{WC(L!3K?=jh$cK*3LRooG+a=K?<~rz zcCCoSiYh04A1ZIl;rjT#UPsp1KC;H4Qo8r$s47+9)Ix5{~x_Qera?vMyOTy)6aH~c36Ms}{N1b^( zqj}sL|75%@c;R^k-7-)9UI)j!Dkojs)7*w}RK?d56QZ6Iy@F z3S&E55@f&2zgp}$vCxw;ZMkRslGRYRd2c{@(V22l5rX#fhFtr+C-#UZ2>f&V${^Rd z9EVCj*VT%W<-X9)(kn}c8|H2fK%JX>_ej_1vDT(9jv{!^`iwSw>s)@pcg_gG@0ItBG^1Q==eWKa%7VUI(?UvZeXdP? zXtS`9>_{YUJUF1Wv#^fneOp^gPzDd%5+caZC}q9I!ilv zX`FoIACb~b$bY<|RfGkXj3kM0v*)^;3b#T#dV4l~81A#N5*Cc4llNo2cI1sG6B1D9 zcmD@g#MU0tOp`@Fnusl-|2;*zT8nhsSckS58a)JJ%6bEsToQP&Rbrsra97Y;m`*72 z5MBMV{RPi4p_^Ic7RZJV(qf#Oq$vbSi(eV3`iU^2_x*+5zdj=Bka!$5hd`{#F&YovPduiNQU(Cr6v@HYaOkP2S^!ZxcVYsZma zasBxa71VvZ`9`5bBVSa~Sp!Py=Y)X}ej)V#UQKXEO0!yuj0ju(T_;IAMM7R@3n!DY z0hwDKw>BZAmy@=^Zf_*ciS3+-qVRvGZv(P^YP%%1Jw?xhfdcaXVQ6BL0_<;eFBJS+ zr_C<18>IZ#`!@mJ1iGK=(zaSrs?9Ng1OK$;VVn3A{D0N5Q6HJ~Yi9mI`TxgFMg_4D zh4SxgwsNqI=nc|Hm61c+pEp4Rpb7lNMbsKVBjVfr`iOs_)#zUTHpL)4j57SnvJziYC=?`*hQ-#7Y8H4_j2$82s=*zAR7s=R1F}dA2=`}m7Wt- zU>Bna)}^Vf5Y=2@&~1%CK%dvb)_vo1U6xb??xZOyRU}9gLgEIs?Ip?Q|0f-|8t^@ zfEd$OUPXnqVKlS=tB|6>_Id)P+cBiSO_7vyx-vXj2Et{f3z)(|*BwL=BCo8gbzmxj z10Z!@7ye#IzDVf}Wg>QFLLQ-#W^x04NsUZ2p2?boYic8E*asEsWAD&*>2WMpqgW3C ziL*TEm-8tnZO{jhOq{*HnX*h9CJ0Qt#5z;I5r<}aOv!IrGCZcqt~Pn+15%pE6c`(j z0JX1MIyg!!&;wO0pb|7f!(N8;1_8uYsp5#iY>s;1##G8e%KXGVW&$=rlMw$r@Ai#_ zE=Or*0m@VKIu^miN4nva@I9wHign^jjq%);O z=jxy_k&~NvdWzO~rlxC_nd_}MIn_p)9$1#Ffw#OU=fA)~y^mkEm~2R5LTci0lS76V zJ5fJKkG}mA+@0oy0s2S>d^jc#fndgMq0yG5ifOJv0AVpXc>}YB#!)$FY>P=8t}EDX zlg3W-mj5qALu6Sl4})2P-uVCTFg=U%LW|CUDcizaA?_CwCet;?D>_%j^8ZvCkox$t zbmDL;;Cs?;O5>D_n7N?vQ3kc>=;tn#nv54wu`Q zY73w>A~?X`i(DKWesP280xsi`)oQL28YAo;WaDIX896*w zhMd- zgZx^lj1L_V|1Mg^lxX9Xc%T~fh9{em6={-48=L5V-Z%WBSx1(E(`?GiBnA0-3$H?0 zbKVhDk-wso71h%H{6uCd~M+6O4aU{1?5(s_P&6q=x* zv^UAQo$#^fT88VD_T0@baaKoz?BC zjB(U>zM-msLDD?k9*-|q732a~WJ7uMtG3=;=-nlDFzmi0ah#8m?knty7FDVYeEM0e z^9fX1I-Qlu*&`jVJ3I{T#D%-|WU&n}?CX=7ah5BIz5aqx(vym5^L7U2&aj7JkIJh( zQahDcV_fgz>7NX1WsG$1L;to5o=7WtnEXnoZRfMQuiE53yFO}jb*h!T7UCHnAqRf? zwe@QB@bKu7REtgS?)Gg`xlF4Z<61C%Sa!jZtnQY2;2`4}^%~-mLt1#&bKyN69$zG{ zdU))SX!cZ0uSY$1XLFNuXDE|(JEAbJh3wmYdE^cZLJad)yg#UL7{f1MD#+c$Wbhwp`5mBtI5i5@t>#S;l#3U`Jlp0g3w z_TwFRi)AC-@qm*7d3KyRCFVHAfxHRz2dz0CFNCplpZI*qH9E8E{?wE0%#(Yg@hRt^ zN!=caI}-Z?!v-G|Xr!sI2Uh>3z(EH?OHg>YpT4I^;eIhGMzGVv;}f~(OkNl~wp;S* z%qOc11H*wUYVP-%Z~bMW zuKJ$X1Y$C+n=)9r0Ln|a_p-{)LZU`L2SXZ1x9 zPSaxsgT=o|I#O(SxOGQIxZF3!vssxaPraznq zGwf2>R^L_Y8X+ ze1yz$)z1gj7-V+*z3^Cq!u@>E9*HBTcX{5=HH4p)09}>IJrX4-LVu&sQ?sphx@mM= zng7Vmkl3`{LB)=Q(Ymc}5tPe3{HYd^j3Q{wncPyXi7cms2M;q6gkCu%wuNVR*+yTP z;S*o|SU1dg<_0jF?M}q&>=(&vpPVL}yDeT&Iwsgo7kUc3p!}9wRlWW}0Sp)p&(hZf%kJo&pZ<$pvg383^7A)wPQ4yfu{8}*Vs!Z(%Q2-Zs}AE%_$jg`N3+e}^Rjm5`23&uFPj~pG$6MOz})w|Qi@4Ib{B`T92 zitio8({rBsZ8CDRFO=OGwz}>gjvUNDQ?6yZt+z8uf+8EZlO~~KOaom3EOptkqjBZd z#;9DmYDUeVlyIiGTa@V=WNoLl%)>`D`vqmr@>I!?`09tYHaz2**b(9DKk)%YM^ge0 zfGKMfdcrTjQpr*v73MW!sKT(vTGmSJS4rmp=0HW}=)RW>2lp{C9OyVBQ6MuAXciU+ z6ss!jM-a6oqyox#A<;wPcxAm+_-?z;=V{}T`G(+6JE)RwrkE(#8^1LAY{23YA#2!H zOGWWiBlcIRmIENF<f;&54zDw1YGPAVg)i*+<3iQh#9yQo3U{8XFJ4$sQhsRrY#W6! z?J;roZ!e0Ky!X@L)PO^IKEm$k+rn@_4aj^h!m#7ag_b0KS>5Rr&DMH$dgdt4U(6R` zNN%xMFg8z>PulycgGnP#oFdJSc+zLX_(6clbN-0oK>cx6vL;5W@QceIg?HhiE!Fet zP1LVOR%ZK*4kl?HXNhz$=+KvWhI2-rU)1VLC|TAz}rGOLYT@lOQT8hY?DtZ_fgNKpTiCIRa*Vk~U%+}5E4yxSOA7lekYc1;=@DkDTf zLj$89S9Nr>B%@QS7KpWN#M;REpMXM@My=%?9Rmjh*uxX9s@@RBwBou(=fECC30fc= zgIO;Ybk)|TOd@!B*+swy1$Frt5z$9a2hUH2HnYSZ<1*3C_WK5|_c(LZ_6qFB% zP8x-V`iq1P8S2#!IfRBp8PbLrS&M=Z4Gp@jtgH*33Dq;fkQ$fi`ZTVEaY~aX7i&%l zL%T9iA_IR6Vk2>0hVk6o|M9maP#tp=RtEhoi}YQv9pnw7L|B*r*zE)9GLVlB47SVd z4mqgj+fhbC?E{DFQN!<)!$q*>M(BX<>tZ_$ zBB`Y9^>^HTzW3>67Ft*cRQ!Fng_q-d4)0?>sy13D<)rOHpYQqj+xs&y3l~1_f^9Tx zV(@X3-F;=mjU2WMb^v^4(#GU=0j9J2wyqLV?lW>feEBo|QbBn=b0l79Ni#`@Omt3k z&arJFbxggs6z-L=57(NjpW*4N4sU1P{=4{Pn;DD5S$?9dusDqHN?zC~-Zy zJdaMEF5T~)#D2;$@AH^>E^dZxeN(0k2HQWA@OgqAFO1Ag8__;?8>m3wj4iXRiVYfc zKpWDN{B4AS+ySI?E|oM>K_t!zgAti@ zft?ZzFpt3BCiJ)vdR$A2kN&N_Dm*e8IVIRAw}-8f?dvFpZ+ZiQ96WIy-W4w@a*(*w>W`hhqm|lar7N zs=!qst;-}ntm~u2DAc08AJY>p?le7?q2&fBc-*Mo`+EH_hIshoMx+j8|M9R-z+|0B z0v;L`p=q9ea?>_CWx%$cwopqT4)>{qGZyG9WEcnp(J+2SHUa_3wv?Awbfq*}Y)J#0 zmBC;%fX;QYfOwG`1j0^MC6Po7P8Ntq1Dh={;H6qXPx2&YX(nN$^2xF6OF;rt^?(G_ zczk;&;sXlWZUl0q`Ue@3EEj_zS}x$A)W?XX(#00*f|&8?rxBs5r9GM1*j_M@~f{dcS;K2EL(Hvl*ITX3_?3 z2|EfE=ZwFt@>eQ;Ek|f}Ij2g+otNjPh9sn!h!zZ^2{j^;!XHVx6Cc#tth>{Q$Ge2$ zxd!22arAO?$b!_wZ~QBBzgI|hrpxVT($wX$W_!+=AcJZBjc(R7^uovU>tvLzZ3 z24M?)wsT?fGQ^euSR)Y7PCL?K42P>*UKoIQQNn40?R88ah*u!3>H|CWD)s-O=`x9o z)Wnd6ZKJ?{Q<7OV!$*&%UEpA0Q;?eQpDabW<|oQ~)73L&k$h%s?{}f~Ls_Je+qPSU zf5(3c1-{2%eWxxFL`2y;KqNpihuenIEs@oSYBs|($QM1**-IrYjJ!# z%Jp~fkkP_?ygncuLy=IKthKfpDV=3mhqKJ(gG9zRi#sZGV3LUQze)s;P@SSToBL%L&Va< zc+)A}e$zWmR~Zd03io#j)x&hi&ZvLYB$o2C4{tdi{TeffkP+FrZUE?K`cg)sbpw zRnH?W@hHeq1T4EEdH}6akcYnx6K%^Y(QEKA08a1jA9^QeST_z_dLkwQmCzBRGANS6 z{+Xix#a)h_Tm!U=PL-_AHHglY0ZIj29q?EzWVpaf&E&q_RKU3;O~wD9fxucZg@Ec- z9|RCF_90C@x4+0#$r`{v4TJ@0IzGz9#c2-2F_{0M5dp@&aYyK#1k3-Xy_SlC5x^?} z(L(`gEi4dy^Wxs~m zhQlww*gY?{!^q0za+9H1O+tai_}_BuQJ~ydO;a$P4WdMg-5o}nPZUZArb1<&4kPyp zH&+2GC|nT^sQR+b+YW;X`HORdC;(pqxfW}P^c}UP-ODp$&6b`H z{YIoW$K9!=Am();AZ)<SUMHL6o|m0+#_2z%dS z(60wE;RNELgpyS#n%gjho)lXs02xawT=3hjlO%)c!YwzCwE~-EL!m`=c3N?r&;hs| zs&}<=67`&v|4QebfsYX$ODonxgK(#!Xbc588}JIdYqE+dn6*AyYmKPb7v>uF^~vRe ziAz#6M`?`Ss79Ft3nR^&|4B_%G#$qvPm0&Hg#-%;iCnWGH%ZUXFq0EHrhl0Sfzo{i9-j*bm?oMdTOc z`GrvVFM0e3WdB|X8X$5&p&@j0;43$fhG9Dzc-8UIz}5N{j-jCd09(O5)~o~DO?~lm z9MLXG`LC=qje|7zSYq%~I7W0HFxXL3$X&Z4#{TY*lB|I$#FO%5NG;-jktb+Ct1VV@ zL+#1h=b{A*k<+DjVALPN&0w&;_)et)$PJP5f0}E!B4D7DRommnyzPotQ<$W@L9M{Y z2yfwmu(&tkd4lXxHIk)lw7GHz6p!Y7=o>C(0f+B~b;L7x(yfr*zuooyd`a-973~`% zE~kOE90VUAUR`$-upJ=C50GYx1b_0Al!!hg{WmulwBJNZ#3wM3q&Ea%B$c#)9l<6I ztT}RVAA-M(-iYYaevB4PStU%lRxv$?!5C(pqgf%zY86|z!ggA*bi!bQNxA>YbciN3 z2Hp{0j*(O-3!0M4FtWZ9b`kJ};iMV}8;RHRA)-^Z#tK8;!voN|yVxQpRR#DIEc#BW zat>-Er8%NCegrT6kjqIUhAI@G(_zxPZd5VxZ30w~BV4gXn$nJ6(ek&!YMGKu)Jq#^ zX;UIb)N zM1vj)XmK|&s6dKViQA0TC`fQ_pi*F}VOujXq*h@|hTg!4Z{aU6Ltmp@V2%(5Xp<2J zc5a|l00+c1I1^tkPhbl3CO|9|1GPgm?AWS|U-FVB&ae$?-jHleI(L3=N(cW76D-@% zN!|D998~4fe@(OfgO#l%;UgvCgUhbk!63T{#ezGeId9(j3=II%;mgDKcDfOZFxa7i z7E-jnuH21C)ZnSl(Nezp;RC0OtH+Lx)umA zf0@K3KtSFo238+D^fBM*y-WoPwqs5`PMtbbU}xetq(=N$8=-`d3w`^}0vuBn%O5VM z8rL^ycc4(oIJgtR_3KpoZ$Lt1Ou^?07mBBfC(_qhQT!XSQKD2KxoUmwDRo*T+MG0V zwNt`8*)WNnOQJep*(1&n9Gd>BdZ#ZW^q~h=!Qk`6u$F9H31DL6Jw_#8S3#TwZI=o_ zuU$MR7B9sdz6_xQsEEmNcS!>f1F$sWTNzmUk@^DT4=cAeHcY=NMwo&^#ttwd7&lY;c~8dIEy@GX0kJMRP4z%z#eml(r+NY$ zUcJsdYS;RByJMU{pgsiwWr9s!KpprNPhcWi%MAi3gEEl5a=X@-@@nTZFFUTqxyF`? zYh|I9qMu-0@M%lkonKs!eQ`d-&FoDd@MMc$BzeiMHLFU|Ra$(O*!oe*Wf<%o;0}B+ zJvjha+)c4`0^+`NJtk4DlQD)+G^Z)}Ef8 zHn97sPeL87USi$wws{P4E|9o@LK0IBTy5Pm=z#ho-7`>3Mo0%`fg<8p+~c5l$Z~9$ zjBI`P{*>z)mCjRalYpvnz{Sr-kM5n%7g z>vB%1E2uYsYp{m~WsvQWobLM2Q*Kb@M(=Qo+Jq#5;gH1~lv(%@#-Td{7rb}=xmKD{ zYfV?lUH#+m|4sC0v9|I{5a5$O4+2<#z5(_5o8ozbtvO~*T+!x+@rJSx^>bbCz?Z#} zkez^;&jw!sZv!D}1EZ`olP9nsu)#y@2j#S({=6HHGPQHWt$HswqC_0h2k^TEY|q7J1s0(Bf&mR7ARVhUt-adNB0z0ZQ^C=SX$;UUJF!$gi;1&Ak7mO z8q#FU5D}(>+HK0_ghG|WP1V*F2;9)NUT9ZHi=dV6M7IFc4f+|yE`l$SA~&El#Z$KH zdZmFP0fRsT%xNil-MzucmZ4b!iUTE}B0yvCU>c#fa@TJqC2q%F5jA`-z(&2Hq8g|) z03}!kgF!wFx4N>j0-Fa|AkEwGyWo$wlU7GC??kBWU>m5weN$Hiz?dKrhuyLgZ`#2m z*uL4Cwv7u74_kMKg?SJj@HVur_8WtZLe>E#;PRn2rdCKFVj$b&V(u}MYRFBIY%S4i@s%YmU7LGtwOfdgUQCn zuNBVG-5Wdm8jtF?%I~reIN@9Iqfpu4AAL$(JazuN!u52!-pzB&LsO_aj|p&#<1y1 zL2$K^2H`+kgv$2w0l|~7=p4NV93#nWU|E-8{kV33`w2hCx#TIb6u0q7pa#8jSuC=wE+-M;-8xkTWj;!(0YGRQl1q z8|?RO;(tJN#~Zt!WW#h&h`psV==!6t4AG`Q4S6>LxZ|4;PGCF4Bm@PKn_ZWQetdJ# znqv+Ve5G>JVwJaE6@^R8J90WjEl=@p|F{l3b**w_Li|r{{n*kC_I$@6Fsi>QUVwgZ zWNjChH9hGUGCw>bNp0lQ7QinRu7h6*q&4$ERm?xZ0da#H3VYFwVhzpFz8rin93SG)O_(m4`9=YWV1wKJY?uhV*fgkNa%zLL z!3-)|sskmNNC(9ib(Xg9JE&CCO$C6%CsD<~OLl*CgTa3D9fPzET6U=fphbNVa-P?_ z^T1X*F@d!W(xe_Dt|@Kp_4samOn?C&YuVqi}| z$OBxCUdPM6BLTliL9#XOg_d+qp^Eq0r2VAc+}2l->olv#H94?KS9t)$3qPU-(znNN zCIq9x4|HZ8e1G(66<PLaUC+#Ic@rhxN357c+g}cxI!$BUjPI6zX zeRc7I%4g9`^sG38N-5h?xi(dDAKdk=ieGXa+cya`gG!6&6D*|;6A`MH(K;Wv$`Zn#Ur^u;?NQJoq7XmhbXW&E<%MbEzNAo6$I; z-}Mm!s&d%X6ZsI8Da@MnFd^%@_Ba7W_JMX2<4x1%CJz;k>-h;cy+SK`&!O3^6!B8`zgMebTS{0*-Yc(6m1{z<2M^QapF`I82~{@k0A9N~zygM6p#_fW)}!WnR_Xh!(&Yg@w^ z=P$-L$#dyKu~-9Yy>%288r9Ut&SgN&7s+R`4lx9JQ1y9(&?qYPlFfD)6`zQO!(mvA zkTuQ%pD01a8{%>G>KLFn0mZOm;Sy9sd=wsQ7=i_7hYf4l>aFpKhWL1iu_v`mO>j6n zd^^?>i`7@*TKfqME<$aBfmo`h>;R6s2B*^m#j{^>6@%IYy>Yd~Fv6(0@oX4=s9_=$ zs~uI+pdt?|%X5tv(T#e*j*EIm&5eYrH%X{dYbCJoy%W{hE6a=hZT{}h0*qhjHVQZ9 zxVN}?5U60 zl6$j+atTA+fj)#`p~ZPzn6f^kf|7?5kCn5_%V z%`lX05WWqE6_%K?L2E`C;v-=(^}12`SR6G2kH%WU;rpQqTrRf_CYn-X@$BI^EHuO# zD?+JPe}-Xef=j>;VkFA)Wb42K^4an6F!**0JkTtv_mmx@j5Xq@hBv9;ew#A~s3q1| z>`@h23or{_vmpol1ea|EKZwOjVAb=+27#-QiAJ1>1rBRy1;$m7kU^736tzjdWRQva ztlsb%3~QZ;h1DA-T2k}H>bbB`JQf2Yp`ho9)WT}nqw3*M>_O0V*unFga&KWJ8bjc0 zpj*P(E%0!*S|%)8J)AuW6J8IliBK^xDi#B02U)R>syDcBA``4!X<-I;OvqO8yuvwmJ+BlFMKVyAH$9VXNfkz zW6R4ogK&|2AuODYieYnoInV||6e<=Z3unq_a=m2@(oV#%FLYM~)t6_dbAg#{_) zGr>SwpzBcCExwkCS_{QTS;T|(Vx!gryES%PZWJC4qt;tQQK3~>T{KID5mgi4^~ zt=++d!lT!s1=oICCVTTuw%+%Mo8P{_#eBVYEx2|&ND(BCw#@#PB8a2pv##X^WDLZi zaNlx+MSagH#M*!e%YoEse$ClhLO&n(HAuc1y`kNsKdZ|Ok54OiBR z^!wA+W$VjRIqs}s8TBm=055p=!^LR^~VGeQ%w@8#}G;@wHb2SqQZ9%(dtr)2lnc;DG^XXaGUplfAP-q_)g zqRpvzaDkIu(QD_^tZ1L+ztj1FTK(H!Or+NI?43WoXG^qBv*^O6-a%5t*rnxE4K@?E z_6b-WNc_7w*`S&u@@xY7+W$5@Y0UzwUi3h+kkWNb^3j{odkt4!b(~nb6Mr$>W7XMp zEK^d8+6HUhB1FrdM$DMNvTNtlF4SB9YOMl#HYG3n=Yd>LsaUd*3KfEDMD%fr5fJMP zy#BKikSWBQ@LSvn)x7tz>QMBwjH$lHkawTV<99;y=8D@ku~YJ0ob6YXDOhHejpTl; zphbO=ZYbFpQD|>p@YAI5KS2}N8P0bd#K+s=JP!2G)59|zxvtrFOWQ>hRhDAAhHD1W zjk6@r1)m!*%4~&&M@MeI8nIDW{(2?lSo$O0{bIemiI)451s0yFCl@k&Ek)na_3f{E zbW4w`g3|%+?8*Dt%Lb)tM~+ssPZc~mAzsnzcEb5i(62U&Ylb@A9Uh2l>e1LXLYwSJ zxWRu_v%bD+CBk&=Oj6SPTI{CN+{iu+iVZ9iRqh_Wa$a-J`wbJbN&`E)B#Pa3%-bY5 z_8{A27!x|gI%JuQ7%!FeITag|Iwv;?__Ddn*Jk2hDF0*ElR+Wfu{Dmd&05!+zn83P zpF#Vz*n6Q*b7!kru05Knd|>T&$qQpt_}Wk;QT10#w6A9sbG9_3X?jCC_c1?WazeHEB4}JT`-O0wtO2yX&)z|6{j^Zdp?#seL?S6TP?N` zxfl!%!7VJ`2v?0bccYGEEF|diDla0VtH6<+=C*T_Jn3Z_qFJ^xveo88{_T-ra!YP5 zwZ%&(#EZ~9_nfhwDO8wcbZz!|=5{PCdhs{YFfN9vY?r&YCd);KRJ61)C2uRyZ#S4W zbc#k=zAO?oQ_^@Qt>M?}PMXBxaHCvAl{w>s1zA=jm7STBB_H=pHaU8mI47()U19sr z#CM&|OO@BVl~JUu(N?3Pi|sV%T@yakPx~6FPwOph^pm%L^;wOSy)K3Nu|**&MeIR(uu!z0Tr-c9tx1~#p)X?lN7;OJvNLQ?t0rqsP!zSdyKXGIS-GIP8 zqDes96wP1bOL!Yl1Y55=g-3+(T;R?4wYs}?NZoMGhBMJ@Em)-m|-o&XrxVFMR&8{jw z5T`bG-Ev9X&o3!a=Ipf_*jv}6!;?&8?dvrtu!v!)KatOc@IBP4JN@s*FAx`=#=ZX= z7!$-T%<8oy_)Yfh__Uj0bp}&kSMpAdyU5$1klzO|Dw#V!EqvqF2G0_ff*{`gxF^ah zy@n(_TFF!SzDIN%pMXkgb1~z!M7y5tbq3>y=|cfB{H`Ak7lpR@w(M{9zH7R+$yKKQ zn;JmAhxY2BS`PDnMjNsb%oS?wDIh=Z`BXifo~eq5_zMrby5eoxT7(PI;A007qr@p)sDT43~x{+?sW+Bnl`?+#J;W(bS2h z$>;_1JfIdTco8pR)!42I%<+@HTjk4c(_B5br{`s0rEz*;h44>fk#%gUVzNdHttzfP zxl<6-X*hkbvWM!qkAYV^y=~m>DYMc}Qn}8(ip(^n>$WCcQAS0C(21(a>C)#N&!(zu zY8n^K?3TXSxogG?X0L@_{LoN+jr0RJP_Cy?)H74vT#hL^4!ELACNFZmtisn@`u`l3 zt)vG3r?i#%c`hj}-5{2C&L||c!YwA4VAY*Cp6F~q=V_8#&^Y(Ghg|!2!y{nL-M+!1 zyWfC?MfT%1$W;cOkS5iJ8%kScykQ{Re-S+QA!rFZT*m7VRFd6(-S#zmY4kP->ccNq z(*xi)BFvT=PX%yvTh?P-*o;C1U}N2xZH>is$$~l<2>oHDO~L$cBhT8%;GE{se!aY9 z`pYamdU`7_!$&J#hR@Q2#lvzW0A%o4k(~b7Xa2^=9n||%Fn{qaWur$}4``2SpE><2 z2lFszM4Z&^-zhj&ZTKO^PN2$7Rb1|sK+;qB)Cx+ejnFf32C?bM+a8YmIhVxntyLJp zMbj-oleZ`q2E(_|xs6q7o+HW2M~00$FHxkX1`!o}E+%<%*(&$%{K2w(&DPvSn0BIs zKGrBWp(;oluk4L8%b4mv@Xq*IDCJ|r@`jGR)ehG9YTo7^&MgT-;Tv~ToC`**U(4S~kF#gp{6+KGSS08)qRxTfrD zRae^G8(Ptdmoo*DGUu)0-d#SgDlVWZ))F(ODt>(UXv(sVst-)nvFI&m)V|{EycK1x zumbFXm`kyGaJz_6xGye!vTQbgY|_D1s<5z3)bSSri+X%*%L|IK2vYF-3hT=Ro`~iO9DwDs^XHWVl}ERac26DD{ab7GQ^pkk2{lyV2(5! zoRQ;dlb%=(c;Ur0JV`HO>|Iv!3#eLeexIXv9T&vQ{*YN|tK2l}HpH=`V_`g3*+XI* z#i&H|MPn+Y|S6Yf@c)V4Ypsr>36Hnn4nth=W$(VB!8f{xp3_~#RCf~Y6 zlZ>d@$8WIq6wD92imIK_Qv%UM-S$?I|Vysqh7>+G z>~fKCs`7!3PBd7vcK#@qa=HUEo8OPz<5AxR6`F{WiZHFIdbY4o7 zyVmC#nRK#QmMLGoqhOqPI^Uy1QUg+wTmqfy&e$*6I8$j8z1TAyy@4W8gtc}cb`le> zNQNAjtJZ_WlGHow`^2a^>qnv=0w2cTBB~r#OJMHfQxl6%C-$21@}nSvG3ZRg>ID6*e}osyGTTYd#yF)rgT=qbFojj*S!V$z?*9MzN-3uqsk1JTMvt0XODNj zh1KwwGa;sVwtRtW@zSe(D`Wka4W@4s@jiRT=@kwCzJ8=7SLdU_{}fdA z)sK6+=Ge}}jP%+p1Me9_hWbvE`oZoC3Jo+%)Ql2v`>1=@+ujK^XO@C9KcYNxf^PCD zdwx%zrJ3$c=R59!eRV1^iO$2*xs*`f_XqB7 zSvk4m;|1>FG_Tp#FD-d1yYpQ>Qsc4X2DEpBCv_7q4>v{Rc!;%qII`b(`&jF(5N28WmpsRI#@a<4^@kkls^N@hwAuaugmLA z=S-Dv1lIX=xz7wdaSNYm%kxiq+@%=WYi2U2Igs1JK9m=|+T_q*8tr^{%dG5P*0vLe zytDn7=HtDiGeP7w7FGKPR1kd!&XW zMN`=EDX=pm7r^3QAO`MeJNr_;emLKIQlC}h{oOu;YzH%Cvr;dyy0&WF#gq9*K4cSX zT>AGPc-y-gIMn~yU4)497T&=#e)o94fbU-R$)ZVV)7J|1K40!i&7PdUW0SC_^h0+@ zbCF3^{=15f(m3Se^7H27WBKHj#|lEKjVB{N zNkr9(y?>?lS(?Q7*e?EH%;MXso($jXj(s9#^R52NX>E#UT737&0ncr3rJY}NSLIKZ zjW$`b&RUdPau26o-qGYA8u9u0ZG$^Ytj%h()eddG_8LufF7(h*bg6XkOkQM&BMm0n z^LGLIRN31uQQ*Hxew6M{t4*`U2j#fVb2CmB?B3sKSeC94qSToC1JK=*1WE_dQ!dAbB|p>Abs1HJMlSzq3Aad?n?!RLUI{>4@`S)iYB;v zO5OJ!PPgheeM{=+eQ2j^rtr_ln5@|oMcW?x7uyaS?=Z^q(R_!)^zD;!sL{4E@_f=n z7G)%O%mOd8rzsBqzK|PTbv+#aSs|o(0so-w0ypnnKXHvj@7bN5;@1@;wSA{nZyPHf zecEwcO07d|*w}^^@ZfALgYsl5g9$G~VA6BoN`e8wxggv^_~J`mg?yYNV~MSVk5a&q z2o<>ZR~P|um{m7D;pMT1ev zwqmV{@$RIu*CP6L*Upb~`^=V~c^P=Ps->$Wr2%0yha9`@ti+&lU}fdCR5(tq(wl2 zyYz?}$m&e)59kU$?4e5o-WsNNJ#YbuS56O-C%fN_Gf4?W5BxGF; zs9QW=E^$0#cwBLG!c3nA+yD~|q(1x`xDD~PT0;HrkCh_0I^}MZ2%G-Ra>${PZBQ?B z*1aR)Ugq(vkt=cU$v%{Xt8)gAc6G9hDFWABkc9C7_g%rPMi{2*Q-q`UK8v4*j^$B? zG0y`My58SS7>8+z#(TDyLbH&cqJ>RfO3!wFR?D9|c_?%FRW8}@y40|O@k(lq`()^` zDXzJ;DVgTMkrVZeslGYmy|raXqIq9OnV05xpTdJ}eG+2heX`HDteuB~|EF#{Rs#>s z1WGCGr55-f3w$sL*@z&twx&UBg)xMk+>Tl?U%ZKYc$t>}0@K@(;@c!+cJI!gC}F$d z4eVh;TzcqdD?NNd@@d@zbx^f$m}%=4RB|x^EGvyv;{!!Tm4Ze)LITmQaem^#J)~|y zX;YdgDw{ZbDr)d@&6g`ZR#PP=(MzkCKT!ZzG(%RG-C_FYP*+UJpqCL>)P9Y!d2YFp zb#QCzkV2f(PTg2E9p(Hiy?f})vzMM-e4iiw_T{;!N9mQXrv9SSnb->Wu^SDFi7wCBhK;8F1JhVPa|386MWS~N2|FHz zGp!1=XFbR;$_pomL=5YtwdPXeuKtO1*xIkAaHSWZc4Wu*UfQ?G8dI($Z@W}-yHg4W z%TlkiY3e6s8Yw4`=Im@#w_%ZN~p=;)C>O}E4{KI~G6@Nnw`;H;@(9!mc zaCZAu&8COVROgd2!6nlb&c`=)vo*Xvndk8K{%3wPhWtM2(C zy?6J!6X=0kZj>UL!GIk0Qv*lQhaB7IN_T8IRcMbL^%;X}23$%^#13r?Fy(D0xwRFG zw5JCN6;svv19t>}AiF+aH@64P|KqHm_pQ9@35^#ji9zzSkC*#lTKxP*S?7{Fg!1fZ z+s3o4I1(FG^(y{|=8_x6TZjxgfJ?IYV}2NeLoCiKGdyyg(mt zXlX8nLJj<#eva2E0izc6t6InVeP_2sSAG7p!SZ8G!&&C9J*x=b*M7%?c2XxL!^4pe zH(kCHW{qTi3S~l6Ze_Q=wy%l63^4H22k{M@7Cly_8PkfzBXZoX%Vy;xc0O8+zhb|A zKim3L@u&sm9Z4_Ytg?6bR%leYN?j)C#gV@wsePYUJdWPCUA^Yfa9rv>0S1Tu#{DMU z(vH{8wvc)RG3r#AX$8DX%od%V>ihO(rjU4)T8BI(vxt;(zSuhUR771qmeY81GWm-! z9eL>3jm{jlkT$7m+x;k&gn&(#{4JPyiO7FnN?i;m+nXGyZ#)f+buttxwQVD@OK0Ud za9;XdQHJ#gZDm8ARn}~PiDLM|b(Dp|8Kj&TFBF-^L2yN)kw3c0I-5P?*OY8dN!5q{ z3=|wo@$4#f>duUXy3q0_>Q&>g$uf4Y+)ZtQ7hi6~)dO`apRTgkbh4V!BWxTNkQhpJ z^lh$xw{ZtS(03~biZTq{@Ae$3?qF16&l+8mYWwV%AoiulhAHTe3p_(E1!*ju-gi=t z*4)p$@b<@J*YkKxdRV9hW!N{m)swvNIVP(^DD`}JomS`Th6v;GAqAA2JODKFFI1gfkQgDz+D55cMmD`-gG{;b`c=ux!VS@ zg+Plnwn|5*(G(l9N|-SAuSs}o%hc;=0w?Smwj4&d7-^4tBFx1q_y-b_V=(4JY*%{X z82)fu{DFGuDb?d8T6)F1I+!=*;ACcr=Zw3W2nx~(RGW9N;cJhSu&dDwXB^|ZJ@6F{n=-&7$$TQVd-o%lqnWR9jIyUY zNE=pJCkPAK;cQq5^@`Ax&A`@v!mow#R|H0$;~li+`MBuJ_~L}P52bhGkfEx>^WCp@~C=eDcYwww|UD2x}4dLc3*LRA*b4nu%%$lGUuwauuCz+ z;o{)&3a3@&cV$|CYw*M)G`1%1vsD$k8E2=ODv+$BnjsVffJ&ff zLn3`ik$vc-A#AUIc)r`oqt8-?{9`c2W5YTfcHT zNlX~~v5@c+BlfGj;~b=1v_b`r=fWpXh@q46UTn%&RLK8uEbhCi;zPD(msZrF!^4g0 zlIOrio|@nuu?g)6NN0#Eyd1=G~A@SB+A}_)eD3y&}|Q11v_R9H->_Je@`LpCI-q zM4?sQriGMgW7(7XHqBo5EYGc%5RM|jY{M)V1yWSE?+>!XU+rl;2BMtlt#Dz_3q9~i z>EH%}f=Bj-|_-pTE{Y`~TMI>`6@(Sa&?NX9KJoWiR;4km5Tb2ns z53ZNreh5OyaF%-r8q6%DdVC*dqJrBHQJ z&@yBFPUwIAydsF_5g&61ie-!Zknw_Ax+Yj9OG6T@x}-1N)Vr6^ znW`TjANu4oTQ+94|FzEyXSD}q&+WL&^{B>@<(|tAgx(&W z{3vryeD}qb=K&%9;;p?Bk+rG*#VwU;&#UaF`fc?C^q)cAq5QU)^AKMr0Hs2dpCFjB zXQJlLRqi6k0biuP!RB1Vv4lVk{dcqg1i23~4d&`ow17O|KXolwsQ(nQ7*UQ2NXwy_ z63S7YglW_p@V^#gPhR|b&qr`Q;7tzAeB%4%3&h!kwHs%}-H5$}*>aROrNY|mi9Qkh z^j*T&6BxN9Fbn9~HlPLx?`Yl=6_j}Y4afB2rY0Qxd!$ATy+_9h}_~G#y*t~lm|E7nroBJ4(;a{&F;=Qq-FZD zx;)kXu4|%?gMN#3>(r5sl}vJs;dFI`Wd=I=(`@TQRCP@OF5GA_{*X1M*Ywpyh5;^l z1|>|HnJ{nk3`DuT|Kw2no|9*5ed}e$RI5m>Sw{9Z7PVwYySj<-Rx#iF&cXDa^+OX`S z>Ao_3ifXUTO2?1LOv)TSeMAN5X^ftruV_iJ9I@MA#<2#R?B~yag$3ZIuGZf~+95#T z-oc_=KP^M|diN{s9=8QH;909g%l$P#sPWfrm4~bz)srQ%E|bykmmdX8j-?z7zU3cm zyhP$e$xPU>Qabx$eY4X@dgbt`NZCT{;bKq|o6-^fr{339LPLnXvY})TR9o8_)(d)z zy~M+#mzt4hHW^jo3P8<8Z{_Cf@OssQ+hfU7n8CuK&HPTDto@cV;@GgiSb4Xdy<3GN z-%PeGJ)QTD>LWYP2eBb}8G<;rC&V}y3+gZj>oBjOMJsU%6xb)DH8lPR%IhK#20RyA zGGE)59K<;aHN(=KZ|xhS(bT@>aG^i#uz-=VdUJOyfU$TEl_sy7tx4=;;&AqAZ7H zG&(%`z+t1@Z+Rnxg#jVJ>>K*X=1hvUDazV_{M5f`apu%tGv-%Um-|m`E0Eq5)Udn4 zCHY_}w4q>D1FwFOBe8k^byK*8+sc=WXDclMI(&-l#R0z{ldeB`{O>|6ODfB5hOpMK zkRYTj4^LQDyZ2uH$Fr5igjMn5EJt+sHjXL+{Recs?H`e47sW#^pvH|l-M@Ve*)+^ zy-1Zo(2Mw|q$RZTjq)$9q{uguk5BpD1A2jI#}c&o7~+859|-mOo)OH`_+i)=pHwQ~ zJE7KKYnT4$)s8FF-$!mx>$>d#3SV2%>LG&pA(A0ENo3h%`xTOXw@@wt*)AE?gw`uQ zP7HqHLd}mv*87+x;~HTkK2oWL(#M1A@d}mrdYVn117O`XL%z<@Z%>GC8I-$z zWtfj|v%N;k`V~H?2n2maNvDO6mjNZ|=yzQg`K&O)9umx||9!=x#bba`{tAdDQW7mSOo#RtX3_=?82GnaY3e+LS{~u^+3lBu$F?N-1`lWRJRdl{r}V{0phw%6~r40n)N$HgG=>@(v(NS3Y$%E+8SY7*qzQvNstk z*)oq`2;wC>a4bCm?i6wyLA%Ig$0j-9mIeA`kLM8Q&3#tXxf-Up?M7wp*z>{Mxo`dM zIN{qgW_-(ZZeXNKSd40?@qD<>38 zfsaQKHAl{e6WrA&jzYqya83TR1Rd)x^iBX0Zd+g^dNA^I+9m-rCgKSkg1K2XdcRmR zftpNWK)AVW##*YvLSln03^Ofs5X|?|Ux>wPAH8*-Egv2k+{J(ZYkcJK4GII~tpLUQ zpcp^!(hy~z#i0Ckg>iEQ!d&JRStRey5K)OB-nP=t@lr=lRIU7&zDSQe!Q*1SQ;szU zt?yzvpQ)4`Nugw^Cr3Z{wRca8#j^pV%4YAdj2&=Bu{XUPdD=PpX=z4@TaE&W9zb~! z7QTT?YhL9O2~a-ACmPt8!NPV*bmAmh+>WwB=0CSAM+=a-lKNJ0xeVZ^uW1th`qzq7 z5e1{Dl$M3=qt+GD5K?>Om)RLEgg4BRZSBAccpVNo)Xp4osl+{Nx+C2PqAl7LEEUi5 z3`$|LUkc*=6(-;5{=*6RnEMac_(tDWK8`*l@KG^VCKj1CmVJ!O~)&^eLhh=m~DUvl+h&IsHd^kf6RBLoJQX#RZ|j(S5N zT|bfg1A*Jq!-9CJC1Sa?ZaH>35B)>Wh3gPKwBC%}nIJv&m@X}`iRZeHwLQ#l+bo?u zC`~JxhLzrO>3Sn{+anvBkaPM0md@1C=S?r4WmF@CMfPyToZ*jsHsmJvBLI&Ylzsm7 zAlw>s@~VtLIN6yd089qiGQ+FwlS5e)i}j8sS@G@!Ea>&xa!Op|*CmIa3er`BfInKr z0;dF5c^nLdG@P19sNQ+xtsWY=xn~0Mwga~D3`p&l7e4J! z?J*lk<9A)ZoU8ehfW)mrDHbtGWl(krXHfQREt=-P>1>jWIr&=Vv|w5aj8Zb9p;N`n zfb53?+=t(|@%<|-+{teN>i)n*?As#tavQsO$wJ@_W<%ffU}5R@Z{c)hIsD?Ewql}6 zhT98hQ-b7+MZ~a45UXlCgeUt;0XJi0&p=^IzsTR#Kp z3VUiFO|2bl5eFdlz7tMoqL<7Dy_XM_J`2m*z#~9! z1YR+s-zRIcuygaK(iQd)Mq^((3A8VA3+GIpc#2g0&Dqk|NeB2zc57J7!60P7(Z}PJ z^b%4<*!8Cm0UP?Ab#)|+A=rYi*#dERccjU^m#KJ6H%<2dC2yC8o6q@~sxubzc)9C} zFP#;W+K^}7{Hg{2O&5`WGB-daae#%>^t4*9V*`{0ticdAyi&UN!lJraSmq?R(EXq5 z56e5@F@za~$OQX%kvmlJvxxn#^CFJj)6&w=*eJqkX_>#2\% + time_decompose(count, method = "stl") \%>\% + anomalize(remainder, method = "iqr") + +} +\references{ +\itemize{ +\item The IQR method is used in \href{https://github.com/robjhyndman/forecast/blob/master/R/clean.R}{forecast::tsoutliers()} +\item The GESD method is used in Twitter's \href{https://github.com/twitter/AnomalyDetection}{AnomalyDetection} package and is also available as a function in \href{https://github.com/raunakms/GESD/blob/master/runGESD.R}{@raunakms's GESD method} +} +} +\seealso{ +Anomaly Detection Methods (Powers \code{anomalize}) +\itemize{ +\item \code{\link[=iqr]{iqr()}} +\item \code{\link[=gesd]{gesd()}} +} + +Time Series Anomaly Detection Functions (anomaly detection workflow): +\itemize{ +\item \code{\link[=time_decompose]{time_decompose()}} +\item \code{\link[=time_recompose]{time_recompose()}} +} +} diff --git a/man/anomalize_methods.Rd b/man/anomalize_methods.Rd new file mode 100644 index 0000000..75752ab --- /dev/null +++ b/man/anomalize_methods.Rd @@ -0,0 +1,54 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/anomalize_methods.R +\name{anomalize_methods} +\alias{anomalize_methods} +\alias{iqr} +\alias{gesd} +\title{Methods that power anomalize()} +\usage{ +iqr(x, alpha = 0.05, max_anoms = 0.2, verbose = FALSE) + +gesd(x, alpha = 0.05, max_anoms = 0.2, verbose = FALSE) +} +\arguments{ +\item{x}{A vector of numeric data.} + +\item{alpha}{Controls the width of the "normal" range. +Lower values are more conservative while higher values are less prone +to incorrectly classifying "normal" observations.} + +\item{max_anoms}{The maximum percent of anomalies permitted to be identified.} + +\item{verbose}{A boolean. If \code{TRUE}, will return a list containing useful information +about the anomalies. If \code{FALSE}, just returns a vector of "Yes" / "No" values.} +} +\value{ +Returns character vector or list depending on the value of \code{verbose}. +} +\description{ +Methods that power anomalize() +} +\examples{ + +set.seed(100) +x <- rnorm(100) +idx_outliers <- sample(100, size = 5) +x[idx_outliers] <- x[idx_outliers] + 10 + +iqr(x, alpha = 0.05, max_anoms = 0.2) +iqr(x, alpha = 0.05, max_anoms = 0.2, verbose = TRUE) + +gesd(x, alpha = 0.05, max_anoms = 0.2) +gesd(x, alpha = 0.05, max_anoms = 0.2, verbose = TRUE) + + +} +\references{ +\itemize{ +\item The IQR method is used in \href{https://github.com/robjhyndman/forecast/blob/master/R/clean.R}{forecast::tsoutliers()} +\item The GESD method is used in Twitter's \href{https://github.com/twitter/AnomalyDetection}{AnomalyDetection} package and is also available as a function in \href{https://github.com/raunakms/GESD/blob/master/runGESD.R}{@raunakms's GESD method} +} +} +\seealso{ +\code{\link[=anomalize]{anomalize()}} +} diff --git a/man/anomalize_package.Rd b/man/anomalize_package.Rd new file mode 100644 index 0000000..e39c015 --- /dev/null +++ b/man/anomalize_package.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/anomalize-package.R +\docType{package} +\name{anomalize_package} +\alias{anomalize_package} +\alias{anomalize_package-package} +\title{anomalize: Tidy anomaly detection} +\description{ +anomalize: Tidy anomaly detection +} +\details{ +The \code{anomalize} package enables a tidy workflow for detecting anomalies in data. +The main functions are \code{time_decompose()}, \code{anomalize()}, and \code{time_recompose()}. +When combined, it's quite simple to decompose time series, detect anomalies, +and create bands separating the "normal" data from the anomalous data. + +To learn more about \code{anomalize}, start with the vignettes: +\code{browseVignettes(package = "anomalize")} +} diff --git a/man/decompose_methods.Rd b/man/decompose_methods.Rd new file mode 100644 index 0000000..b0ca5f0 --- /dev/null +++ b/man/decompose_methods.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/time_decompose_methods.R +\name{decompose_methods} +\alias{decompose_methods} +\alias{decompose_twitter} +\alias{decompose_stl} +\alias{decompose_multiplicative} +\title{Methods that power time_decompose()} +\usage{ +decompose_twitter(data, target, frequency = "auto", median_spans = "auto", + message = TRUE) + +decompose_stl(data, target, frequency = "auto", message = TRUE) + +decompose_multiplicative(data, target, frequency = "auto", message = TRUE) +} +\arguments{ +\item{data}{A \code{tibble} or \code{tbl_time} object.} + +\item{target}{A column to apply the function to} + +\item{frequency}{Controls the seasonal adjustment (removal of seasonality). +Input can be either "auto", a time-based definition (e.g. "2 weeks"), +or a numeric number of observations per frequency (e.g. 10). +Refer to \code{\link[=time_frequency]{time_frequency()}}.} + +\item{median_spans}{Applies to the "twitter" method only. +The median spans are used to remove the trend and center the remainder.} + +\item{message}{A boolean. If \code{TRUE}, will output information related to \code{tbl_time} conversions, frequencies, +and median spans (if applicable).} +} +\value{ +A \code{tbl_time} object containing the time series decomposition. +} +\description{ +Methods that power time_decompose() +} +\examples{ + +library(dplyr) + +tidyverse_cran_downloads \%>\% + ungroup() \%>\% + filter(package == "tidyquant") \%>\% + decompose_stl(count) + + +} +\references{ +\itemize{ +\item The "twitter" method is used in Twitter's \href{https://github.com/twitter/AnomalyDetection}{AnomalyDetection package} +} +} +\seealso{ +\code{\link[=time_decompose]{time_decompose()}} +} diff --git a/man/figures/README-pressure-1.png b/man/figures/README-pressure-1.png new file mode 100644 index 0000000000000000000000000000000000000000..c092055f2d496ba61e9f3073c00bd2087bd1a77b GIT binary patch literal 3744 zcmds4cTiL57LOQdLJ$E#L6EY5NQqd0NC_$eYfz-C6cLHUm7<80gc714O@g|FA|@(} zC`fS;5DmQ-6G0FOgqnb1Nq|6rfaG1=owqx)Z|3dW_s5%gcg~&jednBe?sw1c{Jw8) zg8eCLN%39cAP`8>*5O_AS4X9@HFH%9}uyJo{3CzC)tm=#HaZ9JLhciPdH(6D0PI!MOnR~l;_c^~o z&cnD(?fPmYO=IBuZF7@y#|F70E{r$Y^0p;+jJh6J%uo;q>a^*{Bh9QRS}gB+Y2YCu zju-Xpk^DZaEcLg!)TKoo?;~TBgk7vf$sV~~>eMw9k&JO#0(o3` zYeQRucf9TU(a5<|_|vFFl}E0B6p1l}>VU@w*eQm+waFGpFF-4W9jG&E{`5)G(8cR% z6-i~`o=vH`P#M6l$R**Kp^&PPn`wt@3YND3&J}l!OeTRDAdljj#P)3sGiF*!tH|5p zPmu7<=S_3NRV-5o--8ov*M`TWa2-m*L;BrJ6>TE2UTAGIgG+LY2?fg4e|`aXgBraa5X-|TYxlAzyqtMZPcGOLQ~-#(Ui=T>ZV|@ zm9{cg+0)$oQ^ALDDtj0g1QA!Pq2QQSF;BUq8c$nkZOyr@3-*m|rrJA3xI1cK0Ubw@ zW(!U=PKKmSJmNhY@*^TXswW#0MSPzqW1&EooMBqVYq2~o(*x9;fiXa{2gY0y5QMk+ zrX-k_s(Rg~ew8z0^jl=1yL`@=#V1_FH8rX=gBHF;Pfj2kU2V0bpT@K^0vCia7{s}5jNv$ zVjEk*yGhIFRWpZtz!_CztFzvD^}r%fL=4k%8kOf2Ew6$EFmG{QBl4+jh6mkZ1kyLA z(j~APus+1O5YJ@IqVo38tOg`x7W3Xj2sLSR;*RdZ{x@>5x@2Eha-h!g=0HQGblI?% zMP3zhybT6yy-v1}A8>0Gp*u*p{7^}5;W1c>&Ac_ z!3HdTKVN1so**T3+jcO4-;fCR-u^|YtZlw>H{VnSd-0y1>)c}Y8p@g!`uzlVdnazp z`XoIXC4n8&N}|^VY-sSVo5tW1B=7iReh+a+6c0JxPROZ}X&r=m_RvsgTQ zv-zxMgIR1^#Wf+x<=SkCP=gcicUY@qrlv_}(uB-r+!-s8%|TCHU-*35RDC+xa4Jxm zPfoFz525D&^T^Y&VTf@yQ$Hf-PreAe>nrlLgctYVduacx#g2vxG zm61QxbQTX!cwD})Sxh90O z5y?8TRA!W7Km6c^ui`Y9$Zd*z`=`G6$>A`Q{3UMxI_6d25$|1^tp&X@N2u@m zTL#OC#9}npt;IjY@oNB>@;J*Ifd*RobZa|-T4qG>zG^`%Tvqhp-15)ZR_x+60iUJ~ zsJNu>AK0QO;lo($J~brz=-8o#e%@5D2i=++X6cXak@TTukKc4O5B6RAe52u3*z2O> zkcD}l1;WchA4Y)V*TE|3wYV$sVkOp$(xytI8R#Znp6=Cu+ z%2OeMyY@9&b+5qeIc8V-CSJ4c`n>GAc-*}>@t`&ri{5uet_#mHSv4qncKTded8v9%E|j(Cov1TcxH*L$7>L)l{9=}| zo8HJPx~#zU<*G}b3*EP}6&gwoH2T4-kR@<1=k^Pqi+mQTR1Lf{1nKM4n^SDmWtUn& zBNGIN&?{5b9+B79elF(x7`c0jiP_04k5;z@vkDM})lDI9!IrDm!9?CoE5evy<~@u% zZ?+DjR&aeYXkYy=N#_^qQ2FMA?31M8nR~}(DyHE{GF_8`$Rn}UIR*`be2&H)c%;W& z(&Wi(FCqUL58r+>bT&)tNIndj6xx=$%)AY6MmZ5Ly$-4)_sbj6n-%+EB4NjIm z7-qk>B=UUY7zdJ^hP-IJ`RC!lms4jem89;#8`P;Wq6bZdB)=VxfB$c5CUD(esJc1` Szq;|KX=`=rc+t@dvHt+rJImPs literal 0 HcmV?d00001 diff --git a/man/plot_anomaly_decomposition.Rd b/man/plot_anomaly_decomposition.Rd new file mode 100644 index 0000000..3aae88d --- /dev/null +++ b/man/plot_anomaly_decomposition.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plot_anomaly_decomposition.R +\name{plot_anomaly_decomposition} +\alias{plot_anomaly_decomposition} +\title{Visualize the time series decomposition with anomalies shown} +\usage{ +plot_anomaly_decomposition(data, ncol = 1, color_no = "#2c3e50", + color_yes = "#e31a1c", alpha_dots = 1, alpha_circles = 1, + size_dots = 1.5, size_circles = 4, strip.position = "right") +} +\arguments{ +\item{data}{A \code{tibble} or \code{tbl_time} object.} + +\item{ncol}{Number of columns to display. Set to 1 for single column by default.} + +\item{color_no}{Color for non-anomalous data.} + +\item{color_yes}{Color for anomalous data.} + +\item{alpha_dots}{Controls the transparency of the dots. Reduce when too many dots on the screen.} + +\item{alpha_circles}{Controls the transparency of the circles that identify anomalies.} + +\item{size_dots}{Controls the size of the dots.} + +\item{size_circles}{Controls the size of the circles that identify anomalies.} + +\item{strip.position}{Controls the placement of the strip that identifies the time series decomposition components.} +} +\value{ +Returns a \code{ggplot2} object. +} +\description{ +Visualize the time series decomposition with anomalies shown +} +\details{ +The first step in reviewing the anomaly detection process is to evaluate +a single times series to observe how the algorithm is selecting anomalies. +The \code{plot_anomaly_decomposition()} function is used to gain +an understanding as to whether or not the method is detecting anomalies correctly and +whether or not parameters such as decomposition method, anomalize method, +alpha, frequency, and so on should be adjusted. +} +\examples{ + +library(dplyr) +library(ggplot2) + +data(tidyverse_cran_downloads) + +tidyverse_cran_downloads \%>\% + filter(package == "tidyquant") \%>\% + ungroup() \%>\% + time_decompose(count, method = "stl") \%>\% + anomalize(remainder, method = "iqr") \%>\% + plot_anomaly_decomposition() + +} +\seealso{ +\code{\link[=plot_anomalies]{plot_anomalies()}} +} diff --git a/man/prep_tbl_time.Rd b/man/prep_tbl_time.Rd new file mode 100644 index 0000000..666c2e1 --- /dev/null +++ b/man/prep_tbl_time.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prep_tbl_time.R +\name{prep_tbl_time} +\alias{prep_tbl_time} +\title{Automatically create tibbletime objects from tibbles} +\usage{ +prep_tbl_time(data, message = FALSE) +} +\arguments{ +\item{data}{A \code{tibble}.} + +\item{message}{A boolean. If \code{TRUE}, returns a message indicating any +conversion details important to know during the conversion to \code{tbl_time} class.} +} +\value{ +Returns a \code{tibbletime} object of class \code{tbl_time}. +} +\description{ +Automatically create tibbletime objects from tibbles +} +\details{ +Detects a date or datetime index column and automatically +} +\examples{ + +library(dplyr) +library(tibbletime) + +data_tbl <- tibble( + date = seq.Date(from = as.Date("2018-01-01"), by = "day", length.out = 10), + value = rnorm(10) + ) + +prep_tbl_time(data_tbl) + +} +\seealso{ +\code{\link[tibbletime:as_tbl_time]{tibbletime::as_tbl_time()}} +} diff --git a/man/tidyverse_cran_downloads.Rd b/man/tidyverse_cran_downloads.Rd new file mode 100644 index 0000000..4242468 --- /dev/null +++ b/man/tidyverse_cran_downloads.Rd @@ -0,0 +1,39 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/tidyverse_cran_downloads.R +\docType{data} +\name{tidyverse_cran_downloads} +\alias{tidyverse_cran_downloads} +\title{Downloads of various "tidyverse" packages from CRAN} +\format{A grouped \code{tbl_time} object with 6,375 rows and 3 variables: +\describe{ +\item{date}{Date of the daily observation} +\item{count}{Number of downloads that day} +\item{package}{The package corresponding to the daily download number} +}} +\source{ +The package downloads come from CRAN by way of the \code{cranlogs} package. +} +\usage{ +tidyverse_cran_downloads +} +\description{ +A dataset containing the daily download counts from 2017-01-01 to 2018-03-01 +for the following tidyverse packages: +\itemize{ +\item \code{tidyr} +\item \code{lubridate} +\item \code{dplyr} +\item \code{broom} +\item \code{tidyquant} +\item \code{tidytext} +\item \code{ggplot2} +\item \code{purrr} +\item \code{stringr} +\item \code{forcats} +\item \code{knitr} +\item \code{readr} +\item \code{tibble} +\item \code{tidyverse} +} +} +\keyword{datasets} diff --git a/man/time_apply.Rd b/man/time_apply.Rd new file mode 100644 index 0000000..a0315c5 --- /dev/null +++ b/man/time_apply.Rd @@ -0,0 +1,61 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/time_apply.R +\name{time_apply} +\alias{time_apply} +\title{Apply a function to a time series by period} +\usage{ +time_apply(data, target, period, .fun, ..., start_date = NULL, side = "end", + clean = FALSE, message = TRUE) +} +\arguments{ +\item{data}{A \code{tibble} with a date or datetime index.} + +\item{target}{A column to apply the function to} + +\item{period}{A time-based definition (e.g. "2 weeks"). +or a numeric number of observations per frequency (e.g. 10). +See \code{\link[tibbletime:collapse_by]{tibbletime::collapse_by()}} for period notation.} + +\item{.fun}{A function to apply (e.g. \code{median})} + +\item{...}{Additional parameters passed to the function, \code{.fun}} + +\item{start_date}{Optional argument used to +specify the start date for the +first group. The default is to start at the closest period boundary +below the minimum date in the supplied index.} + +\item{side}{Whether to return the date at the beginning or the end of +the new period. By default, the "end" of the period. +Use "start" to change to the start of the period.} + +\item{clean}{Whether or not to round the collapsed index up / down to the next +period boundary. The decision to round up / down is controlled by the side +argument.} + +\item{message}{A boolean. If \code{message = TRUE}, the frequency used is output +along with the units in the scale of the data.} +} +\value{ +Returns a \code{tibbletime} object of class \code{tbl_time}. +} +\description{ +Apply a function to a time series by period +} +\details{ +Uses a time-based period to apply functions to. This is useful in circumstances where you want to +compare the observation values to aggregated values such as \code{mean()} or \code{median()} +during a set time-based period. The returned output extends the +length of the data frame so the differences can easily be computed. +} +\examples{ + +library(dplyr) + +data(tidyverse_cran_downloads) + +# Basic Usage +tidyverse_cran_downloads \%>\% + time_apply(count, period = "1 week", .fun = mean, na.rm = TRUE) + +} diff --git a/man/time_decompose.Rd b/man/time_decompose.Rd new file mode 100644 index 0000000..031c06f --- /dev/null +++ b/man/time_decompose.Rd @@ -0,0 +1,115 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/time_decompose.R +\name{time_decompose} +\alias{time_decompose} +\title{Decompose a time series in preparation for anomaly detection} +\usage{ +time_decompose(data, target, method = c("stl", "twitter", "multiplicative"), + frequency = "auto", ..., merge = FALSE, message = TRUE) +} +\arguments{ +\item{data}{A \code{tibble} or \code{tbl_time} object.} + +\item{target}{A column to apply the function to} + +\item{method}{The time series decomposition method. One of \code{"stl"}, \code{"twitter"}, or +\code{"multiplicative"}. The STL method uses seasonal decomposition (see \code{\link[=decompose_stl]{decompose_stl()}}). +The Twitter method uses \code{median_spans} to remove the trend (see \code{\link[=decompose_twitter]{decompose_twitter()}}). +The Multiplicative method uses multiplicative decomposition (see \code{\link[=decompose_multiplicative]{decompose_multiplicative()}}).} + +\item{frequency}{Controls the seasonal adjustment (removal of seasonality). +Input can be either "auto", a time-based definition (e.g. "2 weeks"), +or a numeric number of observations per frequency (e.g. 10). +Refer to \code{\link[=time_frequency]{time_frequency()}}.} + +\item{...}{Additional parameters passed to the underlying method functions.} + +\item{merge}{A boolean. \code{FALSE} by default. If \code{TRUE}, will append results to the original data.} + +\item{message}{A boolean. If \code{TRUE}, will output information related to \code{tbl_time} conversions, frequencies, +and median spans (if applicable).} +} +\value{ +Returns a \code{tbl_time} object. +} +\description{ +Decompose a time series in preparation for anomaly detection +} +\details{ +The \code{time_decompose()} function generates a time series decomposition on +\code{tbl_time} objects. The function is "tidy" in the sense that it works +on data frames. It is designed to work with time-based data, and as such +must have a column that contains date or datetime information. The function +also works with grouped data. The function implements several methods +of time series decomposition, each with benefits. + +\strong{STL}: + +The STL method (\code{method = "stl"}) implements time series decomposition using +the underlying \code{\link[=decompose_stl]{decompose_stl()}} function. If you are familiar with \code{\link[stats:stl]{stats::stl()}}, +the function is a "tidy" version that is designed to work with \code{tbl_time} objects. +The decomposition separates the "season" and "trend" components from +the "observed" values leaving the "remainder" for anomaly detection. +The main parameter that controls the seasonal adjustment is \code{frequency}. +Setting \code{frequency = "auto"} will lets the \code{\link[=time_frequency]{time_frequency()}} function +automatically determine the frequency based on the scale of the time series. + +\strong{Twitter}: + +The Twitter method (\code{method = "twitter"}) implements time series decomposition using +the methodology from the Twitter \href{https://github.com/twitter/AnomalyDetection}{AnomalyDetection} package. +The decomposition separates the "seasonal" component and then removes +the median data, which is a different approach than the STL method for removing +the trend. This approach works very well for low-growth + high seasonality data. +STL may be a better approach when trend is a large factor. +The user can control two parameters: \code{frequency} and \code{median_spans}. +The \code{frequency} parameter adjusts the "season" component that is removed +from the "observed" values. The \code{median_spans} parameter adjusts the +number of median spans that are used. The user may supply both \code{frequency} +and \code{median_spans} as time-based durations (e.g. "6 weeks") or numeric values +(e.g. 180) or "auto", which predetermines the frequency and/or median spans +based on the scale of the time series. + +\strong{Multiplicative}: + +The Multiplicative method (\code{method = "multiplicative"}) time series decomposition +uses the \code{\link[stats:decompose]{stats::decompose()}} function with \code{type = "multiplicative"}. This +method is useful in circumstances where variance is non-constantant and typically +growing in a multiplicative fashion. The parameters are the same as the STL method. +Alternatively, users may wish to try a transformation (e.g. \code{log()} or \code{sqrt()}) in combination +with the STL method to get near-constant variance. +} +\examples{ + +library(dplyr) + +data(tidyverse_cran_downloads) + +# Basic Usage +tidyverse_cran_downloads \%>\% + time_decompose(count, method = "stl") + +# twitter + median_spans +tidyverse_cran_downloads \%>\% + time_decompose(count, + method = "twitter", + frequency = "1 week", + median_spans = "3 months", + merge = TRUE, + message = FALSE) + +} +\seealso{ +Decomposition Methods (Powers \code{time_decompose}) +\itemize{ +\item \code{\link[=decompose_stl]{decompose_stl()}} +\item \code{\link[=decompose_twitter]{decompose_twitter()}} +\item \code{\link[=decompose_multiplicative]{decompose_multiplicative()}} +} + +Time Series Anomaly Detection Functions (anomaly detection workflow): +\itemize{ +\item \code{\link[=anomalize]{anomalize()}} +\item \code{\link[=time_recompose]{time_recompose()}} +} +} diff --git a/man/time_frequency.Rd b/man/time_frequency.Rd new file mode 100644 index 0000000..91ca714 --- /dev/null +++ b/man/time_frequency.Rd @@ -0,0 +1,106 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/time_frequency.R +\name{time_frequency} +\alias{time_frequency} +\alias{time_trend} +\alias{time_scale_template} +\title{Generate a time series frequency from a periodicity} +\usage{ +time_frequency(data, period = "auto", template = time_scale_template(), + message = TRUE) + +time_trend(data, period = "auto", template = time_scale_template(), + message = TRUE) + +time_scale_template() +} +\arguments{ +\item{data}{A \code{tibble} with a date or datetime index.} + +\item{period}{Either "auto", a time-based definition (e.g. "2 weeks"), +or a numeric number of observations per frequency (e.g. 10). +See \code{\link[tibbletime:collapse_by]{tibbletime::collapse_by()}} for period notation.} + +\item{template}{A template that converts the scale of the data to +the expected frequency and trend spans.} + +\item{message}{A boolean. If \code{message = TRUE}, the frequency used is output +along with the units in the scale of the data.} +} +\value{ +Returns a scalar numeric value indicating the number of observations in the frequency or trend span. +} +\description{ +Generate a time series frequency from a periodicity +} +\details{ +A frequency is loosely defined as the number of observations that comprise a cycle +in a data set. The trend is loosely defined as time span that can +be aggregated across to visualize the central tendency of the data. +It's often easiest to think of frequency and trend in terms of the time-based units +that the data is already in. \strong{This is what \code{time_frequency()} and \code{time_trend()} +enable: using time-based periods to define the frequency or trend.} + +\strong{Frequency}: + +As an example, a weekly cycle is often 5-days (for working +days) or 7-days (for calendar days). Rather than specify a frequency of 5 or 7, +the user can specify \code{period = "1 week"}, and +time_frequency()` will detect the scale of the time series and return 5 or 7 +based on the actual data. + +The \code{period} argument has three basic options for returning a frequency. +Options include: +\itemize{ +\item \code{"auto"}: A target frequency is determined using a pre-defined template (see \code{template} below). +\item \code{time-based duration}: (e.g. "1 week" or "2 quarters" per cycle) +\item \code{numeric number of observations}: (e.g. 5 for 5 observations per cycle) +} + +The \code{template} argument is only used when \code{period = "auto"}. The template is a tibble +of three features: \code{time_scale}, \code{frequency}, and \code{trend}. The algorithm will inspect +the scale of the time series and select the best frequency that matches the scale and +number of observations per target frequency. A frequency is then chosen on be the +best match. The predefined template is stored in a function \code{time_scale_template()}. +However, the user can come up with his or her own template changing the values +for frequency in the data frame. + +\strong{Trend}: + +As an example, the trend of daily data is often best aggregated by evaluating +the moving average over a quarter or a month span. Rather than specify the number +of days in a quarter or month, the user can specify "1 quarter" or "1 month", +and the \code{time_trend()} function will return the correct number of observations +per trend cycle. In addition, there is an option, \code{period = "auto"}, to +auto-detect an appropriate trend span depending on the data. The \code{template} +is used to define the appropriate trend span. +} +\examples{ + +library(dplyr) + +data(tidyverse_cran_downloads) + +#### FREQUENCY DETECTION #### + +# period = "auto" +tidyverse_cran_downloads \%>\% + filter(package == "tidyquant") \%>\% + ungroup() \%>\% + time_frequency(period = "auto", template = time_scale_template()) + +time_scale_template() + +# period = "1 month" +tidyverse_cran_downloads \%>\% + filter(package == "tidyquant") \%>\% + ungroup() \%>\% + time_frequency(period = "1 month") + +#### TREND DETECTION #### + +tidyverse_cran_downloads \%>\% + filter(package == "tidyquant") \%>\% + ungroup() \%>\% + time_trend(period = "auto") +} diff --git a/man/time_recompose.Rd b/man/time_recompose.Rd new file mode 100644 index 0000000..bb800dc --- /dev/null +++ b/man/time_recompose.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/time_recompose.R +\name{time_recompose} +\alias{time_recompose} +\title{Recompose bands separating anomalies from "normal" observations} +\usage{ +time_recompose(data) +} +\arguments{ +\item{data}{A \code{tibble} or \code{tbl_time} object that has been +processed with \code{time_decompose()} and \code{anomalize()}.} +} +\value{ +Returns a \code{tbl_time} object. +} +\description{ +Recompose bands separating anomalies from "normal" observations +} +\details{ +The \code{time_recompose()} function is used to generate bands around the +"normal" levels of observed values. The function uses the remainder_l1 +and remainder_l2 levels produced during the \code{\link[=anomalize]{anomalize()}} step +and the season and trend/median_spans values from the \code{\link[=time_decompose]{time_decompose()}} +step to reconstruct bands around the normal values. + +The following key names are required: observed:remainder from the +\code{time_decompose()} step and remainder_l1 and remainder_l2 from the +\code{anomalize()} step. +} +\examples{ + +library(dplyr) + +data(tidyverse_cran_downloads) + +# Basic Usage +tidyverse_cran_downloads \%>\% + time_decompose(count, method = "stl") \%>\% + anomalize(remainder, method = "iqr") \%>\% + time_recompose() + + +} +\seealso{ +Time Series Anomaly Detection Functions (anomaly detection workflow): +\itemize{ +\item \code{\link[=time_decompose]{time_decompose()}} +\item \code{\link[=anomalize]{anomalize()}} +} +} diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..5a18383 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(anomalize) + +test_check("anomalize") diff --git a/tests/testthat/test-anomalize.R b/tests/testthat/test-anomalize.R new file mode 100644 index 0000000..13a3b36 --- /dev/null +++ b/tests/testthat/test-anomalize.R @@ -0,0 +1,61 @@ +context("test-anomalize.R") + +# Setup + +library(tidyverse) + +tq_dloads <- tidyverse_cran_downloads %>% + dplyr::ungroup() %>% + dplyr::filter(package == "tidyquant") + +# Tests + +test_that("iqr_tbl_df works", { + + iqr_tbl_df <- tq_dloads %>% + anomalize(count, method = "iqr") + + expect_equal(nrow(iqr_tbl_df), 425) + expect_equal(ncol(iqr_tbl_df), 6) + +}) + +test_that("gesd_tbl_df works", { + + gesd_tbl_df <- tq_dloads %>% + anomalize(count, method = "gesd") + + expect_equal(nrow(gesd_tbl_df), 425) + expect_equal(ncol(gesd_tbl_df), 6) + +}) + +test_that("iqr_grouped_df works", { + + iqr_grouped_df <- tidyverse_cran_downloads %>% + dplyr::ungroup() %>% + dplyr::filter(package %in% c("tidyquant", "tidytext")) %>% + dplyr::group_by(package) %>% + anomalize(count, method = "iqr") + + expect_equal(nrow(iqr_grouped_df), 850) + expect_equal(ncol(iqr_grouped_df), 6) + +}) + +test_that("gesd_grouped_df works", { + + gesd_grouped_df <- tidyverse_cran_downloads %>% + dplyr::ungroup() %>% + dplyr::filter(package %in% c("tidyquant", "tidytext")) %>% + dplyr::group_by(package) %>% + anomalize(count, method = "gesd") + + expect_equal(nrow(gesd_grouped_df), 850) + expect_equal(ncol(gesd_grouped_df), 6) + +}) + + + + diff --git a/tests/testthat/test-time_frequency.R b/tests/testthat/test-time_frequency.R new file mode 100644 index 0000000..b5ba262 --- /dev/null +++ b/tests/testthat/test-time_frequency.R @@ -0,0 +1,77 @@ +context("test-time_frequency.R") + +# Setup + +tq_dloads <- tidyverse_cran_downloads %>% + dplyr::ungroup() %>% + dplyr::filter(package == "tidyquant") + +tq_dloads_small <- tq_dloads %>% + dplyr::slice(1:60) + +# Tests + +test_that("time_frequency works: period = 'auto'", { + + freq <- tq_dloads %>% + time_frequency() + + expect_equal(freq, 7) + +}) + +test_that("time_frequency works: period = '2 weeks'", { + + freq <- tq_dloads %>% + time_frequency(period = "2 weeks") + + expect_equal(freq, 14) + +}) + +test_that("time_frequency works: period = 5", { + + freq <- tq_dloads %>% + time_frequency(period = 5) + + expect_equal(freq, 5) + +}) + + + +test_that("time_trend works: period = 'auto'", { + + trend <- tq_dloads %>% + time_trend() + + expect_equal(trend, 91) + +}) + +test_that("time_trend works: period = '90 days'", { + + trend <- tq_dloads %>% + time_trend(period = "90 days") + + expect_equal(trend, 90) + +}) + +test_that("time_trend works: period = 90", { + + trend <- tq_dloads %>% + time_trend(period = 90) + + expect_equal(trend, 90) + +}) + +test_that("time_trend works with small data: period = 'auto'", { + + trend <- tq_dloads_small %>% + time_trend() + + expect_equal(trend, 28) + +})