-
Notifications
You must be signed in to change notification settings - Fork 61
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
198 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
Package: anomalize | ||
Type: Package | ||
Title: Tidy Anomaly Detection | ||
Version: 0.1.2 | ||
Version: 0.2.0 | ||
Authors@R: c( | ||
person("Matt", "Dancho", email = "[email protected]", role = c("aut", "cre")), | ||
person("Davis", "Vaughan", email = "[email protected]", role = c("aut")) | ||
|
@@ -44,7 +44,8 @@ Roxygen: list(markdown = TRUE) | |
Suggests: | ||
tidyverse, | ||
tidyquant, | ||
testthat, | ||
stringr, | ||
testthat (>= 2.1.0), | ||
covr, | ||
knitr, | ||
rmarkdown, | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
#' Clean anomalies from anomalized data | ||
#' | ||
#' @param data A `tibble` or `tbl_time` object. | ||
#' | ||
#' @return Returns a `tibble` / `tbl_time` object with a new column "observed_cleaned". | ||
#' | ||
#' @details | ||
#' The `clean_anomalies()` function is used to replace outliers with the seasonal and trend component. | ||
#' This is often desirable when forecasting with noisy time series data to improve trend detection. | ||
#' | ||
#' To clean anomalies, the input data must be detrended with `time_decompose()` and anomalized with `anomalize()`. | ||
#' The data can also be recomposed with `time_recompose()`. | ||
#' | ||
#' @seealso | ||
#' Time Series Anomaly Detection Functions (anomaly detection workflow): | ||
#' - [time_decompose()] | ||
#' - [anomalize()] | ||
#' - [time_recompose()] | ||
#' | ||
#' @examples | ||
#' | ||
#' library(dplyr) | ||
#' | ||
#' # Needed to pass CRAN check / This is loaded by default | ||
#' set_time_scale_template(time_scale_template()) | ||
#' | ||
#' data(tidyverse_cran_downloads) | ||
#' | ||
#' tidyverse_cran_downloads %>% | ||
#' time_decompose(count, method = "stl") %>% | ||
#' anomalize(remainder, method = "iqr") %>% | ||
#' clean_anomalies() | ||
#' | ||
#' | ||
#' @export | ||
clean_anomalies <- function(data) { | ||
UseMethod("clean_anomalies", data) | ||
} | ||
|
||
#' @export | ||
clean_anomalies.default <- function(data) { | ||
stop("Error clean_anomalies(): Object is not of class `tbl_df` or `tbl_time`.", call. = FALSE) | ||
} | ||
|
||
#' @export | ||
clean_anomalies.tbl_df <- function(data) { | ||
|
||
# Checks | ||
check_clean_anomalies_input(data) | ||
|
||
# Get method col | ||
method_col <- get_method_col(data) | ||
|
||
if (method_col == "trend") { | ||
data %>% | ||
dplyr::mutate(observed_cleaned = ifelse(anomaly == "Yes", season + trend, observed)) | ||
} else { | ||
data %>% | ||
dplyr::mutate(observed_cleaned = ifelse(anomaly == "Yes", season + median_spans, observed)) | ||
} | ||
|
||
} | ||
|
||
check_clean_anomalies_input <- function(data) { | ||
|
||
data_names <- names(data) | ||
|
||
# Detect method - STL or Twitter | ||
method_names <- c("trend", "median_spans") | ||
method_name_in_data <- any(method_names %in% data_names) | ||
|
||
# Check - No method name in data | ||
if (!method_name_in_data) stop("Error clean_anomalies(): Output does not contain a column named trend or median_spans. This may occur if the output was not detrended with time_decompose().", call. = FALSE) | ||
|
||
# Check - Required names from time_decompose() | ||
required_names <- c("observed", "season") | ||
required_names_in_data <- all(required_names %in% data_names) | ||
if (!required_names_in_data) stop("Error clean_anomalies(): Output does not contain columns named observed and season. This may occur if the output was not detrended with time_decompose().", call. = FALSE) | ||
|
||
# Check - Required names from time_decompose() | ||
required_names <- c("anomaly") | ||
required_names_in_data <- all(required_names %in% data_names) | ||
if (!required_names_in_data) stop("Error clean_anomalies(): Output does not contain columns named anomaly. This may occur if the output was not anomalized with anomalize().", call. = FALSE) | ||
|
||
|
||
} | ||
|
||
|
||
get_method_col <- function(data) { | ||
|
||
data_names <- names(data) | ||
|
||
# Detect method - STL or Twitter | ||
method_names <- c("trend", "median_spans") | ||
method_name_in_data <- method_names %in% data_names | ||
|
||
method_names[method_name_in_data] | ||
|
||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,5 +29,6 @@ globalVariables(c( | |
"key", | ||
"median_spans", | ||
"recomposed_l1", | ||
"recomposed_l2" | ||
"recomposed_l2", | ||
"data_names" | ||
)) |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
|
||
|
||
data_stl <- tidyverse_cran_downloads %>% | ||
time_decompose(count, method = "stl") %>% | ||
anomalize(remainder, method = "iqr") | ||
|
||
data_twitter <- tidyverse_cran_downloads %>% | ||
time_decompose(count, method = "twitter") %>% | ||
anomalize(remainder, method = "iqr") | ||
|
||
|
||
test_that("bad data returns error", { | ||
|
||
expect_error(clean_anomalies(2)) | ||
|
||
}) | ||
|
||
test_that("Clean Anomalies from STL Method", { | ||
|
||
expect_true(data_stl %>% | ||
clean_anomalies() %>% | ||
names() %>% | ||
str_detect("observed_cleaned") %>% | ||
any()) | ||
|
||
}) | ||
|
||
test_that("Clean Anomalies from Twitter Method", { | ||
|
||
expect_true(data_twitter %>% | ||
clean_anomalies() %>% | ||
names() %>% | ||
str_detect("observed_cleaned") %>% | ||
any()) | ||
|
||
}) |