diff --git a/NAMESPACE b/NAMESPACE index 6dcb382..b918485 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -8,20 +8,20 @@ export(event_terms) export(occurrence_terms) export(random_id) export(sequential_id) +export(set_abundance) +export(set_collection) +export(set_coordinates) +export(set_coordinates_sf) +export(set_datetime) +export(set_events) +export(set_locality) +export(set_measurements) +export(set_observer) +export(set_occurrences) +export(set_scientific_name) +export(set_taxonomy) export(suggest_workflow) -export(use_abundance) -export(use_collection) -export(use_coordinates) -export(use_datetime) -export(use_events) export(use_individual_traits) -export(use_locality) -export(use_measurements) -export(use_observer) -export(use_occurrences) -export(use_scientific_name) -export(use_sf) -export(use_taxonomy) importFrom(cli,ansi_align) importFrom(cli,ansi_collapse) importFrom(cli,ansi_nchar) diff --git a/R/check.R b/R/check.R index 1b3e6b4..9a062d1 100644 --- a/R/check.R +++ b/R/check.R @@ -49,7 +49,7 @@ wait <- function(seconds = 1) { #' #' @description #' Informs users which columns will be checked by `check_` functions. This includes -#' columns that have been specified in a `use_` function by the user, or columns +#' columns that have been specified in a `set_` function by the user, or columns #' that exist in the user dataframe that already match Darwin Core terms. #' #' @importFrom cli cli_progress_step diff --git a/R/corella-package.R b/R/corella-package.R index 045a8ce..7f1ec93 100644 --- a/R/corella-package.R +++ b/R/corella-package.R @@ -27,19 +27,19 @@ #' The following functions add single DwC fields, or collections of related #' fields, to an existing `tibble`. #' -#' * [use_events()] basic information on observation events (`eventID`, `parentEventID`, `eventType`) -#' * [use_occurrences()] basic information on observations (`occurrenceID`, `basisOfRecord`) -#' * [use_scientific_name()] record the highest level of taxonomic specificity in the dataset (`scientificName`, `scientificNameAuthorship`, `taxonRank`) -#' * [use_taxonomy()] to specify higher taxonomic columns (`kingdom`, `phylum`, `class`, `order`, `family`, `genus`, `species`, `specificEpithet`, `vernacularName`) -#' * [use_coordinates()] for spatial data (`decimalLatitude`, `decimalLongitude`, `geodeticDatum`, `coordinateUncertaintyInMeters`, `coordinatePrecision`) -#' * [use_sf()] for spatial data in `sf` format -#' * [use_locality()] for spatial descriptions (`continent`, `country`, `countryCode`, `stateProvince`, `locality`) -#' * [use_datetime()] for temporal data (`eventDate`, `year`, `month`, `day`, `eventTime`) -#' * [use_collection()] to give museum- or collection- specific information (`datasetID`, `datasetName`, `catalogNumber`) -#' * [use_observer()] to specify who made the observation (`recordedByID`, `recordedBy`) -#' * [use_abundance()] to state how many animals were seen during the observation (`individualCount`, `organismQuantity`, `organismQuantityType`) -#' * [use_individual_traits()] attributes of individuals measured (`individualID`, `lifeStage`, `sex`, `vitality`, `reproductiveCondition`) -#' * [use_measurements()] for 'Measurement or Fact' data (optional) +#' * [set_events()] basic information on observation events (`eventID`, `parentEventID`, `eventType`) +#' * [set_occurrences()] basic information on observations (`occurrenceID`, `basisOfRecord`) +#' * [set_scientific_name()] record the highest level of taxonomic specificity in the dataset (`scientificName`, `scientificNameAuthorship`, `taxonRank`) +#' * [set_taxonomy()] to specify higher taxonomic columns (`kingdom`, `phylum`, `class`, `order`, `family`, `genus`, `species`, `specificEpithet`, `vernacularName`) +#' * [set_coordinates()] for spatial data (`decimalLatitude`, `decimalLongitude`, `geodeticDatum`, `coordinateUncertaintyInMeters`, `coordinatePrecision`) +#' * [set_coordinates_sf()] for spatial data in `sf` format +#' * [set_locality()] for spatial descriptions (`continent`, `country`, `countryCode`, `stateProvince`, `locality`) +#' * [set_datetime()] for temporal data (`eventDate`, `year`, `month`, `day`, `eventTime`) +#' * [set_collection()] to give museum- or collection- specific information (`datasetID`, `datasetName`, `catalogNumber`) +#' * [set_observer()] to specify who made the observation (`recordedByID`, `recordedBy`) +#' * [set_abundance()] to state how many animals were seen during the observation (`individualCount`, `organismQuantity`, `organismQuantityType`) +#' * [set_individual_traits()] attributes of individuals measured (`individualID`, `lifeStage`, `sex`, `vitality`, `reproductiveCondition`) +#' * [set_measurements()] for 'Measurement or Fact' data (optional) #' #' **Checking data for Darwin Core compliance** #' diff --git a/R/use_abundance.R b/R/set_abundance.R similarity index 93% rename from R/use_abundance.R rename to R/set_abundance.R index 19f859d..2044649 100644 --- a/R/use_abundance.R +++ b/R/set_abundance.R @@ -1,10 +1,15 @@ -#' Add abundance fields to a `tibble` +#' Set, create or modify columns with abundance information using Darwin Core #' +#' @description #' In some field methods, it is common to observe more than one individual #' per observation; to observe abundance using non-integer measures such as #' mass or area; or to seek individuals but not find them (abundance of zero). -#' As these approaches use different DwC terms, this function assists in +#' As these approaches use different Darwin Core terms, this function assists in #' specifying abundances in a flexible way. +#' +#' In practice this is no different from using `mutate()`, but gives some +#' informative errors, and serves as a useful lookup for how columns with +#' abundance information are represented in the Darwin Core Standard. #' @param .df a `data.frame` or `tibble` that the column should be appended to. #' @param individualCount The number of individuals present #' @param organismQuantity A number or enumeration value for the quantity of @@ -33,12 +38,12 @@ #' ) #' #' df |> -#' use_abundance(individualCount = n_obs) +#' set_abundance(individualCount = n_obs) #' #' @importFrom dplyr mutate #' @importFrom rlang abort #' @export -use_abundance <- function(.df, +set_abundance <- function(.df, individualCount = NULL, organismQuantity = NULL, organismQuantityType = NULL, diff --git a/R/use_collection.R b/R/set_collection.R similarity index 96% rename from R/use_collection.R rename to R/set_collection.R index 338a71e..dc3c736 100644 --- a/R/use_collection.R +++ b/R/set_collection.R @@ -1,4 +1,4 @@ -#' Add museum- or collection-specific information to a `tibble` +#' Set, create or modify columns with museum- or collection-specific information using Darwin Core #' #' @description #' Format fields that specify the collection or catalog number of a @@ -37,7 +37,7 @@ #' @importFrom dplyr mutate #' @importFrom rlang abort #' @export -use_collection <- function( +set_collection <- function( .df, datasetID = NULL, datasetName = NULL, diff --git a/R/use_coordinates.R b/R/set_coordinates.R similarity index 98% rename from R/use_coordinates.R rename to R/set_coordinates.R index 86549ec..41b8800 100644 --- a/R/use_coordinates.R +++ b/R/set_coordinates.R @@ -1,4 +1,4 @@ -#' Add spatial fields to a `tibble` +#' Set, create or modify columns with spatial information using Darwin Core #' #' This function helps format standard location fields to a `tibble`. #' @@ -31,7 +31,7 @@ #' @importFrom dplyr mutate #' @importFrom rlang abort #' @export -use_coordinates <- function( +set_coordinates <- function( .df, decimalLatitude = NULL, decimalLongitude = NULL, diff --git a/R/use_sf.R b/R/set_coordinates_sf.R similarity index 95% rename from R/use_sf.R rename to R/set_coordinates_sf.R index 0631a1d..8471928 100644 --- a/R/use_sf.R +++ b/R/set_coordinates_sf.R @@ -1,7 +1,8 @@ -#' Add spatial fields to a `tibble` using `sf` `POINT` coordinates +#' Set, create or modify columns with `sf` spatial information using Darwin Core #' +#' @description #' This function helps format standard location fields to a `tibble`. It differs -#' from `use_coordinates()` by accepting `sf` geometry columns of class `POINT` +#' from `set_coordinates()` by accepting `sf` geometry columns of class `POINT` #' as coordinates (rather than `numeric` lat/lon coordinates). The advantage #' of using an `sf` geometry is that the Coordinate Reference System (CRS) is #' automatically formatted into the required `geodeticDatum` column. @@ -13,8 +14,8 @@ #' `"unused"`; i.e. only keeps Darwin Core fields, and not those fields used to #' generate them. #' @returns A tibble with the requested fields added. -#' @seealso [use_coordinates()] for providing numeric coordinates, -#' [use_locality()] for providing text-based spatial information +#' @seealso [set_coordinates()] for providing numeric coordinates, +#' [set_locality()] for providing text-based spatial information #' @importFrom rlang abort #' @importFrom rlang get_expr #' @importFrom sf st_drop_geometry @@ -22,7 +23,7 @@ #' @importFrom cli cli_warn #' @importFrom cli cli_abort #' @export -use_sf <- function( +set_coordinates_sf <- function( .df, geometry = NULL, .keep = "unused" diff --git a/R/use_datetime.R b/R/set_datetime.R similarity index 97% rename from R/use_datetime.R rename to R/set_datetime.R index d0f1ef7..3321f27 100644 --- a/R/use_datetime.R +++ b/R/set_datetime.R @@ -1,6 +1,7 @@ -#' Add date and time fields to a `tibble` +#' Set, create or modify columns with date and time information using Darwin Core #' -#' This function helps format standard date/time fields to a `tibble`. +#' @description +#' This function helps format standard date/time columns in a `tibble`. #' #' In practice this is no different from using `mutate()`, but gives some #' informative errors, and serves as a useful lookup for how spatial fields are @@ -27,7 +28,7 @@ #' @importFrom dplyr mutate #' @importFrom rlang abort #' @export -use_datetime <- function( +set_datetime <- function( .df, eventDate = NULL, year = NULL, diff --git a/R/use_events.R b/R/set_events.R similarity index 97% rename from R/use_events.R rename to R/set_events.R index 781796d..b5e41fd 100644 --- a/R/use_events.R +++ b/R/set_events.R @@ -1,7 +1,7 @@ -#' Add a sampling, collection or image-capture Event to a `tibble` +#' Set, create or modify columns with Event information using Darwin Core #' #' @description -#' Format fields that contain information about an +#' Format columns that contain information about an #' [Event](https://dwc.tdwg.org/list/#dwc_Event). An "Event" in Darwin Core #' standard refers to an action that occurs at a place and time. Examples #' include: @@ -62,7 +62,7 @@ #' @importFrom purrr map #' @importFrom purrr keep #' @export -use_events <- function( +set_events <- function( .df, eventID = NULL, eventType = NULL, diff --git a/R/use_individual_traits.R b/R/set_individual_traits.R similarity index 98% rename from R/use_individual_traits.R rename to R/set_individual_traits.R index 5a27c0e..f1371b7 100644 --- a/R/use_individual_traits.R +++ b/R/set_individual_traits.R @@ -1,4 +1,4 @@ -#' Add information of individual organisms to a `tibble` +#' Set, create or modify columns with information of individual organisms using Darwin Core #' #' @description #' Format fields that contain measurements or attributes of individual diff --git a/R/use_locality.R b/R/set_locality.R similarity index 93% rename from R/use_locality.R rename to R/set_locality.R index e6a5593..b1cb00b 100644 --- a/R/use_locality.R +++ b/R/set_locality.R @@ -1,7 +1,13 @@ -#' Add `locality` data to a `tibble` +#' Set, create or modify columns with locality information using Darwin Core #' +#' @description #' Locality information refers to a description of a place, rather than a -#' spatial coordinate. +#' spatial coordinate. This function helps specify or modify columns +#' with locality information in a flexible way. +#' +#' In practice this is no different from using `mutate()`, but gives some +#' informative errors, and serves as a useful lookup for fields in +#' the Darwin Core Standard. #' @param .df a `data.frame` or `tibble` that the column should be appended to. #' @param continent (string) Valid continent. See details. #' @param country Valid country name. See `country_codes`. @@ -30,7 +36,7 @@ #' @importFrom purrr map #' @importFrom purrr pluck #' @export -use_locality <- function(.df, +set_locality <- function(.df, continent = NULL, country = NULL, countryCode = NULL, diff --git a/R/use_measurements.R b/R/set_measurements.R similarity index 73% rename from R/use_measurements.R rename to R/set_measurements.R index d41b4fb..46fa2c6 100644 --- a/R/use_measurements.R +++ b/R/set_measurements.R @@ -1,8 +1,21 @@ -#' Add measurement data for an individual or event to a `tibble` +#' Convert columns with measurement data for an individual or event to Darwin Core standard #' #' @description +#' `r lifecycle::badge("experimental")` #' This function is a work in progress, and should be used with caution. #' +#' In raw collected data, many types of information can be captured in one +#' column. For example, the column name `LMA_g.m2` contains the measured trait +#' (Leaf Mass per Area, LMA) and the unit of measurement (grams per meter +#' squared, g/m2), and recorded in that column are the values themselves. In +#' Darwin Core, these different types of information must be separated into +#' multiple columns so that they can be ingested correctly and aggregated with +#' sources of data accurately. +#' +#' This function converts information preserved in a single measurement column +#' into multiple columns (`measurementID`, `measurementUnit`, and +#' `measurementType`) as per Darwin Core standard. +#' #' @param .df a `data.frame` or `tibble` that the column should be appended to. #' @param cols vector of column names to be included as 'measurements'. Unquoted. #' @param unit vector of strings giving units for each variable @@ -13,7 +26,13 @@ #' generate them. #' @returns A tibble with the requested fields added. #' @details -#' add examples +#' Columns are nested in a +#' single column `measurementOrFact` that contains Darwin Core standard +#' measurement fields. By nesting three measurement columns within the +#' `measurementOrFact`, despite measurement columns converting to long format +#' (one row per measurement, per occurrence), +#' data will remain organised by occurrences (one row per occurrences). Data +#' can be unnested into long format using `dplyr::unnest()` #' #' @importFrom dplyr mutate #' @importFrom rlang abort @@ -23,7 +42,7 @@ #' @importFrom dplyr row_number #' @importFrom purrr map_dfr #' @export -use_measurements <- function( +set_measurements <- function( .df, cols = NULL, unit = NULL, diff --git a/R/use_observer.R b/R/set_observer.R similarity index 96% rename from R/use_observer.R rename to R/set_observer.R index 6a13933..c601940 100644 --- a/R/use_observer.R +++ b/R/set_observer.R @@ -1,5 +1,6 @@ -#' Add who made an observation to a `tibble` +#' Set, create or modify columns with information of who made an observation (using Darwin Core) #' +#' @description #' Format fields that contain information about who made a specific observation #' of an organism. #' @@ -34,7 +35,7 @@ #' @importFrom purrr map #' @importFrom purrr keep #' @export -use_observer <- function( +set_observer <- function( .df, recordedBy = NULL, recordedByID = NULL, diff --git a/R/use_occurrences.R b/R/set_occurrences.R similarity index 98% rename from R/use_occurrences.R rename to R/set_occurrences.R index 9e3994b..6aaaeb6 100644 --- a/R/use_occurrences.R +++ b/R/set_occurrences.R @@ -1,4 +1,4 @@ -#' Add occurrence-specific information to a `tibble` +#' Set, create or modify columns with occurrence-specific information using Darwin Core #' #' @description #' Format fields uniquely identify each occurrence record and specify the type @@ -45,7 +45,7 @@ #' @importFrom dplyr mutate #' @importFrom rlang abort #' @export -use_occurrences <- function( +set_occurrences <- function( .df, occurrenceID = NULL, basisOfRecord = NULL, diff --git a/R/use_scientific_name.R b/R/set_scientific_name.R similarity index 97% rename from R/use_scientific_name.R rename to R/set_scientific_name.R index befb55f..a581e27 100644 --- a/R/use_scientific_name.R +++ b/R/set_scientific_name.R @@ -1,4 +1,4 @@ -#' Add scientific name and authorship to a `tibble` +#' Set, create or modify columns with scientific name and authorship information using Darwin Core #' #' Format the field `scientificName`, the lowest identified taxonomic name of an #' occurrence, along with the rank and authorship of the provided name. @@ -36,7 +36,7 @@ #' @importFrom dplyr mutate #' @importFrom rlang abort #' @export -use_scientific_name <- function( +set_scientific_name <- function( .df, scientificName = NULL, scientificNameAuthorship = NULL, diff --git a/R/use_taxonomy.R b/R/set_taxonomy.R similarity index 98% rename from R/use_taxonomy.R rename to R/set_taxonomy.R index 1305210..eb888d7 100644 --- a/R/use_taxonomy.R +++ b/R/set_taxonomy.R @@ -1,5 +1,6 @@ -#' Add taxonomic information to a `tibble` +#' Set, create or modify columns with taxonomic information using Darwin Core #' +#' @description #' Format fields that contain taxonomic name information from kingdom to #' species, as well as the common/vernacular name, to a `tibble`. #' @@ -32,7 +33,7 @@ #' @importFrom dplyr mutate #' @importFrom rlang abort #' @export -use_taxonomy <- function( +set_taxonomy <- function( .df, kingdom = NULL, phylum = NULL, diff --git a/R/suggest_workflow.R b/R/suggest_workflow.R index d8132e8..7666240 100644 --- a/R/suggest_workflow.R +++ b/R/suggest_workflow.R @@ -114,13 +114,13 @@ check_contains_terms <- function(.df, suggested_functions <- main_functions |> filter(!.data$dwc_term %in% matched_values) |> - distinct(.data$use_function) |> - pull("use_function") + distinct(.data$set_function) |> + pull("set_function") optional_functions <- other_functions |> filter(.data$dwc_term %in% matched_values) |> - distinct(.data$use_function) |> - pull("use_function") + distinct(.data$set_function) |> + pull("set_function") # this wraps text (which might not be optimal for this table) # withr::with_options( @@ -210,10 +210,10 @@ suggest_functions_message <- function(suggested_functions, is_sf, .envir = parent.frame()) { - # if POINT sf class, suggest `use_coordinates_sf()` + # if POINT sf class, suggest `set_coordinates_sf()` if(isTRUE(is_sf)) { # add - suggested_functions <- c("use_sf()", suggested_functions) + suggested_functions <- c("set_coordinates_sf()", suggested_functions) } # add pipe when there are multiple suggested functions @@ -274,7 +274,7 @@ additional_functions_message <- function(optional_functions, cli_text(paste0("Based on your matched terms, you can also add to your pipe: ", "\n")) cli_bullets(c("*" = optional_functions_message)) } - cli_bullets(c("i" = col_grey("See all `use_` functions at http://corella.ala.org.au/reference/index.html#add-rename-or-edit-columns-to-match-darwin-core-terms"))) + cli_bullets(c("i" = col_grey("See all `set_` functions at http://corella.ala.org.au/reference/index.html#add-rename-or-edit-columns-to-match-darwin-core-terms"))) } @@ -318,64 +318,64 @@ full_workflow_message <- function(matched_values, } -#' Table of Darwin Core terms and their corresponding `use_` function +#' Table of Darwin Core terms and their corresponding `set_` function #' #' @importFrom tibble lst #' @noRd #' @keywords Internal fn_to_term_table <- function() { main <- tibble::tribble( - ~"use_function", ~"dwc_term", - "use_occurrences()", "basisOfRecord", - "use_occurrences()", "occurrenceID", - "use_scientific_name()", "scientificName", - "use_coordinates()", "decimalLatitude", - "use_coordinates()", "decimalLongitude", - "use_coordinates()", "geodeticDatum", - "use_coordinates()", "coordinateUncertaintyInMeters", - "use_datetime()", "eventDate" + ~"set_function", ~"dwc_term", + "set_occurrences()", "basisOfRecord", + "set_occurrences()", "occurrenceID", + "set_scientific_name()", "scientificName", + "set_coordinates()", "decimalLatitude", + "set_coordinates()", "decimalLongitude", + "set_coordinates()", "geodeticDatum", + "set_coordinates()", "coordinateUncertaintyInMeters", + "set_datetime()", "eventDate" ) optional <- tibble::tribble( - ~"use_function", ~"dwc_term", - "use_locality()", "continent", - "use_locality()", "country", - "use_locality()", "countryCode", - "use_locality()", "stateProvince", - "use_locality()", "locality", - "use_taxonomy()", "kingdom", - "use_taxonomy()", "phylum", - "use_taxonomy()", "class", - "use_taxonomy()", "order", - "use_taxonomy()", "family", - "use_taxonomy()", "genus", - # "use_taxonomy()", "species", - "use_taxonomy()", "specificEpithet", - "use_taxonomy()", "vernacularName", - "use_abundance()", "individualCount", - "use_abundance()", "organismQuantity", - "use_abundance()", "organismQuantityType", - "use_abundance()", "organismQuantity", - "use_collection()", "datasetID", - "use_collection()", "datasetName", - "use_collection()", "catalogNumber", - "use_coordinates()", "coordinatePrecision", - "use_scientific_name()", "taxonRank", - "use_scientific_name()", "scientificNameAuthorship", - "use_datetime()", "year", - "use_datetime()", "month", - "use_datetime()", "day", - "use_datetime()", "eventTime", - "use_individual_traits()", "individualID", - "use_individual_traits()", "lifeStage", - "use_individual_traits()", "sex", - "use_individual_traits()", "vitality", - "use_individual_traits()", "reproductiveCondition", - "use_observer()", "recordedBy", - "use_observer()", "recordedByID", - "use_events()", "eventID", - "use_events()", "eventType", - "use_events()", "parentEventID" + ~"set_function", ~"dwc_term", + "set_locality()", "continent", + "set_locality()", "country", + "set_locality()", "countryCode", + "set_locality()", "stateProvince", + "set_locality()", "locality", + "set_taxonomy()", "kingdom", + "set_taxonomy()", "phylum", + "set_taxonomy()", "class", + "set_taxonomy()", "order", + "set_taxonomy()", "family", + "set_taxonomy()", "genus", + # "set_taxonomy()", "species", + "set_taxonomy()", "specificEpithet", + "set_taxonomy()", "vernacularName", + "set_abundance()", "individualCount", + "set_abundance()", "organismQuantity", + "set_abundance()", "organismQuantityType", + "set_abundance()", "organismQuantity", + "set_collection()", "datasetID", + "set_collection()", "datasetName", + "set_collection()", "catalogNumber", + "set_coordinates()", "coordinatePrecision", + "set_scientific_name()", "taxonRank", + "set_scientific_name()", "scientificNameAuthorship", + "set_datetime()", "year", + "set_datetime()", "month", + "set_datetime()", "day", + "set_datetime()", "eventTime", + "set_individual_traits()", "individualID", + "set_individual_traits()", "lifeStage", + "set_individual_traits()", "sex", + "set_individual_traits()", "vitality", + "set_individual_traits()", "reproductiveCondition", + "set_observer()", "recordedBy", + "set_observer()", "recordedByID", + "set_events()", "eventID", + "set_events()", "eventType", + "set_events()", "parentEventID" ) table <- lst(main, optional) # named list diff --git a/README.Rmd b/README.Rmd index 0560c8f..5b70e93 100644 --- a/README.Rmd +++ b/README.Rmd @@ -59,25 +59,25 @@ df <- tibble( df ``` -One of the most important aspects of Darwin Core standard is using standard column names (Darwin Core *terms*). We can update column names in our data to match Darwin Core terms with `use_` functions. +One of the most important aspects of Darwin Core standard is using standard column names (Darwin Core *terms*). We can update column names in our data to match Darwin Core terms with `set_` functions. -Each `use_` function name corresponds to the type of data, and argument names correspond to the available Darwin Core terms to use as column names. `use_` functions support data wrangling operations & `dplyr::mutate()` functionality, meaning columns can be changed or fixed in your pipe. `use_` functions will indicate if anything needs fixing because they also automatically run checks on each column data to make sure each column is in the correct format. +Each `set_` function name corresponds to the type of data, and argument names correspond to the available Darwin Core terms to use as column names. `set_` functions support data wrangling operations & `dplyr::mutate()` functionality, meaning columns can be changed or fixed in your pipe. `set_` functions will indicate if anything needs fixing because they also automatically run checks on each column data to make sure each column is in the correct format. ```{r} suppressMessages( # for readability df |> - use_coordinates( + set_coordinates( decimalLatitude = as.numeric(latitude), # fix latitude decimalLongitude = longitude ) |> - use_scientific_name( + set_scientific_name( scientificName = species ) |> - use_datetime( + set_datetime( eventDate = lubridate::dmy(eventDate) # specify date format ) |> - use_occurrences(occurrenceStatus = status) + set_occurrences(occurrenceStatus = status) ) ``` diff --git a/corella.Rproj b/corella.Rproj index 69fafd4..a30a9b8 100644 --- a/corella.Rproj +++ b/corella.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: 406372db-8fbd-4fb7-98af-03f67b99cdd6 RestoreWorkspace: No SaveWorkspace: No diff --git a/man/corella-package.Rd b/man/corella-package.Rd index 7f0def9..3686212 100644 --- a/man/corella-package.Rd +++ b/man/corella-package.Rd @@ -29,19 +29,19 @@ the Little Corella (\emph{Cacatua sanguinea}), and was drawn by Dax Kellie. The following functions add single DwC fields, or collections of related fields, to an existing \code{tibble}. \itemize{ -\item \code{\link[=use_events]{use_events()}} basic information on observation events (\code{eventID}, \code{parentEventID}, \code{eventType}) -\item \code{\link[=use_occurrences]{use_occurrences()}} basic information on observations (\code{occurrenceID}, \code{basisOfRecord}) -\item \code{\link[=use_scientific_name]{use_scientific_name()}} record the highest level of taxonomic specificity in the dataset (\code{scientificName}, \code{scientificNameAuthorship}, \code{taxonRank}) -\item \code{\link[=use_taxonomy]{use_taxonomy()}} to specify higher taxonomic columns (\code{kingdom}, \code{phylum}, \code{class}, \code{order}, \code{family}, \code{genus}, \code{species}, \code{specificEpithet}, \code{vernacularName}) -\item \code{\link[=use_coordinates]{use_coordinates()}} for spatial data (\code{decimalLatitude}, \code{decimalLongitude}, \code{geodeticDatum}, \code{coordinateUncertaintyInMeters}, \code{coordinatePrecision}) -\item \code{\link[=use_sf]{use_sf()}} for spatial data in \code{sf} format -\item \code{\link[=use_locality]{use_locality()}} for spatial descriptions (\code{continent}, \code{country}, \code{countryCode}, \code{stateProvince}, \code{locality}) -\item \code{\link[=use_datetime]{use_datetime()}} for temporal data (\code{eventDate}, \code{year}, \code{month}, \code{day}, \code{eventTime}) -\item \code{\link[=use_collection]{use_collection()}} to give museum- or collection- specific information (\code{datasetID}, \code{datasetName}, \code{catalogNumber}) -\item \code{\link[=use_observer]{use_observer()}} to specify who made the observation (\code{recordedByID}, \code{recordedBy}) -\item \code{\link[=use_abundance]{use_abundance()}} to state how many animals were seen during the observation (\code{individualCount}, \code{organismQuantity}, \code{organismQuantityType}) -\item \code{\link[=use_individual_traits]{use_individual_traits()}} attributes of individuals measured (\code{individualID}, \code{lifeStage}, \code{sex}, \code{vitality}, \code{reproductiveCondition}) -\item \code{\link[=use_measurements]{use_measurements()}} for 'Measurement or Fact' data (optional) +\item \code{\link[=set_events]{set_events()}} basic information on observation events (\code{eventID}, \code{parentEventID}, \code{eventType}) +\item \code{\link[=set_occurrences]{set_occurrences()}} basic information on observations (\code{occurrenceID}, \code{basisOfRecord}) +\item \code{\link[=set_scientific_name]{set_scientific_name()}} record the highest level of taxonomic specificity in the dataset (\code{scientificName}, \code{scientificNameAuthorship}, \code{taxonRank}) +\item \code{\link[=set_taxonomy]{set_taxonomy()}} to specify higher taxonomic columns (\code{kingdom}, \code{phylum}, \code{class}, \code{order}, \code{family}, \code{genus}, \code{species}, \code{specificEpithet}, \code{vernacularName}) +\item \code{\link[=set_coordinates]{set_coordinates()}} for spatial data (\code{decimalLatitude}, \code{decimalLongitude}, \code{geodeticDatum}, \code{coordinateUncertaintyInMeters}, \code{coordinatePrecision}) +\item \code{\link[=set_coordinates_sf]{set_coordinates_sf()}} for spatial data in \code{sf} format +\item \code{\link[=set_locality]{set_locality()}} for spatial descriptions (\code{continent}, \code{country}, \code{countryCode}, \code{stateProvince}, \code{locality}) +\item \code{\link[=set_datetime]{set_datetime()}} for temporal data (\code{eventDate}, \code{year}, \code{month}, \code{day}, \code{eventTime}) +\item \code{\link[=set_collection]{set_collection()}} to give museum- or collection- specific information (\code{datasetID}, \code{datasetName}, \code{catalogNumber}) +\item \code{\link[=set_observer]{set_observer()}} to specify who made the observation (\code{recordedByID}, \code{recordedBy}) +\item \code{\link[=set_abundance]{set_abundance()}} to state how many animals were seen during the observation (\code{individualCount}, \code{organismQuantity}, \code{organismQuantityType}) +\item \code{\link[=set_individual_traits]{set_individual_traits()}} attributes of individuals measured (\code{individualID}, \code{lifeStage}, \code{sex}, \code{vitality}, \code{reproductiveCondition}) +\item \code{\link[=set_measurements]{set_measurements()}} for 'Measurement or Fact' data (optional) } \strong{Checking data for Darwin Core compliance} diff --git a/man/use_abundance.Rd b/man/set_abundance.Rd similarity index 77% rename from man/use_abundance.Rd rename to man/set_abundance.Rd index 7d9e8ac..c84a089 100644 --- a/man/use_abundance.Rd +++ b/man/set_abundance.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/use_abundance.R -\name{use_abundance} -\alias{use_abundance} -\title{Add abundance fields to a \code{tibble}} +% Please edit documentation in R/set_abundance.R +\name{set_abundance} +\alias{set_abundance} +\title{Set, create or modify columns with abundance information using Darwin Core} \usage{ -use_abundance( +set_abundance( .df, individualCount = NULL, organismQuantity = NULL, @@ -35,8 +35,12 @@ A tibble with the requested fields (see details). In some field methods, it is common to observe more than one individual per observation; to observe abundance using non-integer measures such as mass or area; or to seek individuals but not find them (abundance of zero). -As these approaches use different DwC terms, this function assists in +As these approaches use different Darwin Core terms, this function assists in specifying abundances in a flexible way. + +In practice this is no different from using \code{mutate()}, but gives some +informative errors, and serves as a useful lookup for how columns with +abundance information are represented in the Darwin Core Standard. } \details{ Examples of \code{organismQuantity} & \code{organismQuantityType} values: @@ -56,6 +60,6 @@ df <- tibble::tibble( ) df |> - use_abundance(individualCount = n_obs) + set_abundance(individualCount = n_obs) } diff --git a/man/use_collection.Rd b/man/set_collection.Rd similarity index 87% rename from man/use_collection.Rd rename to man/set_collection.Rd index f630a3c..36950c0 100644 --- a/man/use_collection.Rd +++ b/man/set_collection.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/use_collection.R -\name{use_collection} -\alias{use_collection} -\title{Add museum- or collection-specific information to a \code{tibble}} +% Please edit documentation in R/set_collection.R +\name{set_collection} +\alias{set_collection} +\title{Set, create or modify columns with museum- or collection-specific information using Darwin Core} \usage{ -use_collection( +set_collection( .df, datasetID = NULL, datasetName = NULL, diff --git a/man/use_coordinates.Rd b/man/set_coordinates.Rd similarity index 91% rename from man/use_coordinates.Rd rename to man/set_coordinates.Rd index efc15ab..8b91f46 100644 --- a/man/use_coordinates.Rd +++ b/man/set_coordinates.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/use_coordinates.R -\name{use_coordinates} -\alias{use_coordinates} -\title{Add spatial fields to a \code{tibble}} +% Please edit documentation in R/set_coordinates.R +\name{set_coordinates} +\alias{set_coordinates} +\title{Set, create or modify columns with spatial information using Darwin Core} \usage{ -use_coordinates( +set_coordinates( .df, decimalLatitude = NULL, decimalLongitude = NULL, diff --git a/man/use_sf.Rd b/man/set_coordinates_sf.Rd similarity index 69% rename from man/use_sf.Rd rename to man/set_coordinates_sf.Rd index abe2c59..7fc9360 100644 --- a/man/use_sf.Rd +++ b/man/set_coordinates_sf.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/use_sf.R -\name{use_sf} -\alias{use_sf} -\title{Add spatial fields to a \code{tibble} using \code{sf} \code{POINT} coordinates} +% Please edit documentation in R/set_coordinates_sf.R +\name{set_coordinates_sf} +\alias{set_coordinates_sf} +\title{Set, create or modify columns with \code{sf} spatial information using Darwin Core} \usage{ -use_sf(.df, geometry = NULL, .keep = "unused") +set_coordinates_sf(.df, geometry = NULL, .keep = "unused") } \arguments{ \item{.df}{a \code{data.frame} or \code{tibble} that the column should be appended to.} @@ -21,12 +21,12 @@ A tibble with the requested fields added. } \description{ This function helps format standard location fields to a \code{tibble}. It differs -from \code{use_coordinates()} by accepting \code{sf} geometry columns of class \code{POINT} +from \code{set_coordinates()} by accepting \code{sf} geometry columns of class \code{POINT} as coordinates (rather than \code{numeric} lat/lon coordinates). The advantage of using an \code{sf} geometry is that the Coordinate Reference System (CRS) is automatically formatted into the required \code{geodeticDatum} column. } \seealso{ -\code{\link[=use_coordinates]{use_coordinates()}} for providing numeric coordinates, -\code{\link[=use_locality]{use_locality()}} for providing text-based spatial information +\code{\link[=set_coordinates]{set_coordinates()}} for providing numeric coordinates, +\code{\link[=set_locality]{set_locality()}} for providing text-based spatial information } diff --git a/man/use_datetime.Rd b/man/set_datetime.Rd similarity index 85% rename from man/use_datetime.Rd rename to man/set_datetime.Rd index ba49068..a4cd03a 100644 --- a/man/use_datetime.Rd +++ b/man/set_datetime.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/use_datetime.R -\name{use_datetime} -\alias{use_datetime} -\title{Add date and time fields to a \code{tibble}} +% Please edit documentation in R/set_datetime.R +\name{set_datetime} +\alias{set_datetime} +\title{Set, create or modify columns with date and time information using Darwin Core} \usage{ -use_datetime( +set_datetime( .df, eventDate = NULL, year = NULL, @@ -40,13 +40,13 @@ generate them.} A tibble with the requested fields added. } \description{ -This function helps format standard date/time fields to a \code{tibble}. -} -\details{ +This function helps format standard date/time columns in a \code{tibble}. + In practice this is no different from using \code{mutate()}, but gives some informative errors, and serves as a useful lookup for how spatial fields are represented in the Darwin Core Standard. - +} +\details{ Example values are: \itemize{ \item \code{eventDate} should be class \code{Date} or \code{POSITct}. We suggest using the diff --git a/man/use_events.Rd b/man/set_events.Rd similarity index 92% rename from man/use_events.Rd rename to man/set_events.Rd index 7dd0b4c..5384994 100644 --- a/man/use_events.Rd +++ b/man/set_events.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/use_events.R -\name{use_events} -\alias{use_events} -\title{Add a sampling, collection or image-capture Event to a \code{tibble}} +% Please edit documentation in R/set_events.R +\name{set_events} +\alias{set_events} +\title{Set, create or modify columns with Event information using Darwin Core} \usage{ -use_events( +set_events( .df, eventID = NULL, eventType = NULL, @@ -38,7 +38,7 @@ meaning that deleting these columns by default is typically unwise.} A tibble with the requested fields added. } \description{ -Format fields that contain information about an +Format columns that contain information about an \href{https://dwc.tdwg.org/list/#dwc_Event}{Event}. An "Event" in Darwin Core standard refers to an action that occurs at a place and time. Examples include: diff --git a/man/use_locality.Rd b/man/set_locality.Rd similarity index 76% rename from man/use_locality.Rd rename to man/set_locality.Rd index e67c91c..dd69249 100644 --- a/man/use_locality.Rd +++ b/man/set_locality.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/use_locality.R -\name{use_locality} -\alias{use_locality} -\title{Add \code{locality} data to a \code{tibble}} +% Please edit documentation in R/set_locality.R +\name{set_locality} +\alias{set_locality} +\title{Set, create or modify columns with locality information using Darwin Core} \usage{ -use_locality( +set_locality( .df, continent = NULL, country = NULL, @@ -37,7 +37,12 @@ A tibble with the requested fields. } \description{ Locality information refers to a description of a place, rather than a -spatial coordinate. +spatial coordinate. This function helps specify or modify columns +with locality information in a flexible way. + +In practice this is no different from using \code{mutate()}, but gives some +informative errors, and serves as a useful lookup for fields in +the Darwin Core Standard. } \details{ Example values are: diff --git a/man/set_measurements.Rd b/man/set_measurements.Rd new file mode 100644 index 0000000..94ddd28 --- /dev/null +++ b/man/set_measurements.Rd @@ -0,0 +1,50 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/set_measurements.R +\name{set_measurements} +\alias{set_measurements} +\title{Convert columns with measurement data for an individual or event to Darwin Core standard} +\usage{ +set_measurements(.df, cols = NULL, unit = NULL, type = NULL, .keep = "unused") +} +\arguments{ +\item{.df}{a \code{data.frame} or \code{tibble} that the column should be appended to.} + +\item{cols}{vector of column names to be included as 'measurements'. Unquoted.} + +\item{unit}{vector of strings giving units for each variable} + +\item{type}{vector of strings giving a description for each variable} + +\item{.keep}{Control which columns from .data are retained in the output. +Note that unlike \code{\link[dplyr:mutate]{dplyr::mutate()}}, which defaults to \code{"all"} this defaults to +\code{"unused"}; i.e. only keeps Darwin Core fields, and not those fields used to +generate them.} +} +\value{ +A tibble with the requested fields added. +} +\description{ +\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} +This function is a work in progress, and should be used with caution. + +In raw collected data, many types of information can be captured in one +column. For example, the column name \code{LMA_g.m2} contains the measured trait +(Leaf Mass per Area, LMA) and the unit of measurement (grams per meter +squared, g/m2), and recorded in that column are the values themselves. In +Darwin Core, these different types of information must be separated into +multiple columns so that they can be ingested correctly and aggregated with +sources of data accurately. + +This function converts information preserved in a single measurement column +into multiple columns (\code{measurementID}, \code{measurementUnit}, and +\code{measurementType}) as per Darwin Core standard. +} +\details{ +Columns are nested in a +single column \code{measurementOrFact} that contains Darwin Core standard +measurement fields. By nesting three measurement columns within the +\code{measurementOrFact}, despite measurement columns converting to long format +(one row per measurement, per occurrence), +data will remain organised by occurrences (one row per occurrences). Data +can be unnested into long format using \code{dplyr::unnest()} +} diff --git a/man/use_observer.Rd b/man/set_observer.Rd similarity index 84% rename from man/use_observer.Rd rename to man/set_observer.Rd index 786fda2..54d5e57 100644 --- a/man/use_observer.Rd +++ b/man/set_observer.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/use_observer.R -\name{use_observer} -\alias{use_observer} -\title{Add who made an observation to a \code{tibble}} +% Please edit documentation in R/set_observer.R +\name{set_observer} +\alias{set_observer} +\title{Set, create or modify columns with information of who made an observation (using Darwin Core)} \usage{ -use_observer(.df, recordedBy = NULL, recordedByID = NULL, .keep = "unused") +set_observer(.df, recordedBy = NULL, recordedByID = NULL, .keep = "unused") } \arguments{ \item{.df}{a \code{data.frame} or \code{tibble} that the column should be appended to.} @@ -27,12 +27,12 @@ A tibble with the requested fields added. \description{ Format fields that contain information about who made a specific observation of an organism. -} -\details{ + In practice this is no different from using \code{mutate()}, but gives some informative errors, and serves as a useful lookup for fields in the Darwin Core Standard. - +} +\details{ Examples of \code{recordedBy} values: \itemize{ \item \verb{José E. Crespo} diff --git a/man/use_occurrences.Rd b/man/set_occurrences.Rd similarity index 93% rename from man/use_occurrences.Rd rename to man/set_occurrences.Rd index 67ae56f..249f2ad 100644 --- a/man/use_occurrences.Rd +++ b/man/set_occurrences.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/use_occurrences.R -\name{use_occurrences} -\alias{use_occurrences} -\title{Add occurrence-specific information to a \code{tibble}} +% Please edit documentation in R/set_occurrences.R +\name{set_occurrences} +\alias{set_occurrences} +\title{Set, create or modify columns with occurrence-specific information using Darwin Core} \usage{ -use_occurrences( +set_occurrences( .df, occurrenceID = NULL, basisOfRecord = NULL, diff --git a/man/use_scientific_name.Rd b/man/set_scientific_name.Rd similarity index 88% rename from man/use_scientific_name.Rd rename to man/set_scientific_name.Rd index de95812..3de3481 100644 --- a/man/use_scientific_name.Rd +++ b/man/set_scientific_name.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/use_scientific_name.R -\name{use_scientific_name} -\alias{use_scientific_name} -\title{Add scientific name and authorship to a \code{tibble}} +% Please edit documentation in R/set_scientific_name.R +\name{set_scientific_name} +\alias{set_scientific_name} +\title{Set, create or modify columns with scientific name and authorship information using Darwin Core} \usage{ -use_scientific_name( +set_scientific_name( .df, scientificName = NULL, scientificNameAuthorship = NULL, diff --git a/man/use_taxonomy.Rd b/man/set_taxonomy.Rd similarity index 91% rename from man/use_taxonomy.Rd rename to man/set_taxonomy.Rd index 06cf131..b2d4d04 100644 --- a/man/use_taxonomy.Rd +++ b/man/set_taxonomy.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/use_taxonomy.R -\name{use_taxonomy} -\alias{use_taxonomy} -\title{Add taxonomic information to a \code{tibble}} +% Please edit documentation in R/set_taxonomy.R +\name{set_taxonomy} +\alias{set_taxonomy} +\title{Set, create or modify columns with taxonomic information using Darwin Core} \usage{ -use_taxonomy( +set_taxonomy( .df, kingdom = NULL, phylum = NULL, @@ -49,13 +49,13 @@ A tibble with the requested fields added. \description{ Format fields that contain taxonomic name information from kingdom to species, as well as the common/vernacular name, to a \code{tibble}. -} -\details{ + In practice this is no different from using \code{mutate()}, but gives some informative errors, and serves as a useful lookup for taxonomic names in the Darwin Core Standard. - +} +\details{ Examples of \code{specificEphithet}: \itemize{ \item If \code{scientificName} is \verb{Abies concolor}, the \code{specificEpithet} is \code{concolor}. diff --git a/man/use_individual_traits.Rd b/man/use_individual_traits.Rd index b28d6b0..d16848a 100644 --- a/man/use_individual_traits.Rd +++ b/man/use_individual_traits.Rd @@ -1,8 +1,8 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/use_individual_traits.R +% Please edit documentation in R/set_individual_traits.R \name{use_individual_traits} \alias{use_individual_traits} -\title{Add information of individual organisms to a \code{tibble}} +\title{Set, create or modify columns with information of individual organisms using Darwin Core} \usage{ use_individual_traits( .df, diff --git a/man/use_measurements.Rd b/man/use_measurements.Rd deleted file mode 100644 index 00622e9..0000000 --- a/man/use_measurements.Rd +++ /dev/null @@ -1,31 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/use_measurements.R -\name{use_measurements} -\alias{use_measurements} -\title{Add measurement data for an individual or event to a \code{tibble}} -\usage{ -use_measurements(.df, cols = NULL, unit = NULL, type = NULL, .keep = "unused") -} -\arguments{ -\item{.df}{a \code{data.frame} or \code{tibble} that the column should be appended to.} - -\item{cols}{vector of column names to be included as 'measurements'. Unquoted.} - -\item{unit}{vector of strings giving units for each variable} - -\item{type}{vector of strings giving a description for each variable} - -\item{.keep}{Control which columns from .data are retained in the output. -Note that unlike \code{\link[dplyr:mutate]{dplyr::mutate()}}, which defaults to \code{"all"} this defaults to -\code{"unused"}; i.e. only keeps Darwin Core fields, and not those fields used to -generate them.} -} -\value{ -A tibble with the requested fields added. -} -\description{ -This function is a work in progress, and should be used with caution. -} -\details{ -add examples -} diff --git a/tests/mytests.R b/tests/mytests.R new file mode 100644 index 0000000..ab71c0e --- /dev/null +++ b/tests/mytests.R @@ -0,0 +1,106 @@ + + +library(tibble) +library(readr) +library(dplyr) +library(tidyr) +library(here) + +df <- read_csv(here("inst", "extdata", "westerband_2022_wdate.csv")) + +# take a small sample +df_filtered <- df |> + select(Site, Species, Latitude, Longitude, LMA_g.m2, LeafN_area_g.m2, PNUE) |> + slice(200:300) + +df_nested <- df_filtered |> + group_split(row_number(), .keep = FALSE) %>% + purrr::map_dfr( ~ .x |> + nest(measurementOrFact = c(LMA_g.m2, LeafN_area_g.m2, PNUE))) + # nest(measurementOrFact = c(LMA_g.m2, LeafN_area_g.m2, PNUE)) + +test_string <- c("g/m2", "g/m2", "who knows") +another_string <- c("leaf mass per area", "leaf Nitrogen per area", "PNUE") + +df_nested |> + dplyr::mutate( + measurementOrFact = purrr::map( + measurementOrFact, + ~ .x |> + pivot_longer(names_to = "column_name", + values_to = "measurementValue", + cols = everything()) |> + mutate( + measurementUnit = test_string, + measurementType = another_string + ) + )) |> + unnest(measurementOrFact) + + +very_nested <- df_nested |> + nest(data = c(Species, Latitude, Longitude, measurementOrFact)) + +very_nested + + +very_nested |> + unnest(data) + + + + +test <- df_filtered |> + # slice(200:300) |> + use_measurements(cols = c(LMA_g.m2, LeafN_area_g.m2, PNUE), + unit = c("g/m2", "g/m2", "something else"), + type = c("leaf mass per area", "leaf nitrogen per area", "gibberish")) + +test + +test |> + slice(1:3) |> + unnest(measurementOrFact) + + +number <- 7490 + +floor(log10(number)) + 1 + + + + + + + + + + + + + + +library(tibble) +library(lubridate) + +df <- tibble( + latitude = c(-35.310, "-35.273"), # deliberate error for demonstration purposes + longitude = c(149.125, 149.133), + date = c("14-01-2023", "15-01-2023"), + time = c("10:23:00", "11:25:00"), + month = c("January", "February"), + day = c(100, 101), + species = c("Callocephalon fimbriatum", "Eolophus roseicapilla"), + n = c(2, 3), + crs = c("WGS84", "WGS8d"), + country = c("Australia", "Denmark"), + continent = c("Oceania", "Europe") +) + + +df |> + use_occurrences(basisOfRecord = "humanObservation") + + + + diff --git a/tests/testthat/test-suggest_workflow.R b/tests/testthat/test-suggest_workflow.R index 52607e7..41d6673 100644 --- a/tests/testthat/test-suggest_workflow.R +++ b/tests/testthat/test-suggest_workflow.R @@ -27,19 +27,19 @@ test_that("suggest_workflow() doesn't error for common use cases", { no_error_check() }) -# test_that("use_basisOfRecord() works", { +# test_that("basisOfRecord() works", { # x <- tibble(x = 1) # # no error # x |> -# use_basisOfRecord("humanObservation") |> +# basisOfRecord("humanObservation") |> # expect_no_error() -# result <- use_basisOfRecord(x, "humanObservation") +# result <- basisOfRecord(x, "humanObservation") # expect_s3_class(result, c("tbl_df", "tbl", "data.frame")) # dwc_df case? # expect_equal(ncol(result), 2) # expect_equal(colnames(result), c("x", "basisOfRecord")) # # with error # x |> -# use_basisOfRecord("something") |> +# basisOfRecord("something") |> # expect_error(regexp = "basisOfRecord") # # ideally this would check for `Error in check_basisOfRecord()`; # # but this isn't a message, so it doesn't work. May need to snapshot? diff --git a/tests/testthat/test-use_abundance.R b/tests/testthat/test-use_abundance.R index 378d7c4..582da85 100644 --- a/tests/testthat/test-use_abundance.R +++ b/tests/testthat/test-use_abundance.R @@ -1,72 +1,72 @@ -test_that("use_abundance errors when missing .df", { - expect_error(use_abundance(individualCount = individualCount), +test_that("set_abundance errors when missing .df", { + expect_error(set_abundance(individualCount = individualCount), ".df is missing") }) -test_that("use_abundance errors when no dwc columns are named or exist in the df", { +test_that("set_abundance errors when no dwc columns are named or exist in the df", { df <- tibble::tibble(borp = 23) - expect_warning(suppressMessages(use_abundance(df)), + expect_warning(suppressMessages(set_abundance(df)), "No Darwin Core terms detected") }) -test_that("use_abundance returns tibble with updated dwc column names", { - quiet_use_abundance <- purrr::quietly(use_abundance) +test_that("set_abundance returns tibble with updated dwc column names", { + quiet_set_abundance <- purrr::quietly(set_abundance) df <- tibble::tibble(user_col = 1:2) result <- df |> - quiet_use_abundance(individualCount = user_col) + quiet_set_abundance(individualCount = user_col) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_match(colnames(result$result), c("individualCount")) }) -test_that("use_abundance detects unnamed but existing dwc column names in df", { - quiet_use_abundance <- purrr::quietly(use_abundance) +test_that("set_abundance detects unnamed but existing dwc column names in df", { + quiet_set_abundance <- purrr::quietly(set_abundance) df <- tibble::tibble(individualCount = 2:3, col2 = 1:2) df2 <- tibble::tibble(individualCount = "borp", col2 = 1:2) result <- df |> - quiet_use_abundance() + quiet_set_abundance() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("individualCount", "col2")) expect_error( suppressMessages( - df2 |> use_abundance() + df2 |> set_abundance() ), "individualCount must be a numeric vector, not character") }) -test_that("use_abundance has progress messages", { - quiet_use_abundance <- purrr::quietly(use_abundance) +test_that("set_abundance has progress messages", { + quiet_set_abundance <- purrr::quietly(set_abundance) df <- tibble::tibble(individualCount = 1:2, col2 = 1:2) - result <- df |> quiet_use_abundance() + result <- df |> quiet_set_abundance() expect_false(is.null(result$messages)) }) -test_that("use_abundance checks individualCount format", { +test_that("set_abundance checks individualCount format", { df_dbl <- tibble::tibble(individualCount = c(1, 100, 265)) df_chr <- tibble::tibble(individualCount = c("bleep", "blorp")) expect_no_error(suppressMessages( - df_dbl |> use_abundance(individualCount = individualCount) + df_dbl |> set_abundance(individualCount = individualCount) )) expect_error(suppressMessages( - df_chr |> use_abundance(individualCount = individualCount)), + df_chr |> set_abundance(individualCount = individualCount)), "individualCount must be a numeric vector, not character" ) }) -test_that("use_abundance errors if individualCount = 0 & occurrenceStatus isn't in df", { +test_that("set_abundance errors if individualCount = 0 & occurrenceStatus isn't in df", { df <- tibble::tibble( species = c("Callocephalon fimbriatum", "Eolophus roseicapilla"), # occurrenceStatus = c("present", "present"), @@ -75,13 +75,13 @@ test_that("use_abundance errors if individualCount = 0 & occurrenceStatus isn't expect_error( suppressMessages( - df |> use_abundance(individualCount = individualCount) + df |> set_abundance(individualCount = individualCount) ), "individualCount of 0 detected" ) }) -test_that("use_abundance errors if `individualCount = 0` and `occurrenceStatus = 'absent'` don't match", { +test_that("set_abundance errors if `individualCount = 0` and `occurrenceStatus = 'absent'` don't match", { df <- tibble::tibble( species = c("Callocephalon fimbriatum", "Eolophus roseicapilla"), occurrenceStatus = c("present", "present"), @@ -90,13 +90,13 @@ test_that("use_abundance errors if `individualCount = 0` and `occurrenceStatus = expect_error( suppressMessages( - df |> use_abundance(individualCount = individualCount) + df |> set_abundance(individualCount = individualCount) ), "individualCount values do not match occurrenceStatus" ) }) -test_that("use_abundance requires organismQuantity is paired with organismQuantityType", { +test_that("set_abundance requires organismQuantity is paired with organismQuantityType", { df <- tibble::tibble( species = c("Callocephalon fimbriatum", "Eolophus roseicapilla"), organismQuantity = c(0, 2), @@ -108,18 +108,18 @@ test_that("use_abundance requires organismQuantity is paired with organismQuanti ) expect_no_error(suppressMessages( - df |> use_abundance(organismQuantity = organismQuantity) + df |> set_abundance(organismQuantity = organismQuantity) )) expect_error( suppressMessages( - df_wrong |> use_abundance(organismQuantity = organismQuantity) + df_wrong |> set_abundance(organismQuantity = organismQuantity) ), "Missing organismQuantityType in dataframe" ) }) -test_that("use_abundance requires organismQuantityType is paired with organismQuantity", { +test_that("set_abundance requires organismQuantityType is paired with organismQuantity", { df <- tibble::tibble( species = c("Callocephalon fimbriatum", "Eolophus roseicapilla"), organismQuantity = c(0, 2), @@ -131,12 +131,12 @@ test_that("use_abundance requires organismQuantityType is paired with organismQu ) expect_no_error(suppressMessages( - df |> use_abundance(organismQuantityType = organismQuantityType) + df |> set_abundance(organismQuantityType = organismQuantityType) )) expect_error( suppressMessages( - df_wrong |> use_abundance(organismQuantityType = organismQuantityType) + df_wrong |> set_abundance(organismQuantityType = organismQuantityType) ), "Missing organismQuantity in dataframe" ) diff --git a/tests/testthat/test-use_collection.R b/tests/testthat/test-use_collection.R index bb5a506..d1a8c9b 100644 --- a/tests/testthat/test-use_collection.R +++ b/tests/testthat/test-use_collection.R @@ -1,68 +1,68 @@ -test_that("use_collection errors when missing .df", { - expect_error(use_collection(datasetID = datasetID), +test_that("set_collection errors when missing .df", { + expect_error(set_collection(datasetID = datasetID), ".df is missing") }) -test_that("use_collection errors when no dwc columns are named or exist in the df", { +test_that("set_collection errors when no dwc columns are named or exist in the df", { df <- tibble::tibble(borp = "anID") - expect_warning(suppressMessages(use_collection(df)), + expect_warning(suppressMessages(set_collection(df)), "No Darwin Core terms detected") }) -test_that("use_collection returns tibble with updated dwc column names", { - quiet_use_collection <- purrr::quietly(use_collection) +test_that("set_collection returns tibble with updated dwc column names", { + quiet_set_collection <- purrr::quietly(set_collection) df <- tibble::tibble(user_col = "FrogID") result <- df |> - quiet_use_collection(datasetName = user_col) + quiet_set_collection(datasetName = user_col) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_match(colnames(result$result), c("datasetName")) }) -test_that("use_collection detects unnamed but existing dwc column names in df", { - quiet_use_collection <- purrr::quietly(use_collection) +test_that("set_collection detects unnamed but existing dwc column names in df", { + quiet_set_collection <- purrr::quietly(set_collection) df <- tibble::tibble(datasetName = "FrogID", col2 = 1:2) df2 <- tibble::tibble(datasetName = 1:2, col2 = 1:2) result <- df |> - quiet_use_collection() + quiet_set_collection() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("datasetName", "col2")) expect_error( suppressMessages( - df2 |> use_collection() + df2 |> set_collection() ), "datasetName must be a character vector, not integer") }) -test_that("use_collection has progress messages", { - quiet_use_collection <- purrr::quietly(use_collection) +test_that("set_collection has progress messages", { + quiet_set_collection <- purrr::quietly(set_collection) df <- tibble::tibble(datasetName = "FrogID", col2 = 1:2) - result <- df |> quiet_use_collection() + result <- df |> quiet_set_collection() expect_false(is.null(result$messages)) }) -test_that("use_collection checks datasetName format", { +test_that("set_collection checks datasetName format", { df_chr <- tibble::tibble(datasetName = c("FrogID", "iNaturalist observations")) df_dbl <- tibble::tibble(datasetName =1:3) expect_no_error(suppressMessages( - df_chr |> use_collection(datasetName = datasetName) + df_chr |> set_collection(datasetName = datasetName) )) expect_error(suppressMessages( - df_dbl |> use_collection(datasetName = datasetName)), + df_dbl |> set_collection(datasetName = datasetName)), "datasetName must be a character vector, not integer" ) }) diff --git a/tests/testthat/test-use_coordinates.R b/tests/testthat/test-use_coordinates.R index f35d8ab..9d63660 100644 --- a/tests/testthat/test-use_coordinates.R +++ b/tests/testthat/test-use_coordinates.R @@ -1,117 +1,117 @@ -test_that("use_coordinates errors when missing .df", { - expect_error(use_coordinates(decimalLongitude = c(149.125, 149.133)), +test_that("set_coordinates errors when missing .df", { + expect_error(set_coordinates(decimalLongitude = c(149.125, 149.133)), ".df is missing") }) -test_that("use_coordinates errors when no dwc columns are named, or exist in the df", { +test_that("set_coordinates errors when no dwc columns are named, or exist in the df", { df <- tibble(borp = c(149.125, 149.133)) - - expect_warning(suppressMessages(use_scientific_name(df)), + + expect_warning(suppressMessages(set_scientific_name(df)), "No Darwin Core terms detected") }) -test_that("use_coordinates returns tibble with updated dwc column names", { - quiet_use_coordinates <- purrr::quietly(use_coordinates) +test_that("set_coordinates returns tibble with updated dwc column names", { + quiet_set_coordinates <- purrr::quietly(set_coordinates) df <- tibble(user_col = c(149.125, 149.133)) - + result <- df |> - quiet_use_coordinates(decimalLongitude = user_col) - + quiet_set_coordinates(decimalLongitude = user_col) + expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("decimalLongitude")) }) -test_that("use_coordinates detects unnamed but existing dwc column names in df", { - quiet_use_coordinates <- purrr::quietly(use_coordinates) +test_that("set_coordinates detects unnamed but existing dwc column names in df", { + quiet_set_coordinates <- purrr::quietly(set_coordinates) df <- tibble(decimalLongitude = c(149.125, 149.133), decimalLatitude = c(-35.310, -35.273), col2 = 1:2) - + result <- df |> - quiet_use_coordinates() - + quiet_set_coordinates() + expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("decimalLongitude", "decimalLatitude", "col2")) }) -test_that("use_coordinates has progress messages", { - quiet_use_coordinates <- purrr::quietly(use_coordinates) +test_that("set_coordinates has progress messages", { + quiet_set_coordinates <- purrr::quietly(set_coordinates) df <- tibble(decimalLongitude = c(149.125, 149.133), decimalLatitude = c(-35.310, -35.273), col2 = 1:2) - - result <- df |> quiet_use_coordinates() - + + result <- df |> quiet_set_coordinates() + expect_false(is.null(result$messages)) - + }) -test_that("use_coordinates checks decimalLongitude format", { - quiet_use_coordinates <- purrr::quietly(use_coordinates) +test_that("set_coordinates checks decimalLongitude format", { + quiet_set_coordinates <- purrr::quietly(set_coordinates) df <- tibble(decimalLongitude = c(149.125, 149.133), col_string = c("string", "string"), col_bignumber = c(190, 149.133)) - - result <- df |> quiet_use_coordinates() - + + result <- df |> quiet_set_coordinates() + expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("decimalLongitude", "col_string", "col_bignumber")) - + expect_error(suppressMessages( - df |> use_coordinates(decimalLongitude = col_string) + df |> set_coordinates(decimalLongitude = col_string) ), "decimalLongitude must be a numeric vector, not character" ) expect_error(suppressMessages( - df |> use_coordinates(decimalLongitude = col_bignumber) + df |> set_coordinates(decimalLongitude = col_bignumber) ), "Value is outside of expected range in decimalLongitude" ) }) -test_that("use_coordinates checks decimalLatitude format", { - quiet_use_coordinates <- purrr::quietly(use_coordinates) +test_that("set_coordinates checks decimalLatitude format", { + quiet_set_coordinates <- purrr::quietly(set_coordinates) df <- tibble(decimalLatitude = c(-35.310, -35.273), col_string = c("string", "string"), col_bignumber = c(97, -35.273)) - - result <- df |> quiet_use_coordinates() - + + result <- df |> quiet_set_coordinates() + expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("decimalLatitude", "col_string", "col_bignumber")) - + expect_error(suppressMessages( - df |> use_coordinates(decimalLatitude = col_string) + df |> set_coordinates(decimalLatitude = col_string) ), "decimalLatitude must be a numeric vector, not character" ) expect_error(suppressMessages( - df |> use_coordinates(decimalLatitude = col_bignumber) + df |> set_coordinates(decimalLatitude = col_bignumber) ), "Value is outside of expected range in decimalLatitude" ) }) -test_that("use_coordinates checks geodeticDatum for valid CRS", { - quiet_use_coordinates <- purrr::quietly(use_coordinates) +test_that("set_coordinates checks geodeticDatum for valid CRS", { + quiet_set_coordinates <- purrr::quietly(set_coordinates) df <- tibble(geodeticDatum = c("WGS84", "WGS84"), col_number = c(97, -35.273), col_not_real = c("WGS84", "WGS8d")) - - result <- df |> quiet_use_coordinates() - + + result <- df |> quiet_set_coordinates() + expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("geodeticDatum", "col_number", "col_not_real")) - + expect_error(suppressMessages( - df |> use_coordinates(geodeticDatum = col_not_real) + df |> set_coordinates(geodeticDatum = col_not_real) ), "geodeticDatum contains invalid Coordinate Reference System" ) expect_warning(suppressMessages( - df |> use_coordinates(geodeticDatum = col_number) + df |> set_coordinates(geodeticDatum = col_number) ), "geodeticDatum contains unrecognised Coordinate" ) diff --git a/tests/testthat/test-use_datetime.R b/tests/testthat/test-use_datetime.R index eef13b1..79dfa16 100644 --- a/tests/testthat/test-use_datetime.R +++ b/tests/testthat/test-use_datetime.R @@ -1,71 +1,71 @@ library(tibble) # create quiet function that captures side-effects -# NOTE: This must be re-run if changes are made to `use_datetime()` for bug-fixing -quiet_use_datetime <- purrr::quietly(use_datetime) +# NOTE: This must be re-run if changes are made to `set_datetime()` for bug-fixing +quiet_set_datetime <- purrr::quietly(set_datetime) -test_that("use_datetime errors when missing .df", { +test_that("set_datetime errors when missing .df", { expect_error( - use_datetime(eventDate = eventDate), + set_datetime(eventDate = eventDate), ".df is missing") }) -test_that("use_datetime errors when no dwc columns are named, or exist in the df", { +test_that("set_datetime errors when no dwc columns are named, or exist in the df", { df <- tibble(col1 = "value") - expect_warning(df |> use_datetime(), + expect_warning(df |> set_datetime(), "No Darwin Core terms detected") }) -test_that("use_datetime returns tibble with updated dwc column names", { +test_that("set_datetime returns tibble with updated dwc column names", { df <- tibble(user_col = lubridate::dmy(c("14-01-2023", "15-01-2023"))) suppressWarnings(suppressMessages( result <- df |> - use_datetime(eventDate = user_col) + set_datetime(eventDate = user_col) )) expect_s3_class(result, c("tbl_df", "tbl", "data.frame")) expect_match(colnames(result), c("eventDate")) }) -test_that("use_datetime detects unnamed but existing dwc column names in df", { +test_that("set_datetime detects unnamed but existing dwc column names in df", { df <- tibble(eventDate = lubridate::dmy(c("14-01-2023", "15-01-2023")), col2 = 1:2) df2 <- tibble(eventDate = "borp", col2 = 1:2) - result <- df |> quiet_use_datetime() + result <- df |> quiet_set_datetime() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("eventDate", "col2")) expect_error( suppressWarnings(suppressMessages( - df2 |> use_datetime() + df2 |> set_datetime() )), "eventDate must be a Date vector" ) }) -test_that("use_datetime has progress messages", { +test_that("set_datetime has progress messages", { df <- tibble(eventDate = lubridate::dmy(c("14-01-2023", "15-01-2023")), col2 = 1:2) - result <- df |> quiet_use_datetime() + result <- df |> quiet_set_datetime() expect_false(is.null(result$messages)) }) -test_that("use_datetime detects correct number of existing fields", { +test_that("set_datetime detects correct number of existing fields", { df <- tibble(eventDate = lubridate::dmy(c("14-01-2023", "15-01-2023")), col2 = 1:2) df2 <- tibble(eventDate = lubridate::dmy(c("14-01-2023", "15-01-2023")), year = c(2023, 2023), col2 = 1:2) - result <- df |> quiet_use_datetime() - result2 <- df2 |> quiet_use_datetime() + result <- df |> quiet_set_datetime() + result2 <- df2 |> quiet_set_datetime() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("eventDate", "col2")) @@ -76,7 +76,7 @@ test_that("use_datetime detects correct number of existing fields", { }) -test_that("use_datetime checks eventDate format", { +test_that("set_datetime checks eventDate format", { correct <- tibble(eventDate = lubridate::dmy(c("14-01-2023", "15-01-2023")), col2 = 1:2) not_a_date <- tibble(eventDate = "borp", @@ -85,33 +85,33 @@ test_that("use_datetime checks eventDate format", { col2 = 1:2) result <- correct |> - quiet_use_datetime() + quiet_set_datetime() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("eventDate", "col2")) - ## TODO: The code to generate this warning is commented out in `use_datetime()` + ## TODO: The code to generate this warning is commented out in `set_datetime()` ## Check whether to reinstate # expect_warning( # suppressMessages( - # correct |> use_datetime() + # correct |> set_datetime() # ), # "eventDate defaults to UTC standard" # ) expect_error( suppressWarnings(suppressMessages( - not_a_date |> use_datetime(eventDate = eventDate) + not_a_date |> set_datetime(eventDate = eventDate) )), "eventDate must be a Date vector" ) expect_error( suppressWarnings(suppressMessages( - not_a_time |> use_datetime(eventDate = eventDate) + not_a_time |> set_datetime(eventDate = eventDate) )), "eventDate must be a Date vector" ) }) -test_that("use_datetime checks time format", { +test_that("set_datetime checks time format", { correct_date <- tibble(eventTime = lubridate::hms(c("10:23:00", "11:25:32")), col2 = 1:2) correct_chr <- tibble(eventTime = c("10:23", "11:25"), @@ -122,9 +122,9 @@ test_that("use_datetime checks time format", { col2 = 1:2) result1 <- correct_date |> - quiet_use_datetime() + quiet_set_datetime() result2 <- correct_chr |> - quiet_use_datetime() + quiet_set_datetime() expect_s3_class(result1$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result1$result), c("eventTime", "col2")) @@ -136,19 +136,19 @@ test_that("use_datetime checks time format", { expect_error( suppressMessages( - chr_not_a_time |> use_datetime(eventTime = eventTime) + chr_not_a_time |> set_datetime(eventTime = eventTime) ), "Invalid time format" ) expect_error( suppressMessages( - date_and_time |> use_datetime(eventTime = eventTime) + date_and_time |> set_datetime(eventTime = eventTime) ), "Must format" ) }) -test_that("use_datetime checks year format", { +test_that("set_datetime checks year format", { correct_year <- tibble(year = c(2021, 105), col2 = 1:2) wrong_year <- tibble(year = c(2021, 2100), @@ -157,7 +157,7 @@ test_that("use_datetime checks year format", { col2 = 1:2) result <- correct_year |> - quiet_use_datetime(year = year) + quiet_set_datetime(year = year) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("year", "col2")) @@ -165,27 +165,27 @@ test_that("use_datetime checks year format", { expect_error( suppressMessages( - wrong_year |> use_datetime(year = year) + wrong_year |> set_datetime(year = year) ), "Value is outside" ) expect_error( suppressMessages( - wrong_class |> use_datetime(year = year) + wrong_class |> set_datetime(year = year) ), "year must be a numeric" ) }) -test_that("use_datetime checks month numeric range", { +test_that("set_datetime checks month numeric range", { correct_month <- tibble(month = c(1, 11), col2 = 1:2) wrong_month <- tibble(month = c(1, 13), col2 = 1:2) result <- correct_month |> - quiet_use_datetime(month = month) + quiet_set_datetime(month = month) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("month", "col2")) @@ -193,20 +193,20 @@ test_that("use_datetime checks month numeric range", { expect_error( suppressMessages( - wrong_month |> use_datetime(month = month) + wrong_month |> set_datetime(month = month) ), "Value is outside" ) }) -test_that("use_datetime checks month abbreviations", { +test_that("set_datetime checks month abbreviations", { correct_month <- tibble(month = c("Jan", "Nov"), col2 = 1:2) wrong_month <- tibble(month = c("Jan", "borp"), col2 = 1:2) result <- correct_month |> - quiet_use_datetime(month = month) + quiet_set_datetime(month = month) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("month", "col2")) @@ -214,20 +214,20 @@ test_that("use_datetime checks month abbreviations", { expect_warning( suppressMessages( - wrong_month |> use_datetime(month = month) + wrong_month |> set_datetime(month = month) ), "month contains 1 unrecognised month" ) }) -test_that("use_datetime checks month names", { +test_that("set_datetime checks month names", { correct_month <- tibble(month = c("January", "September"), col2 = 1:2) wrong_month <- tibble(month = c("September", "borp"), col2 = 1:2) result <- correct_month |> - quiet_use_datetime(month = month) + quiet_set_datetime(month = month) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("month", "col2")) @@ -235,13 +235,13 @@ test_that("use_datetime checks month names", { expect_warning( suppressMessages( - wrong_month |> use_datetime(month = month) + wrong_month |> set_datetime(month = month) ), "month contains 1 unrecognised month" ) }) -test_that("use_datetime checks day format", { +test_that("set_datetime checks day format", { correct_day <- tibble(day = c(1, 30), col2 = 1:2) wrong_day <- tibble(day = c(13, 50), @@ -250,7 +250,7 @@ test_that("use_datetime checks day format", { col2 = 1:2) result <- correct_day |> - quiet_use_datetime(day = day) + quiet_set_datetime(day = day) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("day", "col2")) @@ -258,14 +258,14 @@ test_that("use_datetime checks day format", { expect_error( suppressMessages( - wrong_day |> use_datetime(day = day) + wrong_day |> set_datetime(day = day) ), "Value is outside" ) expect_error( suppressMessages( - wrong_class |> use_datetime(day = day) + wrong_class |> set_datetime(day = day) ), "day must be a numeric" ) diff --git a/tests/testthat/test-use_events.R b/tests/testthat/test-use_events.R index 6b77ba8..6daaf30 100644 --- a/tests/testthat/test-use_events.R +++ b/tests/testthat/test-use_events.R @@ -1,10 +1,10 @@ -test_that("use_events() keeps fields when composite_id() is called, but only for occurrenceID fields", { - quiet_use_events <- purrr::quietly(use_events) +test_that("set_events() keeps fields when composite_id() is called, but only for occurrenceID fields", { + quiet_set_events <- purrr::quietly(set_events) df <- tibble(obs_type = "survey", site = seq_len(10), year = 2024) result <- df |> - quiet_use_events(eventID = composite_id(sequential_id(), + quiet_set_events(eventID = composite_id(sequential_id(), site, year), eventType = obs_type) @@ -14,8 +14,8 @@ test_that("use_events() keeps fields when composite_id() is called, but only for # i.e. components of `eventID` are retained, but `obs_type` is not }) -test_that("setting .keep_composite = 'unused' affects use_occurrences()", { - quiet_use_events <- purrr::quietly(use_events) +test_that("setting .keep_composite = 'unused' affects set_occurrences()", { + quiet_set_events <- purrr::quietly(set_events) df <- tibble(user_col = "humanObservation", site = seq_len(10), year = 2024) @@ -23,7 +23,7 @@ test_that("setting .keep_composite = 'unused' affects use_occurrences()", { site = seq_len(10), year = 2024) result <- df |> - quiet_use_events(eventID = composite_id(sequential_id(), + quiet_set_events(eventID = composite_id(sequential_id(), site, year), eventType = obs_type, @@ -33,13 +33,13 @@ test_that("setting .keep_composite = 'unused' affects use_occurrences()", { c("eventID", "eventType")) }) -test_that("sequential_id() works with use_events()", { +test_that("sequential_id() works with set_events()", { input <- tibble(eventDate = paste0(rep(c(2020:2024), 3), "-01-01"), basisOfRecord = "humanObservation", site = rep(c("A01", "A02", "A03"), each = 5)) suppressMessages( result <- input |> - use_events(eventID = sequential_id()) + set_events(eventID = sequential_id()) ) expect_equal(colnames(result), c("eventDate", "basisOfRecord", "site", "eventID")) @@ -48,22 +48,22 @@ test_that("sequential_id() works with use_events()", { expect_true(all(nchar(result$eventID) == 3)) }) -test_that("sequential_id() accepts `width` argument works with use_events()", { +test_that("sequential_id() accepts `width` argument works with set_events()", { input <- tibble(eventDate = paste0(rep(c(2020:2024), 3), "-01-01"), basisOfRecord = "humanObservation", site = rep(c("A01", "A02", "A03"), each = 5)) suppressMessages(result <- input |> - use_events(eventID = sequential_id(width = 10)) + set_events(eventID = sequential_id(width = 10)) ) expect_true(all(nchar(result$eventID) == 10)) }) -test_that("random_id() works with use_events()", { +test_that("random_id() works with set_events()", { input <- tibble(eventDate = paste0(rep(c(2020:2024), 3), "-01-01"), basisOfRecord = "humanObservation", site = rep(c("A01", "A02", "A03"), each = 5)) suppressMessages(result <- input |> - use_events(eventID = random_id())) + set_events(eventID = random_id())) expect_equal(colnames(result), c("eventDate", "basisOfRecord", "site", "eventID")) @@ -71,12 +71,12 @@ test_that("random_id() works with use_events()", { nrow(result)) }) -test_that("composite_id() works with use_events()", { +test_that("composite_id() works with set_events()", { input <- tibble(eventDate = paste0(rep(c(2020:2024), 3), "-01-01"), basisOfRecord = "humanObservation", site = rep(c("A01", "A02", "A03"), each = 5)) suppressMessages(result <- input |> - use_events(eventID = composite_id(site, eventDate)) + set_events(eventID = composite_id(site, eventDate)) ) expect_equal(colnames(result), c("eventDate", "basisOfRecord", "site", "eventID")) @@ -89,7 +89,7 @@ test_that("sequential_id() works within composite_id()", { basisOfRecord = "humanObservation", site = rep(c("A01", "A02", "A03"), each = 5)) suppressMessages(result <- input |> - use_events(eventID = composite_id(sequential_id(), + set_events(eventID = composite_id(sequential_id(), site, eventDate)) ) diff --git a/tests/testthat/test-use_individual_traits.R b/tests/testthat/test-use_individual_traits.R index 717edac..f631cf4 100644 --- a/tests/testthat/test-use_individual_traits.R +++ b/tests/testthat/test-use_individual_traits.R @@ -1,130 +1,130 @@ -test_that("use_individual_traits errors when missing .df", { - expect_error(use_individual_traits(individualID = individualID), +test_that("set_individual_traits errors when missing .df", { + expect_error(set_individual_traits(individualID = individualID), ".df is missing") }) -test_that("use_individual_traits errors when no dwc columns are named or exist in the df", { +test_that("set_individual_traits errors when no dwc columns are named or exist in the df", { df <- tibble::tibble(borp = "Oceania") - expect_warning(suppressMessages(use_individual_traits(df)), + expect_warning(suppressMessages(set_individual_traits(df)), "No Darwin Core terms detected") }) -test_that("use_individual_traits returns tibble with updated dwc column names", { - quiet_use_individual_traits <- purrr::quietly(use_individual_traits) +test_that("set_individual_traits returns tibble with updated dwc column names", { + quiet_set_individual_traits <- purrr::quietly(set_individual_traits) df <- tibble::tibble(user_col = "thisIsAnID") result <- df |> - quiet_use_individual_traits(individualID = user_col) + quiet_set_individual_traits(individualID = user_col) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_match(colnames(result$result), c("individualID")) }) -test_that("use_individual_traits detects unnamed but existing dwc column names in df", { - quiet_use_individual_traits <- purrr::quietly(use_individual_traits) +test_that("set_individual_traits detects unnamed but existing dwc column names in df", { + quiet_set_individual_traits <- purrr::quietly(set_individual_traits) df <- tibble::tibble(lifeStage = "zygote", col2 = 1:2) result <- df |> - quiet_use_individual_traits() + quiet_set_individual_traits() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("lifeStage", "col2")) }) -test_that("use_individual_traits has progress messages", { - quiet_use_individual_traits <- purrr::quietly(use_individual_traits) +test_that("set_individual_traits has progress messages", { + quiet_set_individual_traits <- purrr::quietly(set_individual_traits) df <- tibble::tibble(lifeStage = "zygote", col2 = 1:2) - result <- df |> quiet_use_individual_traits() + result <- df |> quiet_set_individual_traits() expect_false(is.null(result$messages)) }) -test_that("use_individual_traits checks individualID format", { +test_that("set_individual_traits checks individualID format", { df <- tibble::tibble(individualID = c("thisIsAnID", "thisIsAnDifferentID")) df_dupes <- tibble::tibble(individualID = c("thisIsAnID", "thisIsAnID")) expect_no_error(suppressMessages( - df |> use_individual_traits(individualID = individualID) + df |> set_individual_traits(individualID = individualID) )) expect_error(suppressMessages( - df_dupes |> use_individual_traits(individualID = individualID)), + df_dupes |> set_individual_traits(individualID = individualID)), "Duplicate values in individualID" ) }) -test_that("use_individual_traits checks lifeStage format", { +test_that("set_individual_traits checks lifeStage format", { df_chr <- tibble::tibble(lifeStage = c("zygote", "seedling")) df_dbl <- tibble::tibble(lifeStage = 1:3) expect_no_error(suppressMessages( - df_chr |> use_individual_traits(lifeStage = lifeStage) + df_chr |> set_individual_traits(lifeStage = lifeStage) )) expect_error(suppressMessages( - df_dbl |> use_individual_traits(lifeStage = lifeStage)), + df_dbl |> set_individual_traits(lifeStage = lifeStage)), "lifeStage must be a character vector, not integer" ) }) -test_that("use_individual_traits checks sex format", { +test_that("set_individual_traits checks sex format", { df_chr <- tibble::tibble(sex = c("male", "female")) df_dbl <- tibble::tibble(sex = 1:3) expect_no_error(suppressMessages( - df_chr |> use_individual_traits(sex = sex) + df_chr |> set_individual_traits(sex = sex) )) expect_error(suppressMessages( - df_dbl |> use_individual_traits(sex = sex)), + df_dbl |> set_individual_traits(sex = sex)), "sex must be a character vector, not integer" ) }) -test_that("use_individual_traits checks sex format", { +test_that("set_individual_traits checks sex format", { df_chr <- tibble::tibble(sex = c("male", "female")) df_dbl <- tibble::tibble(sex = 1:3) expect_no_error(suppressMessages( - df_chr |> use_individual_traits(sex = sex) + df_chr |> set_individual_traits(sex = sex) )) expect_error(suppressMessages( - df_dbl |> use_individual_traits(sex = sex)), + df_dbl |> set_individual_traits(sex = sex)), "sex must be a character vector, not integer" ) }) -test_that("use_individual_traits checks vitality format", { +test_that("set_individual_traits checks vitality format", { df_chr <- tibble::tibble(vitality = c("alive", "dead")) df_dbl <- tibble::tibble(vitality = 1:3) expect_no_error(suppressMessages( - df_chr |> use_individual_traits(vitality = vitality) + df_chr |> set_individual_traits(vitality = vitality) )) expect_error(suppressMessages( - df_dbl |> use_individual_traits(vitality = vitality)), + df_dbl |> set_individual_traits(vitality = vitality)), "vitality must be a character vector, not integer" ) }) -test_that("use_individual_traits checks reproductiveCondition format", { +test_that("set_individual_traits checks reproductiveCondition format", { df_chr <- tibble::tibble(reproductiveCondition = c("alive", "dead")) df_dbl <- tibble::tibble(reproductiveCondition = 1:3) expect_no_error(suppressMessages( - df_chr |> use_individual_traits(reproductiveCondition = reproductiveCondition) + df_chr |> set_individual_traits(reproductiveCondition = reproductiveCondition) )) expect_error(suppressMessages( - df_dbl |> use_individual_traits(reproductiveCondition = reproductiveCondition)), + df_dbl |> set_individual_traits(reproductiveCondition = reproductiveCondition)), "reproductiveCondition must be a character vector, not integer" ) }) diff --git a/tests/testthat/test-use_locality.R b/tests/testthat/test-use_locality.R index fb06a47..66cdddf 100644 --- a/tests/testthat/test-use_locality.R +++ b/tests/testthat/test-use_locality.R @@ -1,58 +1,58 @@ -test_that("use_locality errors when missing .df", { - expect_error(use_locality(continent = continent), +test_that("set_locality errors when missing .df", { + expect_error(set_locality(continent = continent), ".df is missing") }) -test_that("use_locality errors when no dwc columns are named or exist in the df", { +test_that("set_locality errors when no dwc columns are named or exist in the df", { df <- tibble::tibble(borp = "Oceania") - expect_warning(suppressMessages(use_locality(df)), + expect_warning(suppressMessages(set_locality(df)), "No Darwin Core terms detected") }) -test_that("use_locality returns tibble with updated dwc column names", { - quiet_use_locality <- purrr::quietly(use_locality) +test_that("set_locality returns tibble with updated dwc column names", { + quiet_set_locality <- purrr::quietly(set_locality) df <- tibble::tibble(user_col = "New South Wales") result <- df |> - quiet_use_locality(stateProvince = user_col) + quiet_set_locality(stateProvince = user_col) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_match(colnames(result$result), c("stateProvince")) }) -test_that("use_locality detects unnamed but existing dwc column names in df", { - quiet_use_locality <- purrr::quietly(use_locality) +test_that("set_locality detects unnamed but existing dwc column names in df", { + quiet_set_locality <- purrr::quietly(set_locality) df <- tibble::tibble(continent = "Oceania", col2 = 1:2) df2 <- tibble::tibble(continent = "borp", col2 = 1:2) result <- df |> - quiet_use_locality() + quiet_set_locality() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("continent", "col2")) expect_error( suppressMessages( - df2 |> use_locality() + df2 |> set_locality() ), "Unexpected value in continent") }) -test_that("use_locality has progress messages", { - quiet_use_locality <- purrr::quietly(use_locality) +test_that("set_locality has progress messages", { + quiet_set_locality <- purrr::quietly(set_locality) df <- tibble::tibble(stateProvince = "Victoria", col2 = 1:2) - result <- df |> quiet_use_locality() + result <- df |> quiet_set_locality() expect_false(is.null(result$messages)) }) -test_that("use_locality only accepts valid values for continent", { +test_that("set_locality only accepts valid values for continent", { valid_values <- c("Africa", "Antarctica", "Asia", "Europe", "North America", "Oceania", "South America") df_right <- tibble::tibble(continent = valid_values) @@ -60,39 +60,39 @@ test_that("use_locality only accepts valid values for continent", { df_num <- tibble::tibble(continent = 1:3) expect_no_error(suppressMessages( - df_right |> use_locality(continent = continent) + df_right |> set_locality(continent = continent) )) expect_error(suppressMessages( - df_wrong |> use_locality(continent = continent)), + df_wrong |> set_locality(continent = continent)), "Unexpected value in continent" ) expect_error(suppressMessages( - df_num |> use_locality(continent = continent)), + df_num |> set_locality(continent = continent)), "continent must be a character vector, not integer" ) }) -test_that("use_locality only accepts valid values for country", { +test_that("set_locality only accepts valid values for country", { valid_values <- c("Australia", "United States of America", "New Zealand") df_right <- tibble::tibble(country = valid_values) df_wrong <- tibble::tibble(country = c(valid_values, "blop")) expect_no_error(suppressMessages( - df_right |> use_locality(country = country) + df_right |> set_locality(country = country) )) expect_error(suppressMessages( - df_wrong |> use_locality(country = country)), + df_wrong |> set_locality(country = country)), "Unexpected value in country" ) expect_error(suppressMessages( - df_wrong |> use_locality(country = 3)), + df_wrong |> set_locality(country = 3)), "country must be a character vector, not numeric" ) }) -test_that("use_locality only accepts valid values for country", { +test_that("set_locality only accepts valid values for country", { # subset of common values valid_values <- c("Australia", "United States of America", "New Zealand", "Indonesia") @@ -100,19 +100,19 @@ test_that("use_locality only accepts valid values for country", { df_wrong <- tibble::tibble(country = c(valid_values, "blop")) expect_no_error(suppressMessages( - df_right |> use_locality(country = country) + df_right |> set_locality(country = country) )) expect_error(suppressMessages( - df_wrong |> use_locality(country = country)), + df_wrong |> set_locality(country = country)), "Unexpected value in country" ) expect_error(suppressMessages( - df_wrong |> use_locality(country = 3)), + df_wrong |> set_locality(country = 3)), "country must be a character vector, not numeric" ) }) -test_that("use_locality only accepts valid values for countryCode", { +test_that("set_locality only accepts valid values for countryCode", { # subset of common values valid_values <- c("AU", "US", "NZ", "JP", "ID") @@ -120,42 +120,42 @@ test_that("use_locality only accepts valid values for countryCode", { df_wrong <- tibble::tibble(countryCode = c(valid_values, "blop")) expect_no_error(suppressMessages( - df_right |> use_locality(countryCode = countryCode) + df_right |> set_locality(countryCode = countryCode) )) expect_error(suppressMessages( - df_wrong |> use_locality(countryCode = countryCode)), + df_wrong |> set_locality(countryCode = countryCode)), "Unexpected value in countryCode" ) expect_error(suppressMessages( - df_wrong |> use_locality(countryCode = 3)), + df_wrong |> set_locality(countryCode = 3)), "countryCode must be a character vector, not numeric" ) }) -test_that("use_locality checks stateProvince format", { +test_that("set_locality checks stateProvince format", { df_chr <- tibble::tibble(stateProvince = c("Victoria", "Perth")) df_dbl <- tibble::tibble(stateProvince =1:3) expect_no_error(suppressMessages( - df_chr |> use_locality(stateProvince = stateProvince) + df_chr |> set_locality(stateProvince = stateProvince) )) expect_error(suppressMessages( - df_dbl |> use_locality(stateProvince = stateProvince)), + df_dbl |> set_locality(stateProvince = stateProvince)), "stateProvince must be a character vector, not integer" ) }) -test_that("use_locality checks locality format", { +test_that("set_locality checks locality format", { df_chr <- tibble::tibble(locality = c("a place", "another place")) df_dbl <- tibble::tibble(locality =1:3) expect_no_error(suppressMessages( - df_chr |> use_locality(locality = locality) + df_chr |> set_locality(locality = locality) )) expect_error(suppressMessages( - df_dbl |> use_locality(locality = locality)), + df_dbl |> set_locality(locality = locality)), "locality must be a character vector, not integer" ) }) diff --git a/tests/testthat/test-use_observer.R b/tests/testthat/test-use_observer.R index ab678b5..effa150 100644 --- a/tests/testthat/test-use_observer.R +++ b/tests/testthat/test-use_observer.R @@ -1,81 +1,81 @@ -test_that("use_observer errors when missing .df", { - expect_error(use_observer(recordedBy = recordedBy), +test_that("set_observer errors when missing .df", { + expect_error(set_observer(recordedBy = recordedBy), ".df is missing") }) -test_that("use_observer errors when no dwc columns are named or exist in the df", { +test_that("set_observer errors when no dwc columns are named or exist in the df", { df <- tibble::tibble(borp = c("Generic Name", "Generic Name")) - expect_warning(suppressMessages(use_observer(df)), + expect_warning(suppressMessages(set_observer(df)), "No Darwin Core terms detected") }) -test_that("use_observer returns tibble with updated dwc column names", { - quiet_use_observer <- purrr::quietly(use_observer) +test_that("set_observer returns tibble with updated dwc column names", { + quiet_set_observer <- purrr::quietly(set_observer) df <- tibble::tibble(user_col = "New South Wales") result <- df |> - quiet_use_observer(recordedBy = user_col) + quiet_set_observer(recordedBy = user_col) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_match(colnames(result$result), c("recordedBy")) }) -test_that("use_observer detects unnamed but existing dwc column names in df", { - quiet_use_observer <- purrr::quietly(use_observer) +test_that("set_observer detects unnamed but existing dwc column names in df", { + quiet_set_observer <- purrr::quietly(set_observer) df <- tibble::tibble(recordedBy = "Oceania", col2 = 1:2) df2 <- tibble::tibble(recordedBy = 1:2, col2 = 1:2) result <- df |> - quiet_use_observer() + quiet_set_observer() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("recordedBy", "col2")) expect_error( suppressMessages( - df2 |> use_observer() + df2 |> set_observer() ), "recordedBy must be a character vector, not integer") }) -test_that("use_observer has progress messages", { - quiet_use_observer <- purrr::quietly(use_observer) +test_that("set_observer has progress messages", { + quiet_set_observer <- purrr::quietly(set_observer) df <- tibble::tibble(recordedBy = "Victoria", col2 = 1:2) - result <- df |> quiet_use_observer() + result <- df |> quiet_set_observer() expect_false(is.null(result$messages)) }) -test_that("use_observer checks recordedBy format", { +test_that("set_observer checks recordedBy format", { df_chr <- tibble::tibble(recordedBy = c("Generic Name", "Generic Name 2")) df_dbl <- tibble::tibble(recordedBy = 1:3) expect_no_error(suppressMessages( - df_chr |> use_observer(recordedBy = recordedBy) + df_chr |> set_observer(recordedBy = recordedBy) )) expect_error(suppressMessages( - df_dbl |> use_observer(recordedBy = recordedBy)), + df_dbl |> set_observer(recordedBy = recordedBy)), "recordedBy must be a character vector, not integer" ) }) -test_that("use_observer checks recordedByID format", { +test_that("set_observer checks recordedByID format", { df_chr <- tibble::tibble(recordedByID = c("https://orcid.org/0000-0002-1825-0097", "https://orcid.org/0000-0002-1825-0098")) df_dbl <- tibble::tibble(recordedByID = 1:3) expect_no_error(suppressMessages( - df_chr |> use_observer(recordedByID = recordedByID) + df_chr |> set_observer(recordedByID = recordedByID) )) expect_error(suppressMessages( - df_dbl |> use_observer(recordedByID = recordedByID)), + df_dbl |> set_observer(recordedByID = recordedByID)), "recordedByID must be a character vector, not integer" ) }) diff --git a/tests/testthat/test-use_occurrences.R b/tests/testthat/test-use_occurrences.R index 585157b..9dd5c9d 100644 --- a/tests/testthat/test-use_occurrences.R +++ b/tests/testthat/test-use_occurrences.R @@ -1,35 +1,35 @@ library(tibble) -test_that("use_occurrences errors when missing .df", { - expect_error(use_occurrences(basisOfRecord = basisOfRecord), +test_that("set_occurrences errors when missing .df", { + expect_error(set_occurrences(basisOfRecord = basisOfRecord), ".df is missing") }) -test_that("use_occurrences errors when no dwc columns are named, or exist in the df", { +test_that("set_occurrences errors when no dwc columns are named, or exist in the df", { df <- tibble(borp = "humanObservation") - expect_warning(suppressMessages(use_occurrences(df)), + expect_warning(suppressMessages(set_occurrences(df)), "No Darwin Core terms detected") }) -test_that("use_occurrences returns tibble with updated dwc column names", { - quiet_use_occurrences <- purrr::quietly(use_occurrences) +test_that("set_occurrences returns tibble with updated dwc column names", { + quiet_set_occurrences <- purrr::quietly(set_occurrences) df <- tibble(user_col = "humanObservation") result <- df |> - quiet_use_occurrences(basisOfRecord = user_col) + quiet_set_occurrences(basisOfRecord = user_col) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_match(colnames(result$result), c("basisOfRecord")) }) -test_that("use_occurrences() keeps fields when composite_id() is called, but only for occurrenceID fields", { - quiet_use_occurrences <- purrr::quietly(use_occurrences) +test_that("set_occurrences() keeps fields when composite_id() is called, but only for occurrenceID fields", { + quiet_set_occurrences <- purrr::quietly(set_occurrences) df <- tibble(user_col = "humanObservation", site = seq_len(10), year = 2024) result <- df |> - quiet_use_occurrences(basisOfRecord = user_col, + quiet_set_occurrences(basisOfRecord = user_col, occurrenceID = composite_id(sequential_id(), site, year)) @@ -39,13 +39,13 @@ test_that("use_occurrences() keeps fields when composite_id() is called, but onl # i.e. components of `occurrenceID` are retained, but `user_col` is not }) -test_that("setting .keep_composite = 'unused' affects use_occurrences()", { - quiet_use_occurrences <- purrr::quietly(use_occurrences) +test_that("setting .keep_composite = 'unused' affects set_occurrences()", { + quiet_set_occurrences <- purrr::quietly(set_occurrences) df <- tibble(user_col = "humanObservation", site = seq_len(10), year = 2024) result <- df |> - quiet_use_occurrences(basisOfRecord = user_col, + quiet_set_occurrences(basisOfRecord = user_col, occurrenceID = composite_id(sequential_id(), site, year), @@ -55,37 +55,37 @@ test_that("setting .keep_composite = 'unused' affects use_occurrences()", { c("occurrenceID", "basisOfRecord")) }) -test_that("use_occurrences detects unnamed but existing dwc column names in df", { - quiet_use_occurrences <- purrr::quietly(use_occurrences) +test_that("set_occurrences detects unnamed but existing dwc column names in df", { + quiet_set_occurrences <- purrr::quietly(set_occurrences) df <- tibble(basisOfRecord = "humanObservation", col2 = 1:2) df2 <- tibble(basisOfRecord = "borp", col2 = 1:2) result <- df |> - quiet_use_occurrences() + quiet_set_occurrences() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("basisOfRecord", "col2")) expect_error( suppressMessages( - df2 |> use_occurrences() + df2 |> set_occurrences() ), "Unexpected value in basisOfRecord") }) -test_that("use_occurrences has progress messages", { - quiet_use_occurrences <- purrr::quietly(use_occurrences) +test_that("set_occurrences has progress messages", { + quiet_set_occurrences <- purrr::quietly(set_occurrences) df <- tibble(basisOfRecord = "humanObservation", col2 = 1:2) - result <- df |> quiet_use_occurrences() + result <- df |> quiet_set_occurrences() expect_false(is.null(result$messages)) }) -test_that("use_occurrences only accepts valid values for basisOfRecord", { +test_that("set_occurrences only accepts valid values for basisOfRecord", { valid_values <- c("humanObservation", "machineObservation", "livingSpecimen", "preservedSpecimen", "fossilSpecimen", "materialCitation") @@ -93,27 +93,27 @@ test_that("use_occurrences only accepts valid values for basisOfRecord", { df_wrong <- tibble(basisOfRecord = c(valid_values, "blop")) expect_no_error(suppressMessages( - df_right |> use_occurrences(basisOfRecord = basisOfRecord) + df_right |> set_occurrences(basisOfRecord = basisOfRecord) )) expect_error(suppressMessages( - df_wrong |> use_occurrences(basisOfRecord = basisOfRecord)), + df_wrong |> set_occurrences(basisOfRecord = basisOfRecord)), "Unexpected value in basisOfRecord" ) expect_error(suppressMessages( - df_wrong |> use_occurrences(basisOfRecord = 3)), + df_wrong |> set_occurrences(basisOfRecord = 3)), "basisOfRecord must be a character vector, not numeric" ) }) -test_that("use_occurrences checks occurrenceStatus format", { - quiet_use_occurrences <- purrr::quietly(use_occurrences) +test_that("set_occurrences checks occurrenceStatus format", { + quiet_set_occurrences <- purrr::quietly(set_occurrences) valid_values <- c("present", "absent") df <- tibble(occurrenceStatus = valid_values) df_wrong_class <- tibble(occurrenceStatus = c(1, 2)) df_wrong_name <- tibble(occurrenceStatus = c(valid_values, "blop")) result <- df |> - quiet_use_occurrences(occurrenceStatus = occurrenceStatus) + quiet_set_occurrences(occurrenceStatus = occurrenceStatus) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("occurrenceStatus")) @@ -121,25 +121,25 @@ test_that("use_occurrences checks occurrenceStatus format", { expect_error( suppressMessages( - df_wrong_class |> use_occurrences(occurrenceStatus = occurrenceStatus) + df_wrong_class |> set_occurrences(occurrenceStatus = occurrenceStatus) ), "occurrenceStatus must be a character vector, not numeric" ) expect_error( suppressMessages( - df_wrong_name |> use_occurrences(occurrenceStatus = occurrenceStatus) + df_wrong_name |> set_occurrences(occurrenceStatus = occurrenceStatus) ), "Unexpected value in occurrenceStatus" ) }) -test_that("sequential_id() works with use_occurrences()", { +test_that("sequential_id() works with set_occurrences()", { input <- tibble(eventDate = paste0(rep(c(2020:2024), 3), "-01-01"), basisOfRecord = "humanObservation", site = rep(c("A01", "A02", "A03"), each = 5)) suppressMessages(result <- input |> - use_occurrences(occurrenceID = sequential_id()) + set_occurrences(occurrenceID = sequential_id()) ) expect_equal(colnames(result), c("eventDate", "basisOfRecord", "site", "occurrenceID")) @@ -148,22 +148,22 @@ test_that("sequential_id() works with use_occurrences()", { expect_true(all(nchar(result$occurrenceID) == 3)) }) -test_that("sequential_id() accepts `width` argument with use_occurrences()", { +test_that("sequential_id() accepts `width` argument with set_occurrences()", { input <- tibble(eventDate = paste0(rep(c(2020:2024), 3), "-01-01"), basisOfRecord = "humanObservation", site = rep(c("A01", "A02", "A03"), each = 5)) suppressMessages(result <- input |> - use_occurrences(occurrenceID = sequential_id(width = 10)) + set_occurrences(occurrenceID = sequential_id(width = 10)) ) expect_true(all(nchar(result$occurrenceID) == 10)) }) -test_that("random_id() works with use_occurrences()", { +test_that("random_id() works with set_occurrences()", { input <- tibble(eventDate = paste0(rep(c(2020:2024), 3), "-01-01"), basisOfRecord = "humanObservation", site = rep(c("A01", "A02", "A03"), each = 5)) suppressMessages(result <- input |> - use_occurrences(occurrenceID = random_id()) + set_occurrences(occurrenceID = random_id()) ) expect_equal(colnames(result), c("eventDate", "basisOfRecord", "site", "occurrenceID")) @@ -171,12 +171,12 @@ test_that("random_id() works with use_occurrences()", { nrow(result)) }) -test_that("composite_id() works with use_occurrences()", { +test_that("composite_id() works with set_occurrences()", { input <- tibble(eventDate = paste0(rep(c(2020:2024), 3), "-01-01"), basisOfRecord = "humanObservation", site = rep(c("A01", "A02", "A03"), each = 5)) suppressMessages(result <- input |> - use_occurrences(occurrenceID = composite_id(site, eventDate)) + set_occurrences(occurrenceID = composite_id(site, eventDate)) ) expect_equal(colnames(result), c("eventDate", "basisOfRecord", "site", "occurrenceID")) @@ -189,7 +189,7 @@ test_that("sequential_id() works within composite_id()", { basisOfRecord = "humanObservation", site = rep(c("A01", "A02", "A03"), each = 5)) suppressMessages(result <- input |> - use_occurrences(occurrenceID = composite_id(sequential_id(), + set_occurrences(occurrenceID = composite_id(sequential_id(), site, eventDate)) ) diff --git a/tests/testthat/test-use_scientific_name.R b/tests/testthat/test-use_scientific_name.R index 629e009..6579cb8 100644 --- a/tests/testthat/test-use_scientific_name.R +++ b/tests/testthat/test-use_scientific_name.R @@ -1,99 +1,99 @@ library(tibble) -test_that("use_scientific_name errors when missing .df", { - expect_error(use_scientific_name(scientificName = scientificName), +test_that("set_scientific_name errors when missing .df", { + expect_error(set_scientific_name(scientificName = scientificName), ".df is missing") }) -test_that("use_scientific_name errors when no dwc columns are named, or exist in the df", { +test_that("set_scientific_name errors when no dwc columns are named, or exist in the df", { df <- tibble(borp = c("Callocephalon fimbriatum", "Eolophus roseicapilla")) - expect_warning(suppressMessages(use_scientific_name(df)), + expect_warning(suppressMessages(set_scientific_name(df)), "No Darwin Core terms detected") }) -test_that("use_scientific_name returns tibble with updated dwc column names", { - quiet_use_scientific_name <- purrr::quietly(use_scientific_name) +test_that("set_scientific_name returns tibble with updated dwc column names", { + quiet_set_scientific_name <- purrr::quietly(set_scientific_name) df <- tibble(user_col = c("Callocephalon fimbriatum", "Eolophus roseicapilla")) result <- df |> - quiet_use_scientific_name(scientificName = user_col) + quiet_set_scientific_name(scientificName = user_col) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("scientificName")) }) -test_that("use_scientific_name detects unnamed but existing dwc column names in df", { - quiet_use_scientific_name <- purrr::quietly(use_scientific_name) +test_that("set_scientific_name detects unnamed but existing dwc column names in df", { + quiet_set_scientific_name <- purrr::quietly(set_scientific_name) df <- tibble(scientificName = c("Callocephalon fimbriatum", "Eolophus roseicapilla"), scientificNameAuthorship = c("Fred", "Mary"), col2 = 1:2) result <- df |> - quiet_use_scientific_name() + quiet_set_scientific_name() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("scientificName", "scientificNameAuthorship", "col2")) }) -test_that("use_scientific_name has progress messages", { - quiet_use_scientific_name <- purrr::quietly(use_scientific_name) +test_that("set_scientific_name has progress messages", { + quiet_set_scientific_name <- purrr::quietly(set_scientific_name) df <- tibble(scientificName = c("Callocephalon fimbriatum", "Eolophus roseicapilla"), col2 = 1:2) - result <- df |> quiet_use_scientific_name() + result <- df |> quiet_set_scientific_name() expect_false(is.null(result$messages)) }) -test_that("use_scientific_name checks scientificName format", { - quiet_use_scientific_name <- purrr::quietly(use_scientific_name) +test_that("set_scientific_name checks scientificName format", { + quiet_set_scientific_name <- purrr::quietly(set_scientific_name) df <- tibble(scientificName = c("Callocephalon fimbriatum", "Eolophus roseicapilla"), col2 = 1:2) - result <- df |> quiet_use_scientific_name() + result <- df |> quiet_set_scientific_name() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("scientificName", "col2")) expect_error(suppressMessages( - df |> use_scientific_name(scientificName = col2) + df |> set_scientific_name(scientificName = col2) ), "scientificName must be a character vector, not integer" ) }) -test_that("use_scientific_name checks taxonRank format", { - quiet_use_scientific_name <- purrr::quietly(use_scientific_name) +test_that("set_scientific_name checks taxonRank format", { + quiet_set_scientific_name <- purrr::quietly(set_scientific_name) df <- tibble(taxonRank = c("family", "species"), col2 = 1:2) - result <- df |> quiet_use_scientific_name() + result <- df |> quiet_set_scientific_name() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("taxonRank", "col2")) expect_error(suppressMessages( - df |> use_scientific_name(taxonRank = col2) + df |> set_scientific_name(taxonRank = col2) ), "taxonRank must be a character vector, not integer" ) }) -test_that("use_scientific_name checks scientificNameAuthorship format", { - quiet_use_scientific_name <- purrr::quietly(use_scientific_name) +test_that("set_scientific_name checks scientificNameAuthorship format", { + quiet_set_scientific_name <- purrr::quietly(set_scientific_name) df <- tibble(scientificNameAuthorship = c("(Györfi, 1952)", "R. A. Graham"), col2 = 1:2) - result <- df |> quiet_use_scientific_name() + result <- df |> quiet_set_scientific_name() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("scientificNameAuthorship", "col2")) expect_error(suppressMessages( - df |> use_scientific_name(scientificNameAuthorship = col2) + df |> set_scientific_name(scientificNameAuthorship = col2) ), "scientificNameAuthorship must be a character vector, not integer" ) diff --git a/tests/testthat/test-use_sf.R b/tests/testthat/test-use_sf.R index fe308d6..7ecb461 100644 --- a/tests/testthat/test-use_sf.R +++ b/tests/testthat/test-use_sf.R @@ -9,91 +9,91 @@ occs_clean <- occs |> sf::st_as_sf(coords = c("decimalLongitude", "decimalLatitude"), crs = 4326) #--- -test_that("use_sf errors when missing .df", { - expect_error(use_sf(geometry = geometry), +test_that("set_coordinates_sf errors when missing .df", { + expect_error(set_coordinates_sf(geometry = geometry), ".df is missing") }) -test_that("use_sf errors when no dwc columns are named, or exist in the df", { +test_that("set_coordinates_sf errors when no dwc columns are named, or exist in the df", { df <- tibble(borp = c(149.125, 149.133)) expect_error( # extra error when sf object isn't found - expect_warning(suppressMessages(df |> use_coordinates_sf()), + expect_warning(suppressMessages(df |> set_coordinates_sf()), "No Darwin Core terms detected") ) }) -test_that("use_sf returns tibble with updated dwc column names", { - quiet_use_sf <- purrr::quietly(use_sf) +test_that("set_coordinates_sf returns tibble with updated dwc column names", { + quiet_set_coordinates_sf <- purrr::quietly(set_coordinates_sf) df <- occs_clean |> dplyr::select(recordID) result <- df |> - quiet_use_sf(geometry = geometry) + quiet_set_coordinates_sf(geometry = geometry) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("recordID", "decimalLongitude", "decimalLatitude", "geodeticDatum")) }) -test_that("use_sf detects unnamed but existing `geometry` in df", { - quiet_use_sf <- purrr::quietly(use_sf) +test_that("set_coordinates_sf detects unnamed but existing `geometry` in df", { + quiet_set_coordinates_sf <- purrr::quietly(set_coordinates_sf) df <- occs_clean |> dplyr::select(recordID) result <- df |> - quiet_use_sf() + quiet_set_coordinates_sf() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("recordID", "decimalLongitude", "decimalLatitude", "geodeticDatum")) }) -test_that("use_sf has progress messages", { - quiet_use_sf <- purrr::quietly(use_sf) +test_that("set_coordinates_sf has progress messages", { + quiet_set_coordinates_sf <- purrr::quietly(set_coordinates_sf) df <- occs_clean |> dplyr::select(recordID) - result <- df |> quiet_use_sf() + result <- df |> quiet_set_coordinates_sf() expect_false(is.null(result$messages)) }) -test_that("use_sf messages when successfully converted columns", { - quiet_use_sf <- purrr::quietly(use_sf) +test_that("set_coordinates_sf messages when successfully converted columns", { + quiet_set_coordinates_sf <- purrr::quietly(set_coordinates_sf) df <- occs_clean |> dplyr::select(recordID) - result <- df |> quiet_use_sf() + result <- df |> quiet_set_coordinates_sf() expect_contains(result$messages[5], "* Converted geometry > decimalLongitude, decimalLatitude, and geodeticDatum.") }) -test_that("use_sf accepts user-renamed `geometry` column if specified", { - quiet_use_sf <- purrr::quietly(use_sf) +test_that("set_coordinates_sf accepts user-renamed `geometry` column if specified", { + quiet_set_coordinates_sf <- purrr::quietly(set_coordinates_sf) df <- occs_clean |> dplyr::select(recordID) sf::st_geometry(df) <- "coords" result <- df |> - quiet_use_sf(geometry = coords) + quiet_set_coordinates_sf(geometry = coords) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("recordID", "decimalLongitude", "decimalLatitude", "geodeticDatum")) }) -test_that("use_sf errors when wrong `geometry` column is specified", { +test_that("set_coordinates_sf errors when wrong `geometry` column is specified", { df <- occs_clean |> dplyr::select(recordID) sf::st_geometry(df) <- "coords" expect_error(suppressMessages( df |> - use_sf(geometry = borp) + set_coordinates_sf(geometry = borp) ), "Must specify an existing 'geometry' column") }) -test_that("use_sf errors when df is not an sf object", { +test_that("set_coordinates_sf errors when df is not an sf object", { df <- tibble( latitude = c(-35.310, -35.273), longitude = c(149.125, 149.133) @@ -101,32 +101,32 @@ test_that("use_sf errors when df is not an sf object", { expect_error(suppressMessages( df |> - use_sf() + set_coordinates_sf() ), "No geometry detected") }) -test_that("use_sf errors when df is not an sf object", { +test_that("set_coordinates_sf errors when df is not an sf object", { sf_map <- ozmaps::ozmap_states # TODO: Replace with another sf file, or download to testthat expect_error(suppressMessages( sf_map |> - use_sf() + set_coordinates_sf() ), ".df geometry must be of type 'POINT', not 'MULTIPOLYGON'") }) -test_that("use_sf warns that geometry has been dropped from df", { - quiet_use_sf <- purrr::quietly(use_sf) +test_that("set_coordinates_sf warns that geometry has been dropped from df", { + quiet_set_coordinates_sf <- purrr::quietly(set_coordinates_sf) df <- occs_clean |> dplyr::select(recordID) result <- df |> - quiet_use_sf() + quiet_set_coordinates_sf() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_false(is.null(result$warnings)) expect_equal(result$warnings, "geometry dropped from data frame.") }) -# use_sf errors when missing crs +# set_coordinates_sf errors when missing crs diff --git a/tests/testthat/test-use_taxonomy.R b/tests/testthat/test-use_taxonomy.R index 727a519..e52ad0a 100644 --- a/tests/testthat/test-use_taxonomy.R +++ b/tests/testthat/test-use_taxonomy.R @@ -1,165 +1,165 @@ -test_that("use_taxonomy errors when missing .df", { - expect_error(use_taxonomy(kingdom = kingdom), +test_that("set_taxonomy errors when missing .df", { + expect_error(set_taxonomy(kingdom = kingdom), ".df is missing") }) -test_that("use_taxonomy errors when no dwc columns are named or exist in the df", { +test_that("set_taxonomy errors when no dwc columns are named or exist in the df", { df <- tibble::tibble(borp = "Oceania") - expect_warning(suppressMessages(use_taxonomy(df)), + expect_warning(suppressMessages(set_taxonomy(df)), "No Darwin Core terms detected") }) -test_that("use_taxonomy returns tibble with updated dwc column names", { - quiet_use_taxonomy <- purrr::quietly(use_taxonomy) +test_that("set_taxonomy returns tibble with updated dwc column names", { + quiet_set_taxonomy <- purrr::quietly(set_taxonomy) df <- tibble::tibble(user_col = "New South Wales") result <- df |> - quiet_use_taxonomy(kingdom = user_col) + quiet_set_taxonomy(kingdom = user_col) expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_match(colnames(result$result), c("kingdom")) }) -test_that("use_taxonomy detects unnamed but existing dwc column names in df", { - quiet_use_taxonomy <- purrr::quietly(use_taxonomy) +test_that("set_taxonomy detects unnamed but existing dwc column names in df", { + quiet_set_taxonomy <- purrr::quietly(set_taxonomy) df <- tibble::tibble(kingdom = "Plantae", col2 = 1:2) df2 <- tibble::tibble(kingdom = 1:2, # class check at the moment, but could be values col2 = 1:2) result <- df |> - quiet_use_taxonomy() + quiet_set_taxonomy() expect_s3_class(result$result, c("tbl_df", "tbl", "data.frame")) expect_equal(colnames(result$result), c("kingdom", "col2")) expect_error( suppressMessages( - df2 |> use_taxonomy() + df2 |> set_taxonomy() ), "kingdom must be a character vector, not integer") }) -test_that("use_taxonomy has progress messages", { - quiet_use_taxonomy <- purrr::quietly(use_taxonomy) +test_that("set_taxonomy has progress messages", { + quiet_set_taxonomy <- purrr::quietly(set_taxonomy) df <- tibble::tibble(kingdom = "Fungi", col2 = 1:2) - result <- df |> quiet_use_taxonomy() + result <- df |> quiet_set_taxonomy() expect_false(is.null(result$messages)) }) -test_that("use_taxonomy checks kingdom format", { +test_that("set_taxonomy checks kingdom format", { df_chr <- tibble::tibble(kingdom = c("plantae", "plantae")) df_dbl <- tibble::tibble(kingdom =1:3) expect_no_error(suppressMessages( - df_chr |> use_taxonomy(kingdom = kingdom) + df_chr |> set_taxonomy(kingdom = kingdom) )) expect_error(suppressMessages( - df_dbl |> use_taxonomy(kingdom = kingdom)), + df_dbl |> set_taxonomy(kingdom = kingdom)), "kingdom must be a character vector, not integer" ) }) -test_that("use_taxonomy checks phylum format", { +test_that("set_taxonomy checks phylum format", { df_chr <- tibble::tibble(phylum = c("Chordata", "Chordata")) df_dbl <- tibble::tibble(phylum =1:3) expect_no_error(suppressMessages( - df_chr |> use_taxonomy(phylum = phylum) + df_chr |> set_taxonomy(phylum = phylum) )) expect_error(suppressMessages( - df_dbl |> use_taxonomy(phylum = phylum)), + df_dbl |> set_taxonomy(phylum = phylum)), "phylum must be a character vector, not integer" ) }) -test_that("use_taxonomy checks class format", { +test_that("set_taxonomy checks class format", { df_chr <- tibble::tibble(class = c("Amphibia", "Amphibia")) df_dbl <- tibble::tibble(class =1:3) expect_no_error(suppressMessages( - df_chr |> use_taxonomy(class = class) + df_chr |> set_taxonomy(class = class) )) expect_error(suppressMessages( - df_dbl |> use_taxonomy(class = class)), + df_dbl |> set_taxonomy(class = class)), "class must be a character vector, not integer" ) }) -test_that("use_taxonomy checks order format", { +test_that("set_taxonomy checks order format", { df_chr <- tibble::tibble(order = c("Anura", "Anura")) df_dbl <- tibble::tibble(order =1:3) expect_no_error(suppressMessages( - df_chr |> use_taxonomy(order = order) + df_chr |> set_taxonomy(order = order) )) expect_error(suppressMessages( - df_dbl |> use_taxonomy(order = order)), + df_dbl |> set_taxonomy(order = order)), "order must be a character vector, not integer" ) }) -test_that("use_taxonomy checks family format", { +test_that("set_taxonomy checks family format", { df_chr <- tibble::tibble(family = c("Myobatrachidae", "Myobatrachidae")) df_dbl <- tibble::tibble(family =1:3) expect_no_error(suppressMessages( - df_chr |> use_taxonomy(family = family) + df_chr |> set_taxonomy(family = family) )) expect_error(suppressMessages( - df_dbl |> use_taxonomy(family = family)), + df_dbl |> set_taxonomy(family = family)), "family must be a character vector, not integer" ) }) -test_that("use_taxonomy checks genus format", { +test_that("set_taxonomy checks genus format", { df_chr <- tibble::tibble(genus = c("Pseudophryne", "Pseudophryne")) df_dbl <- tibble::tibble(genus =1:3) expect_no_error(suppressMessages( - df_chr |> use_taxonomy(genus = genus) + df_chr |> set_taxonomy(genus = genus) )) expect_error(suppressMessages( - df_dbl |> use_taxonomy(genus = genus)), + df_dbl |> set_taxonomy(genus = genus)), "genus must be a character vector, not integer" ) }) -test_that("use_taxonomy checks specificEpithet format", { +test_that("set_taxonomy checks specificEpithet format", { df_chr <- tibble::tibble(specificEpithet = c("corroboree", "corroboree")) df_dbl <- tibble::tibble(specificEpithet =1:3) expect_no_error(suppressMessages( - df_chr |> use_taxonomy(specificEpithet = specificEpithet) + df_chr |> set_taxonomy(specificEpithet = specificEpithet) )) expect_error(suppressMessages( - df_dbl |> use_taxonomy(specificEpithet = specificEpithet)), + df_dbl |> set_taxonomy(specificEpithet = specificEpithet)), "specificEpithet must be a character vector, not integer" ) }) -test_that("use_taxonomy checks vernacularName format", { +test_that("set_taxonomy checks vernacularName format", { df_chr <- tibble::tibble(vernacularName = c("corroboree frog", "corroboree frog")) df_dbl <- tibble::tibble(vernacularName =1:3) expect_no_error(suppressMessages( - df_chr |> use_taxonomy(vernacularName = vernacularName) + df_chr |> set_taxonomy(vernacularName = vernacularName) )) expect_error(suppressMessages( - df_dbl |> use_taxonomy(vernacularName = vernacularName)), + df_dbl |> set_taxonomy(vernacularName = vernacularName)), "vernacularName must be a character vector, not integer" ) }) diff --git a/vignettes/checking-your-dataset.Rmd b/vignettes/checking-your-dataset.Rmd index cea1c5c..23d5b4e 100644 --- a/vignettes/checking-your-dataset.Rmd +++ b/vignettes/checking-your-dataset.Rmd @@ -40,12 +40,12 @@ df <- tibble::tibble( df ``` -I can use the function `use_coordinates()` to specify which of my columns refer to the valid Darwin Core terms `decimalLatitude` and `decimalLongitude`. I have intentionally added the wrong column `place` as `decimalLatitude`. corella will return an error because `decimalLatitude` and `decimalLatitude` fields must be numeric in Darwin Core standard. This error comes from a small internal checking function called `check_decimalLatitude()`. +I can use the function `set_coordinates()` to specify which of my columns refer to the valid Darwin Core terms `decimalLatitude` and `decimalLongitude`. I have intentionally added the wrong column `place` as `decimalLatitude`. corella will return an error because `decimalLatitude` and `decimalLatitude` fields must be numeric in Darwin Core standard. This error comes from a small internal checking function called `check_decimalLatitude()`. ```{r} #| error: true df |> - use_coordinates(decimalLatitude = place, # wrong column + set_coordinates(decimalLatitude = place, # wrong column decimalLongitude = longitude) ``` @@ -58,7 +58,7 @@ corella contains internal `check_` functions for all individual Darwin Core term # generate table # fn_to_term_table() |> # dplyr::bind_rows() |> -# write.csv("vignettes/supported-terms.csv") +# write.csv("supported-terms.csv") ``` @@ -80,7 +80,7 @@ object |> dplyr::rename( "Term" = dwc_term, "check function" = check_function, - "use function" = use_function + "set function" = set_function ) |> gt() |> cols_align( @@ -102,7 +102,7 @@ object |> cell_borders(sides = c("l"), color = "gray50", weight = px(3)), cell_text(font = c(google_font(name = "Fira Mono"))) ), - locations = cells_body(columns = c("check function", "use function")) + locations = cells_body(columns = c("check function", "set function")) ) |> tab_options( container.height = "450px" @@ -111,7 +111,7 @@ object |>
-When a user specifies a column to a matching Darwin Core term (or the column/term is detected by corella automatically) in a `use_` function, the `use_` function triggers that matching term's `check_` function. This process ensures that the data is correctly formatted prior to being saved in a Darwin Core Archive. +When a user specifies a column to a matching Darwin Core term (or the column/term is detected by corella automatically) in a `set_` function, the `set_` function triggers that matching term's `check_` function. This process ensures that the data is correctly formatted prior to being saved in a Darwin Core Archive. It's useful to know that these internal, individual `check_` functions exist because they are the building blocks of a full suite of checks, which users can run with `check_dataset()`. @@ -146,4 +146,4 @@ Note that `check_dataset()` currently only accepts occurrence-level datasets. Da ## Users have options -corella offers two options for checking a dataset, which we have detailed above: Running individual checks through `use_` functions, or running a "test suite" with `check_dataset()`. We hope that these alternative options provide users with different options for their workflow, allowing them to choose their favourite method or switch between methods as they standardise their data. +corella offers two options for checking a dataset, which we have detailed above: Running individual checks through `set_` functions, or running a "test suite" with `check_dataset()`. We hope that these alternative options provide users with different options for their workflow, allowing them to choose their favourite method or switch between methods as they standardise their data. diff --git a/vignettes/quick_start_guide.Rmd b/vignettes/quick_start_guide.Rmd index 02138e0..895ea58 100644 --- a/vignettes/quick_start_guide.Rmd +++ b/vignettes/quick_start_guide.Rmd @@ -85,55 +85,55 @@ df <- tibble( df ``` -We can standardise our data with `use_` functions. These functions a prefix of `use_`, and a suffix named to suggest what type of data they are used to standardise. For example, `use_coordinates` or `use_datetime`. The argument names in `use_` functions are valid Darwin Core terms. Our hope is that by grouping Darwin Core terms into groups based on their data type, corella makes it easier for users to know which Darwin Core terms to use as column names (which is one of the most confusing parts of Darwin Core). +We can standardise our data with `set_` functions. These functions a prefix of `set_`, and a suffix named to suggest what type of data they are used to standardise. For example, `set_coordinates` or `set_datetime`. The argument names in `set_` functions are valid Darwin Core terms. Our hope is that by grouping Darwin Core terms into groups based on their data type, corella makes it easier for users to know which Darwin Core terms to use as column names (which is one of the most confusing parts of Darwin Core). -Let's specify that the scientific name (i.e. genus & species name) in our data is in the `species` column by using `use_scientific_name()` to update our dataframe. +Let's specify that the scientific name (i.e. genus & species name) in our data is in the `species` column by using `set_scientific_name()` to update our dataframe. ```{r} df |> - use_scientific_name(scientificName = species) + set_scientific_name(scientificName = species) ``` You'll notice 2 things happen: 1. The `species` column in our dataframe has been renamed to `scientificName`. - 2. `use_scientific_name()` ran a check on our `species` column to make sure it was formatted correctly. + 2. `set_scientific_name()` ran a check on our `species` column to make sure it was formatted correctly. -What happens when we add a column with an error in it? In our `df`, the `latitude` column is "accidentally" a class `character` column, when it should be a `numeric` column. When we try to add it using `use_coordinates()`, an error will tell us something is wrong. +What happens when we add a column with an error in it? In our `df`, the `latitude` column is "accidentally" a class `character` column, when it should be a `numeric` column. When we try to add it using `set_coordinates()`, an error will tell us something is wrong. ```{r} #| eval: true #| error: true df |> - use_scientific_name(scientificName = species) |> - use_coordinates(decimalLongitude = longitude, + set_scientific_name(scientificName = species) |> + set_coordinates(decimalLongitude = longitude, decimalLatitude = latitude) ``` ## Fix or update columns -How do we fix the column that caused an error? One way is to edit the column within the `use_` function containing our problematic column. +How do we fix the column that caused an error? One way is to edit the column within the `set_` function containing our problematic column. -Each `use_` function is essentially a specialised [`mutate()`]() function from [dplyr](). This means you can edit columns using the same process you would use with `mutate()`. Let's fix the `latitude` column so that it is class `numeric`, which will fix the error. +Each `set_` function is essentially a specialised [`mutate()`]() function from [dplyr](). This means you can edit columns using the same process you would use with `mutate()`. Let's fix the `latitude` column so that it is class `numeric`, which will fix the error. ```{r} df |> - use_scientific_name(scientificName = species) |> - use_coordinates(decimalLongitude = longitude, + set_scientific_name(scientificName = species) |> + set_coordinates(decimalLongitude = longitude, decimalLatitude = as.numeric(latitude)) ``` ## Auto-detect columns -corella is also able to detect when columns already contain valid Darwin Core terms as column names. For example, `df` contains columns with locality information. We can add `use_locality()` to our pipe to specify and check these columns. Some of our columns already have valid Darwin Core terms as column names (`country` and `continent`). If we add `use_locality()` to our pipe without any arguments, it will detect these valid Darwin Core columns in `df` and check them automatically. +corella is also able to detect when columns already contain valid Darwin Core terms as column names. For example, `df` contains columns with locality information. We can add `set_locality()` to our pipe to specify and check these columns. Some of our columns already have valid Darwin Core terms as column names (`country` and `continent`). If we add `set_locality()` to our pipe without any arguments, it will detect these valid Darwin Core columns in `df` and check them automatically. ```{r} df |> - use_scientific_name(scientificName = species) |> - use_coordinates(decimalLongitude = longitude, + set_scientific_name(scientificName = species) |> + set_coordinates(decimalLongitude = longitude, decimalLatitude = as.numeric(latitude)) |> - use_locality() + set_locality() ``` By auto-detecting columns, corella prevents users from having to specify every single column when they are already named correctly in their dataset. This functionality will definitely help reduce the workload on datasets with lots of columns! @@ -159,14 +159,14 @@ df |> `suggest_workflow()` will update the suggested function pipe to only suggest functions that are necessary to standardise your data correctly. -For example, after using one of the suggested functions `use_occurrences()`, if we run `suggest_workflow()` again, the output message no longer suggests `use_occurrences()`. +For example, after using one of the suggested functions `set_occurrences()`, if we run `suggest_workflow()` again, the output message no longer suggests `set_occurrences()`. ```{r} #| message: false #| warning: false #| error: false df_edited <- df |> - use_occurrences( + set_occurrences( occurrenceID = seq_len(nrow(df)), basisOfRecord = "humanObservation" ) @@ -179,7 +179,7 @@ df_edited |> # Test your data -If your dataset already uses valid Darwin Core terms as column names, instead of working through each `use_` function, you might wish to run tests on your entire dataset. To run checks on your data like a test suite, use `check_dataset()`. Much like `devtools::test()` or `devtools::check()`, `check_dataset()` runs the relevant check on each matching Darwin Core column and returns a summary of the results, along with any error messages returned by those checks. +If your dataset already uses valid Darwin Core terms as column names, instead of working through each `set_` function, you might wish to run tests on your entire dataset. To run checks on your data like a test suite, use `check_dataset()`. Much like `devtools::test()` or `devtools::check()`, `check_dataset()` runs the relevant check on each matching Darwin Core column and returns a summary of the results, along with any error messages returned by those checks. ```{r} df <- tibble( @@ -197,7 +197,7 @@ df |> ``` -# `use_measurements`: a work in progress +# `set_measurements`: a work in progress Let's use a small sample of a real dataset of Australian native plant species, including Eucalypts and Acacias. The dataset contains lots of columns containing measurements that capture the traits or environment of each species occurrence. I have extracted 3 examples of these measurement columns: `LMA_g.m2`, `LeafN_area_g.m2`, `PNUE`. @@ -213,18 +213,18 @@ library(tidyr) df_filtered <- read.csv("./westerband_2022_wdate.csv") |> select(Site, Species, Latitude, Longitude, LMA_g.m2, LeafN_area_g.m2, PNUE) -df_filtered +df_filtered |> print(n = 10) # see first 10 rows ``` The way that Darwin Core handles measurement fields is slightly different to how they are organised in these data. In `df_filtered`, lots of information about what data the column contains is in the abbreviated column name. This includes the type of measure, the unit it is measured in, and the value itself. In Darwin Core, this information needs to be documented individually for each measurement, which requires multiple columns and multiple rows (ie a 'long' data format). -corella can convert measurement columns into Darwin Core standard with `use_measurements()`. This function requires users to specify the column, unit and measure type for each respective measurement. `use_measurements()` then assigns this information to the correct Darwin Core columns, and nests these columns into a nested column called `measurementOrFact`. +corella can convert measurement columns into Darwin Core standard with `set_measurements()`. This function requires users to specify the column, unit and measure type for each respective measurement. `set_measurements()` then assigns this information to the correct Darwin Core columns, and nests these columns into a nested column called `measurementOrFact`. ```{r, message=FALSE, warning=FALSE} df_nested <- df_filtered |> slice(220:270) |> - use_measurements(cols = c(LMA_g.m2, + set_measurements(cols = c(LMA_g.m2, LeafN_area_g.m2, PNUE), unit = c("g/m2", diff --git a/vignettes/supported-terms.csv b/vignettes/supported-terms.csv index b0985cc..538d407 100644 --- a/vignettes/supported-terms.csv +++ b/vignettes/supported-terms.csv @@ -1,46 +1,46 @@ -"","use_function","dwc_term" -"1","use_occurrences()","basisOfRecord" -"2","use_occurrences()","occurrenceID" -"3","use_scientific_name()","scientificName" -"4","use_coordinates()","decimalLatitude" -"5","use_coordinates()","decimalLongitude" -"6","use_coordinates()","geodeticDatum" -"7","use_coordinates()","coordinateUncertaintyInMeters" -"8","use_datetime()","eventDate" -"9","use_locality()","continent" -"10","use_locality()","country" -"11","use_locality()","countryCode" -"12","use_locality()","stateProvince" -"13","use_locality()","locality" -"14","use_taxonomy()","kingdom" -"15","use_taxonomy()","phylum" -"16","use_taxonomy()","class" -"17","use_taxonomy()","order" -"18","use_taxonomy()","family" -"19","use_taxonomy()","genus" -"20","use_taxonomy()","specificEpithet" -"21","use_taxonomy()","vernacularName" -"22","use_abundance()","individualCount" -"23","use_abundance()","organismQuantity" -"24","use_abundance()","organismQuantityType" -"25","use_abundance()","organismQuantity" -"26","use_collection()","datasetID" -"27","use_collection()","datasetName" -"28","use_collection()","catalogNumber" -"29","use_coordinates()","coordinatePrecision" -"30","use_scientific_name()","taxonRank" -"31","use_scientific_name()","scientificNameAuthorship" -"32","use_datetime()","year" -"33","use_datetime()","month" -"34","use_datetime()","day" -"35","use_datetime()","eventTime" -"36","use_individual_traits()","individualID" -"37","use_individual_traits()","lifeStage" -"38","use_individual_traits()","sex" -"39","use_individual_traits()","vitality" -"40","use_individual_traits()","reproductiveCondition" -"41","use_observer()","recordedBy" -"42","use_observer()","recordedByID" -"43","use_events()","eventID" -"44","use_events()","eventType" -"45","use_events()","parentEventID" +"","set_function","dwc_term" +"1","set_occurrences()","basisOfRecord" +"2","set_occurrences()","occurrenceID" +"3","set_scientific_name()","scientificName" +"4","set_coordinates()","decimalLatitude" +"5","set_coordinates()","decimalLongitude" +"6","set_coordinates()","geodeticDatum" +"7","set_coordinates()","coordinateUncertaintyInMeters" +"8","set_datetime()","eventDate" +"9","set_locality()","continent" +"10","set_locality()","country" +"11","set_locality()","countryCode" +"12","set_locality()","stateProvince" +"13","set_locality()","locality" +"14","set_taxonomy()","kingdom" +"15","set_taxonomy()","phylum" +"16","set_taxonomy()","class" +"17","set_taxonomy()","order" +"18","set_taxonomy()","family" +"19","set_taxonomy()","genus" +"20","set_taxonomy()","specificEpithet" +"21","set_taxonomy()","vernacularName" +"22","set_abundance()","individualCount" +"23","set_abundance()","organismQuantity" +"24","set_abundance()","organismQuantityType" +"25","set_abundance()","organismQuantity" +"26","set_collection()","datasetID" +"27","set_collection()","datasetName" +"28","set_collection()","catalogNumber" +"29","set_coordinates()","coordinatePrecision" +"30","set_scientific_name()","taxonRank" +"31","set_scientific_name()","scientificNameAuthorship" +"32","set_datetime()","year" +"33","set_datetime()","month" +"34","set_datetime()","day" +"35","set_datetime()","eventTime" +"36","set_individual_traits()","individualID" +"37","set_individual_traits()","lifeStage" +"38","set_individual_traits()","sex" +"39","set_individual_traits()","vitality" +"40","set_individual_traits()","reproductiveCondition" +"41","set_observer()","recordedBy" +"42","set_observer()","recordedByID" +"43","set_events()","eventID" +"44","set_events()","eventType" +"45","set_events()","parentEventID"