StoXProject · arnejohannesholmin · Oct 29, 2024 · Oct 28, 2024 · Oct 29, 2024 · Oct 29, 2024
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: RstoxData
-Version: 2.0.1-9004
-Date: 2024-10-08
+Version: 2.0.1-9005
+Date: 2024-10-29
 Title: Tools to Read and Manipulate Fisheries Data
 Authors@R: c(
   person(given = "Edvin", 

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,10 @@
+# RstoxData v2.0.1-9005  (2024-10-22)
+* Fixed bug in as.numeric_IfPossible() used by setorderv_numeric() and orderRowsByKeys() where individual elements could be set to NA in a vector unless all of the values were NA after conversion to numeric. In the new version all of the values must be convertible to numeric for a numeric vector to be returned. In addition setorderv_numeric() has gained the parameter split, which is used in RstoxBase::formatOutput() as split = c("-", "/") to split both by the within StoX key separator and the between StoX kye separator used in IDs such as Sample and Individual. This bugfix may result in different sorting of StoxBiotic, particularly for NMDBiotic data with herring coded as catchcategory 161722.G03, 161722.G05 or 161722.G07.
+* Changed the drop down list of ConditionalVariableNames in Translate functions to only include the variables in the table of the VariableName (and also excluding the VariableName). Previously all variables of the entire data were listed, which was confusing since only those present in the relevant table could be used.
+* Changed the behavior of Translate functions when a variable that is not present in the table is used as a conditional variable. Before this conditional variable was effectively ignored, but in the new version the behavior is to give a warning and not perform any translation.
+* Added a warning if no values are translated in Translate functions.
+* Added the new GeneticPopulationCode to ICESBiotic(). 
+
 # RstoxData v2.0.1-9004  (2024-10-08)
 * Fixed a bug where certain values of BiologyLengthCode were shifted one integer value down in ICESBiotic(). The bug is related to floating point precision which causes some values to be slightly lower than the corresponding integer after calculations. In R one example is format(29 / 100 * 100, digits = 20) = "28.999999999999996447", which results in 28 when converted to integer. The following values are affected:
 	* 29, 57, 58, 113, 114, 115, 116 when BiologyLengthCode is "cm" (lengthresolution "3")

diff --git a/R/DefineAndUpdateVariables.R b/R/DefineAndUpdateVariables.R
@@ -438,8 +438,20 @@ translateOneTranslationOneTable <- function(translationListOne, table, translate
 		if(variableToTranslate %in% names(table)) {
 			varsToMatch <- setdiff(names(translationListOne), "NewValue")
 			matches <- lapply(varsToMatch, matchVariable, list = translationListOne, table = table)
+
+			# Any empty matches indicate that the variable of interest is not present, and triggers a warning and no match:
+			emptyMatches <- lengths(matches) == 0
+			if(any(emptyMatches)) {
+				warning("StoX: The following variables are used in the translation but are not present in the table. This results in no translation: ", paste(varsToMatch[emptyMatches], collapse = ", "), ".")
+				matches[emptyMatches] <- rep(list(FALSE), sum(emptyMatches))
+			}
+
 			matches <- apply(do.call(cbind, matches), 1, all)
 
+			if(!any(matches)) {
+				warning("StoX: No values were translated. Did you spell the values of the variable to translate correctly?")
+			}
+
 			### # Change the type after matching and before translating to avoid type conversion warninigs:
 			### if(!PreserveClass) {
 			### 	setColumnClasses(table, structure(list(class(translationListOne$NewValue)), names = variableToTranslate))

diff --git a/R/StoxBiotic.R b/R/StoxBiotic.R
@@ -72,6 +72,9 @@ GeneralSamplingHierarchy2StoxBiotic <- function(GeneralSamplingHierarchy, Number
 
 	# Rbind for each StoxBiotic table:
 	StoxBioticData <- rbindlist_StoxFormat(StoxBioticData)
+
+	# Temporarily remove the Prey tables:
+	StoxBioticData <- StoxBioticData[!startsWith(names(StoxBioticData), "Prey")]
 
 	return(StoxBioticData)
 }

diff --git a/R/StoxExport.R b/R/StoxExport.R
@@ -795,6 +795,7 @@ BioticData_NMDToICESBioticOne <- function(
 		SpeciesCode = aphia,
 		SpeciesCategory = catchpartnumber,
 		StockCode = NA_character_,
+		GeneticPopulationCode = NA_character_,
 		FishID = specimenid,
 		#LengthCode = "mm", 
 		LengthCode = getLengthCodeICES(lengthresolution, format = "ICESBiotic"), 

diff --git a/R/Utilities.R b/R/Utilities.R
@@ -639,19 +639,21 @@ findVariablesMathcinigVocabularyOne <- function(vocabularyOne, data) {
 #' @export
 #' 
 orderRowsByKeys <- function(data) {
-	lapply(data, setorderv_numeric, key = "Key")
+	# StoX keys are concatenated only by "/", whereas IDs are concatenated keys by "-":
+	lapply(data, setorderv_numeric, key = "Key", split = "/")
 }
 
 #' Order a data.table (by reference) by interpreting characters as numeric if possible
 #'
 #' @param dataOne A data.table.
 #' @param by Order by the given columns.
 #' @param key If given and \code{by} is empty, order by the columns with names ending with \code{key}.
-#' @param ... Passed on to \code{\link[data.table]{setorderv}}
+#' @param split Character: A vector of single character to split by. The default c("-", "/") splits between StoX keys and within StoX keys. 
 #'
 #' @export
 #' 
-setorderv_numeric <- function(dataOne, by = NULL, key = NULL, ...) {
+setorderv_numeric <- function(dataOne, by = NULL, key = NULL, split = "/") {
+#setorderv_numeric <- function(dataOne, by = NULL, key = NULL, ...) {
 
 	# Locate keys:
 	if(!length(by)) {
@@ -667,10 +669,11 @@ setorderv_numeric <- function(dataOne, by = NULL, key = NULL, ...) {
 		orderKeys <- paste0(by, "OrderedAfterSplitting")
 
 		# Create keys which are converted to ranks, splitting first and then treating individual elements as numbers if possible:
-		dataOne[, (orderKeys) := lapply(.SD, createOrderKey), .SDcols = by]
+		dataOne[, (orderKeys) := lapply(.SD, createOrderKey, split = split), .SDcols = by]
 
 		# Order the rows:
-		data.table::setorderv(dataOne, orderKeys, ...)
+		# 2024-10-28: Change to sort out sorting:
+		data.table::setorderv(dataOne, orderKeys, na.last = TRUE)
 
 		# Remove the orderKeys:
 		dataOne[, (orderKeys) := NULL]
@@ -689,7 +692,7 @@ addNAs <- function(x, areNAs) {
 	return(x)
 }
 
-#' Convert a vector to an integer vector where individual string elelents at interpreted as numeric if possible.
+#' Convert a vector to an integer vector where individual string elements at interpreted as numeric if possible.
 #'
 #' @param x A vector.
 #' @param split A character to split strings by.
@@ -727,7 +730,11 @@ createOrderKey <- function(x, split = "/") {
 	}
 
 	# Split the vector by the 'split' parameter:
-	splitted <- strsplit(x, split, fixed = TRUE)
+	splitted <- x
+	for(thisSplit in split) {
+		splitted <- lapply(splitted, function(x) unlist(strsplit(x, thisSplit, fixed = TRUE)))
+	}
+	#splitted <- strsplit(x, split, fixed = TRUE)
 
 	# Check that all have the same number of elements, that is the same number of splits:
 	if(!all(lengths(splitted) == length(splitted[[1]]))) {
@@ -755,7 +762,7 @@ createOrderKey <- function(x, split = "/") {
 	# This sorting (en_US_POSIX) is the same that data.table uses in setorder/setorderv(), which uses the ICU C locale (https://icu.unicode.org/design/locale/root):
 	splittedDT[, names(splittedDT) := lapply(.SD, function(y) match(y, stringi::stri_sort(unique(y), locale = "en_US_POSIX")))]
 
-	# Count the maximum number of digits for each column, and multiply by the cummulative number of digits:
+	# Count the maximum number of digits for each column, and multiply by the cumulative number of digits:
 	numberOfDigits <- splittedDT[, lapply(.SD, max)]
 	numberOfDigits <- nchar(numberOfDigits)
 	exponent <- rev(c(0, cumsum(rev(numberOfDigits))[ -length(numberOfDigits)]))
@@ -775,107 +782,13 @@ createOrderKey <- function(x, split = "/") {
 }
 
 
-createOrderKeyNewWithoutError <- function(x, split = "/") {
-
-	# Remove NAs and add at the end:
-	areNAs <- is.na(x)
-	# Use c() here to rid off attributes:
-	x <- c(stats::na.omit(x))
-
-	# Split the keys:
-	if(!is.character(x)) {
-		return(addNAs(x, areNAs))
-	}
-
-	# Find the first non-NA:
-	firstNonNA <- x[1]
-	if(is.na(firstNonNA)) {
-		firstNonNA <- x[min(which(!is.na(x)))]
-	}
-
-
-	# If the first non-NA element is coercable to numierc, try converting the entire vector to numeric, and check that no NAs were generated:
-	if(!is.na(suppressWarnings(as.numeric(firstNonNA)))) {
-		numberOfNAs <- sum(is.na(x))
-		xnumeric <- suppressWarnings(as.numeric(x))
-		# If there are NAs created, it signals that the vector is not coerable to numeric:
-		if(sum(is.na(xnumeric)) > numberOfNAs) {
-			return(addNAs(x, areNAs))
-		}
-		else {
-			return(addNAs(xnumeric, areNAs))
-		}
-	}
-
-	# Split by the 'split' argument:
-	containsSplit <- sapply(split, grepl, firstNonNA)
-	split <- split[containsSplit]
-	# If the vector does not contain any of the characters to split by, return unchanegd:
-	if(!any(containsSplit)) {
-		return(addNAs(x, areNAs))
-	}
-
-	# Split the vector by the 'split' parameter:
-	if(length(split) == 1) {
-		splitted <- strsplit(x, split, fixed = TRUE)
-	}
-	else if(length(split) > 1) {
-		splitted <- strsplit(x, split[1], fixed = TRUE)
-		for(ind in seq(2, length(split))) {
-			splitted <- lapply(splitted, function(x) unlist(strsplit(x, split[ind], fixed = TRUE)))
-		}
-	}
-	else {
-		stop("'split' must be given")
-	}
-
-	# Check that all have the same number of elements, that is the same number of splits:
-	if(!all(lengths(splitted) == length(splitted[[1]]))) {
-		return(addNAs(x, areNAs))
-	}
-
-	# Create a data.table of the splitted elements and get the order of these:
-	splittedDT <- data.table::rbindlist(lapply(splitted, as.list))
-	suppressWarnings(splittedDT[, names(splittedDT) := lapply(.SD, as.numeric_IfPossible)])
-
-	# Only accept if all elements can be converted to numeric:
-	#if(any(is.na(splittedDT))) {
-
-	# Acccpet if any of the values are not NA:
-	if(all(is.na(splittedDT))) {
-		return(addNAs(x, areNAs))
-	}
-
-	# Convert to integer ranks:
-	#splittedDT[, names(splittedDT) := lapply(.SD, function(y) match(y, sort(unique(y))))]
-	# Replicate data.table's soring which happend in C-locale (see ?data.table::setorderv):
-	#splittedDT[, names(splittedDT) := lapply(.SD, function(y) match(y, stringi::stri_sort(unique(y), locale = "C")))]
-	#splittedDT[, names(splittedDT) := lapply(.SD, function(y) match(y, stringr::str_sort(unique(y), locale = "C")))]
-	splittedDT[, names(splittedDT) := lapply(.SD, function(y) if(is.numeric(y)) rank(y) else match(y, stringi::stri_sort(unique(y), locale = "en_US_POSIX")))]
-
-	# Count the maximum number of digits for each column, and multiply by the cummulative number of digits:
-	numberOfDigits <- splittedDT[, lapply(.SD, max)]
-	numberOfDigits <- nchar(numberOfDigits)
-	exponent <- rev(c(0, cumsum(rev(numberOfDigits))[ -length(numberOfDigits)]))
-	names(exponent) <- names(splittedDT)
-	for(name in names(splittedDT)) {
-		splittedDT[, (name) := get(name) * 10^(exponent[[name]])]
-	}
-
-	orderKey <- rowSums(splittedDT)
-
-	#orderOfSplitted <- do.call(order, splittedDT)
-	## Match with a sequence to create integers used as order key:
-	#orderKey <- match(seq_along(x), orderOfSplitted)
-	#
-
-
-	return(addNAs(orderKey, areNAs))
-}
 
 as.numeric_IfPossible <- function(x) {
+	# If any missing values are introduced, keep the original:
 	num <- as.numeric(x)
-	if(all(is.na(num))) {
+	#if(all(is.na(num))) {
+	# 2024-10-28: Change to sort out sorting:
+	if(any(is.na(num) & !is.na(x))) {
 		return(x)
 	}
 	else {

diff --git a/R/data.R b/R/data.R
@@ -153,26 +153,6 @@ NULL
 #' IndividualSex \tab sex of an individual \tab F is female, M is male \tab Character \tab "F" \tab "F" if \code{sex = 1}, "M" if \code{sex = 2} \tab \code{IndividualSex} \cr
 #' }
 #' 
-#' 
-#' \bold{PreySpeciesCategory level}:
-#' \tabular{lllllll}{
-#' \bold{Variable} \tab \bold{Description} \tab \bold{Unit} \tab \bold{Data type} \tab \bold{Example} \tab \bold{NMDBiotic} \tab \bold{ICESBiotic} \cr
-#' PreySpeciesCategoryKey \tab Key of the PreySpeciesCategory level \tab None \tab Character \tab "85258" \tab \code{preycategory} (missing (NA) for NMDBiotic 1.1 and 1.4) \tab Missing (NA) \cr
-#' PreySpeciesCategory \tab The species category \tab None \tab Character \tab "Calanoida" \tab Same as PreySpeciesCategoryKey. Can be translated. \tab Missing (NA) \cr
-#' }
-#' 
-#' 
-#' \bold{PreySample level}:
-#' \tabular{lllllll}{
-#' \bold{Variable} \tab \bold{Description} \tab \bold{Unit} \tab \bold{Data type} \tab \bold{Example} \tab \bold{NMDBiotic} \tab \bold{ICESBiotic} \cr
-#' PreySampleKey \tab Key of the PreySample level \tab None \tab Character \tab "1" \tab \code{preysampleid} (missing (NA) for NMDBiotic 1.1 and 1.4) \tab Missing (NA) \cr          
-#' PreySample \tab Unique PreySample identifier \tab None \tab Character \tab "2021105-1-2-sild'G03/161722.G03/126417/Clupea harengus-1-2-85258-1" \tab \code{CruiseKey-StationKey-HaulKey-SpeciesCategoryKey-SampleKey-IndividualKey-PreySpeciesCategoryKey-PreySampleKey} (missing (NA) for NMDBiotic 1.1 and 1.4) \tab Missing (NA) \cr
-#' PreyCatchFractionWeight \tab Total weight of the catch for the PreySpeciesCategory and sub category (e.g., fractions for different digestion) \tab mg \tab Numeric \tab 12.3 \tab \code{totalweight} (taking \code{LengthCode.Biology} into account) (missing (NA) for NMDBiotic 1.1 and 1.4) \tab Missing (NA) \cr
-#' PreyCatchFractionWeightResolution \tab Resolution of PreyCatchFractionWeight \tab mg \tab Numeric \tab 0.001 \tab \code{weightresolution} converted to mg (missing (NA) for NMDBiotic 1.1 and 1.4) \tab Missing (NA) \cr
-#' PreyCatchFractionNumber \tab Total number of individual prey of the PreySpeciesCategory and sub category (e.g., fractions for different digestion) \tab individuals \tab Integer \tab 4 \tab \code{totalcount} (missing (NA) for NMDBiotic 1.1 and 1.4) \tab Missing (NA) \cr     
-#' }
-#' 
-#' 
 #' @name StoxBioticFormat
 #' 
 NULL

diff --git a/R/stoxFunctionAttributes.R b/R/stoxFunctionAttributes.R
@@ -1,34 +1,52 @@
-getVariableNamesStoxData <- function(BioticData, StoxBioticData, ICESBioticData, ICESDatrasData, ICESDatsuscData, AcousticData, StoxAcousticData, ICESAcousticData, LandingData, StoxLandingData) {
+getVariableNamesStoxData <- function(BioticData, StoxBioticData, ICESBioticData, ICESDatrasData, ICESDatsuscData, AcousticData, StoxAcousticData, ICESAcousticData, LandingData, StoxLandingData, VariableName) {
+
+	# Function to get the names of a table if that table contains certain variables:
+	getNamesIfVariableIsPresent <- function(table, requiredVariableNames) {
+		output <- names(table)
+		if(!requiredVariableNames %in% output) {
+			output <- NULL
+		}
+		return(output)
+	}
+
+	# Get the variables:
 	if(!missing(BioticData)) {
-		sort(unique(unlist(lapply(BioticData, function(x) lapply(x, names)))))
+		output <- lapply(BioticData, function(x) lapply(x, getNamesIfVariableIsPresent, requiredVariableNames = VariableName))
 	}
 	else if(!missing(StoxBioticData)) {
-		sort(unique(unlist(lapply(StoxBioticData, names))))
+		output <- lapply(StoxBioticData, getNamesIfVariableIsPresent, requiredVariableNames = VariableName)
 	}
 	else if(!missing(ICESBioticData)) {
-		sort(unique(unlist(lapply(ICESBioticData, names))))
+		output <- lapply(ICESBioticData, getNamesIfVariableIsPresent, requiredVariableNames = VariableName)
 	}
 	else if(!missing(ICESDatrasData)) {
-		sort(unique(unlist(lapply(ICESDatrasData, names))))
+		output <- lapply(ICESDatrasData, getNamesIfVariableIsPresent, requiredVariableNames = VariableName)
 	}
 	else if(!missing(ICESDatsuscData)) {
-		sort(unique(unlist(lapply(ICESDatsuscData, names))))
+		output <- lapply(ICESDatsuscData, getNamesIfVariableIsPresent, requiredVariableNames = VariableName)
 	}
 	else if(!missing(AcousticData)) {
-		sort(unique(unlist(lapply(AcousticData, function(x) lapply(x, names)))))
+		output <- lapply(AcousticData, function(x) lapply(x, getNamesIfVariableIsPresent, requiredVariableNames = VariableName))
 	}
 	else if(!missing(StoxAcousticData)) {
-		sort(unique(unlist(lapply(StoxAcousticData, names))))
+		output <- lapply(StoxAcousticData, getNamesIfVariableIsPresent, requiredVariableNames = VariableName)
 	}
 	else if(!missing(ICESAcousticData)) {
-		sort(unique(unlist(lapply(ICESAcousticData, names))))
+		output <- lapply(ICESAcousticData, getNamesIfVariableIsPresent, requiredVariableNames = VariableName)
 	}
 	else if(!missing(LandingData)) {
-		sort(unique(unlist(lapply(LandingData, function(x) lapply(x, names)))))
+		output <- lapply(LandingData, function(x) lapply(x, getNamesIfVariableIsPresent, requiredVariableNames = VariableName))
 	}
 	else if(!missing(StoxLandingData)) {
-		sort(unique(unlist(lapply(StoxLandingData, names))))
+		output <- lapply(StoxLandingData, getNamesIfVariableIsPresent, requiredVariableNames = VariableName)
 	}
+
+
+	output <- sort(unique(unlist(output)))
+	output <- setdiff(output, VariableName)
+
+	return(output)
+
 	#else {
 	#	stop("Any of BioticData, StoxBioticData, ICESBioticData, ICESDatrasData, AcousticData, StoxAcousticData, ICESAcousticData, Land#ingData and StoxLandingData must be given.")
 	#}
@@ -1166,12 +1184,12 @@ processPropertyFormats <- list(
 		possibleValues = getVariableNamesStoxData, 
 		variableTypes = "character"
 	), 
-	conditionalVariableNames_Copy = list(
-		class = "vector", 
-		title = "Select one variable to copy", 
-		possibleValues = getVariableNamesStoxData, 
-		variableTypes = "character"
-	),
+	#conditionalVariableNames_Copy = list(
+	#	class = "vector", 
+	#	title = "Select one variable to copy", 
+	#	possibleValues = getVariableNamesStoxData, 
+	#	variableTypes = "character"
+	#),
 
 	roundingTable = list(
 		class = "table", 

diff --git a/README.md b/README.md
@@ -30,18 +30,12 @@ Loaded data can also be filtered by using any supported R conditional syntax suc
 
 ## Installation
 
-1. Install from CRAN:
-
-    ```r
-    install.packages("RstoxData")
-    ```
-
-2. Install the latest release from our local repository:
+1. Install the latest release from our local repository:
     ```r
     install.packages("RstoxData", repos = c("https://stoxproject.github.io/repo/", "https://cloud.r-project.org/"))
     ```
 
-3. Install the latest version from GitHub:
+2. Install the latest version from GitHub:
     ```r
     devtools::install_github("https://github.com/StoXProject/RstoxData")
     ```

diff --git a/inst/extdata/functionArguments.rds b/inst/extdata/functionArguments.rds
diff --git a/inst/testresources/biotic_2020821.rds b/inst/testresources/biotic_2020821.rds
diff --git a/inst/tinytest/test-translate.R b/inst/tinytest/test-translate.R
@@ -14,7 +14,7 @@ Translation_SpeciesCategory <- RstoxData::DefineTranslation(
 )
 exampleDataTranslated_SpeciesCategory <- RstoxData::TranslateStoxBiotic(StoxBioticData = exampleData, TranslationDefinition = "FunctionInput", Translation = Translation_SpeciesCategory)
 
-expect_equal(which(exampleDataTranslated_SpeciesCategory$SpeciesCategory$SpeciesCategory == "HER"), c(1, 11))
+expect_equal(sum(exampleDataTranslated_SpeciesCategory$SpeciesCategory$SpeciesCategory == "HER"), 2)
 
 # Translate IndividualSex to Male if IndividualTotalLength > 20. All IndividualSex are NA, so we need to use a function both for IndividualSex and for IndividualTotalLength:
 TranslationTable_IndividualSex = data.table::data.table(