Merge pull request #407 from StoXProject/develop

Develop
StoXProject · Nov 29, 2024 · 175de88 · 175de88
2 parents 7d0f2c4 + e005ad4
commit 175de88
Show file tree

Hide file tree

Showing 7 changed files with 174 additions and 183 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: RstoxData
-Version: 2.1.0
-Date: 2024-11-06
+Version: 2.1.1-9001
+Date: 2024-11-29
 Title: Tools to Read and Manipulate Fisheries Data
 Authors@R: c(
   person(given = "Edvin", 

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# RstoxData v2.1.1_9001  (2024-11-25)
+* Removed rscript_args from mapplyOnCores(), since this caused the Renviron to be ignored in the sub processes, resulting in errors with finding RstoxFramework in the case of multiple libraries on Linux and macOS.
+* Fixed the 29 cm bug again, as it was not properly fixed in RstoxData v2.1.0. Refactored how precision is set both when reading data and in the ICESBiotic() and ICESAcoustic().
+* Introduced the EchoType as a column in the Data table of ICESBiotic() (and WriteICESBiotic()).
+
+
 # RstoxData v2.1.0  (2024-11-04)
 * Final release for StoX 4.1.0.
 * Fixed bug in DefineTranslation, where the ConditionalVariableNames was showing as a single string and not a vector.

diff --git a/R/Definitions.R b/R/Definitions.R
@@ -257,6 +257,24 @@ initiateRstoxData <- function(){
 	lengthCode_unit_table[, rank := seq_len(.N)]
 
 
+	# Define conversion functions from xml types:
+	conversionFunctionName <- list(
+		"xsd:ID" = "as.character", 
+		"xsd:float" = "as.double", 
+		"xs:string" = "as.character",
+		"xsd:string" = "as.character", 
+		#"xsd:int" = "integer", 
+		"xsd:int" = "asIntegerAfterRound", 
+		"xs:long" = "asIntegerAfterRound", 
+		#"xs:integer" = "integer",
+		"xs:integer" = "asIntegerAfterRound",
+		"xs:decimal" = "as.double", 
+		"xs:date" = "as.character", 
+		"xs:time" = "as.character", 
+		"xs:double" = "as.double"
+	)
+
+
 	#### Assign to RstoxDataEnv and return the definitions: ####
 	definitionsNames <- ls()
 	definitions <- lapply(definitionsNames, get, pos = environment())

diff --git a/R/StoxExport.R b/R/StoxExport.R
@@ -274,7 +274,7 @@ checkAndCreateICESAcousticCSV <- function(ICESAcousticDataOne) {
 	checkICESAcousticDefinitions (ICESAcousticDataOne)
 
 	# Set classes of the variables, especially taking care of NAs. The class of the variables is used later to format the output from WriteICESAcoustic, where NA double type is stored as empty sting to support these beingg empty fields in the written file:
-	setClassICESAcoustic(ICESAcousticDataOne)
+	lapply(names(ICESAcousticDataOne), setClass_OneTable, ICESAcousticDataOne, RstoxData::xsdObjects$icesAcoustic.xsd)
 
 
 	#### Rename columns to start with the table name:
@@ -595,7 +595,7 @@ BioticDataToICESBioticOne <- function(
 	ICESBioticDataOne <- ICESBioticDataOne[hierarchicalTables]
 
 	# Set classes of the variables, especially taking care of NAs. The class of the variables is used later to format the output from WriteICESBiotic, where NA double type is stored as empty sting to support these being empty fields in the written file:
-	setClassICESBiotic(ICESBioticDataOne)
+	lapply(names(ICESBioticDataOne), setClass_OneTable, ICESBioticDataOne, RstoxData::xsdObjects$icesBiotic.xsd)
 
 
 	# Create a table of the original and new column names, but remove keys:
@@ -748,6 +748,8 @@ BioticData_NMDToICESBioticOne <- function(
 	# We must filter records with aphia == NA
 	catchRaw <- catchRaw[!is.na(aphia)]
 
+
+
 	Catch <- catchRaw[, .(
 		LocalID = cruise,
 		Gear = gear,
@@ -862,9 +864,6 @@ BioticData_NMDToICESBioticOne <- function(
 	}
 
 
-
-
-
 	ICESBioticCSV <- list(
 		Cruise = Cruise, 
 		Haul = Haul, 
@@ -878,117 +877,12 @@ BioticData_NMDToICESBioticOne <- function(
 
 
 
-setClassICESBiotic <- function(data, tables = names(data), xsd = RstoxData::xsdObjects$icesBiotic.xsd) {
-	setClassICES(data, xsd, tables = names(data))
-}
-
-setClassICESAcoustic <- function(data, tables = names(data), xsd = RstoxData::xsdObjects$icesAcoustic.xsd) {
-	setClassICES(data, xsd, tables = names(data))
-}
 
 
 
-setClassICES <- function(data, xsd, tables = names(data)) {
-	# Get the classes per table:
-	classes <- mapply(
-		structure, 
-		lapply(
-			xsd$tableTypes[tables], 
-			translateSimple, 
-			old = c(
-				"xsd:float", 
-				"xsd:int", 
-				"xsd:string", 
-				"xs:string", 
-				"xsd:ID"
-			), 
-			new = c(
-				"numeric", 
-				"integer", 
-				"character", 
-				"character", 
-				"character"
-			)
-		), 
-		names = xsd$tableHeaders[tables], 
-		SIMPLIFY = FALSE
-	)
-	classes <- lapply(classes, as.list)
-
-	for(table in tables) {
-		data[[table]] <- data[[table]][, lapply(names(.SD), changeClassOfNonNA, classes = classes[[table]], data = data[[table]])]
-	}
-}
 
-translateSimple <- function(x, old, new) {
-	if(length(old) != length(new)) {
-		stop("old and new need to be of equal length.")
-	}
-	for(ind in seq_along(old)) {
-		x <- replace(x, x == old[ind], new[ind])
-	}
-	return(x)
-}
 
 
-changeClassOfNonNA <- function(name, classes, data) {
-	if(name %in% names(data) && name %in% names(classes) && firstClass(data[[name]]) != classes[[name]]) {
-		thisClass <- classes[[name]]
-		if(all(is.na(data[[name]]))) {
-			NAToInsert <- getNAByType(thisClass)
-			data[, c(name) := ..NAToInsert]
-		}
-		else {
-			# Removed this by a special function that converts to integer after rounding to avoid problems like trunc(0.29 * 100) == 28:
-			# I.e., convertion from float to integer performs in the same way as trunc(), which has problems with floating numbers. For the fish lengths that are relevant we have the problem for the following values:
-			# int <- seq_len(150)
-			# d <- data.table::data.table(int = int, equalToFloat = trunc(int / 100 * 100) == int)
-			# subset(d, !equalToFloat)
-			#data[, c(name) := get(paste("as", thisClass, sep = "."))(get(name))]
-			# int equalToFloat
-			# <int>       <lgcl>
-			# 	1:    29        FALSE
-			# 2:    57        FALSE
-			# 3:    58        FALSE
-			# 4:   113        FALSE
-			# 5:   114        FALSE
-			# 6:   115        FALSE
-			# 7:   116        FALSE
-			# So the problem is particularly for fish of length 29 cm, which were truncated to 28 cm when submitting to ICES:
-			data[, c(name) := get(getConversionFunction(thisClass))(get(name))]
-		}
-	}
-}
-
-
-getConversionFunction <- function(class) {
-	atInteger <- class %in% "integer"
-	out <- paste("as", class, sep = ".")
-	out[atInteger] <- "asIntegerAfterRound"
-	return(out)
-}
-
-
-asIntegerAfterRound <- function(x, prec = .Machine$double.eps) {
-	# This operation requires that the input can be represented as numeric, so we test that first by observing whether the number of missing values increases:
-	x_numeric <- as.numeric(x)
-	numberOfNAs <- sum(is.na(x))
-	numberOfNAs_numeric <- sum(is.na(x_numeric))
-	if(numberOfNAs_numeric > numberOfNAs) {
-		warning("StoX: NAs introduced when trying to convert to integer.")
-		return(as.integer)
-	}
-
-	# Convert to integer:
-	x_integer <- as.integer(x)
-	# Find values which differ to the integer value by less than the input precision, and round these off before converting to integer to avoid occasional shifts in integer value due to floating point representation (e.g. as.integer(0.29 * 100) == 28):
-	atSmallDiff <- which(abs(x_numeric - x_integer) <= prec)
-
-	# Convert to integer, but for values that differ to the integer value by more than 
-	x_integer[atSmallDiff] <- as.integer(round(x_numeric[atSmallDiff]))
-
-	return(x_integer)
-}
 
 #' Write ICESBiotic to CSV fille
 #'

diff --git a/R/Utilities.R b/R/Utilities.R
@@ -319,7 +319,10 @@ mapplyOnCores <- function(FUN, NumberOfCores = 1L, ..., MoreArgs = NULL, SIMPLIF
 
 		# On Windows run special args to speed up:
 		#if(get_os() == "win") {
-			cl <- parallel::makeCluster(NumberOfCores, rscript_args = c("--no-init-file", "--no-site-file", "--no-environ"))
+
+			# Removed the rscript_args, because it changes the envoronment compared to the partent environment:
+			#cl <- parallel::makeCluster(NumberOfCores, rscript_args = c("--no-init-file", "--no-site-file", "--no-environ"))
+			cl <- parallel::makeCluster(NumberOfCores)
 			out <- parallel::clusterMap(cl, FUN, ..., MoreArgs = MoreArgs, SIMPLIFY = SIMPLIFY)
 			parallel::stopCluster(cl)
 		#} 
@@ -1052,3 +1055,122 @@ do.call_robust <- function(what, args, quote = FALSE, envir = parent.frame(), ke
 }
 
 
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+# Process column names and types
+setNames_OneTable <- function(tableName, data, xsd) {
+
+	# For convenience extract the tableHeader of the current table:
+	tableHeader <- xsd$tableHeaders[[tableName]]
+
+	# There are duplicated column names in NMDBiotic 1, 1.1 and 1.4. we suffix the table name to those fields:
+	if(anyDuplicated(tableHeader)) {
+		dup <- duplicated(tableHeader)
+		tableHeader[dup] <- paste(tableHeader[dup], tableName, sep = ".")
+	}
+
+	# Handle empty data. This is only relevant for NMDBiotic and NMDAcoustic, which both have levels with no data (i.e. "missions" and "distance_list", respectively):
+	if(!length(data[[tableName]])) {
+		data[[tableName]] <- matrix(data = "", nrow = 0, ncol = length(tableHeader))
+	}
+
+	# Convert to data.table
+	output <- data.table(data[[tableName]])
+
+	# Set column names
+	setnames(output, tableHeader)
+
+	return(output)
+}
+
+
+
+
+
+# Set types of the columns of the table named 'tableName' of 'data'. Note that this only considers the columns with names present in the xsd$tableHeader. For ICES formats the keys are not included in the tableHeader, but all keys are character
+setClass_OneTable <- function(tableName, data, xsd) {
+
+	# Known atomic data types
+	conversionFunctionName <- getRstoxDataDefinitions("conversionFunctionName")
+
+	# Set column types (only double and integer for now)
+	tableHeader <- xsd$tableHeader[[tableName]]
+	tableType <- xsd$tableTypes[[tableName]]
+	if(length(tableType) > 0) {
+		for(i in seq_along(tableHeader)) {
+			# Map the types
+			doConv <- eval(
+				parse(
+					text = conversionFunctionName[[tableType[i]]]
+				)
+			)
+
+			# Throw a proper warning when conversion fails:
+			tryCatch(
+				data[[tableName]][, tableHeader[i] := doConv(data [[tableName]] [[tableHeader[i]]] ) ], 
+				error = function(e) {
+					e
+				}, 
+				warning = function(w) {
+					modifiedWarning <- paste0("The following variable could not converted to numeric as per the format definition and were set to NA: ", names(data[[tableName]])[i])
+					warning(modifiedWarning)
+				}
+			)
+		}
+	}
+
+	invisible(tableName)
+}
+
+
+asIntegerAfterRound <- function(x, prec = sqrt(.Machine$double.eps)) {
+	# This operation requires that the input can be represented as numeric, so we test that first by observing whether the number of missing values increases:
+	x_numeric <- as.numeric(x)
+	x_integer <- as.integer(x)
+
+	# Detect whether the input is not fully convertible to integer, which we assume is the case if there are mote missing values in the x_numeric, in which case we simply return the x_integer: 
+	numberOfNAs <- sum(is.na(x))
+	numberOfNAs_numeric <- sum(is.na(x_numeric))
+	if(numberOfNAs_numeric > numberOfNAs) {
+		warning("StoX: NAs introduced when trying to convert to integer.")
+		return(x_integer)
+	}
+
+	# Convert to integer:
+	x_integer <- as.integer(x)
+	x_rounded <- round(x_numeric)
+	# Find values which differ to the integer value by less than the input precision, and round these off before converting to integer to avoid occasional shifts in integer value due to floating point representation (e.g. as.integer(0.29 * 100) == 28):
+	diff <- x_numeric - x_rounded
+	atSmallDiff <- which(diff < 0 & -diff <= prec)
+
+	# Convert to integer after rounding for values that differ to the integer value by less than the prec:
+	x_integer[atSmallDiff] <- as.integer(x_rounded[atSmallDiff])
+
+	return(x_integer)
+}
+
+
+