Skip to content

Commit

Permalink
Merge pull request #366 from StoXProject/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
arnejohannesholmin authored Oct 5, 2024
2 parents f359f8c + fd85898 commit 3469e53
Show file tree
Hide file tree
Showing 11 changed files with 196 additions and 74 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/check-full.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ jobs:
#### 4. Build the package source and binary files: ####
#######################################################

- name: Build package source archive from branches develop, testing and master
- name: Build package source archive from branches develop, testing and master (on Linux for some reason
if: runner.os == 'Linux' && matrix.config.r == 'release' && github.event_name == 'push' && (github.ref_name == 'master' || github.ref_name == 'testing' || github.ref_name == 'develop')
run: |
pkgbuild::build(".", dest_path = ".", binary = FALSE)
Expand Down
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: RstoxBase
Version: 2.0.1-9002
Date: 2024-09-17
Version: 2.0.1-9003
Date: 2024-10-05
Title: Base StoX Functions
Authors@R: c(
person(given = "Arne Johannes",
Expand Down Expand Up @@ -47,7 +47,7 @@ Imports:
jsonlite (>= 1.6),
lwgeom (>= 0.2-0),
maps (>= 0.2-0),
RstoxData (>= 2.0.1-9001),
RstoxData (>= 2.0.1-9003),
sf (>= 0.9.0),
stringi (>= 1.4.0),
units (>= 0.7),
Expand Down
7 changes: 7 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
# RstoxData v2.0.1-9003 (2024-10-17)
* Added warning if there are duplicated StratumLayerIndividual in Individuals(). There may however be duplicated StratumLayerIndividual in SuperIndividuals(), e.g. when multiple Beam are used. Added support in imputation to tackle this.
* Fixed bug when using DefinitionMethod = "PreDefined" in DefineAcousticPSU().
* Added support for numeric sorting of plus groups in plots, so that 9 comes before 10+.
* Fixed problem with selecting PointColor in PlotAcousticTrawlSurvey() in the GUI.


# RstoxData v2.0.1-9002 (2024-09-17)
* Renamed ReportVariable to TargetVariable and ReportVariableUnit to TargetVariableUnit in ReportSpeciesCategoryCatch() for consistency with other report functions.
* Added TargetVariable and TargetVariableUnit in ReportPreySpeciesCategoryCatch().
Expand Down
79 changes: 71 additions & 8 deletions R/Abundance.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,12 @@ Individuals <- function(
# Add unique ID of the rows:
IndividualsData <- IndividualsData[!is.na(Stratum) & !is.na(Layer) & !is.na(Individual), StratumLayerIndividual := paste(Stratum, Layer, Individual, sep = "-")]

# Give a warning if the StratumLayerIndividual is not unique, which must be a data error:
dupStratumLayerIndividual <- IndividualsData[, duplicated(StratumLayerIndividual)]
if(any(dupStratumLayerIndividual)) {
warning("There are duplicated combinations of Stratum, Layer and Individual, which is an indication of data errors.")
}


# Order the columns, but keep all columns. Also add the names of the MergeStoxBioticData as secondaryColumnOrder to tidy up by moving the Haul column (used as by in the merging) back into its original position:
areKeys <- endsWith(names(MergeStoxBioticData), "Key")
Expand Down Expand Up @@ -721,6 +727,63 @@ ImputeSuperIndividuals <- function(


ImputeDataByRandomSampling <- function(
data,
imputeAtMissing = "IndividualAge",
imputeByEqual = c("IndividualTotalLength", "SpeciesCategory"),
seed = 1,
columnNames = NULL,
lengthInterval = numeric(),
levels = c(
"Haul",
"Stratum",
"Survey"
)
) {

# Get the data and add the RowIndex for use when identifying which rows to impute from:
dataCopy <- data.table::copy(data)


# If there are duplicated StratumLayerIndividual we need to impute in a non-duplicated subset of the data, and then merge the imputed values into the full table:
if(dataCopy[, any(duplicated(stats::na.omit(StratumLayerIndividual)))]) {
dataCopy_nonDuplicatedStratumLayerIndividual <- subset(dataCopy, !duplicated(StratumLayerIndividual))

dataCopy_nonDuplicatedStratumLayerIndividual <- ImputeDataByRandomSampling_nonDuplicatedStratumLayerIndividual(
dataCopy_nonDuplicatedStratumLayerIndividual,
imputeAtMissing = imputeAtMissing,
imputeByEqual = imputeByEqual,
seed = seed,
columnNames = columnNames,
lengthInterval = lengthInterval,
levels = levels
)

dataCopy <- merge(dataCopy[, !..columnNames], subset(dataCopy_nonDuplicatedStratumLayerIndividual, select = c(
"StratumLayerIndividual",
"ReplaceLevel",
"ReplaceStratumLayerIndividual",
columnNames
)))
}
else {
dataCopy <- ImputeDataByRandomSampling_nonDuplicatedStratumLayerIndividual(
dataCopy,
imputeAtMissing = imputeAtMissing,
imputeByEqual = imputeByEqual,
seed = seed,
columnNames = columnNames,
lengthInterval = lengthInterval,
levels = levels
)
}


return(dataCopy)
}



ImputeDataByRandomSampling_nonDuplicatedStratumLayerIndividual <- function(
data,
imputeAtMissing = "IndividualAge",
imputeByEqual = c("IndividualTotalLength", "SpeciesCategory"),
Expand All @@ -739,14 +802,14 @@ ImputeDataByRandomSampling <- function(
#RowIndex <- seq_len(nrow(dataCopy))
#dataCopy[, RowIndex := ..RowIndex]

# If specified, regroup the length intervals:
if(length(lengthInterval) == 1L) {
dataCopy <- RegroupLengthData(
dataCopy,
lengthInterval = lengthInterval
)
}

# This was an arrempt to regroup the length intervals so that the imputation can be done on coarser intervals to avoid too much imputation over long distances (Stratum and Survey levels). However the implementation is not finished:
## If specified, regroup the length intervals:
#if(length(lengthInterval) == 1L) {
# dataCopy <- RegroupLengthData(
# dataCopy,
# lengthInterval = lengthInterval
# )
#}

# Introduce an Individual index for use in the sorted sampling, as a factor sorted as "en_US_POSIX":
dataCopy[, StratumLayerIndividualIndex := as.numeric(factor(StratumLayerIndividual, levels = stringi::stri_sort(StratumLayerIndividual, locale = "en_US_POSIX")))]
Expand Down
2 changes: 1 addition & 1 deletion R/Acoustic.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ NASC <- function(
# Check that the input StoxAcousticData has the same ChannelReferenceType throughout:
type <- getDataTypeDefinition(dataType = "NASCData", elements = "type", unlist = TRUE)
ChannelReferenceType <- NASCData[[type]]
if(!allEqual(ChannelReferenceType, na.rm = TRUE) && NROW(ChannelReferenceType)) {
if(!allEqual(ChannelReferenceType, na.rm = TRUE) && NROW(NASCData)) {
stop("The StoxAcousticData must have only one ", type, " in the NASC function. This can be obtained in FilterStoxAcoustic.")
}

Expand Down
71 changes: 51 additions & 20 deletions R/Define.R
Original file line number Diff line number Diff line change
Expand Up @@ -160,17 +160,21 @@ DefinePSU <- function(
Stratum_PSU <- getStratumOfSSUs_SF(SSU_PSU, MergedStoxDataStationLevel, StratumPolygon, SSULabel, StationLevel)#,
# msg = FALSE
#)

# Define the processData:
processData <- list(
Stratum_PSU = Stratum_PSU,
SSU_PSU = SSU_PSU
)
}
# If DefinitionMethod = "PreDefined" use times to interpret the PSUs (only used for acoustic PSUs):
else if(grepl("PreDefined", DefinitionMethod, ignore.case = TRUE)) {

processData <- getPSUProcessDataFromPSUByTime(
PSUByTime,
PSUProcessData$PSUByTime,
MergedStoxDataStationLevel = MergedStoxDataStationLevel,
PSUType = PSUType
)


}

#else if(grepl("Interval", DefinitionMethod, ignore.case = TRUE)) {
Expand Down Expand Up @@ -209,6 +213,12 @@ DefinePSU <- function(
PSU = NA_character_
)
Stratum_PSU <- data.table::data.table()

# Define the processData:
processData <- list(
Stratum_PSU = Stratum_PSU,
SSU_PSU = SSU_PSU
)
}
# Manual implies to use the existing process data, or create an empty set if not present:
else if(grepl("Manual", DefinitionMethod, ignore.case = TRUE)) {
Expand All @@ -225,16 +235,16 @@ DefinePSU <- function(
)
Stratum_PSU <- data.table::data.table()
}

# Define the processData:
processData <- list(
Stratum_PSU = Stratum_PSU,
SSU_PSU = SSU_PSU
)
}
else {
stop("Inavlid DefinitionMethod")
}

# Define the processData:
processData <- list(
Stratum_PSU = Stratum_PSU,
SSU_PSU = SSU_PSU
)
}


Expand Down Expand Up @@ -1175,17 +1185,8 @@ DefineBioticAssignment <- function(
return(processData$BioticAssignment)
}

# Check whether all PSUs are present in the processData, and issue a warning if there are new PSUs to be included:
newPSUs <- setdiff(AcousticPSU$Stratum_PSU$PSU, subset(processData$BioticAssignment, !is.na(Haul))$PSU)
if(length(newPSUs)) {
warning("StoX: The following acoustic PSUs are not present in the BioticAssignment processData or have no assigned biotic Hauls. This may not be a problem if there is no NASC in those PSUs. Otherwise, please add assignment to these acoustic PSUs, or if an automatic method was used in DefineBioticAssignment, rerun that process with UseProcecssData set to FALSE (unchecked):\n", paste("\t", newPSUs, collapse = "\n"))
}

# Also issue a warning for assignment to non-existing acoustic PSUs:
nonExistingPSUs <- setdiff(processData$BioticAssignment$PSU, AcousticPSU$Stratum_PSU$PSU)
if(length(nonExistingPSUs)) {
warning("StoX: There are assignments to the following non-existing acoustic PSUs. Please remove these assignments, or if an automatic method was used in DefineBioticAssignment, rerun that process with UseProcecssData set to FALSE (unchecked):\n", paste("\t", nonExistingPSUs, collapse = "\n"))
}
# Check the BioticAssignment:
processData$BioticAssignment <- DefineBioticAssignment_Warnings(processData$BioticAssignment, AcousticPSU)

# Special action since we have included Layer in the BioticAssignment but have not yet opened for the possibility to assign differently to different layers. Re-add the Layer column:
BioticAssignment <- addLayerToBioticAssignmentAndFormat(
Expand Down Expand Up @@ -1417,6 +1418,9 @@ DefineBioticAssignment <- function(
#Layer_PSU <- data.table::CJ(Layer = AcousticLayer$Layer, PSU = unique(BioticAssignment$PSU))
#BioticAssignment <- merge(BioticAssignment, Layer_PSU, all = TRUE, by = "PSU", allow.cartesian = TRUE)

# Check the BioticAssignment:
BioticAssignment <- DefineBioticAssignment_Warnings(BioticAssignment, AcousticPSU)

BioticAssignment <- addLayerToBioticAssignmentAndFormat(
BioticAssignment = BioticAssignment,
LayerDefinition = LayerDefinition,
Expand All @@ -1431,6 +1435,33 @@ DefineBioticAssignment <- function(
}



DefineBioticAssignment_Warnings <- function(BioticAssignment, AcousticPSU) {
# Check whether all PSUs are present in the processData, and issue a warning if there are new PSUs to be included:
newPSUs <- setdiff(AcousticPSU$Stratum_PSU$PSU, subset(BioticAssignment, !is.na(Haul))$PSU)
if(length(newPSUs)) {
warning("StoX: The following acoustic PSUs are not present in the BioticAssignment processData or have no assigned biotic Hauls. This may not be a problem if there is no NASC in those PSUs. Otherwise, please add assignment to these acoustic PSUs, or if an automatic method was used in DefineBioticAssignment, rerun that process with UseProcecssData set to FALSE (unchecked):\n", paste("\t", newPSUs, collapse = "\n"))
}

# Also issue a warning for assignment to non-existing acoustic PSUs:
nonExistingPSUs <- setdiff(BioticAssignment$PSU, AcousticPSU$Stratum_PSU$PSU)
if(length(nonExistingPSUs)) {
warning("StoX: There are assignments to the following non-existing acoustic PSUs. Please remove these assignments, or if an automatic method was used in DefineBioticAssignment, rerun that process with UseProcecssData set to FALSE (unchecked):\n", paste("\t", nonExistingPSUs, collapse = "\n"))
}

# Give a warning for and remove missing Haul:
missingHaul <- BioticAssignment[, is.na(Haul)]
if(any(missingHaul)) {
warning("StoX: There are assignments to missing Hauls. These were removed.")
BioticAssignment <- subset(BioticAssignment, !is.na(Haul))
}

return(BioticAssignment)
}




notAllStationsInStratum_Warning <- function(BioticAssignment, StoxBioticData) {


Expand Down
2 changes: 1 addition & 1 deletion R/ImputeSuperIndividuals_StoX3.R
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ ImputeDataByRandomSampling_StoX3 <- function(
# A test for duplicated Individual ID for the rows to be imputed. If there are duplicates, the new method of using Individual to identify rows to impute from may differ from the old method using Individual:
duplicatedIndividual <- dataCopy[is.na(get(imputeAtMissing)) & !is.na(Individual), duplicated(Individual)]
if(any(duplicatedIndividual)) {
warning("StoX: There are duplicated entries in the Individual column which imples that individuals were used in multiple Strata. Due to a bug in the function ImputeSuperIndividuals() in StoX <= 3.6.2, which has been renamed to the deprecated ImputeSuperIndividuals_StoX3(), this may result in non-imputed rows. When you see this warning it is advised to use the new ImputeSuperIndividuals() instead.")
warning("StoX: There are duplicated entries in the Individual column which imples that individuals were used in multiple Strata. Due to a bug in the function ImputeSuperIndividuals() in StoX <= 3.6.2, which has been renamed to the deprecated ImputeSuperIndividuals_StoX3(), this may result in non-imputed rows. When you see this warning it is advised to use the new ImputeSuperIndividuals() instead. To do this, please duplicate the existing process and change function in the duplicate, so that you do not loose track of the parameters.")
}


Expand Down
4 changes: 3 additions & 1 deletion R/Plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ PlotAcousticTrawlSurvey <- function(
BorderColor = character(),
OceanColor = character(),
GridColor = character(),
#PointTransparency = 1,

# Options for the point sizes and shapes:
UseDefaultSizeSettings = TRUE,
Expand Down Expand Up @@ -194,6 +195,7 @@ PlotAcousticTrawlSurvey <- function(
type = "lp",
size = "NASC",
shape = 1,
#alpha.point = if(length(PointTransparency)) PointTransparency else 1,
alpha.point = 1,
alpha.track = 1
)
Expand Down Expand Up @@ -511,7 +513,7 @@ plot_lon_lat <- function(
# Plot the points:
if(grepl("p", type)) {

# If the color variable is column in the data, but the user has specified a single color, use this color instead:
# If the color variable is a column in the data, but the user has specified a single color, use this color instead:
if(!isCategorical(x[[color]]) && length(color.scale) == 1 && !is.function(try(get(color.scale), silent = TRUE))) {
color <- color.scale
}
Expand Down
34 changes: 16 additions & 18 deletions R/RstoxBase-package.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,25 +13,23 @@ utils::globalVariables(c(
".", "..Cruise", "..DateTime", "..DensityType", "..Hauls", "..LengthDistributionType",
"..SpeciesCategoryCatchType", "..VerticalResolutionMax", "..VerticalResolutionMin",
"..WeightingFactors", "..acceptedColumns", "..atMissingLengthGroup", "..by", "..cols",
"..columnsToKeep", "..extract", "..extractFromDataCopy", "..haulGrouping",
"..horizontalResolution", "..keys", "..keysSansSample", "..lengthInterval",
"..lengthIntervalWidths", "..lengthVar", "..locatedStratum", "..meanBy", "..paddingVariables",
"..presentResolutionVariables", "..refvar", "..resolutionVar", "..sumBy", "..tomerge",
"..variablesToGetFromQuantityData", "..vars", "Abundance", "AcousticCategory",
"AcousticCategoryKey", "AcousticTargetStrength", "Area", "Beam", "BeamKey", "Biomass",
"CatchFractionNumber", "CatchFractionWeight", "Channel", "ChannelReferenceDepth",
"ChannelReferenceKey", "ChannelReferenceTilt", "ChannelReferenceType",
"ContainsAllSpeciesCategory", "ContainsAnySpeciesCategory", "Cruise", "CruiseKey",
"CruiseKey1", "DateTime", "Density", "DensityType", "DensityWeight", "Depth", "DepthExponent",
"EDSU", "EffectiveLogDistance", "EffectiveTowDistance", "EstimationMethod", "Haul", "HaulKey",
"ImputationMethod", "Individual", "IndividualIndex", "IndividualKey", "IndividualRoundWeight",
"IndividualTotalLength", "IndividualTotalLengthMiddle", "L1", "Latitude", "Layer",
"LengthDistributionType", "LengthExponent", "LengthResolution", "LogDuration", "LogKey",
"LogOrigin", "Longitude", "MaxChannelDepth", "MaxChannelRange", "MeanNASCWeight",
"MiddleDateTime", "MinChannelDepth", "MinChannelRange", "N", "NASCKey", "NASCWeight",
"NumberOfAssignedHauls", "PreySpeciesCategoryCatchWeight", "PSU", "PSUByTime",
"..columnNames", "..columnsToKeep", "..extract", "..extractFromDataCopy", "..haulGrouping",
"..keys", "..keysSansSample", "..lengthInterval", "..lengthIntervalWidths", "..lengthVar",
"..locatedStratum", "..meanBy", "..paddingVariables", "..presentResolutionVariables",
"..refvar", "..resolutionVar", "..sumBy", "..tomerge", "..variablesToGetFromQuantityData",
"..vars", "Abundance", "AcousticCategory", "AcousticCategoryKey", "AcousticTargetStrength",
"Area", "Beam", "BeamKey", "Biomass", "CatchFractionNumber", "CatchFractionWeight", "Channel",
"ChannelReferenceDepth", "ChannelReferenceKey", "ChannelReferenceTilt", "ChannelReferenceType",
"Cruise", "CruiseKey", "CruiseKey1", "DateTime", "Density", "DensityType", "DensityWeight",
"Depth", "DepthExponent", "EDSU", "EffectiveLogDistance", "EffectiveTowDistance",
"EstimationMethod", "Haul", "HaulKey", "ImputationMethod", "Individual", "IndividualIndex",
"IndividualKey", "IndividualRoundWeight", "IndividualTotalLength",
"IndividualTotalLengthMiddle", "L1", "Latitude", "Layer", "LengthDistributionType",
"LengthExponent", "LengthResolution", "LogDuration", "LogKey", "LogOrigin", "Longitude",
"MaxChannelDepth", "MaxChannelRange", "MeanNASCWeight", "MiddleDateTime", "MinChannelDepth",
"MinChannelRange", "NASCKey", "NASCWeight", "PSU", "PreySpeciesCategoryCatchWeightingFactor",
"ReplaceIndividual", "ReplaceIndividualIndex", "ReplaceLevel", "ReplaceStratumLayerIndividual",
"ReplaceStratumLayerIndividualIndex", "SSU", "SSUIndex", "SSULabel", "Sample", "SampleNumber",
"ReplaceStratumLayerIndividualIndex", "SSU", "SSUIndex", "Sample", "SampleNumber",
"SampleWeight", "SpeciesCategory", "SpeciesCategoryCatchWeight", "SplitAcousticCategory",
"StartDateTime", "Station", "StationLevel", "StopDateTime", "Stratum",
"StratumLayerIndividual", "StratumLayerIndividualIndex", "StratumPolygon", "SummedWeights",
Expand Down
23 changes: 22 additions & 1 deletion R/Utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -1759,9 +1759,30 @@ factorNAfirst <- function(x){
x <- rep("NA", length(x))
}

levels <- sort(levels, na.last = FALSE)
# If there are levels ending with "+", this is an indication that a plus group (e.g. age) is given, so we remove the "+" and try as numeric:
if(any(endsWith(levels, "+"))) {
levels_sans_plus <- convertToNumericIfPossible(sub("+", "", levels, fixed = TRUE))
levels = levels[order(levels_sans_plus)]
}
else {
levels <- sort(levels, na.last = FALSE)
}


factor(x, levels = levels, exclude = NULL)
}



convertToNumericIfPossible <- function(x) {
numberOfNAs <- sum(is.na(x))
xnumeric <- suppressWarnings(as.numeric(x))
if(sum(is.na(xnumeric)) > numberOfNAs) {
return(x)
}
else {
return(xnumeric)
}
}


Loading

0 comments on commit 3469e53

Please sign in to comment.