-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#201 basic ratio estimator based on BV and CL weight data
- Loading branch information
Showing
8 changed files
with
279 additions
and
134 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
##' Estimate Catch at Number (CANUM) for Biological Variables | ||
#' | ||
#' This function estimates catch at number (CANUM) for a specified biological variable, such as age or length. It aggregates data based on specified columns and generates a "plus group" for the highest value in the defined classes. The function supports grouping by various units (e.g., age, length, weight) and calculates required indices, totals, and proportions for the groups. | ||
#' | ||
#' @param bv A `data.table` containing biological data, with columns for the biological variable, class units (e.g., `Ageyear`, `Lengthmm`, `Weightg`), and other relevant variables. | ||
#' @param addColumns A character vector of additional column names used to group the data for aggregation (e.g., `BVfishId` and other identifiers). | ||
#' @param classUnits A character string specifying the class units of the biological variable to use for grouping (e.g., "Ageyear", "Lengthmm", "Weightg"). Default is "Ageyear". | ||
#' @param classBreaks A numeric vector specifying the breakpoints for classifying the biological variable. The last value defines the lower bound of the "plus group". Default is `1:8` for age groups. | ||
#' @param verbose Logical, if `TRUE`, prints detailed information about the process. Default is `FALSE`. | ||
#' | ||
#' @return A `data.table` containing the aggregated results, including groupings, calculated means, proportions, indices, and totals for the specified biological variable. | ||
#' | ||
#' @details The function performs the following steps: | ||
#' \itemize{ | ||
#' \item Validates the presence of the `classUnits` in the biological variable data. | ||
#' \item Reshapes the input data using `dcast` and groups the biological variable into classes using `cut()`. | ||
#' \item Aggregates mean weights and lengths by the defined classes, along with calculating proportions and indices based on the sample size. | ||
#' \item A "plus group" is created for values exceeding the highest `classBreaks` value. | ||
#' \item Calculates total weights, catch numbers, and performs a sanity check to ensure there are no rounding errors in the final results. | ||
#' } | ||
#' @export | ||
doBVestimCANUM <- function(bv, addColumns, | ||
classUnits = "Ageyear", | ||
classBreaks = 1:8, | ||
verbose = FALSE){ | ||
rightF <- "BVvalUnitScale" | ||
#the class unit must be one of "Sex" "Lengthmm" "Ageyear" "Weightg" "SMSF" | ||
if(!(classUnits %in% unique(bv[[rightF]]))){ | ||
stop("The class unit must be present in data column BVvalUnitScale ", | ||
"the available values are: ", paste0(unique(bv[[rightF]]), collapse = ", ")) | ||
} | ||
|
||
#extract raw values | ||
leftF <- paste0(c("BVfishId", addColumns), collapse = "+") | ||
|
||
bv_wide <- data.table::dcast(bv, formula(paste0(leftF, "~", rightF)), value.var = "BVvalueMeas") | ||
bv_wide$target <- as.numeric(bv_wide[[classUnits]]) | ||
|
||
classLabs <- switch(classUnits, | ||
Ageyear = c(classBreaks[-length(classBreaks)], paste0(max(classBreaks), "+")), | ||
c(paste0(classBreaks[-length(classBreaks)], "-", classBreaks[-1]), paste0(max(classBreaks), "+"))) | ||
|
||
# Create the 'plus group' by using cut() to assign groups based on classBreaks | ||
bv_wide$Group <- cut(bv_wide$target, breaks = c(classBreaks, Inf), | ||
include.lowest = TRUE, right = FALSE, | ||
labels =classLabs) | ||
|
||
bv_wide$Lengthmm <- as.numeric(bv_wide$Lengthmm) | ||
bv_wide$Weightg <- as.numeric(bv_wide$Weightg) | ||
|
||
#aggregate values | ||
a <- bv_wide[, .(WeightgMean = mean(Weightg, na.rm = TRUE), | ||
WeightgLen = sum(!is.na(Weightg)), | ||
LengthmmMean = mean(Lengthmm, na.rm = TRUE)), | ||
by = c("Group",addColumns)] | ||
|
||
b <- bv_wide[, .(lenMeas = sum(!is.na(Lengthmm)), | ||
targetMeas = sum(!is.na(Group))), | ||
by = addColumns] | ||
|
||
targetWeights <- merge(a, b, by = addColumns) | ||
|
||
#remove the NA row | ||
targetWeights <- targetWeights[!is.na(targetWeights$Group), ] | ||
|
||
#add extra columns | ||
#targetWeights$MeanLengthCm <- targetWeights$Lengthmm / 10 | ||
targetWeights$plusGroup <- classBreaks[length(classBreaks)] | ||
|
||
#calculate required values | ||
targetWeights$propSample <- targetWeights$WeightgLen / targetWeights$targetMeas | ||
targetWeights$WeightIndex <- targetWeights$propSample * (targetWeights$WeightgMean / 1000) | ||
|
||
# Calculate the sum of WeightIndex for each group defined by addColumns | ||
targetWeights[, WeightIndexSum := sum(WeightIndex), by = addColumns] | ||
|
||
targetWeights$TWCoef <- targetWeights$sumCLoffWeight / targetWeights$WeightIndexSum | ||
targetWeights$totWeight <- targetWeights$WeightIndex * targetWeights$TWCoef | ||
targetWeights$totNum <- targetWeights$totWeight / (targetWeights$WeightgMean / 1000) | ||
|
||
# Sanity check with tolerance to avoid rounding error | ||
weights <- targetWeights$totNum * (targetWeights$WeightgMean / 1000) | ||
expected_sum <- sum(unique(targetWeights$sumCLoffWeight)) | ||
|
||
|
||
# Use all.equal to compare with tolerance or manually check the difference | ||
if(!isTRUE(all.equal(sum(weights), expected_sum))) { | ||
stop("Strange problem: sums do not match within tolerance") | ||
} | ||
|
||
targetWeights | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
# Step 1: Generate independent random normal variables | ||
n <- 100 # Number of samples | ||
Lengthmm <- rnorm(n, mean = 100, sd = 1) # Lengthmm variable | ||
Ageyear <- rnorm(n, mean = 2, sd = 1) # Ageyear variable | ||
Weightg <- rnorm(n, mean = 10, sd = 1) # Weightg variable | ||
|
||
|
||
biolCLQ1 <- data.table( | ||
BVfishId = rep(1:100,each=3), | ||
BVvalueMeas = c(Lengthmm, Ageyear, Weightg), | ||
BVvalUnitScale = rep(c("Lengthmm", "Ageyear", "Weightg"), 100), | ||
sumCLoffWeight = 396210 | ||
) | ||
|
||
# Test 1: Check the output structure of BVestimCANUM | ||
test_that("doBVestimCANUM returns correct structure", { | ||
lenCANUMQ1 <- doBVestimCANUM(biolCLQ1, c("sumCLoffWeight"), | ||
classUnits = "Lengthmm", | ||
classBreaks = seq(70, 130, 10), | ||
verbose = FALSE) | ||
|
||
# Check that the output is a data.table | ||
expect_true(is.data.table(lenCANUMQ1)) | ||
|
||
# Check that the expected columns are present | ||
expected_columns <- c("Group", "WeightgMean", "WeightgLen", "lenMeas", "targetMeas", | ||
"propSample", "WeightIndex", "totWeight", "totNum") | ||
expect_true(all(expected_columns %in% names(lenCANUMQ1))) | ||
}) | ||
|
||
# Test 2: Check that Group is correctly classified | ||
test_that("doBVestimCANUM correctly classifies Lengthmm groups", { | ||
lenCANUMQ1 <- doBVestimCANUM(biolCLQ1, c("sumCLoffWeight"), | ||
classUnits = "Lengthmm", | ||
classBreaks = seq(70, 130, 10), | ||
verbose = FALSE) | ||
|
||
# Check that the Group column contains the correct labels | ||
expected_groups <- c("70-80", "80-90", "90-100", "100-110", "110-120", "120-130", "130+") | ||
actual_groups <- unique(lenCANUMQ1$Group) | ||
expect_true(all(actual_groups %in% expected_groups)) | ||
}) | ||
|
||
# Test 3: Check WeightgMean calculation | ||
test_that("doBVestimCANUM calculates WeightgMean correctly", { | ||
lenCANUMQ1 <- doBVestimCANUM(biolCLQ1, c("sumCLoffWeight"), | ||
classUnits = "Lengthmm", | ||
classBreaks = seq(70, 130, 10), | ||
verbose = FALSE) | ||
|
||
# Ensure WeightgMean is numeric and positive | ||
expect_true(is.numeric(lenCANUMQ1$WeightgMean)) | ||
expect_true(all(lenCANUMQ1$WeightgMean > 0)) | ||
}) | ||
|
||
|
||
# Test 5: Sanity check for total weights | ||
test_that("doBVestimCANUM performs a sanity check for total weights", { | ||
lenCANUMQ1 <- doBVestimCANUM(biolCLQ1, c("sumCLoffWeight"), | ||
classUnits = "Lengthmm", | ||
classBreaks = seq(70, 130, 10), | ||
verbose = FALSE) | ||
|
||
# Check that the sum of totWeight equals sumCLoffWeight (within tolerance) | ||
total_weight <- sum(lenCANUMQ1$totWeight) | ||
expected_sum <- sum(unique(lenCANUMQ1$sumCLoffWeight)) | ||
|
||
expect_true(isTRUE(all.equal(total_weight, expected_sum))) | ||
}) | ||
|
||
# Test 6: Check TWCoef calculation | ||
test_that("doBVestimCANUM calculates TWCoef correctly", { | ||
lenCANUMQ1 <- doBVestimCANUM(biolCLQ1, c("sumCLoffWeight"), | ||
classUnits = "Lengthmm", | ||
classBreaks = seq(70, 130, 10), | ||
verbose = FALSE) | ||
|
||
# Recalculate TWCoef | ||
lenCANUMQ1[, expected_TWCoef := sumCLoffWeight / WeightIndexSum] | ||
|
||
# Ensure TWCoef is correctly calculated | ||
expect_equal(lenCANUMQ1$TWCoef, lenCANUMQ1$expected_TWCoef) | ||
}) | ||
|
Oops, something went wrong.