From 4ccb8f7d3c55beab848e6efbeb160d3ca961bfc9 Mon Sep 17 00:00:00 2001
From: Richard Meitern <richard.meitern@ut.ee>
Date: Thu, 17 Oct 2024 17:43:10 +0300
Subject: [PATCH] #201 basic ratio estimator based on BV and CL weight data

---
 R/addCLtoLowerCS.R                            |  17 +-
 R/doBVestimCACNUM.R                           |  92 +++++++++++
 R/getLowerTableSubsets.R                      |  42 +++--
 R/lowerTblData.R                              |  10 +-
 tests/testthat/test-addCLtoLowerCS.R          |  10 +-
 tests/testthat/test-doBVestimCACNUM.R         |  84 ++++++++++
 tests/testthat/test-getLowerTableSubsets.R    |   6 +-
 .../ratio-estimating-rdbesdataobjects.Rmd     | 152 +++++++-----------
 8 files changed, 279 insertions(+), 134 deletions(-)
 create mode 100644 R/doBVestimCACNUM.R
 create mode 100644 tests/testthat/test-doBVestimCACNUM.R

diff --git a/R/addCLtoLowerCS.R b/R/addCLtoLowerCS.R
index b756af0..1a206ac 100644
--- a/R/addCLtoLowerCS.R
+++ b/R/addCLtoLowerCS.R
@@ -10,11 +10,11 @@
 #'   The names of the list should match column names in any of the `CS` tables.
 #' @param strataListCL A named list of filter criteria for subsetting the `CL` data in the `RDBESDataObject`.
 #'   The names of the list should match column names in the `CL` table.
-#' @param samplingEventsTable A character string specifying the sampling events table name in the `RDBESDataObject`.
 #' @param combineStrata Logical, if `TRUE`, strata in the `strataListCS` are combined using a vertical bar (`|`).
 #' @param lowerHierarchy A character string specifying the level of the lower hierarchy table to which the CL data will be added.
 #'   Currently, only "C" is supported ie BV data only.
 #' @param CLfields A character vector of field names from the `CL` table that will be summed and added as new columns to the lower-level biological data.
+#' @param verbose Logical, if `TRUE`, the function prints informative text.
 #'
 #' @return A data.table containing the biological data from the lower hierarchy with added strata information from the `CL` table and
 #'   the sum of the specified fields from the `CL` data.
@@ -35,7 +35,6 @@
 #'                      CLmetier6 = "OTM_SPF_16-31_0_0",
 #'                      CLspecFAO = "SPR")
 #' biolCL <- addCLtoLowerCS(rdbesObject, strataListCS, strataListCL,
-#'                          samplingEventsTable = "VS",
 #'                          combineStrata = TRUE,
 #'                          lowerHierarchy = "C",
 #'                          CLfields = c("CLoffWeight"))
@@ -43,14 +42,7 @@
 #'
 #' @seealso \code{\link{getLowerTableSubsets}}, \code{\link{upperTblData}}
 #' @export
-addCLtoLowerCS <- function(rdbes, strataListCS, strataListCL, samplingEventsTable, combineStrata =T, lowerHierarchy = "C", CLfields = c("CLoffWeight")){
-  if(inherits(rdbes, "RDBESDataObject")) {
-    tableNames <- names(summary(rdbes)$data)
-    #select only the relevant hierarchy table names
-    rdbesTbl <- rdbes[tableNames]
-  } else {
-    stop("rdbes must be of class RDBESDataObject")
-  }
+addCLtoLowerCS <- function(rdbes, strataListCS, strataListCL, combineStrata =T, lowerHierarchy = "C", CLfields = c("CLoffWeight"), verbose = FALSE){
 
   # Function to subset data.table based on criteria list
   subset_dt <- function(dt, criteria) {
@@ -78,10 +70,6 @@ addCLtoLowerCS <- function(rdbes, strataListCS, strataListCL, samplingEventsTabl
   #get the sampling data
   biolData <- getLowerTableSubsets(strataListCS, tblName, rdbes, combineStrata = combineStrata)
 
-  #get the trip count
-  biolTrips <- upperTblData(tblId, biolData[[tblId]], rdbesTbl,samplingEventsTable)
-  biolTrips$samplingEvents <- nrow(biolTrips)
-
 
   #get the CL data
   CL <- subset_dt(rdbes$CL, strataListCL)
@@ -91,6 +79,7 @@ addCLtoLowerCS <- function(rdbes, strataListCS, strataListCL, samplingEventsTabl
     x_new <- ifelse(length(x) == 1, x, paste0(x, collapse = "|"))
     rep(x_new, nrow(biolData))
     })
+
   strataList <- as.data.table(strataList)
   biolData <- cbind(biolData, strataList)
 
diff --git a/R/doBVestimCACNUM.R b/R/doBVestimCACNUM.R
new file mode 100644
index 0000000..37df57d
--- /dev/null
+++ b/R/doBVestimCACNUM.R
@@ -0,0 +1,92 @@
+##' Estimate Catch at Number (CANUM) for Biological Variables
+#'
+#' This function estimates catch at number (CANUM) for a specified biological variable, such as age or length. It aggregates data based on specified columns and generates a "plus group" for the highest value in the defined classes. The function supports grouping by various units (e.g., age, length, weight) and calculates required indices, totals, and proportions for the groups.
+#'
+#' @param bv A `data.table` containing biological data, with columns for the biological variable, class units (e.g., `Ageyear`, `Lengthmm`, `Weightg`), and other relevant variables.
+#' @param addColumns A character vector of additional column names used to group the data for aggregation (e.g., `BVfishId` and other identifiers).
+#' @param classUnits A character string specifying the class units of the biological variable to use for grouping (e.g., "Ageyear", "Lengthmm", "Weightg"). Default is "Ageyear".
+#' @param classBreaks A numeric vector specifying the breakpoints for classifying the biological variable. The last value defines the lower bound of the "plus group". Default is `1:8` for age groups.
+#' @param verbose Logical, if `TRUE`, prints detailed information about the process. Default is `FALSE`.
+#'
+#' @return A `data.table` containing the aggregated results, including groupings, calculated means, proportions, indices, and totals for the specified biological variable.
+#'
+#' @details The function performs the following steps:
+#' \itemize{
+#'   \item Validates the presence of the `classUnits` in the biological variable data.
+#'   \item Reshapes the input data using `dcast` and groups the biological variable into classes using `cut()`.
+#'   \item Aggregates mean weights and lengths by the defined classes, along with calculating proportions and indices based on the sample size.
+#'   \item A "plus group" is created for values exceeding the highest `classBreaks` value.
+#'   \item Calculates total weights, catch numbers, and performs a sanity check to ensure there are no rounding errors in the final results.
+#' }
+#' @export
+doBVestimCANUM <- function(bv, addColumns,
+                         classUnits = "Ageyear",
+                         classBreaks = 1:8,
+                         verbose = FALSE){
+  rightF <- "BVvalUnitScale"
+  #the class unit must be one of "Sex" "Lengthmm" "Ageyear"  "Weightg"  "SMSF"
+  if(!(classUnits %in% unique(bv[[rightF]]))){
+  stop("The class unit must be present in data column BVvalUnitScale ",
+       "the available values are: ", paste0(unique(bv[[rightF]]), collapse = ", "))
+  }
+
+  #extract raw values
+  leftF <- paste0(c("BVfishId", addColumns), collapse = "+")
+
+  bv_wide <- data.table::dcast(bv, formula(paste0(leftF, "~", rightF)), value.var = "BVvalueMeas")
+  bv_wide$target <- as.numeric(bv_wide[[classUnits]])
+
+  classLabs <- switch(classUnits,
+                      Ageyear = c(classBreaks[-length(classBreaks)], paste0(max(classBreaks), "+")),
+                      c(paste0(classBreaks[-length(classBreaks)], "-", classBreaks[-1]),  paste0(max(classBreaks), "+")))
+
+  # Create the 'plus group' by using cut() to assign groups based on classBreaks
+  bv_wide$Group <- cut(bv_wide$target, breaks = c(classBreaks, Inf),
+                             include.lowest = TRUE, right = FALSE,
+                             labels =classLabs)
+
+  bv_wide$Lengthmm <- as.numeric(bv_wide$Lengthmm)
+  bv_wide$Weightg <- as.numeric(bv_wide$Weightg)
+
+  #aggregate values
+  a <- bv_wide[, .(WeightgMean = mean(Weightg, na.rm = TRUE),
+                   WeightgLen = sum(!is.na(Weightg)),
+                   LengthmmMean = mean(Lengthmm, na.rm = TRUE)),
+               by = c("Group",addColumns)]
+
+  b <- bv_wide[, .(lenMeas = sum(!is.na(Lengthmm)),
+                   targetMeas = sum(!is.na(Group))),
+               by = addColumns]
+
+  targetWeights <- merge(a, b, by = addColumns)
+
+  #remove the NA row
+  targetWeights <- targetWeights[!is.na(targetWeights$Group), ]
+
+  #add extra columns
+  #targetWeights$MeanLengthCm <- targetWeights$Lengthmm / 10
+  targetWeights$plusGroup <- classBreaks[length(classBreaks)]
+
+  #calculate required values
+  targetWeights$propSample <- targetWeights$WeightgLen / targetWeights$targetMeas
+  targetWeights$WeightIndex <- targetWeights$propSample * (targetWeights$WeightgMean / 1000)
+
+  # Calculate the sum of WeightIndex for each group defined by addColumns
+  targetWeights[, WeightIndexSum := sum(WeightIndex), by = addColumns]
+
+  targetWeights$TWCoef <- targetWeights$sumCLoffWeight / targetWeights$WeightIndexSum
+  targetWeights$totWeight <-  targetWeights$WeightIndex * targetWeights$TWCoef
+  targetWeights$totNum <- targetWeights$totWeight / (targetWeights$WeightgMean / 1000)
+
+  # Sanity check with tolerance to avoid rounding error
+  weights <- targetWeights$totNum * (targetWeights$WeightgMean / 1000)
+  expected_sum <- sum(unique(targetWeights$sumCLoffWeight))
+
+
+  # Use all.equal to compare with tolerance or manually check the difference
+  if(!isTRUE(all.equal(sum(weights), expected_sum))) {
+    stop("Strange problem: sums do not match within tolerance")
+  }
+
+  targetWeights
+}
diff --git a/R/getLowerTableSubsets.R b/R/getLowerTableSubsets.R
index 4933313..05db061 100644
--- a/R/getLowerTableSubsets.R
+++ b/R/getLowerTableSubsets.R
@@ -11,6 +11,7 @@
 #' @param combineStrata A logical value indicating whether to include the strata information in the result.
 #'   If `TRUE`, and if any strata has more than one value, those values are collapsed into a single string
 #'   and appended to the result with a warning.
+#' @param verbose A logical value indicating whether to print informative text.
 #'
 #' @return A unique data frame containing the rows of the target lower level table that are associated with
 #'   the given values of the upper table field in each subset. If `combineStrata = TRUE`, the result will also include
@@ -20,12 +21,11 @@
 #' the values from each subset in the upper tables. It then ensures that only unique rows are returned,
 #' based on the ID column of the target table.
 #'
-#' @export
-getLowerTableSubsets <- function(subsets, tblName, rdbesTables, combineStrata=F){
-  #check if tbls is of class "RDBESDataObject"
+getLowerTableSubsets <- function(subsets, tblName, rdbesTables, combineStrata = TRUE, verbose = FALSE) {
+  # Check if rdbesTables is of class "RDBESDataObject"
   if(inherits(rdbesTables, "RDBESDataObject")) {
     tableNames <- names(summary(rdbesTables)$data)
-    #select only the relevant hierarchy table names
+    # Select only the relevant hierarchy table names
     rdbesTables <- rdbesTables[tableNames]
   } else {
     stop("rdbesTables must be of class RDBESDataObject")
@@ -35,30 +35,40 @@ getLowerTableSubsets <- function(subsets, tblName, rdbesTables, combineStrata=F)
   }
 
   res <- list()
+  if(verbose) print("Getting lower table data")
+
+  # Process each subset
   for(ss in names(subsets)){
-    res[[ss]] <- lowerTblData(ss, subsets[[ss]], rdbesTables, tblName,F)
+    res[[ss]] <- lowerTblData(ss, subsets[[ss]], rdbesTables, tblName, verbose)
   }
+
+  # Function to get the intersection of IDs
   intesectIDs <- function(x, y, tblName){
     idCol <- paste0(tblName, "id")
-    if(is.data.frame(x)){ start <- x[[idCol]]} else{start <- x}
-
+    if(is.data.frame(x)){
+      start <- x[[idCol]]
+    } else {
+      start <- x
+    }
     intersect(start, y[[idCol]])
   }
+
+  # Get the IDs from the first subset if only one subset is present
   if(length(res) == 1) {
     ids <- res[[1]][[paste0(tblName, "id")]]
   } else{
-    ids <- Reduce(function(x, y) {intesectIDs(x, y, tblName) }, res)
+    ids <- Reduce(function(x, y) {intesectIDs(x, y, tblName)}, res)
   }
 
+  # Bind the data together and filter based on intersected IDs
   res <- data.table::rbindlist(res)
   res <- res[get(paste0(tblName, "id")) %in% ids]
   res <- unique(res, by = paste0(tblName, "id"))
 
-  if(combineStrata){
-    # Check the length of each element in the list
+  if(combineStrata) {
+    # Collapse multiple strata values into a single string if combineStrata = TRUE
     lengths <- sapply(subsets, length)
-    if(any(lengths>1)){
-      # If the length is more than 1, collapse it into a single string
+    if(any(lengths > 1)){
       subsets[lengths > 1] <- sapply(subsets[lengths > 1], paste0, collapse = "|")
       items <- subsets[lengths > 1]
       for(i in 1:length(items)){
@@ -66,11 +76,13 @@ getLowerTableSubsets <- function(subsets, tblName, rdbesTables, combineStrata=F)
                 " is collapsed in the result into: \"",items[i],"\"\n")
       }
     }
-
+  } else {
+   stop("combineStrata must be TRUE for now")
   }
 
-  res <- cbind(res,as.data.frame(subsets))
-
+  # Combine the data with the strata subsets
+  res <- cbind(res, as.data.frame(subsets))
 
   return(res)
 }
+
diff --git a/R/lowerTblData.R b/R/lowerTblData.R
index b3f59fd..8f19912 100644
--- a/R/lowerTblData.R
+++ b/R/lowerTblData.R
@@ -14,11 +14,11 @@
 #'   the given values of the upper table field.
 #'
 #' @examples
-#'DE <- data.table(DEid = c(1, 2, 3, 4), SDid = c(1, 2, 3, 4))
-#'SD <- data.table(SDid = c(1, 2, 3, 4), TEid = c(1, 2, 3, 4))
-#'TE <- data.table(SDid = c(1, 2, 3, 4), TEid = c(1, 2, 3, 4))
-#'VS <- data.table(TEid = c(1, 2, 3, 4), VSid = c(1, 2, 3, 4))
-#'LE <- data.table(VSid = 1:5, LEid = 1:5, value = c(10, 20, 3, 4, 6))
+#'DE <- data.table::data.table(DEid = c(1, 2, 3, 4), SDid = c(1, 2, 3, 4))
+#'SD <- data.table::data.table(SDid = c(1, 2, 3, 4), TEid = c(1, 2, 3, 4))
+#'TE <- data.table::data.table(SDid = c(1, 2, 3, 4), TEid = c(1, 2, 3, 4))
+#'VS <- data.table::data.table(TEid = c(1, 2, 3, 4), VSid = c(1, 2, 3, 4))
+#'LE <- data.table::data.table(VSid = 1:5, LEid = 1:5, value = c(10, 20, 3, 4, 6))
 #' tblsSprat <- list( DE = DE ,SD = SD, TE = TE, VS = VS, LE = LE )
 #'
 #' lowerTblData("TEid", c(4), tblsSprat, "LE", T)
diff --git a/tests/testthat/test-addCLtoLowerCS.R b/tests/testthat/test-addCLtoLowerCS.R
index 7a6a9c2..f206ecd 100644
--- a/tests/testthat/test-addCLtoLowerCS.R
+++ b/tests/testthat/test-addCLtoLowerCS.R
@@ -10,8 +10,8 @@ strataListCL <- list(CLarea="27.3.d.28.1", CLquar = 1, CLmetier6 = "OTM_SPF_16-3
 # Test 1: Expect error if rdbes is not of class RDBESDataObject
 test_that("addCLtoLowerCS throws an error if rdbes is not of class RDBESDataObject", {
   expect_error(
-    addCLtoLowerCS(list(), strataListCS, strataListCL, samplingEventsTable = "VS", combineStrata = TRUE, lowerHierarchy = "C", CLfields = c("CLoffWeight")),
-    "rdbes must be of class RDBESDataObject"
+    addCLtoLowerCS(list(), strataListCS, strataListCL, combineStrata = TRUE, lowerHierarchy = "C", CLfields = c("CLoffWeight")),
+    "No CL data in the input RDBESDataObject"
   )
 })
 
@@ -20,14 +20,14 @@ test_that("addCLtoLowerCS throws an error if CL data is missing in rdbes", {
   mock_rdbes_no_CL <- mock_rdbes
   mock_rdbes_no_CL$CL <- NULL
   expect_error(
-    addCLtoLowerCS(mock_rdbes_no_CL, strataListCS, strataListCL, samplingEventsTable = "VS", combineStrata = TRUE, lowerHierarchy = "C", CLfields = c("CLoffWeight")),
+    addCLtoLowerCS(mock_rdbes_no_CL, strataListCS, strataListCL, combineStrata = TRUE, lowerHierarchy = "C", CLfields = c("CLoffWeight")),
     "No CL data in the input RDBESDataObject"
   )
 })
 
 # Test 3: Check correct output with valid input
 test_that("addCLtoLowerCS adds the correct sumCLoffWeight column with valid input", {
-  result <- expect_warning(addCLtoLowerCS(mock_rdbes, strataListCS, strataListCL, samplingEventsTable = "VS", combineStrata = TRUE, lowerHierarchy = "C", CLfields = c("CLoffWeight")))
+  result <- expect_warning(addCLtoLowerCS(mock_rdbes, strataListCS, strataListCL, combineStrata = TRUE, lowerHierarchy = "C", CLfields = c("CLoffWeight")))
 
   # Check that the result has the correct columns
   expect_true("sumCLoffWeight" %in% names(result))
@@ -37,7 +37,7 @@ test_that("addCLtoLowerCS adds the correct sumCLoffWeight column with valid inpu
 # Test 4: Check that the function handles unsupported lowerHierarchy correctly
 test_that("addCLtoLowerCS throws an error if lowerHierarchy is unsupported", {
   expect_error(
-    addCLtoLowerCS(mock_rdbes, strataListCS, strataListCL, samplingEventsTable = "VS", combineStrata = TRUE, lowerHierarchy = "A", CLfields = c("CLoffWeight")),
+    addCLtoLowerCS(mock_rdbes, strataListCS, strataListCL, combineStrata = TRUE, lowerHierarchy = "A", CLfields = c("CLoffWeight")),
     "Lower hierarchy A not yet supported"
   )
 })
diff --git a/tests/testthat/test-doBVestimCACNUM.R b/tests/testthat/test-doBVestimCACNUM.R
new file mode 100644
index 0000000..82fde0b
--- /dev/null
+++ b/tests/testthat/test-doBVestimCACNUM.R
@@ -0,0 +1,84 @@
+# Step 1: Generate independent random normal variables
+n <- 100  # Number of samples
+Lengthmm <- rnorm(n, mean = 100, sd = 1)  # Lengthmm variable
+Ageyear <- rnorm(n, mean = 2, sd = 1)     # Ageyear variable
+Weightg <- rnorm(n, mean = 10, sd = 1)    # Weightg variable
+
+
+biolCLQ1 <- data.table(
+  BVfishId = rep(1:100,each=3),
+  BVvalueMeas = c(Lengthmm, Ageyear, Weightg),
+  BVvalUnitScale = rep(c("Lengthmm", "Ageyear", "Weightg"), 100),
+  sumCLoffWeight = 396210
+)
+
+# Test 1: Check the output structure of BVestimCANUM
+test_that("doBVestimCANUM returns correct structure", {
+  lenCANUMQ1 <- doBVestimCANUM(biolCLQ1, c("sumCLoffWeight"),
+                             classUnits = "Lengthmm",
+                             classBreaks = seq(70, 130, 10),
+                             verbose = FALSE)
+
+  # Check that the output is a data.table
+  expect_true(is.data.table(lenCANUMQ1))
+
+  # Check that the expected columns are present
+  expected_columns <- c("Group", "WeightgMean", "WeightgLen", "lenMeas", "targetMeas",
+                        "propSample", "WeightIndex", "totWeight", "totNum")
+  expect_true(all(expected_columns %in% names(lenCANUMQ1)))
+})
+
+# Test 2: Check that Group is correctly classified
+test_that("doBVestimCANUM correctly classifies Lengthmm groups", {
+  lenCANUMQ1 <- doBVestimCANUM(biolCLQ1, c("sumCLoffWeight"),
+                             classUnits = "Lengthmm",
+                             classBreaks = seq(70, 130, 10),
+                             verbose = FALSE)
+
+  # Check that the Group column contains the correct labels
+  expected_groups <- c("70-80", "80-90", "90-100", "100-110", "110-120", "120-130", "130+")
+  actual_groups <- unique(lenCANUMQ1$Group)
+  expect_true(all(actual_groups %in% expected_groups))
+})
+
+# Test 3: Check WeightgMean calculation
+test_that("doBVestimCANUM calculates WeightgMean correctly", {
+  lenCANUMQ1 <- doBVestimCANUM(biolCLQ1, c("sumCLoffWeight"),
+                             classUnits = "Lengthmm",
+                             classBreaks = seq(70, 130, 10),
+                             verbose = FALSE)
+
+  # Ensure WeightgMean is numeric and positive
+  expect_true(is.numeric(lenCANUMQ1$WeightgMean))
+  expect_true(all(lenCANUMQ1$WeightgMean > 0))
+})
+
+
+# Test 5: Sanity check for total weights
+test_that("doBVestimCANUM performs a sanity check for total weights", {
+  lenCANUMQ1 <- doBVestimCANUM(biolCLQ1, c("sumCLoffWeight"),
+                             classUnits = "Lengthmm",
+                             classBreaks = seq(70, 130, 10),
+                             verbose = FALSE)
+
+  # Check that the sum of totWeight equals sumCLoffWeight (within tolerance)
+  total_weight <- sum(lenCANUMQ1$totWeight)
+  expected_sum <- sum(unique(lenCANUMQ1$sumCLoffWeight))
+
+  expect_true(isTRUE(all.equal(total_weight, expected_sum)))
+})
+
+# Test 6: Check TWCoef calculation
+test_that("doBVestimCANUM calculates TWCoef correctly", {
+  lenCANUMQ1 <- doBVestimCANUM(biolCLQ1, c("sumCLoffWeight"),
+                             classUnits = "Lengthmm",
+                             classBreaks = seq(70, 130, 10),
+                             verbose = FALSE)
+
+  # Recalculate TWCoef
+  lenCANUMQ1[, expected_TWCoef := sumCLoffWeight / WeightIndexSum]
+
+  # Ensure TWCoef is correctly calculated
+  expect_equal(lenCANUMQ1$TWCoef, lenCANUMQ1$expected_TWCoef)
+})
+
diff --git a/tests/testthat/test-getLowerTableSubsets.R b/tests/testthat/test-getLowerTableSubsets.R
index ae2899a..76783c6 100644
--- a/tests/testthat/test-getLowerTableSubsets.R
+++ b/tests/testthat/test-getLowerTableSubsets.R
@@ -25,7 +25,7 @@ test_that("Function throws error for missing table name", {
 })
 
 test_that("Function returns correct data with valid inputs", {
-  res <- getLowerTableSubsets(list(TEstratumName = "January"), "SA", H8ExampleEE1, F)
+  res <- getLowerTableSubsets(list(TEstratumName = "January"), "SA", H8ExampleEE1, T)
 
   expect_equal(nrow(res), 3)
   #expect last column to be "TEstratumName"
@@ -45,14 +45,14 @@ test_that("Function works with combineStrata = TRUE and collapsing strata", {
 })
 
 test_that("Function works with empty subsets", {
-  res <- getLowerTableSubsets(list(TEstratumName = character(0)), "SA", H8ExampleEE1, F)
+  res <- getLowerTableSubsets(list(TEstratumName = character(0)), "SA", H8ExampleEE1, T)
 
   expect_equal(nrow(res), 0)
 
 })
 
 test_that("Function handles intersection correctly", {
-  res <- getLowerTableSubsets(list(TEid = 4, TEstratumName = "January"), "SA", H8ExampleEE1, F)
+  res <- getLowerTableSubsets(list(TEid = 4, TEstratumName = "January"), "SA", H8ExampleEE1, T)
 
   expect_equal(nrow(res), 2)
   #expect last column to be "TEstratumName"
diff --git a/vignettes/ratio-estimating-rdbesdataobjects.Rmd b/vignettes/ratio-estimating-rdbesdataobjects.Rmd
index 09efc4c..e7ef0f4 100644
--- a/vignettes/ratio-estimating-rdbesdataobjects.Rmd
+++ b/vignettes/ratio-estimating-rdbesdataobjects.Rmd
@@ -42,134 +42,102 @@ The data has not gone through RDBES upload and download procedure and  contains
 validateRDBESDataObject(H8ExampleEE1, checkDataTypes = TRUE, strict = FALSE)
 ```
 
-```{r}
-#create the estimation object to estimate values on the SA table
-SampleWeightEstData <- createRDBESEstObject(H8ExampleEE1, 8, "SA" )
 
-#the SAsampWtMes is in grammes let's estimate in tonnes
-SampleWeightEstData$SAsampWtMes <- SampleWeightEstData$SAsampWtMes / (1000 * 1000)
 
-results <- doEstimationForAllStrata(SampleWeightEstData, "SAsampWtMes", verbose = T, estimMethod = "ratio")
-```
 
+### Simple Estimation Example
 
 
-```{r}
-View(H8ExampleEE1$CL)
-```
+For the simplest case of estimation we need the RDBESDataObject with CS tables and a CL table. In the following example we will estimate the total number of sprat caught in the area 27.3.d.28.1 with the gear OTM_SPF_16-31_0_0 for the first and last quarter of the year.
 
 ```{r}
-#create the estimation object to estimate values on the BV table
-estData <- createRDBESEstObject(H8ExampleEE1, 8, "BV" )
-#for length we want the LengthTotal
-estData <- estData[estData$BVtypeMeas == "LengthTotal",]
-
-#the BV valuemeasured is character
-head(estData$BVvalueMeas)
-```
-```{r}
-estData$BVfishLen <- as.numeric(estData$BVvalueMeas)
+#From the commertial landings table we need to get the total weight of the catches
+CLfieldstoSum <- c("CLoffWeight")
 ```
 
+The most important thing in this estimation is to get the same strata for the CS and CL tables. This means we want to take the samples from the same area, with the same gear and the same species. Exacctly how this is done depends on the upper and lower hierarchy used and how the sampling is stratified. In the following example we are using the lower hierarchy C meaning that we are extracting the BV data as the biological data. 
 
 ```{r}
-for(SAid in c(1:15)){
-  bvt <- as.data.frame(H8ExampleEE1$BV)[as.data.frame(H8ExampleEE1$BV)$SAid== SAid,]
-  bvt <- bvt[bvt$BVtypeMeas == "LengthTotal",]
-  bvt$BVfishLen <- as.numeric(bvt$BVvalueMeas)
-  
-  print(paste("SAid:", SAid, "Number of fish:", nrow(bvt)))
-}
-```
+#get the first quarter data from CS 
+strataListCS <- list(LEarea="27.3.d.28.1",
+                     LEmetier6 = "OTM_SPF_16-31_0_0",
+                     TEstratumName = month.name[1:3],
+                    SAspeCodeFAO = "SPR")
 
+#get the first quarter data from CL table
+strataListCL <- list(CLarea="27.3.d.28.1",
+                     CLquar = 1,
+                     CLmetier6 = "OTM_SPF_16-31_0_0",
+                     CLspecFAO = "SPR")
 
-```{r}
-resultsFishLen <- doEstimationForAllStrata(estData, "BVfishLen", verbose = T, estimMethod = "ratio")
+#we are using the lower hierarchy C meaning that we are extracting the BV data
+#as the biological data
+biolCLQ1 <- addCLtoLowerCS(H8ExampleEE1, strataListCS, strataListCL,
+                         combineStrata =T, lowerHierarchy = "C",
+                         CLfields = CLfieldstoSum)
 ```
 
-```{r}
-bvw <- bvt <- as.data.frame(H8ExampleEE1$BV)
-bvw <- bvw[bvw$BVtypeMeas == "WeightMeasured",]
-bvt <- bvt[bvt$BVtypeMeas == "LengthTotal",]
-bv <- merge(bvw, bvt, by = c("BVfishId", "SAid"), suffixes = c("Wt", "Len"))
-bv$BVvalueMeasWt <- as.numeric(bv$BVvalueMeasWt)
-bv$BVvalueMeasLen <- as.numeric(bv$BVvalueMeasLen)
-#group by lenght class
-
-bv$BVfishLenClass <- cut(bv$BVvalueMeasLen, breaks = seq(10, 300, 10), include.lowest = TRUE)
-#aggregate by lenght class
-bvta <- aggregate(cbind(countAtLen= BVvalueMeasLen) ~ BVfishLenClass + SAid, data = bv, FUN = length)
-bvtb <- aggregate( cbind(totalWeightAtLen=  BVvalueMeasWt) ~ BVfishLenClass + SAid, data = bv, FUN = sum)
-bvtc <- aggregate( cbind(totCount=  BVvalueMeasLen) ~  SAid, data = bv, FUN = length)
-
-bvta <- merge(bvta, bvtb, by = c("BVfishLenClass", "SAid"))
-bvta <- merge(bvta, bvtc, by = "SAid")
-bvta$prop <- bvta$countAtLen / bvta$totCount
-
-SA <- as.data.frame(H8ExampleEE1$SA)
-SA$aux <- c(
-  1659, 
-3377, 
-8110, 
-20118, 
-1138, 
-4323, 
-2049, 
-4412, 
-1165, 
-3031, 
-1207, 
-11698, 
-2623, 
-3025, 
-12635
-)
-sam <- merge(SA, bvta, by = "SAid")
-
-#get length class proportions
-sam$ratio <- (sam$SAtotalWtMes * sam$prop) / sam$totalWeightAtLen
+To estimate the total number of sprat caught in the area 27.3.d.28.1 with the gear OTM_SPF_16-31_0_0 for the first quarter of the year we need to use the function **doBVestimCANUM(...)**. The function takes the biological data table, the columns to be added to the final result and the class breaks for the estimation. The function returns a table with the estimates for the given class breaks.
+The allowed class units are "Ageyears", "Lenghtmm" and "Weightg". The class breaks are the number of classes to break the data into. The function will return the estimates for each class.
+The addColumns parameter is a vector of column names that will be added to the final result. The columns should be present in the biological data table. A minimum of one column (the sum column that in our case is "sumCLoffWeight") is required. The function will return the estimates for each unique value in the columns.
 
-sam$numAtLen <- sam$aux * sam$countAtLen
-#the auxiliary variable how to use it? instead
+```{r}
+lenCANUMQ1 <- doBVestimCANUM(biolCLQ1, c("sumCLoffWeight"),
+             classUnits = "Lengthmm",
+             classBreaks = seq(70,130,10),
+             verbose = FALSE)
 
-sam$countAtLen <- sam$ratio * sam$countAtLen
-sam[c("SAid", "BVfishLenClass", "countAtLen",  "SAtotalWtMes", "ratio","numAtLen", "prop")]
+lenCANUMQ1
 ```
 
 
+To have the estimates for several Quarters in the result same biolCL table creation is done for the last quarter of the year. 
 
 ```{r}
-sampling_ratio <- estData$SAtotalWtMes/ estData$SAsampWtMes
-unique(sampling_ratio)
-```
+strataListCS <- list(LEarea="27.3.d.28.1",
+                     LEmetier6 = "OTM_SPF_16-31_0_0",
+                     TEstratumName = month.name[10:12], #the last 3 months of the year
+                    SAspeCodeFAO = "SPR")
 
-```{r}
-sampling_ratio <- estData$su5numTotal / estData$su5numSamp
-unique(sampling_ratio)
-```
-```{r}
+strataListCL <- list(CLarea="27.3.d.28.1",
+                     CLquar = 4,
+                     CLmetier6 = "OTM_SPF_16-31_0_0",
+                     CLspecFAO = "SPR")
 
+biolCLQ4 <- addCLtoLowerCS(H8ExampleEE1, strataListCS, strataListCL,
+                         combineStrata =T, lowerHierarchy = "C",
+                         CLfields = CLfieldstoSum)
 ```
 
-
 ```{r}
-#how many uniqe fish are there in the BV
-length(unique(estData$BVfishId))
-unique(estData$BVtypeMeas)
-  
+#lets combine the two quarters into one table
+biolCL <- rbind(biolCLQ1, biolCLQ4)
 ```
 
+More columns from the above table can be added to the final result. The output is broken down by unique values in these columns.
 
 ```{r}
-colnames(estData)
-```
+#this alllows to add extra columns into the final result 
+addCols <- c(names(strataListCS),
+             names(strataListCL),
+             paste0('sum', CLfieldstoSum))
 
+ageCANUM <- doBVestimCANUM(biolCL, addCols,
+             classUnits = "Ageyear",
+             classBreaks = 1:8,
+             verbose = FALSE)
 
-```{r}
-View(estData)
+ageCANUM
+             
 ```
 
 
+The output table should be enough to populate a classic Intercatch data call table. 
+
+
+
+
+
 
 
 ```{r}