Skip to content

Commit

Permalink
First version of function that takes the output from doEstimationForA…
Browse files Browse the repository at this point in the history
…llStrata() and converts it into the InterCatch exchange format (#209)
  • Loading branch information
davidcurrie2001 committed Oct 17, 2024
1 parent a55ccc1 commit ab89650
Show file tree
Hide file tree
Showing 3 changed files with 339 additions and 0 deletions.
123 changes: 123 additions & 0 deletions R/exportEstimationResultsToInterCatchFormat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#' exportEstimationResultsToInterCatchFormat
#'
#'This function transforms the estimation results into the InterCatch format.
#'
#' @param dataToExport A data frame containing the estimation results -
#' this should include the output from the doEstimationForAllStrata function
#' and already have the the InterCatch columns present.
#'
#' @return
#' @export
#'
exportEstimationResultsToInterCatchFormat <- function(dataToExport){

# HI
# create a data frame called HIdefinitions with 2 columns: "Name" and "Type"
HIdefinitions <- data.frame(Name = c("Country","Year","SeasonType","Season","Fleet","AreaType","FishingArea","DepthRange","UnitEffort","Effort","AreaQualifier"),
Type = c("character","character","character","integer","character","character","character","character","character","integer","character"))
HIcols <- HIdefinitions$Name

# SI
# create a data frame called SIdefinitions with 2 columns: "Name" and "Type"
SIdefinitions <- data.frame(Name = c("Country","Year","SeasonType","Season","Fleet","AreaType","FishingArea","DepthRange","Species","Stock","CatchCategory","ReportingCategory","DataToFrom","Usage","SamplesOrigin","QualityFlag","UnitCATON","CATON","OffLandings","varCATON","InfoFleet","InfoStockCoordinator","InfoGeneral"),
Type = c("character","character","character","integer","character","character","character","character","character","character","character","character","character","character","character","character","character","numeric","integer","numeric","character","character","character"))
SIcols <- SIdefinitions$Name

# SD
# create a data frame called SDdefinitions with 2 columns: "Name" and "Type"
SDdefinitions <- data.frame(Name = c('Country','Year','SeasonType','Season','Fleet','AreaType','FishingArea','DepthRange','Species','Stock','CatchCategory','ReportingCategory','Sex','CANUMtype','AgeLength','PlusGroup','SampledCatch','NumSamplesLngt','NumLngtMeas','NumSamplesAge','NumAgeMeas','unitMeanWeight','unitCANUM','UnitAgeOrLength','UnitMeanLength','Maturity','NumberCaught','MeanWeight','MeanLength','varNumLanded','varWgtLanded','varLgtLanded'),
Type = c('character','character','character','integer','character','character','character','character','character','character','character','character','character','character','integer','integer','integer','integer','integer','integer','integer','character','character','character','character','character','numeric','numeric','numeric','numeric','numeric','numeric'))
SDcols <- SDdefinitions$Name

# TODO- create SD data (if required)

# find the values of SIcols that are in the column names of dataToExport
SIdata <- dataToExport[,intersect(SIcols,names(dataToExport))]
# remove any duplicates from SIdata
SIdata <- unique(SIdata)

SI <- createIC_SI(SIdata, SIdefinitions)

# find the values of HIcols that are in the column names of dataToExport
HIdata <- dataToExport[,intersect(HIcols,names(dataToExport))]
# remove any duplicates from HIdata
HIdata <- unique(HIdata)

HI <- createIC_HI(HIdata, HIdefinitions)

IC <- rbind(HI[,c("Key","Value")], SI[,c("Key","Value")])
# sort IC by the Key column
IC <- IC[order(IC$Key),]
# remove the Key column from IC
IC <- IC$Value

return(IC)

}

# Function to create HI format data
createIC_HI <- function(HIdata, HIdefinitions){

HI <- createIC_subtype(HIdata, HIdefinitions,"HI")

# create a new column in HI called "Key" - its value will be the following columns concatenated together (seperated by "_") "Country","Year","SeasonType","Season","Fleet","AreaType","FishingArea","DepthRange"
HI$Key <- paste(HI$Country,HI$Year,HI$SeasonType,HI$Season,HI$Fleet,HI$AreaType,HI$FishingArea,HI$DepthRange, sep = "_")

return(HI)
}

# Function to create SI format data
createIC_SI <- function(SIdata, SIdefinitions){

SI <- createIC_subtype(SIdata, SIdefinitions,"SI")

# create a new column in SI called "Key" - its value will be the following columns concatenated together (seperated by "_") "Country","Year","SeasonType","Season","Fleet","AreaType","FishingArea","DepthRange"
SI$Key <- paste(SI$Country,SI$Year,SI$SeasonType,SI$Season,SI$Fleet,SI$AreaType,SI$FishingArea,SI$DepthRange, sep = "_")

return(SI)
}


# Function to create SD format data
createIC_SD <- function(SDdata, SDdefinitions){

SD <- createIC_subtype(SDdata, SDdefinitions,"SD")

# create a new column in SD called "Key" - its value will be the following columns concatenated together (seperated by "_") "Country","Year","SeasonType","Season","Fleet","AreaType","FishingArea","DepthRange"
SD$Key <- paste(SD$Country,SD$Year,SD$SeasonType,SD$Season,SD$Fleet,SD$AreaType,SD$FishingArea,SD$DepthRange, sep = "_")

return(SD)
}

# Base function used by HI, SI, and SD functions to create the relevent data
createIC_subtype <- function(subtypeData, subtypeDefinitions, subtypeName){

numRows <- nrow(subtypeData)
recordType <- rep(subtypeName, numRows)
st <- data.table::data.table(recordType)

# loop through subtypeDefinitions and add parameter values to SD
for(i in 1:nrow(subtypeDefinitions)){
# check if the column name is in the column names of SDdata
if(subtypeDefinitions$Name[i] %in% names(subtypeData)){
myParam <- data.frame(subtypeData[[subtypeDefinitions$Name[i]]])
} else {
# if it isn't, create a column of NAs or -9s
if(subtypeDefinitions$Type[i] == "character"){
myParam <- data.frame(rep(NA, numRows))
} else {
myParam <- data.frame(rep(-9, numRows))
}
}
names(myParam) <- subtypeDefinitions$Name[i]
# change any NAs in myParam to "NA"
myParam[is.na(myParam)] <- "NA"
st <- cbind(st, myParam)
}

# create a new column in st called "Value" - it will have the values from all other columns concatenated together with a comma seperator
st$Value <- apply(st, 1, function(x) paste(x, collapse = ","))

return(st)

}
96 changes: 96 additions & 0 deletions tests/testthat/test-exportEstimationResultsToInterCatchFormat.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#capture.output({ ## suppresses printing of console output when running test()

# H1 directory
dirH1 <- "./h1_v_1_19_26/"
myCountry <- "ZW"
myYear <- 1965
myCatchFraction <- "Lan"
mySpecies <- 1019159

generateTestData <- function(){

## Step 1) load and prepare some test data
myH1RawObject <- importRDBESDataCSV(rdbesExtractPath = dirH1)

# Edit our data so that we have SRSWOR on each level and calculate the probs
myH1RawObject[["VS"]]$VSselectMeth <- "SRSWOR"
myH1RawObject[["VS"]]$VSincProb <- myH1RawObject[["VS"]]$VSnumSamp / myH1RawObject[["VS"]]$VSnumTotal
myH1RawObject[["VS"]]$VSselProb <- 1/myH1RawObject[["VS"]]$VSnumTotal
myH1RawObject[["FT"]]$FTselectMeth <- "SRSWOR"
myH1RawObject[["FT"]]$FTincProb <- myH1RawObject[["FT"]]$FTnumSamp / myH1RawObject[["FT"]]$FTnumTotal
myH1RawObject[["FT"]]$FTselProb <- 1/myH1RawObject[["FT"]]$FTnumTotal
myH1RawObject[["FO"]]$FOselectMeth <- "SRSWOR"
myH1RawObject[["FO"]]$FOincProb <- myH1RawObject[["FO"]]$FOnumSamp / myH1RawObject[["FO"]]$FOnumTotal
myH1RawObject[["FO"]]$FOselProb <- 1/myH1RawObject[["FO"]]$FOnumTotal
myH1RawObject[["SS"]]$SSselectMeth <- "SRSWOR"
myH1RawObject[["SS"]]$SSincProb <- myH1RawObject[["SS"]]$SSnumSamp / myH1RawObject[["SS"]]$SSnumTotal
myH1RawObject[["SS"]]$SSselProb <- 1/myH1RawObject[["SS"]]$SSnumTotal
myH1RawObject[["SA"]]$SAselectMeth <- "SRSWOR"
myH1RawObject[["SA"]]$SAincProb <- myH1RawObject[["SA"]]$SAnumSamp / myH1RawObject[["SA"]]$SAnumTotal
myH1RawObject[["SA"]]$SAselProb <- 1/myH1RawObject[["SA"]]$SAnumTotal

# Update our test data with some random sample measurements (it didn't include these)
# set the random seed
set.seed(1234)
myH1RawObject[['SA']]$SAsampWtLive <- round(runif(n=nrow(myH1RawObject[['SA']]),min = 1, max = 100))
myH1RawObject[['SA']]$SAsampWtMes <- round(runif(n=nrow(myH1RawObject[['SA']]),min = 1, max = 100))

#Filter our data for WGRDBES-EST TEST 1, 1965, H1
myFields <- c("DEyear","SDctry","DEhierarchy","DEsampScheme","DEstratumName","SAspeCode","SScatchFra")
myValues <- c(myYear,myCountry,1,"National Routine","DE_stratum1_H1",mySpecies,myCatchFraction)
myH1RawObject <- filterRDBESDataObject(myH1RawObject,
fieldsToFilter = myFields,
valuesToFilter = myValues )
myH1RawObject <- findAndKillOrphans(myH1RawObject)
myH1RawObject

}

test_that("exportEstimationResultsToInterCatchFormat runs without errors or warnings", {

# get some test data
myH1RawObject <- generateTestData()

## Create an estimation object, but stop at SA

myTestData <- createRDBESEstObject(myH1RawObject, 1, stopTable = "SA")
# Get rid of rows that don't have an SA row
myTestData <- myTestData[!is.na(myTestData$SAid),]
# Estimate using the data
myStrataResults <- doEstimationForAllStrata(myTestData, "SAsampWtLive")
myStrataResults

# Get our estimated values for the PSU
psuEstimates <- myStrataResults[myStrataResults$recType == "VS",]

# This is the data we will export to IC format
dataToOutput <- data.frame(Country = myCountry,
Year = myYear,
SeasonType = NA,
Season = NA,
Fleet= NA,
AreaType = "Stratum",
FishingArea = psuEstimates$stratumName,
Species = mySpecies ,
Stock = mySpecies,
CatchCategory = substr(myCatchFraction, 1, 1),
ReportingCategory = "A",
Usage = "H",
SamplesOrigin = "O",
UnitCATON = "kg",
CATON = psuEstimates$est.total/1000.0,
varCATON = psuEstimates$var.total/1000000.0)


expect_error(exportEstimationResultsToInterCatchFormat(dataToOutput),NA )
expect_warning(exportEstimationResultsToInterCatchFormat(dataToOutput),NA )

icOutput <- exportEstimationResultsToInterCatchFormat(dataToOutput)
expect_equal(length(icOutput), 6)
expect_equal(icOutput[1], "HI,ZW,1965,NA,NA,NA,Stratum,VS_stratum1,NA,NA,-9,NA")
expect_equal(icOutput[2], "SI,ZW,1965,NA,NA,NA,Stratum,VS_stratum1,NA,1019159,1019159,L,A,NA,H,O,NA,kg, 3342.222,-9, 1053266,NA,NA,NA")

})


#}) ## end capture.output
120 changes: 120 additions & 0 deletions vignettes/create-IC-output-from-estimation-results.Rmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
---
title: "Create IC format from doEstimationForAllStrata() output"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{Create IC format from doEstimationForAllStrata}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
```

## Introduction
This document shows how to take the output from doEstimationForAllStrata() and format it into the InterCatch format. This is done by using the function exportEstimationResultsToInterCatchFormat() in the RDBEScore package.

## Load the package

```{r setup}
library(RDBEScore)
```

## Step 1) load and prepare some test data

```{r}
# load an example dataset
# H1 directory
dirH1 <- "../tests/testthat/h1_v_1_19_26/"
fixProbs <- function(myH1RawObject){
# Edit our data so that we have SRSWOR on each level and calculate the probs
myH1RawObject[["VS"]]$VSselectMeth <- "SRSWOR"
myH1RawObject[["VS"]]$VSincProb <- myH1RawObject[["VS"]]$VSnumSamp / myH1RawObject[["VS"]]$VSnumTotal
myH1RawObject[["VS"]]$VSselProb <- 1/myH1RawObject[["VS"]]$VSnumTotal
myH1RawObject[["FT"]]$FTselectMeth <- "SRSWOR"
myH1RawObject[["FT"]]$FTincProb <- myH1RawObject[["FT"]]$FTnumSamp / myH1RawObject[["FT"]]$FTnumTotal
myH1RawObject[["FT"]]$FTselProb <- 1/myH1RawObject[["FT"]]$FTnumTotal
myH1RawObject[["FO"]]$FOselectMeth <- "SRSWOR"
myH1RawObject[["FO"]]$FOincProb <- myH1RawObject[["FO"]]$FOnumSamp / myH1RawObject[["FO"]]$FOnumTotal
myH1RawObject[["FO"]]$FOselProb <- 1/myH1RawObject[["FO"]]$FOnumTotal
myH1RawObject[["SS"]]$SSselectMeth <- "SRSWOR"
myH1RawObject[["SS"]]$SSincProb <- myH1RawObject[["SS"]]$SSnumSamp / myH1RawObject[["SS"]]$SSnumTotal
myH1RawObject[["SS"]]$SSselProb <- 1/myH1RawObject[["SS"]]$SSnumTotal
myH1RawObject[["SA"]]$SAselectMeth <- "SRSWOR"
myH1RawObject[["SA"]]$SAincProb <- myH1RawObject[["SA"]]$SAnumSamp / myH1RawObject[["SA"]]$SAnumTotal
myH1RawObject[["SA"]]$SAselProb <- 1/myH1RawObject[["SA"]]$SAnumTotal
# Update our test data with some random sample measurements (it didn't include these)
# set the random seed
set.seed(1234)
myH1RawObject[['SA']]$SAsampWtLive <- round(runif(n=nrow(myH1RawObject[['SA']]),min = 1, max = 100))
myH1RawObject[['SA']]$SAsampWtMes <- round(runif(n=nrow(myH1RawObject[['SA']]),min = 1, max = 100))
myH1RawObject
}
## Step 1) load and prepare some test data
myH1RawObject <- importRDBESDataCSV(rdbesExtractPath = dirH1)
#Filter our data for WGRDBES-EST TEST 1, 1965, H1
myCatchFraction <- "Lan"
mySpecies <- 1019159
myFields <- c("DEyear","DEhierarchy","DEsampScheme","DEstratumName","SAspeCode","SScatchFra")
myValues <- c(1965,1,"National Routine","DE_stratum1_H1",mySpecies,myCatchFraction)
myH1RawObject <- filterRDBESDataObject(myH1RawObject,
fieldsToFilter = myFields,
valuesToFilter = myValues )
myH1RawObject <- findAndKillOrphans(myH1RawObject)
myH1 <- fixProbs(myH1RawObject)
```

## Step 2) Create an estimation object, but stop at SA


```{r}
myTestData <- createRDBESEstObject(myH1, 1, stopTable = "SA")
# Get rid of rows that don't have an SA row
myTestData <- myTestData[!is.na(myTestData$SAid),]
# Estimate using the data
myStrataResults <- doEstimationForAllStrata(myTestData, "SAsampWtLive")
```

## Step 3) Export the results to InterCatch format (just HI and SI for this data)

```{r}
# Get our estimated values for the PSU
psuEstimates <- myStrataResults[myStrataResults$recType == "VS",]
# This is the data we will export to IC format
dataToOutput <- data.frame(Country = myH1[["SD"]]$SDctry,
Year = myH1[["DE"]]$DEyear,
SeasonType = NA,
Season = NA,
Fleet= NA,
AreaType = "Stratum",
FishingArea = psuEstimates$stratumName,
Species = mySpecies ,
Stock = mySpecies,
CatchCategory = substr(myCatchFraction, 1, 1),
ReportingCategory = "A",
Usage = "H",
SamplesOrigin = "O",
UnitCATON = "kg",
CATON = psuEstimates$est.total/1000.0,
varCATON = psuEstimates$var.total/1000000.0)
tempIC <- exportEstimationResultsToInterCatchFormat(dataToOutput)
print(tempIC)
```

0 comments on commit ab89650

Please sign in to comment.