Skip to content

Commit

Permalink
Updated IC output function (#209)
Browse files Browse the repository at this point in the history
  • Loading branch information
davidcurrie2001 committed Oct 18, 2024
1 parent 6357826 commit 9a84ebc
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 22 deletions.
61 changes: 43 additions & 18 deletions R/exportEstimationResultsToInterCatchFormat.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,47 +5,56 @@
#' @param dataToExport A data frame containing the estimation results -
#' this should include the output from the doEstimationForAllStrata function
#' and already have the the InterCatch columns present.
#' @param verbose (Optional) Set to TRUE if you want informative text printed
#' out, or FALSE if you don't. The default is FALSE.
#'
#' @return
#' @export
#'
exportEstimationResultsToInterCatchFormat <- function(dataToExport){
exportEstimationResultsToInterCatchFormat <- function(dataToExport,
verbose = FALSE){

# HI
# create a data frame called HIdefinitions with 2 columns: "Name" and "Type"
HIdefinitions <- data.frame(Name = c("Country","Year","SeasonType","Season","Fleet","AreaType","FishingArea","DepthRange","UnitEffort","Effort","AreaQualifier"),
Type = c("character","character","character","integer","character","character","character","character","character","integer","character"))
Type = c("character","character","character","integer","character","character","character","character","character","integer","character"),
Mandatory = c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE))
HIcols <- HIdefinitions$Name

# SI
# create a data frame called SIdefinitions with 2 columns: "Name" and "Type"
SIdefinitions <- data.frame(Name = c("Country","Year","SeasonType","Season","Fleet","AreaType","FishingArea","DepthRange","Species","Stock","CatchCategory","ReportingCategory","DataToFrom","Usage","SamplesOrigin","QualityFlag","UnitCATON","CATON","OffLandings","varCATON","InfoFleet","InfoStockCoordinator","InfoGeneral"),
Type = c("character","character","character","integer","character","character","character","character","character","character","character","character","character","character","character","character","character","numeric","integer","numeric","character","character","character"))
Type = c("character","character","character","integer","character","character","character","character","character","character","character","character","character","character","character","character","character","numeric","integer","numeric","character","character","character"),
Mandatory = c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE))
SIcols <- SIdefinitions$Name

# SD
# create a data frame called SDdefinitions with 2 columns: "Name" and "Type"
SDdefinitions <- data.frame(Name = c('Country','Year','SeasonType','Season','Fleet','AreaType','FishingArea','DepthRange','Species','Stock','CatchCategory','ReportingCategory','Sex','CANUMtype','AgeLength','PlusGroup','SampledCatch','NumSamplesLngt','NumLngtMeas','NumSamplesAge','NumAgeMeas','unitMeanWeight','unitCANUM','UnitAgeOrLength','UnitMeanLength','Maturity','NumberCaught','MeanWeight','MeanLength','varNumLanded','varWgtLanded','varLgtLanded'),
Type = c('character','character','character','integer','character','character','character','character','character','character','character','character','character','character','integer','integer','integer','integer','integer','integer','integer','character','character','character','character','character','numeric','numeric','numeric','numeric','numeric','numeric'))
Type = c('character','character','character','integer','character','character','character','character','character','character','character','character','character','character','integer','integer','integer','integer','integer','integer','integer','character','character','character','character','character','numeric','numeric','numeric','numeric','numeric','numeric'),
Mandatory = c(TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,TRUE,FALSE,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE,FALSE,TRUE,TRUE,TRUE,FALSE,FALSE,TRUE,TRUE,FALSE,FALSE,FALSE,FALSE))
SDcols <- SDdefinitions$Name

# TODO- create SD data (if required)
# find the values of SIcols that are in the column names of dataToExport
SDdata <- dataToExport[,intersect(SDcols,names(dataToExport))]
SD <- createIC_SD(SDdata, SDdefinitions, verbose)

# find the values of SIcols that are in the column names of dataToExport
SIdata <- dataToExport[,intersect(SIcols,names(dataToExport))]
# remove any duplicates from SIdata
SIdata <- unique(SIdata)

SI <- createIC_SI(SIdata, SIdefinitions)
SI <- createIC_SI(SIdata, SIdefinitions, verbose)

# find the values of HIcols that are in the column names of dataToExport
HIdata <- dataToExport[,intersect(HIcols,names(dataToExport))]
# remove any duplicates from HIdata
HIdata <- unique(HIdata)

HI <- createIC_HI(HIdata, HIdefinitions)
HI <- createIC_HI(HIdata, HIdefinitions, verbose)

IC <- rbind(HI[,c("Key","Value")], SI[,c("Key","Value")])
IC <- rbind(HI[,c("Key","Value")], SI[,c("Key","Value")], SD[,c("Key","Value")])
# sort IC by the Key column
IC <- IC[order(IC$Key),]
# remove the Key column from IC
Expand All @@ -56,9 +65,11 @@ exportEstimationResultsToInterCatchFormat <- function(dataToExport){
}

# Function to create HI format data
createIC_HI <- function(HIdata, HIdefinitions){
createIC_HI <- function(HIdata,
HIdefinitions,
verbose = FALSE){

HI <- createIC_subtype(HIdata, HIdefinitions,"HI")
HI <- createIC_subtype(HIdata, HIdefinitions,"HI", verbose)

# create a new column in HI called "Key" - its value will be the following columns concatenated together (seperated by "_") "Country","Year","SeasonType","Season","Fleet","AreaType","FishingArea","DepthRange"
HI$Key <- paste(HI$Country,HI$Year,HI$SeasonType,HI$Season,HI$Fleet,HI$AreaType,HI$FishingArea,HI$DepthRange, sep = "_")
Expand All @@ -67,41 +78,55 @@ createIC_HI <- function(HIdata, HIdefinitions){
}

# Function to create SI format data
createIC_SI <- function(SIdata, SIdefinitions){
createIC_SI <- function(SIdata,
SIdefinitions,
verbose = FALSE){

SI <- createIC_subtype(SIdata, SIdefinitions,"SI")
SI <- createIC_subtype(SIdata, SIdefinitions,"SI", verbose)

# create a new column in SI called "Key" - its value will be the following columns concatenated together (seperated by "_") "Country","Year","SeasonType","Season","Fleet","AreaType","FishingArea","DepthRange"
SI$Key <- paste(SI$Country,SI$Year,SI$SeasonType,SI$Season,SI$Fleet,SI$AreaType,SI$FishingArea,SI$DepthRange, sep = "_")
SI$Key <- paste(SI$Country,SI$Year,SI$SeasonType,SI$Season,SI$Fleet,SI$AreaType,SI$FishingArea,SI$DepthRange,SI$Species,SI$Stock,SI$CatchCategory,SI$ReportingCategory, sep = "_")

return(SI)
}


# Function to create SD format data
createIC_SD <- function(SDdata, SDdefinitions){
createIC_SD <- function(SDdata,
SDdefinitions,
verbose = FALSE){

SD <- createIC_subtype(SDdata, SDdefinitions,"SD")
SD <- createIC_subtype(SDdata, SDdefinitions,"SD", verbose)

# create a new column in SD called "Key" - its value will be the following columns concatenated together (seperated by "_") "Country","Year","SeasonType","Season","Fleet","AreaType","FishingArea","DepthRange"
SD$Key <- paste(SD$Country,SD$Year,SD$SeasonType,SD$Season,SD$Fleet,SD$AreaType,SD$FishingArea,SD$DepthRange, sep = "_")
SD$Key <- paste(SD$Country,SD$Year,SD$SeasonType,SD$Season,SD$Fleet,SD$AreaType,SD$FishingArea,SD$DepthRange,SD$Species,SD$Stock,SD$CatchCategory,SD$ReportingCategory,SD$Sex, SD$CANUMType, SD$AgeLength, sep = "_")

return(SD)
}

# Base function used by HI, SI, and SD functions to create the relevent data
createIC_subtype <- function(subtypeData, subtypeDefinitions, subtypeName){
createIC_subtype <- function(subtypeData,
subtypeDefinitions,
subtypeName,
verbose = FALSE){

numRows <- nrow(subtypeData)
recordType <- rep(subtypeName, numRows)
st <- data.table::data.table(recordType)

# loop through subtypeDefinitions and add parameter values to SD
if (verbose) print(paste0("Trying to create ",numRows," rows of ",subtypeName," data"))

# loop through subtypeDefinitions and add parameter values to st
for(i in 1:nrow(subtypeDefinitions)){
# check if the column name is in the column names of SDdata
# check if the column name is in the column names of subtypeData
if(subtypeDefinitions$Name[i] %in% names(subtypeData)){
myParam <- data.frame(subtypeData[[subtypeDefinitions$Name[i]]])
} else {
if (subtypeDefinitions$Mandatory[i]){
warning(paste0("The column ", subtypeDefinitions$Name[i], " is mandatory but is not present in the ",subtypeName," data\n"))
} else if (verbose){
print(paste0("The column ", subtypeDefinitions$Name[i], " is not present in the ",subtypeName," data"))
}
# if it isn't, create a column of NAs or -9s
if(subtypeDefinitions$Type[i] == "character"){
myParam <- data.frame(rep(NA, numRows))
Expand Down
69 changes: 67 additions & 2 deletions tests/testthat/test-exportEstimationResultsToInterCatchFormat.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ test_that("exportEstimationResultsToInterCatchFormat runs without errors or warn
Fleet= NA,
AreaType = "Stratum",
FishingArea = psuEstimates$stratumName,
DepthRange = "NA",
Species = mySpecies ,
Stock = mySpecies,
CatchCategory = substr(myCatchFraction, 1, 1),
Expand All @@ -79,18 +80,82 @@ test_that("exportEstimationResultsToInterCatchFormat runs without errors or warn
SamplesOrigin = "O",
UnitCATON = "kg",
CATON = psuEstimates$est.total/1000.0,
varCATON = psuEstimates$var.total/1000000.0)
varCATON = psuEstimates$var.total/1000000.0,
Sex = "NA",
PlusGroup = "NA",
MeanWeight = -9,
unitMeanWeight = "kg",
unitCANUM = "n",
UnitAgeOrLength = "year",
NumberCaught = -9,
MeanWeight = -9,
CANUMtype = "Age",
AgeLength = c(1,1,1,2,2,2,3,3,3),
NumSamplesAge = c(300,300,300,200,200,200,100,100,100))


expect_error(exportEstimationResultsToInterCatchFormat(dataToOutput),NA )
expect_warning(exportEstimationResultsToInterCatchFormat(dataToOutput),NA )

icOutput <- exportEstimationResultsToInterCatchFormat(dataToOutput)
expect_equal(length(icOutput), 6)
expect_equal(length(icOutput), 15)
expect_equal(icOutput[1], "HI,ZW,1965,NA,NA,NA,Stratum,VS_stratum1,NA,NA,-9,NA")
expect_equal(icOutput[2], "SI,ZW,1965,NA,NA,NA,Stratum,VS_stratum1,NA,1019159,1019159,L,A,NA,H,O,NA,kg, 3342.222,-9, 1053266,NA,NA,NA")

})

test_that("exportEstimationResultsToInterCatchFormat gikves a warning when mandatory column AgeLength is missing", {

# get some test data
myH1RawObject <- generateTestData()

## Create an estimation object, but stop at SA

myTestData <- createRDBESEstObject(myH1RawObject, 1, stopTable = "SA")
# Get rid of rows that don't have an SA row
myTestData <- myTestData[!is.na(myTestData$SAid),]
# Estimate using the data
myStrataResults <- doEstimationForAllStrata(myTestData, "SAsampWtLive")
myStrataResults

# Get our estimated values for the PSU
psuEstimates <- myStrataResults[myStrataResults$recType == "VS",]

# This is the data we will export to IC format
dataToOutput <- data.frame(Country = myCountry,
Year = myYear,
SeasonType = NA,
Season = NA,
Fleet= NA,
AreaType = "Stratum",
FishingArea = psuEstimates$stratumName,
DepthRange = "NA",
Species = mySpecies ,
Stock = mySpecies,
CatchCategory = substr(myCatchFraction, 1, 1),
ReportingCategory = "A",
Usage = "H",
SamplesOrigin = "O",
UnitCATON = "kg",
CATON = psuEstimates$est.total/1000.0,
varCATON = psuEstimates$var.total/1000000.0,
Sex = "NA",
PlusGroup = "NA",
MeanWeight = -9,
unitMeanWeight = "kg",
unitCANUM = "n",
UnitAgeOrLength = "year",
NumberCaught = -9,
MeanWeight = -9,
CANUMtype = "Age",
#AgeLength = c(1,1,1,2,2,2,3,3,3),
NumSamplesAge = c(300,300,300,200,200,200,100,100,100))


expect_warning(exportEstimationResultsToInterCatchFormat(dataToOutput),"The column AgeLength is mandatory but is not present in the SD data\n" )


})


#}) ## end capture.output
16 changes: 14 additions & 2 deletions vignettes/create-IC-output-from-estimation-results.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ myH1 <- fixProbs(myH1RawObject)
```

## Step 2) Create an estimation object, but stop at SA
## Step 2) Estimate for live weight for the data


```{r}
Expand All @@ -104,6 +104,7 @@ dataToOutput <- data.frame(Country = myH1[["SD"]]$SDctry,
Fleet= NA,
AreaType = "Stratum",
FishingArea = psuEstimates$stratumName,
DepthRange = "NA",
Species = mySpecies ,
Stock = mySpecies,
CatchCategory = substr(myCatchFraction, 1, 1),
Expand All @@ -112,7 +113,18 @@ dataToOutput <- data.frame(Country = myH1[["SD"]]$SDctry,
SamplesOrigin = "O",
UnitCATON = "kg",
CATON = psuEstimates$est.total/1000.0,
varCATON = psuEstimates$var.total/1000000.0)
varCATON = psuEstimates$var.total/1000000.0,
Sex = "NA",
PlusGroup = "NA",
MeanWeight = -9,
unitMeanWeight = "kg",
unitCANUM = "n",
UnitAgeOrLength = "year",
NumberCaught = -9,
MeanWeight = -9,
CANUMtype = "Age",
AgeLength = c(1,1,1,2,2,2,3,3,3),
NumSamplesAge = c(300,300,300,200,200,200,100,100,100))
tempIC <- exportEstimationResultsToInterCatchFormat(dataToOutput)
print(tempIC)
Expand Down

0 comments on commit 9a84ebc

Please sign in to comment.