-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
First version of function that takes the output from doEstimationForA…
…llStrata() and converts it into the InterCatch exchange format (#209)
- Loading branch information
1 parent
a55ccc1
commit ab89650
Showing
3 changed files
with
339 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
#' exportEstimationResultsToInterCatchFormat | ||
#' | ||
#'This function transforms the estimation results into the InterCatch format. | ||
#' | ||
#' @param dataToExport A data frame containing the estimation results - | ||
#' this should include the output from the doEstimationForAllStrata function | ||
#' and already have the the InterCatch columns present. | ||
#' | ||
#' @return | ||
#' @export | ||
#' | ||
exportEstimationResultsToInterCatchFormat <- function(dataToExport){ | ||
|
||
# HI | ||
# create a data frame called HIdefinitions with 2 columns: "Name" and "Type" | ||
HIdefinitions <- data.frame(Name = c("Country","Year","SeasonType","Season","Fleet","AreaType","FishingArea","DepthRange","UnitEffort","Effort","AreaQualifier"), | ||
Type = c("character","character","character","integer","character","character","character","character","character","integer","character")) | ||
HIcols <- HIdefinitions$Name | ||
|
||
# SI | ||
# create a data frame called SIdefinitions with 2 columns: "Name" and "Type" | ||
SIdefinitions <- data.frame(Name = c("Country","Year","SeasonType","Season","Fleet","AreaType","FishingArea","DepthRange","Species","Stock","CatchCategory","ReportingCategory","DataToFrom","Usage","SamplesOrigin","QualityFlag","UnitCATON","CATON","OffLandings","varCATON","InfoFleet","InfoStockCoordinator","InfoGeneral"), | ||
Type = c("character","character","character","integer","character","character","character","character","character","character","character","character","character","character","character","character","character","numeric","integer","numeric","character","character","character")) | ||
SIcols <- SIdefinitions$Name | ||
|
||
# SD | ||
# create a data frame called SDdefinitions with 2 columns: "Name" and "Type" | ||
SDdefinitions <- data.frame(Name = c('Country','Year','SeasonType','Season','Fleet','AreaType','FishingArea','DepthRange','Species','Stock','CatchCategory','ReportingCategory','Sex','CANUMtype','AgeLength','PlusGroup','SampledCatch','NumSamplesLngt','NumLngtMeas','NumSamplesAge','NumAgeMeas','unitMeanWeight','unitCANUM','UnitAgeOrLength','UnitMeanLength','Maturity','NumberCaught','MeanWeight','MeanLength','varNumLanded','varWgtLanded','varLgtLanded'), | ||
Type = c('character','character','character','integer','character','character','character','character','character','character','character','character','character','character','integer','integer','integer','integer','integer','integer','integer','character','character','character','character','character','numeric','numeric','numeric','numeric','numeric','numeric')) | ||
SDcols <- SDdefinitions$Name | ||
|
||
# TODO- create SD data (if required) | ||
|
||
# find the values of SIcols that are in the column names of dataToExport | ||
SIdata <- dataToExport[,intersect(SIcols,names(dataToExport))] | ||
# remove any duplicates from SIdata | ||
SIdata <- unique(SIdata) | ||
|
||
SI <- createIC_SI(SIdata, SIdefinitions) | ||
|
||
# find the values of HIcols that are in the column names of dataToExport | ||
HIdata <- dataToExport[,intersect(HIcols,names(dataToExport))] | ||
# remove any duplicates from HIdata | ||
HIdata <- unique(HIdata) | ||
|
||
HI <- createIC_HI(HIdata, HIdefinitions) | ||
|
||
IC <- rbind(HI[,c("Key","Value")], SI[,c("Key","Value")]) | ||
# sort IC by the Key column | ||
IC <- IC[order(IC$Key),] | ||
# remove the Key column from IC | ||
IC <- IC$Value | ||
|
||
return(IC) | ||
|
||
} | ||
|
||
# Function to create HI format data | ||
createIC_HI <- function(HIdata, HIdefinitions){ | ||
|
||
HI <- createIC_subtype(HIdata, HIdefinitions,"HI") | ||
|
||
# create a new column in HI called "Key" - its value will be the following columns concatenated together (seperated by "_") "Country","Year","SeasonType","Season","Fleet","AreaType","FishingArea","DepthRange" | ||
HI$Key <- paste(HI$Country,HI$Year,HI$SeasonType,HI$Season,HI$Fleet,HI$AreaType,HI$FishingArea,HI$DepthRange, sep = "_") | ||
|
||
return(HI) | ||
} | ||
|
||
# Function to create SI format data | ||
createIC_SI <- function(SIdata, SIdefinitions){ | ||
|
||
SI <- createIC_subtype(SIdata, SIdefinitions,"SI") | ||
|
||
# create a new column in SI called "Key" - its value will be the following columns concatenated together (seperated by "_") "Country","Year","SeasonType","Season","Fleet","AreaType","FishingArea","DepthRange" | ||
SI$Key <- paste(SI$Country,SI$Year,SI$SeasonType,SI$Season,SI$Fleet,SI$AreaType,SI$FishingArea,SI$DepthRange, sep = "_") | ||
|
||
return(SI) | ||
} | ||
|
||
|
||
# Function to create SD format data | ||
createIC_SD <- function(SDdata, SDdefinitions){ | ||
|
||
SD <- createIC_subtype(SDdata, SDdefinitions,"SD") | ||
|
||
# create a new column in SD called "Key" - its value will be the following columns concatenated together (seperated by "_") "Country","Year","SeasonType","Season","Fleet","AreaType","FishingArea","DepthRange" | ||
SD$Key <- paste(SD$Country,SD$Year,SD$SeasonType,SD$Season,SD$Fleet,SD$AreaType,SD$FishingArea,SD$DepthRange, sep = "_") | ||
|
||
return(SD) | ||
} | ||
|
||
# Base function used by HI, SI, and SD functions to create the relevent data | ||
createIC_subtype <- function(subtypeData, subtypeDefinitions, subtypeName){ | ||
|
||
numRows <- nrow(subtypeData) | ||
recordType <- rep(subtypeName, numRows) | ||
st <- data.table::data.table(recordType) | ||
|
||
# loop through subtypeDefinitions and add parameter values to SD | ||
for(i in 1:nrow(subtypeDefinitions)){ | ||
# check if the column name is in the column names of SDdata | ||
if(subtypeDefinitions$Name[i] %in% names(subtypeData)){ | ||
myParam <- data.frame(subtypeData[[subtypeDefinitions$Name[i]]]) | ||
} else { | ||
# if it isn't, create a column of NAs or -9s | ||
if(subtypeDefinitions$Type[i] == "character"){ | ||
myParam <- data.frame(rep(NA, numRows)) | ||
} else { | ||
myParam <- data.frame(rep(-9, numRows)) | ||
} | ||
} | ||
names(myParam) <- subtypeDefinitions$Name[i] | ||
# change any NAs in myParam to "NA" | ||
myParam[is.na(myParam)] <- "NA" | ||
st <- cbind(st, myParam) | ||
} | ||
|
||
# create a new column in st called "Value" - it will have the values from all other columns concatenated together with a comma seperator | ||
st$Value <- apply(st, 1, function(x) paste(x, collapse = ",")) | ||
|
||
return(st) | ||
|
||
} |
96 changes: 96 additions & 0 deletions
96
tests/testthat/test-exportEstimationResultsToInterCatchFormat.R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
#capture.output({ ## suppresses printing of console output when running test() | ||
|
||
# H1 directory | ||
dirH1 <- "./h1_v_1_19_26/" | ||
myCountry <- "ZW" | ||
myYear <- 1965 | ||
myCatchFraction <- "Lan" | ||
mySpecies <- 1019159 | ||
|
||
generateTestData <- function(){ | ||
|
||
## Step 1) load and prepare some test data | ||
myH1RawObject <- importRDBESDataCSV(rdbesExtractPath = dirH1) | ||
|
||
# Edit our data so that we have SRSWOR on each level and calculate the probs | ||
myH1RawObject[["VS"]]$VSselectMeth <- "SRSWOR" | ||
myH1RawObject[["VS"]]$VSincProb <- myH1RawObject[["VS"]]$VSnumSamp / myH1RawObject[["VS"]]$VSnumTotal | ||
myH1RawObject[["VS"]]$VSselProb <- 1/myH1RawObject[["VS"]]$VSnumTotal | ||
myH1RawObject[["FT"]]$FTselectMeth <- "SRSWOR" | ||
myH1RawObject[["FT"]]$FTincProb <- myH1RawObject[["FT"]]$FTnumSamp / myH1RawObject[["FT"]]$FTnumTotal | ||
myH1RawObject[["FT"]]$FTselProb <- 1/myH1RawObject[["FT"]]$FTnumTotal | ||
myH1RawObject[["FO"]]$FOselectMeth <- "SRSWOR" | ||
myH1RawObject[["FO"]]$FOincProb <- myH1RawObject[["FO"]]$FOnumSamp / myH1RawObject[["FO"]]$FOnumTotal | ||
myH1RawObject[["FO"]]$FOselProb <- 1/myH1RawObject[["FO"]]$FOnumTotal | ||
myH1RawObject[["SS"]]$SSselectMeth <- "SRSWOR" | ||
myH1RawObject[["SS"]]$SSincProb <- myH1RawObject[["SS"]]$SSnumSamp / myH1RawObject[["SS"]]$SSnumTotal | ||
myH1RawObject[["SS"]]$SSselProb <- 1/myH1RawObject[["SS"]]$SSnumTotal | ||
myH1RawObject[["SA"]]$SAselectMeth <- "SRSWOR" | ||
myH1RawObject[["SA"]]$SAincProb <- myH1RawObject[["SA"]]$SAnumSamp / myH1RawObject[["SA"]]$SAnumTotal | ||
myH1RawObject[["SA"]]$SAselProb <- 1/myH1RawObject[["SA"]]$SAnumTotal | ||
|
||
# Update our test data with some random sample measurements (it didn't include these) | ||
# set the random seed | ||
set.seed(1234) | ||
myH1RawObject[['SA']]$SAsampWtLive <- round(runif(n=nrow(myH1RawObject[['SA']]),min = 1, max = 100)) | ||
myH1RawObject[['SA']]$SAsampWtMes <- round(runif(n=nrow(myH1RawObject[['SA']]),min = 1, max = 100)) | ||
|
||
#Filter our data for WGRDBES-EST TEST 1, 1965, H1 | ||
myFields <- c("DEyear","SDctry","DEhierarchy","DEsampScheme","DEstratumName","SAspeCode","SScatchFra") | ||
myValues <- c(myYear,myCountry,1,"National Routine","DE_stratum1_H1",mySpecies,myCatchFraction) | ||
myH1RawObject <- filterRDBESDataObject(myH1RawObject, | ||
fieldsToFilter = myFields, | ||
valuesToFilter = myValues ) | ||
myH1RawObject <- findAndKillOrphans(myH1RawObject) | ||
myH1RawObject | ||
|
||
} | ||
|
||
test_that("exportEstimationResultsToInterCatchFormat runs without errors or warnings", { | ||
|
||
# get some test data | ||
myH1RawObject <- generateTestData() | ||
|
||
## Create an estimation object, but stop at SA | ||
|
||
myTestData <- createRDBESEstObject(myH1RawObject, 1, stopTable = "SA") | ||
# Get rid of rows that don't have an SA row | ||
myTestData <- myTestData[!is.na(myTestData$SAid),] | ||
# Estimate using the data | ||
myStrataResults <- doEstimationForAllStrata(myTestData, "SAsampWtLive") | ||
myStrataResults | ||
|
||
# Get our estimated values for the PSU | ||
psuEstimates <- myStrataResults[myStrataResults$recType == "VS",] | ||
|
||
# This is the data we will export to IC format | ||
dataToOutput <- data.frame(Country = myCountry, | ||
Year = myYear, | ||
SeasonType = NA, | ||
Season = NA, | ||
Fleet= NA, | ||
AreaType = "Stratum", | ||
FishingArea = psuEstimates$stratumName, | ||
Species = mySpecies , | ||
Stock = mySpecies, | ||
CatchCategory = substr(myCatchFraction, 1, 1), | ||
ReportingCategory = "A", | ||
Usage = "H", | ||
SamplesOrigin = "O", | ||
UnitCATON = "kg", | ||
CATON = psuEstimates$est.total/1000.0, | ||
varCATON = psuEstimates$var.total/1000000.0) | ||
|
||
|
||
expect_error(exportEstimationResultsToInterCatchFormat(dataToOutput),NA ) | ||
expect_warning(exportEstimationResultsToInterCatchFormat(dataToOutput),NA ) | ||
|
||
icOutput <- exportEstimationResultsToInterCatchFormat(dataToOutput) | ||
expect_equal(length(icOutput), 6) | ||
expect_equal(icOutput[1], "HI,ZW,1965,NA,NA,NA,Stratum,VS_stratum1,NA,NA,-9,NA") | ||
expect_equal(icOutput[2], "SI,ZW,1965,NA,NA,NA,Stratum,VS_stratum1,NA,1019159,1019159,L,A,NA,H,O,NA,kg, 3342.222,-9, 1053266,NA,NA,NA") | ||
|
||
}) | ||
|
||
|
||
#}) ## end capture.output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
--- | ||
title: "Create IC format from doEstimationForAllStrata() output" | ||
output: rmarkdown::html_vignette | ||
vignette: > | ||
%\VignetteIndexEntry{Create IC format from doEstimationForAllStrata} | ||
%\VignetteEngine{knitr::rmarkdown} | ||
%\VignetteEncoding{UTF-8} | ||
--- | ||
|
||
```{r, include = FALSE} | ||
knitr::opts_chunk$set( | ||
collapse = TRUE, | ||
comment = "#>" | ||
) | ||
``` | ||
|
||
## Introduction | ||
This document shows how to take the output from doEstimationForAllStrata() and format it into the InterCatch format. This is done by using the function exportEstimationResultsToInterCatchFormat() in the RDBEScore package. | ||
|
||
## Load the package | ||
|
||
```{r setup} | ||
library(RDBEScore) | ||
``` | ||
|
||
## Step 1) load and prepare some test data | ||
|
||
```{r} | ||
# load an example dataset | ||
# H1 directory | ||
dirH1 <- "../tests/testthat/h1_v_1_19_26/" | ||
fixProbs <- function(myH1RawObject){ | ||
# Edit our data so that we have SRSWOR on each level and calculate the probs | ||
myH1RawObject[["VS"]]$VSselectMeth <- "SRSWOR" | ||
myH1RawObject[["VS"]]$VSincProb <- myH1RawObject[["VS"]]$VSnumSamp / myH1RawObject[["VS"]]$VSnumTotal | ||
myH1RawObject[["VS"]]$VSselProb <- 1/myH1RawObject[["VS"]]$VSnumTotal | ||
myH1RawObject[["FT"]]$FTselectMeth <- "SRSWOR" | ||
myH1RawObject[["FT"]]$FTincProb <- myH1RawObject[["FT"]]$FTnumSamp / myH1RawObject[["FT"]]$FTnumTotal | ||
myH1RawObject[["FT"]]$FTselProb <- 1/myH1RawObject[["FT"]]$FTnumTotal | ||
myH1RawObject[["FO"]]$FOselectMeth <- "SRSWOR" | ||
myH1RawObject[["FO"]]$FOincProb <- myH1RawObject[["FO"]]$FOnumSamp / myH1RawObject[["FO"]]$FOnumTotal | ||
myH1RawObject[["FO"]]$FOselProb <- 1/myH1RawObject[["FO"]]$FOnumTotal | ||
myH1RawObject[["SS"]]$SSselectMeth <- "SRSWOR" | ||
myH1RawObject[["SS"]]$SSincProb <- myH1RawObject[["SS"]]$SSnumSamp / myH1RawObject[["SS"]]$SSnumTotal | ||
myH1RawObject[["SS"]]$SSselProb <- 1/myH1RawObject[["SS"]]$SSnumTotal | ||
myH1RawObject[["SA"]]$SAselectMeth <- "SRSWOR" | ||
myH1RawObject[["SA"]]$SAincProb <- myH1RawObject[["SA"]]$SAnumSamp / myH1RawObject[["SA"]]$SAnumTotal | ||
myH1RawObject[["SA"]]$SAselProb <- 1/myH1RawObject[["SA"]]$SAnumTotal | ||
# Update our test data with some random sample measurements (it didn't include these) | ||
# set the random seed | ||
set.seed(1234) | ||
myH1RawObject[['SA']]$SAsampWtLive <- round(runif(n=nrow(myH1RawObject[['SA']]),min = 1, max = 100)) | ||
myH1RawObject[['SA']]$SAsampWtMes <- round(runif(n=nrow(myH1RawObject[['SA']]),min = 1, max = 100)) | ||
myH1RawObject | ||
} | ||
## Step 1) load and prepare some test data | ||
myH1RawObject <- importRDBESDataCSV(rdbesExtractPath = dirH1) | ||
#Filter our data for WGRDBES-EST TEST 1, 1965, H1 | ||
myCatchFraction <- "Lan" | ||
mySpecies <- 1019159 | ||
myFields <- c("DEyear","DEhierarchy","DEsampScheme","DEstratumName","SAspeCode","SScatchFra") | ||
myValues <- c(1965,1,"National Routine","DE_stratum1_H1",mySpecies,myCatchFraction) | ||
myH1RawObject <- filterRDBESDataObject(myH1RawObject, | ||
fieldsToFilter = myFields, | ||
valuesToFilter = myValues ) | ||
myH1RawObject <- findAndKillOrphans(myH1RawObject) | ||
myH1 <- fixProbs(myH1RawObject) | ||
``` | ||
|
||
## Step 2) Create an estimation object, but stop at SA | ||
|
||
|
||
```{r} | ||
myTestData <- createRDBESEstObject(myH1, 1, stopTable = "SA") | ||
# Get rid of rows that don't have an SA row | ||
myTestData <- myTestData[!is.na(myTestData$SAid),] | ||
# Estimate using the data | ||
myStrataResults <- doEstimationForAllStrata(myTestData, "SAsampWtLive") | ||
``` | ||
|
||
## Step 3) Export the results to InterCatch format (just HI and SI for this data) | ||
|
||
```{r} | ||
# Get our estimated values for the PSU | ||
psuEstimates <- myStrataResults[myStrataResults$recType == "VS",] | ||
# This is the data we will export to IC format | ||
dataToOutput <- data.frame(Country = myH1[["SD"]]$SDctry, | ||
Year = myH1[["DE"]]$DEyear, | ||
SeasonType = NA, | ||
Season = NA, | ||
Fleet= NA, | ||
AreaType = "Stratum", | ||
FishingArea = psuEstimates$stratumName, | ||
Species = mySpecies , | ||
Stock = mySpecies, | ||
CatchCategory = substr(myCatchFraction, 1, 1), | ||
ReportingCategory = "A", | ||
Usage = "H", | ||
SamplesOrigin = "O", | ||
UnitCATON = "kg", | ||
CATON = psuEstimates$est.total/1000.0, | ||
varCATON = psuEstimates$var.total/1000000.0) | ||
tempIC <- exportEstimationResultsToInterCatchFormat(dataToOutput) | ||
print(tempIC) | ||
``` | ||
|