Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

adding function generateNAsUsingSL and tests #177

Merged
merged 2 commits into from
Oct 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 87 additions & 0 deletions R/generateNAsUsingSL.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
#' Generate NAs in samples using Species List information
#'
#' @param RDBESDataObject An RDBESDataObject.
#' @param targetAphiaId a vector of aphiaId.
#' @param overwriteSampled (Optional) should SAtotalWtMes and SAsampWtMes be set to 0 if
#' spp recorded but absent from SL? The default is TRUE.
#' @param validate (Optional) Set to TRUE if you want validation to be carried out. The
#' default if TRUE.
#' @param verbose (Optional) Set to TRUE if you want informative text on validation printed
#' out, or FALSE if you don't. The default is FALSE.
#' @param strict (Optional) This function can validate its input data - should
#' the validation be strict? The default is TRUE.
#'
#' @return RDBES data object where SA was complemented with NAs for species not looked for
#' (sensu in SL)
#' @export
#'
#' @examples
#' # To be added

generateNAsUsingSL<-function(RDBESDataObject, targetAphiaId, overwriteSampled=TRUE, validate = TRUE, verbose=FALSE, strict=TRUE){

if(validate){
validateRDBESDataObject(RDBESDataObject,
verbose = verbose,
strict = strict
)
}

# Take a copy of SA and SL since we'll change some column data types and
# we don't want to update the original version
tmpSS <- data.table::copy(RDBESDataObject[["SS"]])
tmpSL <- data.table::copy(fixSLids(RDBESDataObject)$SL)
tmpSA <- data.table::copy(RDBESDataObject[["SA"]])

tmpSSwithSL<-merge(tmpSS, tmpSL, by="SLid", all.x=T)

ls1 <- split(tmpSSwithSL, tmpSSwithSL$SSid)
ls2 <- lapply(ls1, function(tmpSSwithSLrow, targetAphiaId1=targetAphiaId, rdbesSA=tmpSA){
# Now convert some columns from int to numeric
colsToConvertToNumeric <- c("SAid", "SAseqNum")
rdbesSA[, (colsToConvertToNumeric) := lapply(.SD, as.double),
.SDcols = colsToConvertToNumeric]

# determines aphias that need generation
aphiaNeedingGenerateNAs <- targetAphiaId1[!targetAphiaId1 %in% tmpSSwithSLrow$SLcommTaxon]

if(length(aphiaNeedingGenerateNAs)>0)
{
# checks if they already exist in SA [case of the exceptional observer]
inSA <- aphiaNeedingGenerateNAs %in% rdbesSA[SSid == tmpSSwithSLrow$SSid,]$SAspeCode
if(any(!inSA))
# creates a new row
{
draftNewRows <- do.call("rbind", replicate(n=length(aphiaNeedingGenerateNAs[!inSA]),
rdbesSA[SSid == tmpSSwithSLrow$SSid,][1,], simplify = FALSE))

draftNewRows$SAspeCode <- aphiaNeedingGenerateNAs[!inSA]
draftNewRows$SAspeCodeFAO <- ""
draftNewRows$SAsamp <- 'N'
draftNewRows[,c("SAtotalWtLive","SAsampWtLive","SAtotalWtMes","SAsampWtMes")] <- NA
draftNewRows$SAid <- max(rdbesSA$SAid) + (1:length(inSA[!inSA]))*0.001 # maintain a count
draftNewRows$SAseqNum <- max(rdbesSA$SAseqNum) + (1:length(inSA[!inSA]))*0.001 # maintain a count
draftNewRows$SAunitName <- paste0("NAgen_", max(rdbesSA$SAid) + (1:length(inSA[!inSA]))*0.001) # maintain a count
# updates the table
rdbesSA <- rbind(rdbesSA, draftNewRows)
# checks if spp were added ok
#browser()
test_fail <- length(aphiaNeedingGenerateNAs <- targetAphiaId1[!targetAphiaId1 %in% rdbesSA$SAspeCode])>0
if(test_fail) stop()
}

if(any(inSA)){
# if they do and overwrite==T give him/her bonus points and but overwrite their data with NAs
# if they do and overwrite==F give him/her bonus points and and keep their data
if(overwriteSampled==T) {
rdbesSA[SSid == tmpSSwithSLrow$SSid & SAspeCode %in% aphiaNeedingGenerateNAs,c("SAtotalWtLive",
"SAsampWtLive","SAtotalWtMes","SAsampWtMes")] <- NA
} else {"do nothing"}}
}
#browser()
rdbesSA}
)
RDBESDataObject[["SA"]] <- data.table::setDT(do.call("rbind", ls2))
setkey(RDBESDataObject[["SA"]],"SAid")
RDBESDataObject
}
206 changes: 206 additions & 0 deletions tests/testthat/test-generateNAsUsingSL.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
capture.output({ ## suppresses printing of console output when running test()

# download and subset original data

myH1DataObject <- RDBEScore:::importRDBESDataZIP("./h1_v_1_19_18/ZW_1965_WGRDBES-EST_TEST_1.zip")

# Subset data
myH1DataObject <- filterRDBESDataObject(myH1DataObject,c("DEstratumName","SLspeclistName"),
c("Pckg_survey_apistrat_H1","WGRDBES-EST_TEST_1_Pckg_survey_apistrat_H1"),
killOrphans=TRUE, strict=TRUE)

# adds a species to SL
rowToAdd <- data.frame('31831','SL','ZW','4484',myH1DataObject[["SL"]]$SLspeclistName,'1965','Dis','107254','107254')
colnames(rowToAdd) <- names(myH1DataObject[["SL"]])

# myH1DataObject[["SL"]] <- rbind(myH1DataObject[["SL"]],rowToAdd)
# myH1DataObject[["SL"]]$SLid <- as.integer(myH1DataObject[["SL"]]$SLid)
# myH1DataObject[["SL"]]$SLyear <- as.integer(myH1DataObject[["SL"]]$SLyear)
# myH1DataObject[["SL"]]$SLcommTaxon <- as.integer(myH1DataObject[["SL"]]$SLcommTaxon)
# myH1DataObject[["SL"]]$SLsppCode <- as.integer(myH1DataObject[["SL"]]$SLsppCode)
# ensure key is set on SL
setkey(myH1DataObject[["SL"]], SLid)

# adds a row to SS
myH1DataObject[["SS"]]<-rbind(myH1DataObject[["SS"]][1,],myH1DataObject[["SS"]][1,])
myH1DataObject[["SS"]]$SScatchFra[2]<-"Dis"
myH1DataObject[["SS"]]$SSid[2]<-myH1DataObject[["SS"]]$SSid[1]+1
myH1DataObject[["SS"]]$SSid<-as.integer(myH1DataObject[["SS"]]$SSid)
# ensure key is set on SS
setkey(myH1DataObject[["SS"]], SSid)
setkey(myH1DataObject[["SA"]], SAid)

validateRDBESDataObject(myH1DataObject, checkDataTypes = TRUE)

# prepare myH1DataObject1: test data 1 species

myH1DataObject1 <- filterRDBESDataObject(myH1DataObject, c("SSid"), c(227694),
killOrphans = TRUE, strict=TRUE)

setkey(myH1DataObject1[["SS"]], SSid)
setkey(myH1DataObject1[["SA"]], SAid)

validateRDBESDataObject(myH1DataObject1, checkDataTypes = TRUE)

# prepare myH1DataObject2: test data >1 species
myH1DataObject2 <- myH1DataObject1
myH1DataObject2$SL<-rbind(myH1DataObject2$SL,myH1DataObject2$SL)
myH1DataObject2$SL[,c("SLcommTaxon","SLsppCode")]<-as.integer(c(107254, 107253))
myH1DataObject2$SL$SLid[2]<-as.integer(47892)

myH1DataObject2$SA<-rbind(myH1DataObject2$SA,myH1DataObject2$SA)
myH1DataObject2$SA$SAspeCode[2] <- "107253"
myH1DataObject2$SA$SAid[2] <- as.integer(572814)

setkey(myH1DataObject2[["SL"]], SLid)
setkey(myH1DataObject2[["SS"]], SSid)
setkey(myH1DataObject2[["SA"]], SAid)

validateRDBESDataObject(myH1DataObject2, checkDataTypes = TRUE)

# object demo
myH1DataObject[c("SL","SS","SA")]
myH1DataObject1[c("SL","SS","SA")]
myH1DataObject2[c("SL","SS","SA")]


# ------------------
# tests: 1 spp call
# ------------------

test_that("simpleSA: generateNAsUsingSL does not add any NA rows if none are missing (1 targetAphiaId, SS present)", {

expect_equal(myH1DataObject1,generateNAsUsingSL(myH1DataObject1, targetAphiaId = c("107254")))

})

test_that("simpleSA: generateNAsUsingSL adds one NA row if spp not in list (case: 1 targetAphiaId, SS present)", {

# expect 1 row to be added
expect_equal(nrow(generateNAsUsingSL(myH1DataObject1, targetAphiaId = c("107253"))$SA),2)
# expect 1 spp ("107253") in the 2nd row
expect_equal(generateNAsUsingSL(myH1DataObject1, targetAphiaId = c("107253"))$SA$SAspeCode,c("107254","107253"))
# expect SAtotalWtMes of 2nd row to be NA
expect_equal(generateNAsUsingSL(myH1DataObject1, targetAphiaId = c("107253"))$SA$SAtotalWtMes[2],as.integer(NA))
# expect SAsampWtMes of 2nd row to be NA
expect_equal(generateNAsUsingSL(myH1DataObject1, targetAphiaId = c("107253"))$SA$SAsampWtMes[2],as.integer(NA))

})

test_that("simpleSA: generateNAsUsingSL makes spp weights NA if spp not in list and overwriteSampled = TRUE [default] (case: 1 targetAphiaId, SS present)", {

myH1DataObject11<-myH1DataObject1
myH1DataObject11$SL$SLid<-1; setkey(myH1DataObject11$SL, "SLid")
# check: should yield TRUE
!myH1DataObject11$SS$SLid %in% myH1DataObject11$SL$SLid
myH1DataObject11[c("SL","SS","SA")]

# expect 0 row to be added
expect_equal(nrow(generateNAsUsingSL(myH1DataObject11, targetAphiaId = c("107254"))$SA),1)
# expect SAtotalWtMes & SAsampWtMes of spp to be NA
expect_equal(generateNAsUsingSL(myH1DataObject11, targetAphiaId = c("107254"))$SA$SAtotalWtMes,as.integer(NA))
expect_equal(generateNAsUsingSL(myH1DataObject11, targetAphiaId = c("107254"))$SA$SAsampWtMes,as.integer(NA))
# expect all other vars to have remained unchanged
expect_equal(apply(generateNAsUsingSL(myH1DataObject11, targetAphiaId = c("107254"))$SA[,!c("SAtotalWtMes","SAsampWtMes")],1, paste0, collapse=""),apply(myH1DataObject11$SA[,!c("SAtotalWtMes","SAsampWtMes")],1, paste0, collapse=""))

})

test_that("simpleSA: generateNAsUsingSL does makes spp weights NA if spp not in list and overwriteSampled = FALSE (case: 1 targetAphiaId, SS present)", {

myH1DataObject11<-myH1DataObject1
myH1DataObject11$SL$SLid<-1; setkey(myH1DataObject11$SL, "SLid")
# check: should yield TRUE
!myH1DataObject11$SS$SLid %in% myH1DataObject11$SL$SLid
myH1DataObject11[c("SL","SS","SA")]

# expect 0 row to be added
expect_equal(nrow(generateNAsUsingSL(myH1DataObject11, targetAphiaId = c("107254"), overwriteSampled=FALSE)$SA),1)
# expect all vars to have remained unchanged
expect_equal(apply(generateNAsUsingSL(myH1DataObject11, targetAphiaId = c("107254"), overwriteSampled=FALSE)$SA,1, paste0, collapse=""),apply(myH1DataObject11$SA,1, paste0, collapse=""))

})

# ------------------
# tests: >1 spp call
# ------------------

test_that("simpleSA: generateNAsUsingSL does not add any NA rows or change data if none are missing (2 targetAphiaId, SS present)", {



# expect 0 row to be added
expect_equal(generateNAsUsingSL(myH1DataObject2, targetAphiaId = c("107254", "107253")), myH1DataObject2)

})


test_that("simpleSA: generateNAsUsingSL adds an NA row if one of target_spp (spp2) not in SL (2 targetAphiaId, SS present)", {

# prepare test data
myH1DataObject21 <- myH1DataObject2
myH1DataObject21$SL <- myH1DataObject21$SL[1,]
myH1DataObject21$SA <- myH1DataObject21$SA[1,]
validateRDBESDataObject(myH1DataObject21, checkDataTypes = TRUE)

myH1DataObject21[c("SL","SS","SA")]

# expect 0 row to be added
expect_equal(nrow(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107254", "107253"))$SA),2)
# expect both spp to be present in result
expect_equal(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107254", "107253"))$SA$SAspeCode,c("107254", "107253"))
# expect all columns in spp1 to remain the same
expect_equal(apply(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107254", "107253"))$SA[1,],1,paste0, collapse=""),apply(myH1DataObject2$SA[1,],1, paste0, collapse=""))
# expect SAtotalWtMes of spp2 (not in list) to be NA
expect_equal(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107254", "107253"))$SA$SAtotalWtMes[2],as.integer(NA))
# expect SAsampWtMes of spp2 (not in list) to be NA
expect_equal(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107254", "107253"))$SA$SAsampWtMes[2],as.integer(NA))
})


test_that("simpleSA: generateNAsUsingSL makes spp weights NA in spp not in list if overwriteSampled = TRUE [default] (case: 2 targetAphiaId, SS present)", {

# prepare test data
myH1DataObject21 <- myH1DataObject2
myH1DataObject21$SL <- myH1DataObject21$SL[1,]
validateRDBESDataObject(myH1DataObject21, checkDataTypes = TRUE)

myH1DataObject21[c("SL","SS","SA")]

# expect 0 row to be added
expect_equal(nrow(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107253"))$SA),2)
# expect SAtotalWtMes & SAsampWtMes of spp1 (in list) to remain the same
expect_equal(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107253"))$SA$SAtotalWtMes[1],myH1DataObject21$SA$SAtotalWtMes[1])
expect_equal(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107253"))$SA$SAsampWtMes[1],myH1DataObject21$SA$SAsampWtMes[1])
# expect SAtotalWtMes & SAsampWtMes of spp2 (not in list) to be set to NA (because overwriteSampled== TRUE by default)
expect_equal(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107253"))$SA$SAtotalWtMes[2],as.integer(NA))
expect_equal(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107253"))$SA$SAsampWtMes[2],as.integer(NA))
# expect all other vars to have remained unchanged
expect_equal(apply(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107253"))$SA[,!c("SAtotalWtMes","SAsampWtMes")],1, paste0, collapse=""),apply(myH1DataObject21$SA[,!c("SAtotalWtMes","SAsampWtMes")],1, paste0, collapse=""))

})

test_that("simpleSA: generateNAsUsingSL does not makes spp weights NA in spp not in list if overwriteSampled = FALSE (case: 2 targetAphiaId, SS present)", {

# prepare test data
myH1DataObject21 <- myH1DataObject2
myH1DataObject21$SL <- myH1DataObject21$SL[1,]
validateRDBESDataObject(myH1DataObject21, checkDataTypes = TRUE)

myH1DataObject21[c("SL","SS","SA")]

# expect 0 row to be added
expect_equal(nrow(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107253"), overwriteSampled=FALSE)$SA),2)
# expect SAtotalWtMes & SAsampWtMes of spp1 (in list) to remain the same
expect_equal(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107253"), overwriteSampled=FALSE)$SA$SAtotalWtMes[1],myH1DataObject21$SA$SAtotalWtMes[1])
expect_equal(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107253"), overwriteSampled=FALSE)$SA$SAsampWtMes[1],myH1DataObject21$SA$SAsampWtMes[1])
# expect SAtotalWtMes & SAsampWtMes of spp2 (not in list) to remain the same (because overwriteSampled== FALSE)
expect_equal(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107253"), overwriteSampled=FALSE)$SA$SAtotalWtMes[2],myH1DataObject21$SA$SAtotalWtMes[2])
expect_equal(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107253"), overwriteSampled=FALSE)$SA$SAsampWtMes[2],myH1DataObject21$SA$SAsampWtMes[2])
# expect all other vars to have remained unchanged
expect_equal(apply(generateNAsUsingSL(myH1DataObject21, targetAphiaId = c("107253"), overwriteSampled=FALSE)$SA[,!c("SAtotalWtMes","SAsampWtMes")],1, paste0, collapse=""),apply(myH1DataObject21$SA[,!c("SAtotalWtMes","SAsampWtMes")],1, paste0, collapse=""))

})

}) ## end capture.output