Skip to content

Commit

Permalink
Merge pull request #479 from RobFryer/auxiliary_variables
Browse files Browse the repository at this point in the history
update treatment of auxiliary variables
  • Loading branch information
annelaerkes authored Sep 2, 2024
2 parents a385b68 + 910c347 commit 744a2af
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 42 deletions.
64 changes: 41 additions & 23 deletions R/import_check_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ ctsm.check.basis.biota <- function(data, info) {
new[!ok] <- "W"
})

id <- data$determinand %in% c("EXLIP%", "FATWT%", "LIPIDWT%")
id <- data$determinand %in% c("EXLIP%", "FATWT%", "LIPIDWT%", "WTMEA")
if (any(id))
data[id,] <- within(data[id,], {
ok <- basis %in% c("W", "D")
Expand All @@ -164,8 +164,8 @@ ctsm.check.basis.biota <- function(data, info) {
})

id <- data$determinand %in% c(
"VDS", "IMPS", "INTS", "VDSI", "PCI", "INTSI", "%FEMALEPOP", "SURVT", "NRR", "LP", "%DNATAIL",
"MNC", "CMT-QC-NR", "MNC-QC-NR")
"VDS", "IMPS", "INTS", "VDSI", "PCI", "INTSI", "%FEMALEPOP", "SURVT", "NRR",
"LP", "%DNATAIL", "MNC", "CMT-QC-NR", "MNC-QC-NR")
if (any(id))
data[id,] <- within(data[id,], {
ok <- is.na(basis)
Expand Down Expand Up @@ -250,7 +250,7 @@ ctsm.check.matrix.biota <- function(data, info) {
# actually LNMEA could be feather length (but have not allowed for this at present)
# NB procedures for merging with LNMEA are similary complicated for birds

id <- data$determinand %in% "LNMEA"
id <- data$determinand %in% c("LNMEA", "WTMEA")
if (any(id))
data[id,] <- within(data[id,], {
ok <- (species_group %in% c("Fish", "Mammal") & matrix %in% "WO") |
Expand Down Expand Up @@ -450,33 +450,33 @@ ctsm.check.species_group.biota <- function(data, info) {

ctsm.check.sex.biota <- function(data, info) {

# check sex has valid ICES codes
# extra checks for imposex determinands and EROD

# silence non-standard evaluation warnings
sex <- NULL

# NB any changes should really be made at the sub-sample level

id <- ctsm_is_contaminant(data$pargroup) |
data$group %in% "Metabolites" |
data$determinand %in% c("AGMEA", "LNMEA", "DRYWT%", "EXLIP%", "FATWT%", "LIPIDWT%") |
data$determinand %in% c("ALAD", "SFG", "ACHE", "GST", "SURVT", "NRR", "LP", "%DNATAIL",
"MNC", "CMT-QC-NR", "MNC-QC-NR")

if (any(id))
data[id,] <- within(data[id,], {
ok <- sex %in% c("F", "I", "M", "U", "X", NA)
action <- ifelse(ok, "none", "warning")
new[!ok] <- NA
})

# global check of ICES codes

data <- dplyr::mutate(
data,
ok = .data$sex %in% c("F", "H", "I", "M", "T", "U", "X", NA),
action = dplyr::if_else(ok, "none", "warning"),
new[!ok] <- NA
)


# imposex

id <- data$determinand %in% c("VDS", "IMPS", "INTS")
if (any(id))
data[id,] <- within(data[id,], {
ok <- sex %in% "F"
action <- ifelse(ok, "none", ifelse(sex %in% NA, "warning", "error"))
new[sex %in% NA] <- "F"
})

id <- data$determinand %in% c("VDSI", "PCI", "INTSI", "%FEMALEPOP")
if (any(id))
data[id,] <- within(data[id,], {
Expand All @@ -485,25 +485,36 @@ ctsm.check.sex.biota <- function(data, info) {
new[sex %in% NA] <- "X"
})


# EROD

id <- data$determinand %in% "EROD"
if (any(id))
data[id,] <- within(data[id,], {
ok <- sex %in% c("F", "M")
ok.delete <- sex %in% c("U", "I", "X")
ok.delete <- sex %in% c("H", "I", "T", "U", "X")
action <- ifelse(ok, "none", ifelse(ok.delete, "delete", "error"))
if (any(ok.delete))
cat(" Dropping EROD data with immature or unidentifiable sex\n")
})

# id <- is.na(data$ok)
# if (any(id))
# data[id,] <- within(data[id,], {
# ok <- sex %in% c("F", "I", "M", "T", "U", "X", NA)
# action <- ifelse(ok, "none", "warning")
# new[!ok] <- NA
# })

data
}


ctsm.check.unit.biota <- function(data, info) {

standard_unit <- c(
"g/g", "mg/mg", "ug/ug", "ng/ng", "pg/pg", "mg/g", "ug/g", "ng/g", "pg/g", "g/kg", "mg/kg",
"ug/kg", "ng/kg", "pg/kg")
"g/g", "mg/mg", "ug/ug", "ng/ng", "pg/pg", "mg/g", "ug/g", "ng/g", "pg/g",
"g/kg", "mg/kg", "ug/kg", "ng/kg", "pg/kg")

id <- ctsm_is_contaminant(data$pargroup, exclude = "I-RNC") & data$determinand != "TEQDFP"
if (any(id))
Expand Down Expand Up @@ -551,6 +562,13 @@ ctsm.check.unit.biota <- function(data, info) {
action <- ifelse(ok, "none", "error")
})

id <- data$determinand %in% c("WTMEA")
if (any(id))
data[id,] <- within(data[id,], {
ok <- unit %in% c("kg", "g", "mg")
action <- ifelse(ok, "none", "error")
})

id <- data$determinand %in% c("DRYWT%", "EXLIP%", "FATWT%", "LIPIDWT%")
if (any(id))
data[id,] <- within(data[id,], {
Expand Down Expand Up @@ -775,7 +793,7 @@ ctsm.check.value.biota <- function(data, info) {

id <- ctsm_is_contaminant(data$pargroup, exclude = "I-MTC") |
data$group %in% "Metabolites" |
data$determinand %in% c("EROD", "ALAD", "ACHE", "GST", "AGMEA", "LNMEA")
data$determinand %in% c("EROD", "ALAD", "ACHE", "GST", "AGMEA", "LNMEA", "WTMEA")

if (any(id))
data[id,] <- within(data[id,], {
Expand Down
79 changes: 64 additions & 15 deletions R/import_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,36 @@ library(readxl)
#'
#' ## Control parameters
#'
#' Many aspects of the assessment process can be controlled through the
#' parameters stored in `info$control`. This is a list populated with default
#' values which can then be overwritten, if required, using the `control`
#' argument.
#' Many aspects of the assessment process can be controlled using parameters
#' which are stored in the `info` component of the harsat data object. The
#' default control values can be overwritten using the `control` argument.
#'
#' * `reporting_window` A scalar (default 6) which determines whether
#' timeseries are excluded because they have no 'recent' data. Formally,
#' timeseries are excluded if they have no data in the period
#' `max_year - reporting_window + 1` and `max_year`, so the default approach
#' is to exclude timeseries if they have no dat in the most recent six
#' monitoring years. The value of 6 is chosen to match with Marine Strategy
#' Framework Directive reporting periods.
#' * `region`
#' * `add_stations`
#' * `bivalve_spawning_season`
#' * `use_stage`
#' * `relative_uncertainty`
#' * `auxiliary` A list which allows flexibility in the treatment of auxiliary
#' variables. At present, there is just one component `by_matrix`, a
#' character vector that determines which auxiliary variables are matched to
#' the contaminant data by `sample` and `matrix` as opposed to just `sample`.
#' For sediment and water, the default is `all`; i.e. all variables are
#' matched by `sample` and `matrix`. This ensures, for example, that
#' sediment normalisers such as aluminium and organic carbon content are
#' matched to chemical measurements in the same grain fraction. For biota, the
#' default is `c("DRYWT%", "LIPIDWT%)`, so these variables are matched by
#' `sample` and `matrix` and all other variables (e.g. LNMEA or %FEMALEPOP)
#' are matched by `sample`. Thus, dry weight and lipid weight contents are
#' matched to chemical measurements in the same tissue. However, mean length
#' (which is usually the lenght of the whole organism) is matched to all
#' tissue types.
#'
#' ## External data
#'
Expand Down Expand Up @@ -272,7 +298,7 @@ safe_read_file <- function(file, header=TRUE, sep=",", quote="\"", dec=".", fill

control_default <- function(purpose, compartment) {

# import functions
# import_functions.R
# sets up default values that control the assessment

# reporting_window is set to 6 to match the MSFD reporting cycle
Expand All @@ -295,6 +321,12 @@ control_default <- function(purpose, compartment) {
# relative uncertainties for log-normally distributed data; the default is
# to accept relative uncertainties greater than (but not equal) to 1% and
# less than (but not equal to) 100%

# auxiliary is a list (to be extended) that allows flexibility in the
# treatment of auxiliary variables
# by_matrix determines which variables are merged by matrix in addition to
# by sample; default is DRYWT% and LIPIDWT% for biota and everything for
# sediment and water

region <- list()

Expand Down Expand Up @@ -373,14 +405,22 @@ control_default <- function(purpose, compartment) {
)
)


auxiliary = list(
by_matrix = switch(
compartment,
biota = c("DRYWT%", "LIPIDWT%"),
"all"
)
)

list(
reporting_window = 6L,
region = region,
add_stations = add_stations,
bivalve_spawning_season = bivalve_spawning_season,
use_stage = use_stage,
relative_uncertainty = relative_uncertainty
relative_uncertainty = relative_uncertainty,
auxiliary = auxiliary
)
}

Expand Down Expand Up @@ -2227,7 +2267,7 @@ tidy_contaminants <- function(data, info) {




# create timeseries ----

#' Create a time series
#'
Expand Down Expand Up @@ -2600,7 +2640,7 @@ create_timeseries <- function(

# merge auxiliary data with determinand data

data <- ctsm_merge_auxiliary(data, info)
data <- merge_auxiliary(data, info)


# impute %femalepop when missing and sex = 1 - write out remaining
Expand Down Expand Up @@ -4006,15 +4046,18 @@ check_subseries <- function(data, info) {
}


ctsm_merge_auxiliary <- function(data, info) {
merge_auxiliary <- function(data, info) {

# import_functions.R
# merge auxiliary variables with data

control <- info$auxiliary


# identify auxiliary variables and split data set accordingly

auxiliary_var <- ctsm_get_auxiliary(data$determinand, info)

id <- data$determinand %in% auxiliary_var

auxiliary <- data[id, ]
Expand All @@ -4032,9 +4075,14 @@ ctsm_merge_auxiliary <- function(data, info) {
auxiliary <- split(auxiliary, auxiliary$determinand)


# catch for LNMEA measured in WO and ES for birds - probably shouldn't happen because the
# sample will differ? need to check
# update control

if (length(control$by_matrix) == 1L && control$by_matrix == "all") {
control$by_matrix <- auxiliary_var
}


# merge auxiliary variables one at a time

for (aux_id in names(auxiliary)) {

Expand All @@ -4049,7 +4097,7 @@ ctsm_merge_auxiliary <- function(data, info) {
# additional variables for some auxiliaries
# need to make this more flexible - issue raised

if (aux_id %in% c("DRYWT%", "LIPIDWT%", "CORG", "LOIGN", "AL", "LI")) {
if (aux_id %in% control$by_matrix) {

merge_id <- c(merge_id, "matrix")

Expand Down Expand Up @@ -4110,6 +4158,7 @@ ctsm_merge_auxiliary <- function(data, info) {
data <- droplevels(data)
}


ctsm_convert_to_target_basis <- function(data, info, get_basis) {

# location: import_functions.R
Expand Down
35 changes: 31 additions & 4 deletions man/read_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 744a2af

Please sign in to comment.