Merge pull request #372 from RobFryer/teq_information

TEQ DFP construction
osparcomm · Nov 26, 2023 · 0fa3e49 · 0fa3e49
2 parents 26f6a3f + 63024c6
commit 0fa3e49
Show file tree

Hide file tree

Showing 8 changed files with 103 additions and 86 deletions.
diff --git a/NAMESPACE b/NAMESPACE
@@ -34,6 +34,7 @@ export(get_basis_default)
 export(get_basis_most_common)
 export(get_station_code)
 export(get_timeseries)
+export(info_TEF)
 export(normalise_biota_HELCOM)
 export(normalise_sediment_HELCOM)
 export(normalise_sediment_OSPAR)

diff --git a/R/import_functions.R b/R/import_functions.R
@@ -2486,10 +2486,12 @@ create_timeseries <- function(
       bespoke = get(paste("determinand.link", i, sep = "."), mode = "function")
     )
 
-    data <- do.call(
-      linkFunction, 
-      list(data = data, keep = i, drop = wk$det, info = info)
-    )
+    args = list(data = data, keep = i, drop = wk$det)
+    if ("weights" %in% names(wk)) {
+      args = c(args, list(weights = wk$weights))
+    }
+
+    data <- do.call(linkFunction, args)
   }  
 
   # drop any remaining unwanted determinands (from sum and perhaps bespoke functions);
@@ -3253,7 +3255,7 @@ determinand.link.imposex <- function(data, keep, drop, ...) {
   visitID <- with(data, paste(station_code, year))
 
 
-  # find visits when both indivuduals and stages reported and check consistent
+  # find visits when both individuals and stages reported and check consistent
 
   ok <- by(data, visitID, function(x) {
     with(x, {
@@ -3465,7 +3467,7 @@ determinand.link.sum <- function(data, keep, drop, ...) {
 
 
 
-determinand.link.TEQDFP <- function(data, keep, drop, ...) {
+determinand.link.TEQDFP <- function(data, keep, drop, weights) {
 
   stopifnot(length(keep) == 1, length(drop) > 1)
 
@@ -3499,18 +3501,6 @@ determinand.link.TEQDFP <- function(data, keep, drop, ...) {
 
   summed_data <- by(data[["TRUE"]], ID, function(x) {
 
-    # only some of the determinands are mandatory - otherwise we woudld lose everything 
-    # mandatory determinands contribute at least 1% to the total TEQ based on a quick look-see!
-    # the order below is based on % contribution
-
-    # mandatory <- c(
-    #   "CB126", "CDF2N", "CDD1N", "CDF2T", "TCDD", "CB169", "CB118", "CDFP2", "CDD6X", "CDF4X", 
-    #   "CDF6X")
-    # 
-    # if (!all(mandatory %in% x$determinand)) 
-    #   return(NULL)
-
-
     # check all bases are the same 
 
     if (!all(drop %in% x$determinand)) return(NULL)
@@ -3524,7 +3514,7 @@ determinand.link.TEQDFP <- function(data, keep, drop, ...) {
 
     x[id] <- lapply(x[id], convert_units, from = x$unit, to = "ug/kg")
 
-    TEQ <- info_TEQ[as.character(x$determinand)]
+    TEQ <- weights[as.character(x$determinand)]
 
     x[id] <- lapply(x[id], "*", TEQ)
 

diff --git a/R/information_functions.R b/R/information_functions.R
@@ -2539,6 +2539,40 @@ get.info.imposex <- function(
 
 
 
+# TEF values ----
+
+#' TEF values for selected groups of compounds
+#' 
+#' A list of named vectors which currently provide the TEFs for the WHO TEQ for 
+#' dioxins, furans and dioxin-like (planar) polychlorinated biphenyls.
+#' DFP_environmental and DFP_human_health provide the TEFs appropriate for 
+#' testing against the environmental and human health standards respectively.
+#' 
+#' Adding further TEFs will require more development to the relevant 
+#' determinand_link function 
+#' 
+#' @export 
+info_TEF <- list(
+  DFP_environmental = c(
+    "CB77" = 0.0001, "CB81" = 0.0003, "CB105" = 0.00003, "CB118" = 0.00003, 
+    "CB126" = 0.1, "CB156" = 0.00003, "CB157" = 0.00003, "CB167" = 0.00003, 
+    "CB169" = 0.03, "CDD1N" = 1, "CDD4X" = 0.1, "CDD6P" = 0.01, "CDD6X" = 0.1, 
+    "CDD9X" = 0.1, "CDDO" = 0.0003, "CDF2N" = 0.3, "CDF2T" = 0.1, "CDF4X" = 0.1, 
+    "CDF6P" = 0.01, "CDF6X" = 0.1, "CDF9P" = 0.01,
+    "CDF9X" = 0.1, "CDFO" = 0.00003, "CDFP2" = 0.03, "CDFX1" = 0.1, "TCDD" = 1
+  ), 
+  DFP_human_health = c(
+    "CB77" = 0.0001, "CB81" = 0.0003, "CB105" = 0.00003, "CB118" = 0.00003, 
+    "CB126" = 0.1, "CB156" = 0.00003, "CB157" = 0.00003, "CB167" = 0.00003, 
+    "CB169" = 0.03, "CDD1N" = 1, "CDD4X" = 0.1, "CDD6P" = 0.01, "CDD6X" = 0.1, 
+    "CDD9X" = 0.1, "CDDO" = 0.0003, "CDF2N" = 0.3, "CDF2T" = 0.1, "CDF4X" = 0.1, 
+    "CDF6P" = 0.01, "CDF6X" = 0.1, "CDF9P" = 0.01,
+    "CDF9X" = 0.1, "CDFO" = 0.0003, "CDFP2" = 0.03, "CDFX1" = 0.1, "TCDD" = 1
+  )
+)
+
+
+
 # ICES RECO codes ----
 
 # reads in data from csv files exported from ICES RECO

diff --git a/example_HELCOM.r b/example_HELCOM.r
@@ -274,20 +274,6 @@ biota_data <- read_data(
 biota_data <- tidy_data(biota_data)
 
 
-# The construction of the time series has a few more features. However, first we
-# need to provide the individual TEQs to allow the construction of the WHO TEQ
-# for dioxins, furans and planar PCBS (labelled TEQDFP). these are the values
-# for the human health QS. This stage won't be necessary in later releases.
-
-info_TEQ <- c(
-  "CB77" = 0.0001, "CB81" = 0.0003, "CB105" = 0.00003, "CB118" = 0.00003, 
-  "CB126" = 0.1, "CB156" = 0.00003, "CB157" = 0.00003, "CB167" = 0.00003, 
-  "CB169" = 0.03, "CDD1N" = 1, "CDD4X" = 0.1, "CDD6P" = 0.01, "CDD6X" = 0.1, 
-  "CDD9X" = 0.1, "CDDO" = 0.0003, "CDF2N" = 0.3, "CDF2T" = 0.1, "CDF4X" = 0.1, 
-  "CDF6P" = 0.01, "CDF6X" = 0.1, "CDF9P" = 0.01,
-  "CDF9X" = 0.1, "CDFO" = 0.0003, "CDFP2" = 0.03, "CDFX1" = 0.1, "TCDD" = 1
-)
-
 # The determinands.control argument does rather more here. There are four summed
 # variables: PFOS, SBDE6, HBCD and SCB6. There is also one variable CB138+163
 # which needs to be relabeled as (replaced by) CB138. For the purposes of the
@@ -320,7 +306,11 @@ biota_timeseries <- create_timeseries(
       det = c("CB28", "CB52", "CB101", "CB138", "CB153", "CB180"), 
       action = "sum"
     ),
-    TEQDFP = list(det = names(info_TEQ), action = "bespoke"),
+    TEQDFP = list(
+      det = names(info_TEF$DFP_human_health), 
+      action = "bespoke", 
+      weights = info_TEF$DFP_human_health
+    ),
     "LIPIDWT%" = list(det = c("EXLIP%", "FATWT%"), action = "bespoke")
   ),
   normalise = normalise_biota_HELCOM,

diff --git a/example_OSPAR.r b/example_OSPAR.r
@@ -80,16 +80,6 @@ sediment_data <- read_data(
 sediment_data <- tidy_data(sediment_data)
 
 
-info_TEQ <- c(
-  "CB77" = 0.0001, "CB81" = 0.0003, "CB105" = 0.00003, "CB118" = 0.00003, 
-  "CB126" = 0.1, "CB156" = 0.00003, "CB157" = 0.00003, "CB167" = 0.00003, 
-  "CB169" = 0.03, "CDD1N" = 1, "CDD4X" = 0.1, "CDD6P" = 0.01, "CDD6X" = 0.1, 
-  "CDD9X" = 0.1, "CDDO" = 0.0003, "CDF2N" = 0.3, "CDF2T" = 0.1, "CDF4X" = 0.1, 
-  "CDF6P" = 0.01, "CDF6X" = 0.1, "CDF9P" = 0.01,
-  "CDF9X" = 0.1, "CDFO" = 0.00003, "CDFP2" = 0.03, "CDFX1" = 0.1, "TCDD" = 1
-)
-
-
 sediment_timeseries <- create_timeseries(
   sediment_data,
   determinands.control = list(
@@ -100,7 +90,11 @@ sediment_timeseries <- create_timeseries(
     HBCD = list(det = c("HBCDA", "HBCDB", "HBCDG"), action = "sum"),
     CB138 = list(det = c("CB138+163"), action = "replace"),
     CB156 = list(det = c("CB156+172"), action = "replace"),
-    TEQDFP = list(det = names(info_TEQ), action = "bespoke"),
+    TEQDFP = list(
+      det = names(info_TEF$DFP_environmental), 
+      action = "bespoke", 
+      weights = info_TEF$DFP_environmental
+    ),
     HCEPX = list(det = c("HCEPC", "HCEPT"), action = "sum")
   ),
   normalise = normalise_sediment_OSPAR,
@@ -174,15 +168,6 @@ biota_data <- read_data(
 
 biota_data <- tidy_data(biota_data)
 
-info_TEQ <- c(
-  "CB77" = 0.0001, "CB81" = 0.0003, "CB105" = 0.00003, "CB118" = 0.00003, 
-  "CB126" = 0.1, "CB156" = 0.00003, "CB157" = 0.00003, "CB167" = 0.00003, 
-  "CB169" = 0.03, "CDD1N" = 1, "CDD4X" = 0.1, "CDD6P" = 0.01, "CDD6X" = 0.1, 
-  "CDD9X" = 0.1, "CDDO" = 0.0003, "CDF2N" = 0.3, "CDF2T" = 0.1, "CDF4X" = 0.1, 
-  "CDF6P" = 0.01, "CDF6X" = 0.1, "CDF9P" = 0.01,
-  "CDF9X" = 0.1, "CDFO" = 0.00003, "CDFP2" = 0.03, "CDFX1" = 0.1, "TCDD" = 1
-)
-
 biota_timeseries <- create_timeseries(
   biota_data,
   determinands.control = list(
@@ -207,7 +192,11 @@ biota_timeseries <- create_timeseries(
       det = c("CB28", "CB52", "CB101", "CB118", "CB138", "CB153", "CB180"), 
       action = "sum"
     ),
-    TEQDFP = list(det = names(info_TEQ), action = "bespoke"),
+    TEQDFP = list(
+      det = names(info_TEF$DFP_environmental), 
+      action = "bespoke", 
+      weights = info_TEF$DFP_environmental
+    ),
     HCEPX = list(det = c("HCEPC", "HCEPT"), action = "sum"),
     HCH = list(det = c("HCHA", "HCHB", "HCHG"), action = "sum"),
     "LIPIDWT%" = list(det = c("EXLIP%", "FATWT%"), action = "bespoke")

diff --git a/man/info_TEF.Rd b/man/info_TEF.Rd
diff --git a/vignettes/example_HELCOM.Rmd.orig b/vignettes/example_HELCOM.Rmd.orig
@@ -361,17 +361,6 @@ need to provide the individual TEQs to allow the construction of the WHO TEQ
 for dioxins, furans and planar PCBS (labelled TEQDFP). These are the values
 for the human health QS. This stage won't be necessary in later releases.
 
-```{r helcom-teqs}
-info_TEQ <- c(
-  "CB77" = 0.0001, "CB81" = 0.0003, "CB105" = 0.00003, "CB118" = 0.00003, 
-  "CB126" = 0.1, "CB156" = 0.00003, "CB157" = 0.00003, "CB167" = 0.00003, 
-  "CB169" = 0.03, "CDD1N" = 1, "CDD4X" = 0.1, "CDD6P" = 0.01, "CDD6X" = 0.1, 
-  "CDD9X" = 0.1, "CDDO" = 0.0003, "CDF2N" = 0.3, "CDF2T" = 0.1, "CDF4X" = 0.1, 
-  "CDF6P" = 0.01, "CDF6X" = 0.1, "CDF9P" = 0.01,
-  "CDF9X" = 0.1, "CDFO" = 0.0003, "CDFP2" = 0.03, "CDFX1" = 0.1, "TCDD" = 1
-)
-```
-
 The biota data are grouped into time series which consist of the 
 measurements of a single determinand in a single matrix (tissue type; e.g. 
 'EH', `LI' or `SB`) in a single species at a single monitoring station. PAH
@@ -386,8 +375,9 @@ before the six PCBs are summed to give SCB6 in order for them to be included
 in the sum.
 
 There are also two 'bespoke' actions in `determinands.control`. These are
-customised functions that do more complicated things. One of
-them computes the TEQDFP. The other deals with the three different ways in
+customised functions that do more complicated things. One of them computes the 
+WHO TEQ for dioxins, furans and planar PCBS (labelled TEQDFP) using the TEFs 
+for the human health QS. The other deals with the three different ways in
 which lipid weight measurements can be submitted.
 
 Finally, `normalise_biota_HELCOM()` is a customised function that determines which
@@ -416,7 +406,11 @@ biota_timeseries <- create_timeseries(
       det = c("CB28", "CB52", "CB101", "CB138", "CB153", "CB180"), 
       action = "sum"
     ),
-    TEQDFP = list(det = names(info_TEQ), action = "bespoke"),
+    TEQDFP = list(
+      det = names(info_TEF$DFP_human_health), 
+      action = "bespoke", 
+      weights = info_TEF$DFP_human_health
+    ),
     "LIPIDWT%" = list(det = c("EXLIP%", "FATWT%"), action = "bespoke")
   ),
   normalise = normalise_biota_HELCOM,

diff --git a/vignettes/example_OSPAR.Rmd.orig b/vignettes/example_OSPAR.Rmd.orig
@@ -154,21 +154,6 @@ sediment_data <- read_data(
 sediment_data <- tidy_data(sediment_data)
 ```
 
-The TEQs for the WHO TEQ for dioxins, furans and planar PCBs (labelled TEQDFP) 
-are the values for the secondary poisoning QS. This stage won't be necessary in
-later releases.
-
-```{r ospar-TEQ}
-info_TEQ <- c(
-  "CB77" = 0.0001, "CB81" = 0.0003, "CB105" = 0.00003, "CB118" = 0.00003, 
-  "CB126" = 0.1, "CB156" = 0.00003, "CB157" = 0.00003, "CB167" = 0.00003, 
-  "CB169" = 0.03, "CDD1N" = 1, "CDD4X" = 0.1, "CDD6P" = 0.01, "CDD6X" = 0.1, 
-  "CDD9X" = 0.1, "CDDO" = 0.0003, "CDF2N" = 0.3, "CDF2T" = 0.1, "CDF4X" = 0.1, 
-  "CDF6P" = 0.01, "CDF6X" = 0.1, "CDF9P" = 0.01,
-  "CDF9X" = 0.1, "CDFO" = 0.00003, "CDFP2" = 0.03, "CDFX1" = 0.1, "TCDD" = 1
-)
-```
-
 A customised function `normalise_sediment_OSPAR` has been written to implement 
 the OSPAR normalisation procedure. This is necessary because the pivot values 
 used for normalising metals are region-specific. That aside, the procedure is
@@ -177,6 +162,9 @@ normalised to 2.5% organic carbon, unless the samples were taken in the Iberian
 Coast or Gulf of Cadiz OSPAR subregions, in which case the concentrations are 
 not normalised.
 
+The TEFs for the WHO TEQ for dioxins, furans and planar PCBs (labelled TEQDFP) 
+are the values for the secondary poisoning QS. 
+
 ```{r ospar-sediment-timeseries}
 sediment_timeseries <- create_timeseries(
   sediment_data,
@@ -188,7 +176,11 @@ sediment_timeseries <- create_timeseries(
     HBCD = list(det = c("HBCDA", "HBCDB", "HBCDG"), action = "sum"),
     CB138 = list(det = c("CB138+163"), action = "replace"),
     CB156 = list(det = c("CB156+172"), action = "replace"),
-    TEQDFP = list(det = names(info_TEQ), action = "bespoke"),
+    TEQDFP = list(
+      det = names(info_TEF$DFP_environmental), 
+      action = "bespoke", 
+      weights = info_TEF$DFP_environmental
+    ),
     HCEPX = list(det = c("HCEPC", "HCEPT"), action = "sum")
   ),
   normalise = normalise_sediment_OSPAR,
@@ -319,7 +311,11 @@ biota_timeseries <- create_timeseries(
       det = c("CB28", "CB52", "CB101", "CB118", "CB138", "CB153", "CB180"), 
       action = "sum"
     ),
-    TEQDFP = list(det = names(info_TEQ), action = "bespoke"),
+    TEQDFP = list(
+      det = names(info_TEF$DFP_environmental), 
+      action = "bespoke", 
+      weights = info_TEF$DFP_environmental
+    ),
     HCEPX = list(det = c("HCEPC", "HCEPT"), action = "sum"),
     HCH = list(det = c("HCHA", "HCHB", "HCHG"), action = "sum"),
     "LIPIDWT%" = list(det = c("EXLIP%", "FATWT%"), action = "bespoke")