From 63024c6e95900a726902759629bfe0ad0c5bb408 Mon Sep 17 00:00:00 2001
From: RobFryer <Rob.Fryer@gov.scot>
Date: Sun, 26 Nov 2023 18:49:38 +0000
Subject: [PATCH] TEFs for WHO TEQ

Resolves

The TEFs for the environmental and human health WHO TEQ for dioxins, furans and planar PCBs are now exported, so do not need to be created by the user.

An extra weights argument is passed in determinand.control. This is rather clunky at this stage, but it will ultimately allow the action = "sum" function and the TEQ action = "bespoke" function to be combined in a generalised "sum" function.  Post-release though.

Tested on HELCOM and OSPAR examples; vignetted updated; documentation updated.
---
 NAMESPACE                         |  1 +
 R/import_functions.R              | 28 ++++++++-----------------
 R/information_functions.R         | 34 +++++++++++++++++++++++++++++++
 example_HELCOM.r                  | 20 +++++-------------
 example_OSPAR.r                   | 31 +++++++++-------------------
 man/info_TEF.Rd                   | 23 +++++++++++++++++++++
 vignettes/example_HELCOM.Rmd.orig | 22 ++++++++------------
 vignettes/example_OSPAR.Rmd.orig  | 30 ++++++++++++---------------
 8 files changed, 103 insertions(+), 86 deletions(-)
 create mode 100644 man/info_TEF.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 6e66f42..43d6870 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -34,6 +34,7 @@ export(get_basis_default)
 export(get_basis_most_common)
 export(get_station_code)
 export(get_timeseries)
+export(info_TEF)
 export(normalise_biota_HELCOM)
 export(normalise_sediment_HELCOM)
 export(normalise_sediment_OSPAR)
diff --git a/R/import_functions.R b/R/import_functions.R
index ebd9734..e8fd9f6 100644
--- a/R/import_functions.R
+++ b/R/import_functions.R
@@ -2486,10 +2486,12 @@ create_timeseries <- function(
       bespoke = get(paste("determinand.link", i, sep = "."), mode = "function")
     )
     
-    data <- do.call(
-      linkFunction, 
-      list(data = data, keep = i, drop = wk$det, info = info)
-    )
+    args = list(data = data, keep = i, drop = wk$det)
+    if ("weights" %in% names(wk)) {
+      args = c(args, list(weights = wk$weights))
+    }
+    
+    data <- do.call(linkFunction, args)
   }  
 
   # drop any remaining unwanted determinands (from sum and perhaps bespoke functions);
@@ -3253,7 +3255,7 @@ determinand.link.imposex <- function(data, keep, drop, ...) {
   visitID <- with(data, paste(station_code, year))
   
   
-  # find visits when both indivuduals and stages reported and check consistent
+  # find visits when both individuals and stages reported and check consistent
   
   ok <- by(data, visitID, function(x) {
     with(x, {
@@ -3465,7 +3467,7 @@ determinand.link.sum <- function(data, keep, drop, ...) {
 
 
 
-determinand.link.TEQDFP <- function(data, keep, drop, ...) {
+determinand.link.TEQDFP <- function(data, keep, drop, weights) {
 
   stopifnot(length(keep) == 1, length(drop) > 1)
   
@@ -3499,18 +3501,6 @@ determinand.link.TEQDFP <- function(data, keep, drop, ...) {
   
   summed_data <- by(data[["TRUE"]], ID, function(x) {
     
-    # only some of the determinands are mandatory - otherwise we woudld lose everything 
-    # mandatory determinands contribute at least 1% to the total TEQ based on a quick look-see!
-    # the order below is based on % contribution
-
-    # mandatory <- c(
-    #   "CB126", "CDF2N", "CDD1N", "CDF2T", "TCDD", "CB169", "CB118", "CDFP2", "CDD6X", "CDF4X", 
-    #   "CDF6X")
-    # 
-    # if (!all(mandatory %in% x$determinand)) 
-    #   return(NULL)
-    
-
     # check all bases are the same 
     
     if (!all(drop %in% x$determinand)) return(NULL)
@@ -3524,7 +3514,7 @@ determinand.link.TEQDFP <- function(data, keep, drop, ...) {
     
     x[id] <- lapply(x[id], convert_units, from = x$unit, to = "ug/kg")
     
-    TEQ <- info_TEQ[as.character(x$determinand)]
+    TEQ <- weights[as.character(x$determinand)]
     
     x[id] <- lapply(x[id], "*", TEQ)
     
diff --git a/R/information_functions.R b/R/information_functions.R
index 6f07af7..4a76194 100644
--- a/R/information_functions.R
+++ b/R/information_functions.R
@@ -2539,6 +2539,40 @@ get.info.imposex <- function(
 
 
 
+# TEF values ----
+
+#' TEF values for selected groups of compounds
+#' 
+#' A list of named vectors which currently provide the TEFs for the WHO TEQ for 
+#' dioxins, furans and dioxin-like (planar) polychlorinated biphenyls.
+#' DFP_environmental and DFP_human_health provide the TEFs appropriate for 
+#' testing against the environmental and human health standards respectively.
+#' 
+#' Adding further TEFs will require more development to the relevant 
+#' determinand_link function 
+#' 
+#' @export 
+info_TEF <- list(
+  DFP_environmental = c(
+    "CB77" = 0.0001, "CB81" = 0.0003, "CB105" = 0.00003, "CB118" = 0.00003, 
+    "CB126" = 0.1, "CB156" = 0.00003, "CB157" = 0.00003, "CB167" = 0.00003, 
+    "CB169" = 0.03, "CDD1N" = 1, "CDD4X" = 0.1, "CDD6P" = 0.01, "CDD6X" = 0.1, 
+    "CDD9X" = 0.1, "CDDO" = 0.0003, "CDF2N" = 0.3, "CDF2T" = 0.1, "CDF4X" = 0.1, 
+    "CDF6P" = 0.01, "CDF6X" = 0.1, "CDF9P" = 0.01,
+    "CDF9X" = 0.1, "CDFO" = 0.00003, "CDFP2" = 0.03, "CDFX1" = 0.1, "TCDD" = 1
+  ), 
+  DFP_human_health = c(
+    "CB77" = 0.0001, "CB81" = 0.0003, "CB105" = 0.00003, "CB118" = 0.00003, 
+    "CB126" = 0.1, "CB156" = 0.00003, "CB157" = 0.00003, "CB167" = 0.00003, 
+    "CB169" = 0.03, "CDD1N" = 1, "CDD4X" = 0.1, "CDD6P" = 0.01, "CDD6X" = 0.1, 
+    "CDD9X" = 0.1, "CDDO" = 0.0003, "CDF2N" = 0.3, "CDF2T" = 0.1, "CDF4X" = 0.1, 
+    "CDF6P" = 0.01, "CDF6X" = 0.1, "CDF9P" = 0.01,
+    "CDF9X" = 0.1, "CDFO" = 0.0003, "CDFP2" = 0.03, "CDFX1" = 0.1, "TCDD" = 1
+  )
+)
+
+
+
 # ICES RECO codes ----
 
 # reads in data from csv files exported from ICES RECO
diff --git a/example_HELCOM.r b/example_HELCOM.r
index da596da..1fa69eb 100644
--- a/example_HELCOM.r
+++ b/example_HELCOM.r
@@ -274,20 +274,6 @@ biota_data <- read_data(
 biota_data <- tidy_data(biota_data)
 
 
-# The construction of the time series has a few more features. However, first we
-# need to provide the individual TEQs to allow the construction of the WHO TEQ
-# for dioxins, furans and planar PCBS (labelled TEQDFP). these are the values
-# for the human health QS. This stage won't be necessary in later releases.
-
-info_TEQ <- c(
-  "CB77" = 0.0001, "CB81" = 0.0003, "CB105" = 0.00003, "CB118" = 0.00003, 
-  "CB126" = 0.1, "CB156" = 0.00003, "CB157" = 0.00003, "CB167" = 0.00003, 
-  "CB169" = 0.03, "CDD1N" = 1, "CDD4X" = 0.1, "CDD6P" = 0.01, "CDD6X" = 0.1, 
-  "CDD9X" = 0.1, "CDDO" = 0.0003, "CDF2N" = 0.3, "CDF2T" = 0.1, "CDF4X" = 0.1, 
-  "CDF6P" = 0.01, "CDF6X" = 0.1, "CDF9P" = 0.01,
-  "CDF9X" = 0.1, "CDFO" = 0.0003, "CDFP2" = 0.03, "CDFX1" = 0.1, "TCDD" = 1
-)
-
 # The determinands.control argument does rather more here. There are four summed
 # variables: PFOS, SBDE6, HBCD and SCB6. There is also one variable CB138+163
 # which needs to be relabeled as (replaced by) CB138. For the purposes of the
@@ -320,7 +306,11 @@ biota_timeseries <- create_timeseries(
       det = c("CB28", "CB52", "CB101", "CB138", "CB153", "CB180"), 
       action = "sum"
     ),
-    TEQDFP = list(det = names(info_TEQ), action = "bespoke"),
+    TEQDFP = list(
+      det = names(info_TEF$DFP_human_health), 
+      action = "bespoke", 
+      weights = info_TEF$DFP_human_health
+    ),
     "LIPIDWT%" = list(det = c("EXLIP%", "FATWT%"), action = "bespoke")
   ),
   normalise = normalise_biota_HELCOM,
diff --git a/example_OSPAR.r b/example_OSPAR.r
index 0b00b09..f611244 100644
--- a/example_OSPAR.r
+++ b/example_OSPAR.r
@@ -80,16 +80,6 @@ sediment_data <- read_data(
 sediment_data <- tidy_data(sediment_data)
 
 
-info_TEQ <- c(
-  "CB77" = 0.0001, "CB81" = 0.0003, "CB105" = 0.00003, "CB118" = 0.00003, 
-  "CB126" = 0.1, "CB156" = 0.00003, "CB157" = 0.00003, "CB167" = 0.00003, 
-  "CB169" = 0.03, "CDD1N" = 1, "CDD4X" = 0.1, "CDD6P" = 0.01, "CDD6X" = 0.1, 
-  "CDD9X" = 0.1, "CDDO" = 0.0003, "CDF2N" = 0.3, "CDF2T" = 0.1, "CDF4X" = 0.1, 
-  "CDF6P" = 0.01, "CDF6X" = 0.1, "CDF9P" = 0.01,
-  "CDF9X" = 0.1, "CDFO" = 0.00003, "CDFP2" = 0.03, "CDFX1" = 0.1, "TCDD" = 1
-)
-
-
 sediment_timeseries <- create_timeseries(
   sediment_data,
   determinands.control = list(
@@ -100,7 +90,11 @@ sediment_timeseries <- create_timeseries(
     HBCD = list(det = c("HBCDA", "HBCDB", "HBCDG"), action = "sum"),
     CB138 = list(det = c("CB138+163"), action = "replace"),
     CB156 = list(det = c("CB156+172"), action = "replace"),
-    TEQDFP = list(det = names(info_TEQ), action = "bespoke"),
+    TEQDFP = list(
+      det = names(info_TEF$DFP_environmental), 
+      action = "bespoke", 
+      weights = info_TEF$DFP_environmental
+    ),
     HCEPX = list(det = c("HCEPC", "HCEPT"), action = "sum")
   ),
   normalise = normalise_sediment_OSPAR,
@@ -174,15 +168,6 @@ biota_data <- read_data(
 
 biota_data <- tidy_data(biota_data)
 
-info_TEQ <- c(
-  "CB77" = 0.0001, "CB81" = 0.0003, "CB105" = 0.00003, "CB118" = 0.00003, 
-  "CB126" = 0.1, "CB156" = 0.00003, "CB157" = 0.00003, "CB167" = 0.00003, 
-  "CB169" = 0.03, "CDD1N" = 1, "CDD4X" = 0.1, "CDD6P" = 0.01, "CDD6X" = 0.1, 
-  "CDD9X" = 0.1, "CDDO" = 0.0003, "CDF2N" = 0.3, "CDF2T" = 0.1, "CDF4X" = 0.1, 
-  "CDF6P" = 0.01, "CDF6X" = 0.1, "CDF9P" = 0.01,
-  "CDF9X" = 0.1, "CDFO" = 0.00003, "CDFP2" = 0.03, "CDFX1" = 0.1, "TCDD" = 1
-)
-
 biota_timeseries <- create_timeseries(
   biota_data,
   determinands.control = list(
@@ -207,7 +192,11 @@ biota_timeseries <- create_timeseries(
       det = c("CB28", "CB52", "CB101", "CB118", "CB138", "CB153", "CB180"), 
       action = "sum"
     ),
-    TEQDFP = list(det = names(info_TEQ), action = "bespoke"),
+    TEQDFP = list(
+      det = names(info_TEF$DFP_environmental), 
+      action = "bespoke", 
+      weights = info_TEF$DFP_environmental
+    ),
     HCEPX = list(det = c("HCEPC", "HCEPT"), action = "sum"),
     HCH = list(det = c("HCHA", "HCHB", "HCHG"), action = "sum"),
     "LIPIDWT%" = list(det = c("EXLIP%", "FATWT%"), action = "bespoke")
diff --git a/man/info_TEF.Rd b/man/info_TEF.Rd
new file mode 100644
index 0000000..b590387
--- /dev/null
+++ b/man/info_TEF.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/information_functions.R
+\docType{data}
+\name{info_TEF}
+\alias{info_TEF}
+\title{TEF values for selected groups of compounds}
+\format{
+An object of class \code{list} of length 2.
+}
+\usage{
+info_TEF
+}
+\description{
+A list of named vectors which currently provide the TEFs for the WHO TEQ for
+dioxins, furans and dioxin-like (planar) polychlorinated biphenyls.
+DFP_environmental and DFP_human_health provide the TEFs appropriate for
+testing against the environmental and human health standards respectively.
+}
+\details{
+Adding further TEFs will require more development to the relevant
+determinand_link function
+}
+\keyword{datasets}
diff --git a/vignettes/example_HELCOM.Rmd.orig b/vignettes/example_HELCOM.Rmd.orig
index 566f6b9..76139f2 100644
--- a/vignettes/example_HELCOM.Rmd.orig
+++ b/vignettes/example_HELCOM.Rmd.orig
@@ -361,17 +361,6 @@ need to provide the individual TEQs to allow the construction of the WHO TEQ
 for dioxins, furans and planar PCBS (labelled TEQDFP). These are the values
 for the human health QS. This stage won't be necessary in later releases.
 
-```{r helcom-teqs}
-info_TEQ <- c(
-  "CB77" = 0.0001, "CB81" = 0.0003, "CB105" = 0.00003, "CB118" = 0.00003, 
-  "CB126" = 0.1, "CB156" = 0.00003, "CB157" = 0.00003, "CB167" = 0.00003, 
-  "CB169" = 0.03, "CDD1N" = 1, "CDD4X" = 0.1, "CDD6P" = 0.01, "CDD6X" = 0.1, 
-  "CDD9X" = 0.1, "CDDO" = 0.0003, "CDF2N" = 0.3, "CDF2T" = 0.1, "CDF4X" = 0.1, 
-  "CDF6P" = 0.01, "CDF6X" = 0.1, "CDF9P" = 0.01,
-  "CDF9X" = 0.1, "CDFO" = 0.0003, "CDFP2" = 0.03, "CDFX1" = 0.1, "TCDD" = 1
-)
-```
-
 The biota data are grouped into time series which consist of the 
 measurements of a single determinand in a single matrix (tissue type; e.g. 
 'EH', `LI' or `SB`) in a single species at a single monitoring station. PAH
@@ -386,8 +375,9 @@ before the six PCBs are summed to give SCB6 in order for them to be included
 in the sum.
 
 There are also two 'bespoke' actions in `determinands.control`. These are
-customised functions that do more complicated things. One of
-them computes the TEQDFP. The other deals with the three different ways in
+customised functions that do more complicated things. One of them computes the 
+WHO TEQ for dioxins, furans and planar PCBS (labelled TEQDFP) using the TEFs 
+for the human health QS. The other deals with the three different ways in
 which lipid weight measurements can be submitted.
 
 Finally, `normalise_biota_HELCOM()` is a customised function that determines which
@@ -416,7 +406,11 @@ biota_timeseries <- create_timeseries(
       det = c("CB28", "CB52", "CB101", "CB138", "CB153", "CB180"), 
       action = "sum"
     ),
-    TEQDFP = list(det = names(info_TEQ), action = "bespoke"),
+    TEQDFP = list(
+      det = names(info_TEF$DFP_human_health), 
+      action = "bespoke", 
+      weights = info_TEF$DFP_human_health
+    ),
     "LIPIDWT%" = list(det = c("EXLIP%", "FATWT%"), action = "bespoke")
   ),
   normalise = normalise_biota_HELCOM,
diff --git a/vignettes/example_OSPAR.Rmd.orig b/vignettes/example_OSPAR.Rmd.orig
index 8998cf3..6a39e87 100644
--- a/vignettes/example_OSPAR.Rmd.orig
+++ b/vignettes/example_OSPAR.Rmd.orig
@@ -154,21 +154,6 @@ sediment_data <- read_data(
 sediment_data <- tidy_data(sediment_data)
 ```
 
-The TEQs for the WHO TEQ for dioxins, furans and planar PCBs (labelled TEQDFP) 
-are the values for the secondary poisoning QS. This stage won't be necessary in
-later releases.
-
-```{r ospar-TEQ}
-info_TEQ <- c(
-  "CB77" = 0.0001, "CB81" = 0.0003, "CB105" = 0.00003, "CB118" = 0.00003, 
-  "CB126" = 0.1, "CB156" = 0.00003, "CB157" = 0.00003, "CB167" = 0.00003, 
-  "CB169" = 0.03, "CDD1N" = 1, "CDD4X" = 0.1, "CDD6P" = 0.01, "CDD6X" = 0.1, 
-  "CDD9X" = 0.1, "CDDO" = 0.0003, "CDF2N" = 0.3, "CDF2T" = 0.1, "CDF4X" = 0.1, 
-  "CDF6P" = 0.01, "CDF6X" = 0.1, "CDF9P" = 0.01,
-  "CDF9X" = 0.1, "CDFO" = 0.00003, "CDFP2" = 0.03, "CDFX1" = 0.1, "TCDD" = 1
-)
-```
-
 A customised function `normalise_sediment_OSPAR` has been written to implement 
 the OSPAR normalisation procedure. This is necessary because the pivot values 
 used for normalising metals are region-specific. That aside, the procedure is
@@ -177,6 +162,9 @@ normalised to 2.5% organic carbon, unless the samples were taken in the Iberian
 Coast or Gulf of Cadiz OSPAR subregions, in which case the concentrations are 
 not normalised.
 
+The TEFs for the WHO TEQ for dioxins, furans and planar PCBs (labelled TEQDFP) 
+are the values for the secondary poisoning QS. 
+
 ```{r ospar-sediment-timeseries}
 sediment_timeseries <- create_timeseries(
   sediment_data,
@@ -188,7 +176,11 @@ sediment_timeseries <- create_timeseries(
     HBCD = list(det = c("HBCDA", "HBCDB", "HBCDG"), action = "sum"),
     CB138 = list(det = c("CB138+163"), action = "replace"),
     CB156 = list(det = c("CB156+172"), action = "replace"),
-    TEQDFP = list(det = names(info_TEQ), action = "bespoke"),
+    TEQDFP = list(
+      det = names(info_TEF$DFP_environmental), 
+      action = "bespoke", 
+      weights = info_TEF$DFP_environmental
+    ),
     HCEPX = list(det = c("HCEPC", "HCEPT"), action = "sum")
   ),
   normalise = normalise_sediment_OSPAR,
@@ -319,7 +311,11 @@ biota_timeseries <- create_timeseries(
       det = c("CB28", "CB52", "CB101", "CB118", "CB138", "CB153", "CB180"), 
       action = "sum"
     ),
-    TEQDFP = list(det = names(info_TEQ), action = "bespoke"),
+    TEQDFP = list(
+      det = names(info_TEF$DFP_environmental), 
+      action = "bespoke", 
+      weights = info_TEF$DFP_environmental
+    ),
     HCEPX = list(det = c("HCEPC", "HCEPT"), action = "sum"),
     HCH = list(det = c("HCHA", "HCHB", "HCHG"), action = "sum"),
     "LIPIDWT%" = list(det = c("EXLIP%", "FATWT%"), action = "bespoke")