Merge pull request #15 from tscnlab/development_dosimetry-metrics

Development dosimetry metrics
tscnlab · Jun 10, 2024 · 3589beb · 3589beb
2 parents d9a5e4f + c9250bd
commit 3589beb
Show file tree

Hide file tree

Showing 52 changed files with 2,559 additions and 232 deletions.
diff --git a/.DS_Store b/.DS_Store
diff --git a/.gitignore b/.gitignore
@@ -1,55 +1,40 @@
 # History files
 .Rhistory
 .Rapp.history
-
 # Session Data files
 .RData
 .RDataTmp
-
 # User-specific files
 .Ruserdata
-
 # Example code in package build process
 *-Ex.R
-
 # Output files from R CMD build
 /*.tar.gz
-
 # Output files from R CMD check
 /*.Rcheck/
-
 # RStudio files
 .Rproj.user/
-
 # produced vignettes
 vignettes/*.html
 vignettes/*.pdf
-
 # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
 .httr-oauth
-
 # knitr and R markdown default cache directories
 *_cache/
 /cache/
-
 # Temporary files created by R markdown
 *.utf8.md
 *.knit.md
-
 # R Environment Variables
 .Renviron
-
 # pkgdown site
 docs/
-
 # translation temp files
 po/*~
-
 # RStudio Connect folder
 rsconnect/
 .Rproj.user
 docs
 inst/doc
-
 # DS_Store file
 .DS_Store
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: LightLogR
 Title: Work With Data from Wearable Light Loggers and Optical Radiation Dosimeters
-Version: 0.3.2
+Version: 0.3.3
 Authors@R: c(
     person("Johannes", "Zauner", 
       email = "[email protected]", role = c("aut", "cre"),

diff --git a/NAMESPACE b/NAMESPACE
@@ -7,6 +7,7 @@ export(Brown_rec)
 export(Datetime_breaks)
 export(Datetime_limits)
 export(aggregate_Datetime)
+export(barroso_lighting_metrics)
 export(bright_dark_period)
 export(centroidLE)
 export(count_difftime)
@@ -18,10 +19,12 @@ export(dominant_epoch)
 export(dst_change_handler)
 export(dst_change_summary)
 export(duration_above_threshold)
+export(exponential_moving_average)
 export(filter_Date)
 export(filter_Datetime)
 export(filter_Datetime_multiple)
 export(filter_Time)
+export(frequency_crossing_threshold)
 export(gap_finder)
 export(gap_handler)
 export(gapless_Datetimes)
@@ -43,9 +46,12 @@ export(nvRC_circadianDisturbance)
 export(nvRC_relativeAmplitudeError)
 export(nvRD)
 export(nvRD_cumulative_response)
+export(period_above_threshold)
+export(pulses_above_threshold)
 export(sc2interval)
 export(sleep_int2Brown)
 export(symlog_trans)
+export(threshold_for_duration)
 export(timing_above_threshold)
 importFrom(magrittr,"%>%")
 importFrom(rlang,":=")
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,7 @@
+# LightLogR 0.3.3
+
+* New and updated metric functions. LightLogR now contains 16 metric families with 60 sub-metrics.
+
 # LightLogR 0.3.2
 
 * added import functions for `nanoLambda`and `LightWatcher` devices

diff --git a/R/helper.R b/R/helper.R
@@ -16,9 +16,9 @@ test.Time.regex <- function(input) {
 
     if(!test){
       stop(paste("input:",
-        rlang::as_string(
-          input.defused), 
-        "needs to be in the format 'hh:mm:ss'"))
+                 rlang::as_string(
+                   input.defused), 
+                 "needs to be in the format 'hh:mm:ss'"))
     }
   }
 }
@@ -45,17 +45,17 @@ compare_difftime <- function(dataset1, dataset2, Datetime.colname = Datetime, n
   #do a full join with every column but Quantile
   group_variables <- setdiff(names(Quant2), "Quant")
   dplyr::full_join(Quant1, Quant2, by = group_variables) %>% 
-  dplyr::mutate(
-    comparison = Quant.x <= Quant.y
-  )
+    dplyr::mutate(
+      comparison = Quant.x <= Quant.y
+    )
 }
 
 #calculate whether any of the comparisons in compare_difftime is FALSE
 compare_difftime.any <- function(...) {
   comparison <- compare_difftime(...) %>% 
     dplyr::filter(comparison == FALSE) %>% 
     dplyr::rename(Dataset.Interval = Quant.x,
-                 Reference.Interval = Quant.y) %>% 
+                  Reference.Interval = Quant.y) %>% 
     dplyr::select(-comparison)
 
   if(nrow(comparison) > 0) comparison else TRUE
@@ -74,10 +74,10 @@ create.Reference.label <- function(dataset,
       dplyr::mutate(!!Reference.label.column.str := 
                       dplyr::if_else(
                         !is.na({{ Reference.column }}), Reference.label, NA
-                        )
+                      )
       )
     dataset
-    } else dataset
+  } else dataset
 }
 
 #helper to pick the colums that are used for grouping
@@ -89,9 +89,9 @@ pick.grouping.columns <- function(dataset) {
 
 # Compare with threshold
 compare_threshold <- function(Light.vector,
-                      threshold,
-                      comparison = c("above", "below"),
-                      na.replace = FALSE){
+                              threshold,
+                              comparison = c("above", "below"),
+                              na.replace = FALSE){
 
   comparison = match.arg(comparison)
 
@@ -119,6 +119,105 @@ compare_threshold <- function(Light.vector,
   return(out)
 }
 
+# Find clusters of consecutive `TRUE` values in logical vector. 
+# Allows to concatenate periods of consecutive values that are interrupted 
+# by periods of `FALSE` values. 
+find_clusters <- function(x,
+                          min.length,
+                          max.interrupt = 0,
+                          prop.interrupt = 1,
+                          cluster_name = "cluster") {
+
+  stopifnot(
+    "`x` must be logical" = is.logical(x),
+    "`min.length` must be larger than 0" = min.length > 0,
+    "`max.interrupt` must be larger than or equal to 0" = max.interrupt >= 0,
+    "`prop.interrupt` must be between 0 and 1" = 
+      prop.interrupt >= 0 & prop.interrupt <= 1
+  )
+
+  # Replace NA with FALSE
+  x <- tidyr::replace_na(x, FALSE)
+
+  # Find the start and end indices of each cluster of consecutive values
+  start_indices <- which(x & !dplyr::lag(x, default = FALSE))
+  end_indices <- which(x & !dplyr::lead(x, default = FALSE))
+  ranges <- as.numeric(matrix(rbind(start_indices, end_indices), nrow = 2))
+
+  # Remove ranges < min.length
+  intra_diff <- diff(ranges)[1:(length(ranges) - 1) %% 2 != 0] + 1
+  exclude_ranges <- c(
+    which(intra_diff < min.length) * 2,
+    which(intra_diff < min.length) * 2 - 1
+  )
+  if (length(exclude_ranges) > 0) {
+    ranges <- ranges[-exclude_ranges]
+  }
+
+  # Calculate cumulative ranges
+  intra_diff <- diff(ranges)[1:(length(ranges) - 1) %% 2 != 0] + 1
+  intra_cumsum <- intra_diff[1:length(intra_diff)-1] + intra_diff[2:length(intra_diff)]
+
+  # Inter-range differences
+  inter_diff <- diff(ranges)[1:(length(ranges) - 1) %% 2 == 0] - 1
+
+  # Proportion inter-range difference and cumulative range sums
+  interrupt_ratio <- inter_diff / intra_cumsum
+
+  # Combine ranges with inter-range difference <= max.interrupt &
+  # interrupt ratio <= prop.interrupt
+  exclude_ranges <- c(
+    which(inter_diff <= max.interrupt & interrupt_ratio <= prop.interrupt) * 2,
+    which(inter_diff <= max.interrupt & interrupt_ratio <= prop.interrupt) * 2 + 1
+  )
+  if (length(exclude_ranges) > 0) {
+    ranges <- ranges[-exclude_ranges]
+  }
+
+  # Make data frame with intervals
+  if (length(ranges) > 0) {
+    intervals <-
+      matrix(ranges, ncol = 2, byrow = TRUE) %>%
+      as.data.frame() %>%
+      magrittr::set_names(c("cluster_start", "cluster_end")) %>%
+      dplyr::mutate(cluster_idx = 1:length(cluster_start)) %>%
+      dplyr::rowwise() %>%
+      dplyr::mutate(row_idx = list(seq(cluster_start, cluster_end))) %>%
+      tidyr::unnest(cols = c(row_idx)) %>%
+      dplyr::ungroup() %>%
+      dplyr::mutate(is_cluster = TRUE) %>%
+      dplyr::relocate(row_idx, is_cluster, cluster_idx, cluster_start, cluster_end)
+  } else {
+    intervals <-
+      tibble::tibble(
+        row_idx = 1, is_cluster = NA, cluster_idx = NA,
+        cluster_start = NA, cluster_end = NA
+      )
+  }
+  # Replace "cluster" with custom name
+  intervals <- intervals %>% 
+    dplyr::rename_with(~gsub("cluster", cluster_name, .x))
+
+  return(intervals)
+}
+
+# Convert `x` to time scale of `t`
+convert_to_timescale <- function(x, t){
+  if(lubridate::is.POSIXct(t)){
+    x <- lubridate::as_datetime(x, tz = lubridate::tz(t))
+  }
+  if(hms::is_hms(t)){
+    x <- hms::as_hms(x)
+  }
+  if(lubridate::is.duration(t)){
+    x <- lubridate::as.duration(x)
+  }
+  if(lubridate::is.difftime(t) & !hms::is_hms(t)){
+    x <- lubridate::as.difftime(x, unit = units(t))
+  }
+  return(x)
+}
+
 # Create the list of epochs, which are either dominant or not
 epoch_list <- function(dataset = dataset, 
                        Datetime.colname = Datetime, 
@@ -135,4 +234,3 @@ epoch_list <- function(dataset = dataset,
 
   epochs
 }
-
diff --git a/R/metric_EMA.R b/R/metric_EMA.R
@@ -0,0 +1,101 @@
+#' Exponential moving average filter (EMA)
+#'
+#' This function smoothes the data using an exponential moving average filter
+#' with a specified decay half-life.
+#'
+#' @param Light.vector Numeric vector containing the light data. Missing values are
+#'    replaced by 0.
+#' @param Time.vector Vector containing the time data. Can be \link[base]{POSIXct}, \link[hms]{hms}, 
+#'    \link[lubridate]{duration}, or \link[base]{difftime}.
+#' @param decay The decay half-life controlling the exponential smoothing.
+#'    Can be either a \link[lubridate]{duration} or a string. If it is a string, it 
+#'    needs to be a valid \link[lubridate]{duration} string, e.g., `"1 day"` or `"10 sec"`.
+#'    The default is set to `"90 mins"` for a biologically relevant estimate (see
+#'    the reference paper). 
+#' @param epoch The epoch at which the data was sampled. Can be either a
+#'    \link[lubridate]{duration} or a string. If it is a string, it needs to be
+#'    either `"dominant.epoch"` (the default) for a guess based on the data, or a valid
+#'    \link[lubridate]{duration} string, e.g., `"1 day"` or `"10 sec"`.
+#'
+#' @return A numeric vector containing the smoothed light data. The output has the same
+#'    length as `Light.vector`.
+#'    
+#' @export
+#' 
+#' @family metrics
+#' 
+#' @details The timeseries is assumed to be regular. Missing values in the
+#'    light data will be replaced by 0.
+#'    
+#' @references 
+#'    Price, L. L. A. (2014). On the Role of Exponential Smoothing in Circadian 
+#'    Dosimetry. \emph{Photochemistry and Photobiology}, 90(5), 1184-1192.
+#'    \url{https://doi.org/10.1111/php.12282}
+#'    
+#'    Hartmeyer, S.L., Andersen, M. (2023). Towards a framework for light-dosimetry studies:
+#'    Quantification metrics. \emph{Lighting Research & Technology}. 
+#'    \url{https://doi.org/10.1177/14771535231170500}
+#'
+#' @examples
+#' sample.data.environment.EMA = sample.data.environment %>%
+#'   dplyr::filter(Id == "Participant") %>%
+#'   filter_Datetime(length = lubridate::days(2)) %>%
+#'   dplyr::mutate(MEDI.EMA = exponential_moving_average(MEDI, Datetime))
+#' 
+#' # Plot to compare results
+#' sample.data.environment.EMA %>%
+#'   ggplot2::ggplot(ggplot2::aes(x = Datetime)) +
+#'   ggplot2::geom_line(ggplot2::aes(y = MEDI), colour = "black") +
+#'   ggplot2::geom_line(ggplot2::aes(y = MEDI.EMA), colour = "red")
+#'   
+exponential_moving_average <- function(Light.vector,
+                                       Time.vector,
+                                       decay = "90 min",
+                                       epoch = "dominant.epoch") {
+
+  # Perform argument checks
+  stopifnot(
+    "`Light.vector` must be numeric!" = is.numeric(Light.vector),
+    "`Time.vector` must be POSIXct, hms, duration, or difftime!" =
+      lubridate::is.POSIXct(Time.vector) | hms::is_hms(Time.vector) | 
+      lubridate::is.duration(Time.vector) | lubridate::is.difftime(Time.vector),
+    "`Light.vector` and `Time.vector` must be same length!" = 
+      length(Light.vector) == length(Time.vector),
+    "`decay` must either be a duration or a string" =
+      lubridate::is.duration(decay) | is.character(decay),
+    "`epoch` must either be a duration or a string" =
+      lubridate::is.duration(epoch) | is.character(epoch)
+  )
+
+  # Get the epochs based on the data
+  if (is.character(epoch) && epoch == "dominant.epoch") {
+    epoch <- count_difftime(tibble::tibble(Datetime = Time.vector))$difftime[1]
+  }
+  # If the user specified an epoch, use that instead
+  else {
+    epoch <- lubridate::as.duration(epoch)
+  }
+
+  # Replace missing values
+  if (any(is.na(Light.vector))) {
+    warning("Light data contains missing values! They are replaced by 0.")
+    Light.vector[is.na(Light.vector)] <- 0
+  }
+
+  # Calculate smoothing factor beta
+  decay <- lubridate::as.duration(decay)
+  beta <- log(2) / (as.numeric(decay) / as.numeric(epoch))
+
+  # EMA filter
+  D <- double(length(Light.vector))
+  for (idx in 1:length(Light.vector)) {
+    if (idx == 1) {
+      D[idx] <- beta * (Light.vector[idx])
+    } else {
+      D[idx] <- D[idx - 1] + beta * (Light.vector[idx] - D[idx - 1])
+    }
+  }
+
+  # Return numeric vector of EMA light values
+  return(D)
+}