Added documentation, examples, and tests

tscnlab · Oct 15, 2023 · 9481e7c · 9481e7c
1 parent ec83fcf
commit 9481e7c
Show file tree

Hide file tree

Showing 25 changed files with 706 additions and 119 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,5 +1,11 @@
 # LightLogR 0.2.0.9000
 
+* Added Unit tests and documentation for all new functions.
+
+* To `filter_Datetime()` and `filter_Date()` added the option to filter for group specific dates.
+
+* Added the family of functions around `States` and `Reference` to import, process, and add states to light logger data, like sleep/wake times, wear times, or other data. This family includes `import.Statechanges()`, `sc2interval()`, `ìnterval2state()`, `data2reference()`, `sleep.int2Brown()`, `Brown.check()`, `Brown.rec()`, and `Brown2reference()`. 
+
 * Added the Article/Vignette "What´s in a Day" to demonstrate the LightLogR workflow.
 
 * Added the convenience function `create_Time.data()` to create a Time-of-Day column in datasets.

diff --git a/R/Brown.R b/R/Brown.R
@@ -24,13 +24,15 @@
 #'   a `character` scalar.
 #' @param Brown.check.colname The name of the column that will contain the check
 #'   if the illuminance is within the recommended levels.
+#' @param overwrite.Reference If `TRUE` (defaults to `FALSE`), the function will
+#'   overwrite the `Brown.rec.colname` columns if it already exists.
 #' @param ... Additional arguments that will be passed to [Brown.rec()] and
 #'   [Brown.check()]. This is only relevant to correct the names of the daytime
 #'   states or the thresholds used within these states. See the documentation of
 #'   these functions for more information.
 #'
 #' @references
-#'   https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.3001571
+#' https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.3001571
 #'
 #' @return A dataframe on the basis of the `dataset` that contains the added
 #'   columns.
@@ -51,19 +53,21 @@ Brown2reference <- function(dataset,
                             Reference.label.colname = Reference.label,
                             Reference.label = "Brown et al. (2022)",
                             Brown.check.colname = Reference.check,
+                            overwrite.Reference = FALSE,
                             ...) {
 
 
   # Initial Checks ----------------------------------------------------------
 
   MEDI.colname.defused <- colname.defused({{ MEDI.colname }})
   Brown.state.colname.defused <- colname.defused({{ Brown.state.colname }})
-
   Brown.rec.colname.str <- colname.defused({{ Brown.rec.colname }})
 
-  #give an error if the reference column is present
-  if(Brown.rec.colname.str %in% names(dataset)) 
+  #give an error or warning if the reference column is present
+  if(Brown.rec.colname.str %in% names(dataset) & !overwrite.Reference) 
     stop("A Reference column with the given (or default) name is already part of the dataset. Please remove the column or choose a different name")
+  if(Brown.rec.colname.str %in% names(dataset)) 
+    warning("A Reference column with the given (or default) name is already part of the dataset. It is overwritten, because `overwrite.Reference = TRUE ` was set.")
 
   stopifnot(
     "dataset is not a dataframe" = is.data.frame(dataset),
@@ -72,7 +76,9 @@ Brown2reference <- function(dataset,
     "Brown.state.colname must be part of the dataset" = 
       Brown.state.colname.defused %in% names(dataset),
     "MEDI.colname must be a numeric column" = 
-      is.numeric(dataset[[MEDI.colname.defused]])
+      is.numeric(dataset[[MEDI.colname.defused]]),
+    "overwrite.Reference must be a logical" = 
+      is.logical(overwrite.Reference)
   )
 
   #check whether the dataset has the right labels
@@ -101,23 +107,38 @@ Brown2reference <- function(dataset,
   dataset
 }
 
-#' Check whether a value is within the recommended illuminance/MEDI levels by Brown et al. (2022)
-#' 
-#' This is a lower level function. It checks a given value against a threshold for the states given by Brown et al. (2022). The function is vectorized. For `day` the threshold is a lower limit, for `evening` and `night` the threshold is an upper limit.
+#' Check whether a value is within the recommended illuminance/MEDI levels by
+#' Brown et al. (2022)
+#'
+#' This is a lower level function. It checks a given value against a threshold
+#' for the states given by Brown et al. (2022). The function is vectorized. For
+#' `day` the threshold is a lower limit, for `evening` and `night` the threshold
+#' is an upper limit.
 #'
-#' @param value Illuminance value to check against the recommendation. needs to be numeric, can be a vector.
-#' @param state The state from Brown et al. (2022). Needs to be a character vector with the same length as `value`.
-#' @param Brown.day,Brown.evening,Brown.night The names of the states from Brown et al. (2022). These are the default values (`"day"`, `"evening"`, `"night"`), but can be changed if the names in `state` are different. Needs to be a character scalar.
-#' @param Brown.day.th,Brown.evening.th,Brown.night.th The thresholds for the states from Brown et al. (2022). These are the default values (`250`, `10`, `1`), but can be changed if the thresholds should be different. Needs to be a numeric scalar.
+#' @param value Illuminance value to check against the recommendation. needs to
+#'   be numeric, can be a vector.
+#' @param state The state from Brown et al. (2022). Needs to be a character
+#'   vector with the same length as `value`.
+#' @param Brown.day,Brown.evening,Brown.night The names of the states from Brown
+#'   et al. (2022). These are the default values (`"day"`, `"evening"`,
+#'   `"night"`), but can be changed if the names in `state` are different. Needs
+#'   to be a character scalar.
+#' @param Brown.day.th,Brown.evening.th,Brown.night.th The thresholds for the
+#'   states from Brown et al. (2022). These are the default values (`250`, `10`,
+#'   `1`), but can be changed if the thresholds should be different. Needs to be
+#'   a numeric scalar.
 #'
-#' @return A logical vector with the same length as `value` that indicates whether the value is within the recommended illuminance levels.
+#' @return A logical vector with the same length as `value` that indicates
+#'   whether the value is within the recommended illuminance levels.
 #' @export
-#' @references https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.3001571
+#' @references
+#'   https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.3001571
 #'
 #' @family Brown
 #' @examples
 #' states <- c("day", "evening", "night", "day")
 #' values <- c(100, 10, 1, 300)
+#' Brown.check(values, states)
 #' Brown.check(values, states, Brown.day.th = 100)
 #' 
 Brown.check <- function(value,
@@ -130,13 +151,12 @@ Brown.check <- function(value,
                         Brown.night.th = 1) {
 
   stopifnot("Thresholds need to be numeric" = 
-              is.numeric(
-                c(Brown.day.th, Brown.evening.th, Brown.night.th
-                )
-              )
+              is.numeric(c(Brown.day.th, Brown.evening.th, Brown.night.th)),
+            "States need to be scalars" =
+              is.all.scalar(Brown.day, Brown.evening, Brown.night)
   )
 
-  #check wheter state has the same length as value, give an error if not
+  #check whether state has the same length as value, give an error if not
   stopifnot(
     "state needs to be a character vector with the same length as value" = 
               is.character(state) & length(state) == length(value)
@@ -155,15 +175,20 @@ Brown.check <- function(value,
 
 #' Calculate the recommended illuminance/MEDI levels by Brown et al. (2022)
 #'
-#' This is a lower level function. It calculates the recommended illuminance/MEDI levels by Brown et al. (2022) for a given state. The function is vectorized.
+#' This is a lower level function. It calculates the recommended
+#' illuminance/MEDI levels by Brown et al. (2022) for a given state. The
+#' function is vectorized.
 #'
 #' @inheritParams Brown.check
-#' @param state The state from Brown et al. (2022). Needs to be a character vector.
+#' @param state The state from Brown et al. (2022). Needs to be a character
+#'   vector.
 #'
-#' @return df A dataframe with the same length as `state` that contains the recommended illuminance/MEDI levels.
+#' @return df A dataframe with the same length as `state` that contains the
+#'   recommended illuminance/MEDI levels.
 #' @export
-#' 
-#' @references https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.3001571
+#'
+#' @references
+#'   https://journals.plos.org/plosbiology/article?id=10.1371/journal.pbio.3001571
 #'
 #' @family Brown
 #' @examples
@@ -180,15 +205,15 @@ Brown.rec <- function(state,
                       Brown.night.th = 1){
 
   stopifnot("Thresholds need to be numeric" = 
-              is.numeric(
-                c(Brown.day.th, Brown.evening.th, Brown.night.th
-                )
-              )
+              is.numeric(c(Brown.day.th, Brown.evening.th, Brown.night.th)),
+            "States need to be scalars" =
+              is.all.scalar(Brown.day, Brown.evening, Brown.night)
   )
 
   dplyr::case_when(
     state == Brown.day ~ Brown.day.th,
     state == Brown.evening ~ Brown.evening.th,
-    state == Brown.night ~ Brown.night.th
+    state == Brown.night ~ Brown.night.th,
+    .default = NA
   )
 }
diff --git a/R/filter_Datetime.R b/R/filter_Datetime.R
@@ -24,6 +24,7 @@
 #'   at the documentation from [lubridate]. Basically, periods model clocktimes,
 #'   whereas durations model physical processes. This matters on several
 #'   occasions, like leap years, or daylight savings.
+#' @param filter.expr Advanced filtering conditions. If not `NULL` (default) and given an `expression`, this is used to [dplyr::filter()] the results. This can be useful to filter, e.g. for group-specific conditions, like starting after the first two days of measurement (see examples).
 #' @param tz Timezone of the start/end times. If `NULL` (the default), it will
 #'   take the timezone from the `Datetime.colname` column.
 #' @param full.day A `logical` indicating wether the `start` param should be
@@ -63,6 +64,19 @@
 #' sample.data.environment %>%
 #' filter_Datetime(length = days(2)) %>%
 #' pull(Datetime) %>% range()
+#' 
+#' #advanced filtering based on grouping (second day of each group)
+#' sample.data.environment %>%
+#' group_by(Source) %>% 
+#' #shift the "Environment" group by one day
+#' mutate(
+#' Datetime = ifelse(Source == "Environment", Datetime + ddays(1), Datetime) %>% 
+#' as_datetime()) -> sample
+#' sample %>% summarize(Daterange = paste(min(Datetime), max(Datetime), sep = " - "))
+#' #now we can use the `filter.expr` argument to filter from the second day of each group
+#' sample %>% 
+#' filter_Datetime(filter.expr = Datetime > Datetime[1] + days(1)) %>% 
+#' summarize(Daterange = paste(min(Datetime), max(Datetime), sep = " - "))
 
 
 filter_Datetime <- function(dataset, 
@@ -71,10 +85,13 @@ filter_Datetime <- function(dataset,
                             end = NULL, 
                             length = NULL,
                             full.day = FALSE,
-                            tz = NULL) {
+                            tz = NULL,
+                            filter.expr = NULL) {
 
   # Initial Checks ----------------------------------------------------------
 
+  filter.expr <- rlang::enexpr(filter.expr)
+
   Datetime.colname.defused <- 
     rlang::enexpr(Datetime.colname) %>% rlang::as_string()
   #timezone
@@ -93,15 +110,13 @@ filter_Datetime <- function(dataset,
       Datetime.colname.defused %in% names(dataset),
     "Datetime.colname must be a Datetime" = 
       lubridate::is.POSIXct(dataset[[Datetime.colname.defused]]),
-    "At least one parameter from `start`, `end` or `length` must be specified" =
-      !all(is.null(start), is.null(end), is.null(length)),
+    "At least one parameter from `start`, `end`, `length` or `filter.expr` must be specified" =
+      !all(is.null(start), is.null(end), is.null(length), is.null(filter.expr)),
     "tz needs to be a character" = is.character(tz),
     "tz needs to be a valid time zone, see `OlsonNames()`" = tz %in% OlsonNames(),
     "full.day needs to be a `logical`" = is.logical(full.day)
     )
 
-  # if(!is.null(c(end, start, length))) message("length will be ignored")
-
   # Manipulation ----------------------------------------------------------
 
   #calculate starting time if length and end are given
@@ -134,6 +149,10 @@ filter_Datetime <- function(dataset,
         {{ Datetime.colname }} >= lubridate::as_datetime(start, tz = tz),
         {{ Datetime.colname }} <= lubridate::as_datetime(end, tz = tz)
         )
+    #possible extra filter step
+    if(!is.null(filter.expr)) {
+      dataset <- dataset %>% dplyr::filter(!!filter.expr)
+    }
 
   # Return --------------------------------------------------------------
   dataset

diff --git a/R/helper.R b/R/helper.R
@@ -31,3 +31,54 @@ colname.defused <- function(Colname, as_string = TRUE) {
   }
   else Colname
 }
+
+#tests whether the inputs are all scalar
+is.all.scalar <- function(...) {
+  list(...) %>% 
+    purrr::every(\(x) length(x) == 1)
+}
+
+#counts the different time differences per group (in a grouped dataset)
+count.difftime <- function(dataset, Datetime.column = Datetime) {
+  dataset %>% 
+    dplyr::mutate(
+      difftime = c(NA, diff({{Datetime.column}}) %>% lubridate::as.duration())
+      ) %>% 
+    tidyr::drop_na(difftime) %>% 
+    dplyr::count(difftime, sort = TRUE)
+}
+
+#calculate the nth percentile of time differences per group (in a grouped dataset)
+nth.difftime <- function(dataset, Datetime.column = Datetime, n = 0.95) {
+  dataset %>% 
+    dplyr::mutate(
+      difftime = c(NA, diff({{Datetime.column}}) %>% lubridate::as.duration())
+      ) %>% 
+    tidyr::drop_na(difftime) %>% 
+    dplyr::summarise(
+      percentile = quantile(difftime, probs = n, na.rm = TRUE)
+    )
+}
+
+#calculate the whether the nth percentile of time differences in one dataset is smaller or equal to the nth percentile of time differences in another dataset
+compare.difftime <- function(dataset1, dataset2, Datetime.column = Datetime, n = 0.95) {
+  percentile1 <- nth.difftime(dataset1, {{ Datetime.column }}, n = n)
+  percentile2 <- nth.difftime(dataset2, {{ Datetime.column }}, n = n)
+  #do a full join with every column but percentile
+  group_variables <- setdiff(names(percentile2), "percentile")
+  dplyr::full_join(percentile1, percentile2, by = group_variables) %>% 
+  dplyr::mutate(
+    comparison = percentile.x <= percentile.y
+  )
+}
+
+#calculate whether any of the comparisons in compare.difftime is FALSE
+compare.difftime.any <- function(...) {
+  comparison <- compare.difftime(...) %>% 
+    dplyr::filter(comparison == FALSE) %>% 
+    dplyr::rename(Dataset.Interval = percentile.x,
+                 Reference.Interval = percentile.y) %>% 
+    dplyr::select(-comparison)
+
+  if(nrow(comparison) > 0) comparison else TRUE
+}