diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index dfabc07..0000000 Binary files a/.DS_Store and /dev/null differ diff --git a/NEWS.md b/NEWS.md index dc69d6d..e4fd144 100644 --- a/NEWS.md +++ b/NEWS.md @@ -2,6 +2,12 @@ * `bright_dark_period()` now maintains the date when looping the data. +* Added articles on `Import & Cleaning`, `Metrics`, and `Visualizations` to the website. + +* Added the option for more print rows of observation intervals during `import`. + +* Added the option to set a length for the dataset starting from the end in `filter_Datetime()` and family. + # LightLogR 0.3.5 * Added the function `aggregate_Date()` to aggregate long datasets to one day per group. diff --git a/R/aaa.r b/R/aaa.r index ddebc49..d70ef61 100644 --- a/R/aaa.r +++ b/R/aaa.r @@ -1,4 +1,4 @@ -Time <- mEDI <- Time.data <- Datetime <- timestamp <- tz <- Day.data <- `DATE/TIME` <- n <- Datetime.rounded <- id <- sleep.colname.string <- file.name <- Interval <- original.datapoints.fleeting <- MEDI <- State.Brown <- Reference <- Reference.check <- Id <- Start.date.shift <- data <- Shift <- `MELANOPIC EDI` <- State <- group <- End <- Start <- Quant.x <- Quant.y <- is.implicit <- group.indices <- Id2 <- gap.id <- start <- end <- path <- auto.id <- n_max <- manual.id <- silent <- Light <- Day <- N <- is_missing <- Hour <- .change <- dst_start <- .dst <- .dst2 <- dst_adjustment <- auto.plot <- group.1 <- group.2 <- group.indices2 <- cluster_start <- cluster_end <- row_idx <- is_cluster <- cluster_idx <- is_pulse <- pulse_idx <- light <- time <- level <- duration <- mean_duration <- onset <- midpoint <- offset <- mean_onset <- mean_midpoint <- mean_offset <- Date.data <- NULL +Time <- mEDI <- Time.data <- Datetime <- timestamp <- tz <- Day.data <- `DATE/TIME` <- n <- Datetime.rounded <- id <- sleep.colname.string <- file.name <- Interval <- original.datapoints.fleeting <- MEDI <- State.Brown <- Reference <- Reference.check <- Id <- Start.date.shift <- data <- Shift <- `MELANOPIC EDI` <- State <- group <- End <- Start <- Quant.x <- Quant.y <- is.implicit <- group.indices <- Id2 <- gap.id <- start <- end <- path <- auto.id <- n_max <- manual.id <- silent <- Light <- Day <- N <- is_missing <- Hour <- .change <- dst_start <- .dst <- .dst2 <- dst_adjustment <- auto.plot <- group.1 <- group.2 <- group.indices2 <- cluster_start <- cluster_end <- row_idx <- is_cluster <- cluster_idx <- is_pulse <- pulse_idx <- light <- time <- level <- duration <- mean_duration <- onset <- midpoint <- offset <- mean_onset <- mean_midpoint <- mean_offset <- Date.data <- print_n <- NULL empty_function <- function() { rsconnect::accountInfo() diff --git a/R/filter_Datetime.R b/R/filter_Datetime.R index 4918cdb..4ce59e5 100644 --- a/R/filter_Datetime.R +++ b/R/filter_Datetime.R @@ -37,6 +37,10 @@ #' is FALSE). This is useful, e.g., when the first observation in the dataset #' is slightly after midnight. If TRUE, it will count the length from midnight #' on to avoid empty days in plotting with [gg_day()]. +#' @param length_from_start A `logical` indicating whether the `length` argument +#' should be applied to the start (default, TRUE) or the end of the data +#' (FALSE). Only relevant if neither the `start` nor the `end` arguments are +#' provided. #' @param only_Id An expression of `ids` where the filtering should be applied #' to. If `NULL` (the default), the filtering will be applied to all `ids`. #' Based on the this expression, the dataset will be split in two and only @@ -93,6 +97,7 @@ filter_Datetime <- function(dataset, start = NULL, end = NULL, length = NULL, + length_from_start = TRUE, full.day = FALSE, tz = NULL, only_Id = NULL, @@ -156,12 +161,22 @@ filter_Datetime <- function(dataset, start <- dataset[[Datetime.colname.defused]] %>% min() } - #calculate end time if length is given + + + #calculate end time if length is given & length_from_start is TRUE if(is.null(end) & !is.null(length)) { + if(length_from_start) { end <- switch (full.day %>% as.character(), "TRUE" = lubridate::as_date(start, tz = tz), "FALSE" = lubridate::as_datetime(start, tz = tz) ) + length + } else { + end <- dataset[[Datetime.colname.defused]] %>% max() + start <- switch (full.day %>% as.character(), + "TRUE" = lubridate::as_date(end, tz = tz), + "FALSE" = lubridate::as_datetime(end, tz = tz) + ) - length + } } #calculate end time if NULL @@ -233,7 +248,10 @@ filter_Date <- function(..., #' to be quoted with [quote()] or [rlang::expr()]. #' @param filter_function The function to be used for filtering, either #' `filter_Datetime` (the default) or `filter_Date` -#' @param ... Additional arguments passed to the filter function +#' @param ... Additional arguments passed to the filter function. If the +#' `length` argument is provided here instead of the `argument`, it has to be +#' written as a string, e.g., `length = "1 day"`, instead of `length = +#' lubridate::days(1)`. #' #' @return A dataframe with the filtered data #' @export @@ -245,7 +263,7 @@ filter_Date <- function(..., #' #compare the unfiltered dataset #' sample.data.environment %>% gg_overview(Id.colname = Id) #' #compare the unfiltered dataset -#' sample.data.environment %>% +#' sample.data.environment %>% #' filter_Datetime_multiple(arguments = arguments, filter_Date) %>% #' gg_overview(Id.colname = Id) filter_Datetime_multiple <- function(dataset, diff --git a/R/import_LL.R b/R/import_LL.R index 25e07e2..a0b0d21 100644 --- a/R/import_LL.R +++ b/R/import_LL.R @@ -43,6 +43,7 @@ #' If the `Id` column is already part of the `dataset` it will just use #' this column. If the column is not present it will add this column and fill #' it with the filename of the importfile (see param `auto.id`). +#' `print_n` can be used if you want to see more rows from the observation intervals #' #' @param ... Parameters that get handed down to the specific import functions #' @param device From what device do you want to import? For a few devices, @@ -153,6 +154,7 @@ imports <- function(device, auto.plot = TRUE, locale = readr::default_locale(), silent = FALSE, + print_n = 10, ... = ), #function expression @@ -237,7 +239,8 @@ imports <- function(device, Id.colname = Id, #the id column name dst_adjustment = dst_adjustment, #whether there is a dst adjustment filename = filename, #what the filename(s) is/are - na.count = na.count #how many NA values were dropped + na.count = na.count, #how many NA values were dropped + print_n = print_n #how many rows to print for observation intervals ) #if autoplot is TRUE, make a plot diff --git a/R/import_helper.r b/R/import_helper.r index 184e757..10fc50f 100644 --- a/R/import_helper.r +++ b/R/import_helper.r @@ -6,7 +6,8 @@ import.info <- function(data, dst_adjustment, dst_info = TRUE, filename, - na.count) { + na.count, + print_n = 10) { #give info about the file min.time <- min(data$Datetime) max.time <- max(data$Datetime) @@ -72,7 +73,8 @@ import.info <- function(data, "Timespan: " , diff(c(min.time, max.time)) %>% format(digits = 2), "\n\n", "Observation intervals: \n", sep = "") - utils::capture.output(interval.time)[c(-1,-2,-4)] %>% cat(sep = "\n") + utils::capture.output(interval.time %>% print(n=print_n))[c(-1,-2,-4)] %>% + cat(sep = "\n") } #This internal helper function looks for the starting row of an import file based on a vector of column names in order. diff --git a/inst/.DS_Store b/inst/.DS_Store deleted file mode 100644 index 1c7e6b6..0000000 Binary files a/inst/.DS_Store and /dev/null differ diff --git a/inst/extdata/.DS_Store b/inst/extdata/.DS_Store deleted file mode 100644 index 5008ddf..0000000 Binary files a/inst/extdata/.DS_Store and /dev/null differ diff --git a/man/.DS_Store b/man/.DS_Store deleted file mode 100644 index 82c0810..0000000 Binary files a/man/.DS_Store and /dev/null differ diff --git a/man/figures/.DS_Store b/man/figures/.DS_Store deleted file mode 100644 index 00dc5b8..0000000 Binary files a/man/figures/.DS_Store and /dev/null differ diff --git a/man/filter_Datetime.Rd b/man/filter_Datetime.Rd index d0819ac..a9ce121 100644 --- a/man/filter_Datetime.Rd +++ b/man/filter_Datetime.Rd @@ -11,6 +11,7 @@ filter_Datetime( start = NULL, end = NULL, length = NULL, + length_from_start = TRUE, full.day = FALSE, tz = NULL, only_Id = NULL, @@ -48,6 +49,11 @@ occasions, like leap years, or daylight savings. You can also provide a \code{character} scalar in the form of e.g. "1 day", which will be converted into a period.} +\item{length_from_start}{A \code{logical} indicating whether the \code{length} argument +should be applied to the start (default, TRUE) or the end of the data +(FALSE). Only relevant if neither the \code{start} nor the \code{end} arguments are +provided.} + \item{full.day}{A \code{logical} indicating whether the \code{start} param should be rounded to a full day, when only the \code{length} argument is provided (Default is FALSE). This is useful, e.g., when the first observation in the dataset diff --git a/man/filter_Datetime_multiple.Rd b/man/filter_Datetime_multiple.Rd index 86e80a3..00f866c 100644 --- a/man/filter_Datetime_multiple.Rd +++ b/man/filter_Datetime_multiple.Rd @@ -22,7 +22,9 @@ to be quoted with \code{\link[=quote]{quote()}} or \code{\link[rlang:expr]{rlang \item{filter_function}{The function to be used for filtering, either \code{filter_Datetime} (the default) or \code{filter_Date}} -\item{...}{Additional arguments passed to the filter function} +\item{...}{Additional arguments passed to the filter function. If the +\code{length} argument is provided here instead of the \code{argument}, it has to be +written as a string, e.g., \code{length = "1 day"}, instead of \code{length = lubridate::days(1)}.} } \value{ A dataframe with the filtered data @@ -41,7 +43,7 @@ arguments <- list( #compare the unfiltered dataset sample.data.environment \%>\% gg_overview(Id.colname = Id) #compare the unfiltered dataset - sample.data.environment \%>\% + sample.data.environment \%>\% filter_Datetime_multiple(arguments = arguments, filter_Date) \%>\% gg_overview(Id.colname = Id) } diff --git a/man/import_Dataset.Rd b/man/import_Dataset.Rd index 204e75d..f3d4c5e 100644 --- a/man/import_Dataset.Rd +++ b/man/import_Dataset.Rd @@ -64,6 +64,7 @@ use of this arguments when importing more than one file} If the \code{Id} column is already part of the \code{dataset} it will just use this column. If the column is not present it will add this column and fill it with the filename of the importfile (see param \code{auto.id}). +\code{print_n} can be used if you want to see more rows from the observation intervals } } \section{Devices}{ diff --git a/vignettes/.DS_Store b/vignettes/.DS_Store deleted file mode 100644 index 5008ddf..0000000 Binary files a/vignettes/.DS_Store and /dev/null differ diff --git a/vignettes/articles/.DS_Store b/vignettes/articles/.DS_Store deleted file mode 100644 index 752ff5d..0000000 Binary files a/vignettes/articles/.DS_Store and /dev/null differ diff --git a/vignettes/articles/Day.Rmd b/vignettes/articles/Day.Rmd index 70b3259..df57456 100644 --- a/vignettes/articles/Day.Rmd +++ b/vignettes/articles/Day.Rmd @@ -1,5 +1,5 @@ --- -title: "What's in a Day?" +title: "The whole game" --- ```{r, include = FALSE} diff --git a/vignettes/articles/Import.Rmd b/vignettes/articles/Import.Rmd new file mode 100644 index 0000000..3c08a6a --- /dev/null +++ b/vignettes/articles/Import.Rmd @@ -0,0 +1,142 @@ +--- +title: "Import & cleaning" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +This article focuses on the import from multiple files and participants, as well as the cleaning of the data. We need these packages + +```{r setup, message = FALSE} +library(LightLogR) +library(tidyverse) +library(gghighlight) +``` + +# Importing Data + +The first step in every analysis is data import. We will work with data collected as part the Master Thesis *Insights into real-world human light exposure: relating self-report with eye-level light logging* by Carolina Guidolin (2023). The data is stored in 17 text files in the *data/* folder. You can access the data yourself through the [LightLogR GitHub repository](https://github.com/tscnlab/LightLogR/tree/main/vignettes/articles/data). + +```{r, files} +path <- "data" +files <- list.files(path, full.names = TRUE) +#show how many files are listes +length(files) +``` + +Next we require a time zone of data collection. If uncertain which time zones are valid, use the `OlsonNames()` function. Our data was collected in the "Europe/Berlin" time zone. + +```{r, tz} +#first six time zones from OlsonNames() +head(OlsonNames()) + +#our time zone +tz <- "Europe/Berlin" +``` + +Lastly, the participant Ids are stored in the file names. We will extract them and store them in a column called `Id`. The following code defines the pattern as a *regular expression*, which will extract the first three digits from the file name. + +```{r, Id pattern} +pattern <- "^(\\d{3})" +``` + +Now we can import the data. Data were collected with the ActLumus device by Condor Instruments. The right way to specify this is through the `import` function. + +```{r, import} +data <- import$ActLumus(files, tz = tz, auto.id = pattern, print_n=33) +``` + +# Data cleaning #1 + +Before we can dive into the analysis part, we need to make sure we have a clean dataset. The import summary shows us two problems with the data: + +- two files have data that crosses daylight saving time (DST) changes. Because the ActLumus device does not adjust for DST, we need to correct for this. +- Multiple Ids have single datapoints at the beginning of the dataset with gaps before actual data collection starts. These are test measurements to check equipment, but must be removed from the dataset. + +Let us first deal with the DST change. LightLogR has an in-built function to correct for this during import. We thus will re-import the data, but make the import silent as to not clutter the output. + +```{r, dst change} +data <- + import$ActLumus(files, tz = tz, auto.id = pattern, dst_adjustment = TRUE, + auto.plot = FALSE, silent = TRUE) +``` + +The second problem requires the filtering of certain Ids. The `filter_Datetime_multiple()` function is ideal for this. We can provide a length (1 week), starting from the end of data collection and backwards. The variable `arguments provide variable arguments to the filter function, they have to be provided in list form and expressions have to be quoted through`quote()`. Fixed arguments, like the length and`length_from_start\` are provided as named arguments and only have to be specified once, as they are the same for all Ids. + +```{r, start shift} +data <- + data %>% + filter_Datetime_multiple( + arguments = list( + list(only_Id = quote(Id == 216)), + list(only_Id = quote(Id == 219)), + list(only_Id = quote(Id == 214)), + list(only_Id = quote(Id == 206)) + ), length = "1 week", length_from_start = FALSE) + +``` + +Let's have a look at the data again with the `gg_overview()` function. + +```{r, overview} +data %>% gg_overview() +``` + +Looks much better now. Also, because there is no longer a hint about gaps in the lower right corner, we can be sure that all gaps have been removed. The function gap_finder() shows us, however, that there are still irregularities in the data and the function count_difftime() reveals where they are. + +```{r, irregularities} +data %>% gap_finder() +data %>% count_difftime() %>% print(n=22) +``` + +This means we have to look at and take care of the irregularities for the Ids 215, 218, and 221. + +# Data cleaning #2 + +Let us first visualize where the irregularities are. We can use `gg_days()` for that. + +```{r} +#create two columns to show the irregularities and gaps for relevant ids +difftimes <- + data %>% + filter(Id %in% c(215, 218, 221)) %>% + mutate(difftime = difftime(lead(Datetime), Datetime, units = "secs"), + end = Datetime + seconds(difftime)) + +#visualize where those points are +difftimes %>% + gg_days(geom = "point", + x.axis.breaks = ~Datetime_breaks(.x, by = "2 days" ) + ) + + geom_rect(data = difftimes %>% filter(difftime !=10), + aes(xmin = Datetime, xmax = end, ymin = -Inf, ymax = Inf), + fill = "red", col = "red", linewidth = 0.2, alpha = 0.2) + + gghighlight(difftime != 10 | lag(difftime !=10)) + +``` + +All irregular data appear at the very beginning of the data collection. As we are interestet in one whole week of data, we can similarly apply a one week filter on these Ids and see if that removed the irregular data points. + +```{r} +data <- + data %>% + filter_Datetime_multiple( + arguments = list( + list(only_Id = quote(Id == 215)), + list(only_Id = quote(Id == 218)), + list(only_Id = quote(Id == 221)) + ), length = "1 week", length_from_start = FALSE) + +data %>% gap_finder() +data %>% count_difftime() %>% print(n=17) +``` + +The data is now clean and we can proceed with the analysis. This dataset will be needed in other articles, so we will save it as an RDS file. + +```{r} +# saveRDS(data, "cleaned_data/ll_data.rds") +``` diff --git a/vignettes/articles/Metrics.Rmd b/vignettes/articles/Metrics.Rmd new file mode 100644 index 0000000..b4f5709 --- /dev/null +++ b/vignettes/articles/Metrics.Rmd @@ -0,0 +1,300 @@ +--- +title: "Metrics" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +This article focuses on two important aspects of light logger analysis: structuring data into relevant groups and calculating personal light exposure metrics for them. LightLogR contains a large set of over 60 metrics and sub-metrics across multiple functions, where each constitutes a family of light exposure metrics. The following packages are needed for the analysis: + +```{r setup, message = FALSE} +library(LightLogR) +library(tidyverse) +library(gt) +library(gtsummary) +``` + +# Importing Data + +We will use data imported and cleaned already in the article [Import & Cleaning](https://tscnlab.github.io/LightLogR/articles/Import.html). + +```{r, import} +data <- readRDS("cleaned_data/ll_data.rds") +``` + +As can be seen by using `gg_overview()`, the dataset contains 17 ids with one weeks worth of data each, and one to three participants per week. + +```{r, overview} +data %>% gg_overview() +``` + +# Metric principles + +There are a lot of metrics associated with personal light exposure. You can find the function reference to all of them in the appropriate [reference section](https://tscnlab.github.io/LightLogR/reference/index.html#metrics). There are a few important distinctions between metrics that are important to understand: + +* Some metrics require or work best with a specific time frame, usually one day, while others are calculated over an arbitrary length of time. For example, the function `interdaily_stability()` calculates a metric over multiple days, while a function like `midpointCE()` calculates the midpoint of the cumulative light exposure within the given time series - this is less useful for multiple days, where the midpoint is just a time point during these days. E.g., for two similar light exposure patterns across two days, the cumulative light exposure across those two days will be around midnight, which is not particularly informative. Much more sensible is the midpoint of the light exposure for each day. To enable this, data has to be grouped within days (or other relevant time frames, like sleep/wake-phase). + +* Some metrics are submetrics within a family and have to be actively chosen through the arguments of the function. An example is `duration_above_threshold()` that, despite its name also provides the metrics `duration below threshold` and `duration within threshold`. Depending on its `comparison` argument, and whether one or two `threshold`s are provided, the function will calculate different metrics. + +* Some metric functions calculate multiple submetrics at once, like `bright_dark_period()`. As stated above, this type of function contains metrics accessible through a function argument, `period` in this case, which allows to specify whether the brightest or darkest periods of the day are required. Independent of this, the function will calculate multiple submetrics at once, which are the `onset`, `midpoint`, and `offset` of the respective period, and also the `mean` light level during that period. + +We will cover the practical considerations following from these aspects in the following sections. Further, every function documentation explicitly states whether different metrics are accessible through parameters, and which metrics are calculated by default. + +# Metric calculation: basics + +All metric functions are by default agnostic to the type of data. They require vectors of light data and commonly also of datetimes. This means that the functions can be used outside of the LightLogR framework, if applied correctly. Let us try this with a simple example for a days worth of light data for one participant across two functions. + +```{r, Id 201} +data_Id201 <- data %>% filter(Id == 201 & date(Datetime) == "2023-08-15") +data_Id201 %>% gg_day() +``` + +## Time above threshold (TAT) + +The first example metric we will calculate is the *time above threshold* (or TAT) for a threshold of 250 lx mel EDI. TAT is calculated by the function `duration_above_threshold()`. + +```{r, TAT} +duration_above_threshold( + Light.vector = data_Id201$MEDI, + Time.vector = data_Id201$Datetime, + threshold = 250 +) +``` + +Specifying the argument `comparison = "below"` will calculate the time below the threshold. + +```{r, TBT} +duration_above_threshold( + Light.vector = data_Id201$MEDI, + Time.vector = data_Id201$Datetime, + threshold = 250, + comparison = "below" +) +``` + +And specifying two thresholds will calculate the time within the thresholds. + +```{r, TWT} +duration_above_threshold( + Light.vector = data_Id201$MEDI, + Time.vector = data_Id201$Datetime, + threshold = c(10,250) +) +``` + +## Brightest 10 hours of the day (L10) + +The second example metric yields multiple submetrics at once. The function `bright_dark_period()` calculates the brightest and darkest periods of the day. By default, it calculates the brightest 10 hour period of the day. By setting `as_df = TRUE`, the function will return a data frame we can pipe to `gt()` for a nice output + + +```{r, L10} +bright_dark_period( + Light.vector = data_Id201$MEDI, + Time.vector = data_Id201$Datetime, + as.df = TRUE +) %>% gt() +``` + +### Looping + +Calculating the darkest period of the day is tricky, as it likely traverses midnight. In the following code we can see that the darkest 10-hour period of day begins at midnight and ends at 10 am, which would be very coincidental. (Note that commonly, the darkest 5-hour period is calculated. We deviate from this to make this point.) +```{r, M10_wrong} +M10_wrong <- +bright_dark_period( + Light.vector = data_Id201$MEDI, + Time.vector = data_Id201$Datetime, + as.df = TRUE, + period = "darkest", + timespan = "10 hours" +) +M10_wrong %>% gt() + +``` + +We also see that this makes little sense, if we visualize this portion. The yellow color indicates the darkest 10-hour period of the day. + +```{r, M10_wrong_plot} +Onset <- M10_wrong$darkest_10h_onset +Offset <- M10_wrong$darkest_10h_offset + +data_Id201 %>% + gg_day(aes_col = Datetime >= Onset & Datetime <= Offset) + + guides(color = "none") + +``` + +To solve this, `bright_dark_period()` and some other functions have the option to `loop` the day. + +```{r, M10} +M10 <- +bright_dark_period( + Light.vector = data_Id201$MEDI, + Time.vector = data_Id201$Datetime, + as.df = TRUE, + period = "darkest", + timespan = "10 hours", + loop = TRUE +) +M10 %>% gt() +``` + +This is more plausible, and can also be visualized easily. + +```{r, M10_plot} +Onset <- M10$darkest_10h_onset +Offset <- M10$darkest_10h_offset + +data_Id201 %>% + gg_day(aes_col = Datetime >= Onset | Datetime <= (Offset -days())) + + guides(color = "none") +``` + +# Metric calculation: advanced + +More often than not, metrics are calculated for many participants over prolonged periods of time. In this case, the singular calculation as shown above is inefficient. The `dplyr` family of `dplyr::summarize()` and `dplyr::reframe()` make this much easier. + +## Preparation + +As we only want to calculate metrics for days with full data, we will exclude Mondays from the data set. + +```{r} +data <- data %>% dplyr::filter(weekdays(Datetime) != "Monday") +``` + +## Summarize + +The `dplyr::summarize()` function is used to calculate metrics for each group of data. In the following example, we will calculate Interdaily Stability (IS) for all participants in the data set, giving us the variability of the 24h light exposure patterns across the full 6 days of data compared to their average, ranging between 0 (Gaussian noise) and 1 (Perfect stability). For brevity, only the first 6 Ids will be shown. + +```{r} +data %>% + summarize( + IS = interdaily_stability( + Light.vector = MEDI, + Datetime.vector = Datetime + ) + ) %>% + head() %>% gt() +``` + +## Grouping + +By default, data imported with `LightLogR` is grouped by `Id`, which represents individual participants. When using the `dplyr` family of functions, grouping is essential, as it specifies the subgroups of data for which the metrics are calculated. In the following example, we will calculate the TAT 250 lx MEDI for all participants in the data set. We only show the first 6 participants, as it becomes readily apparent that time above threshold for 6 days might not be the most informative parametrization of the metric. + +```{r} +data %>% + summarize( + TAT_250 = duration_above_threshold( + Light.vector = MEDI, + Time.vector = Datetime, + threshold = 250 + ) + ) %>% head() %>% gt() +``` + +Instead, we can calculate the TAT 250 lx MEDI for each participant **and day** of data. This is more informative, as it allows us to see how the metric changes over time. The final output is for the first two Ids. + +```{r} +#create a new column in the data set with the weekday +data$wDay <- wday(data$Datetime, label = TRUE, week_start = 1) + +#group the data and calculate the metrics +TAT_250 <- +data %>% + group_by(wDay, .add = TRUE) %>% + summarize( + TAT_250 = duration_above_threshold( + Light.vector = MEDI, + Time.vector = Datetime, + threshold = 250 + ), .groups = "drop_last" + ) + +TAT_250 %>% head(12) %>% gt() + +``` + +## Metric statistics + +With the dataframe `TAT_250`, we can easily calculate statistics for each participant. This can be done manually, e.g., with another call to `dplyr::summarize()`, or semi-automatic, e.g., with packages like `gtsummary`. In the following example, we will calculate the mean and standard deviation of the TAT 250 lx MEDI for each participant, formatted as `HH:SS` through a styling function. + +```{r} +#styling formula for time +style_time <- function(x, format = "%H:%M"){ + x %>% as.numeric() %>% hms::as_hms() %>% as.POSIXlt() %>% format(format) +} + +#Table output +TAT_250 %>% + tbl_summary(by = Id, include = -wDay, + statistic = list(TAT_250 ~ "{mean} ({sd})"), + digits = list(TAT_250 ~ style_time), + label = list(TAT_250 = "Time above 250 lx mel EDI") + ) +``` + +# Metric calculation: batch + +In the final section, we will add more metrics to the analysis, including ones with multiple submetrics. Further, the let us imagine we wand to know how these metrics change from the first half of the experiment (August/September) to the second half (October/November). Finally, we will include a column `Time.data` to the data set, which will be used to calculate the metrics. This avoids + +```{r} +data <- data %>% + mutate( + Month = case_when(month(Datetime) %in% 8:9 ~ "Aug/Sep", + month(Datetime) %in% 10:11 ~ "Oct/Nov") + ) %>% + create_Timedata() + +metrics <- + data %>% + group_by(Month, Id, wDay) %>% + summarize( + MLIT250 = + timing_above_threshold(MEDI, Time.data, threshold = 250, as.df = TRUE), + TAT250 = + duration_above_threshold(MEDI, Time.data, threshold = 250, as.df = TRUE), + average_MEDI = + mean(MEDI), + light_exposure = + sum(MEDI)/360, # 10 second epochs means 360 epochs in one hour. dividing by 360 gives the light exposure in lx·h + .groups = "drop_last" + ) %>% + unnest(-Id) + +#first 6 rows +metrics %>% head() %>% gt() + +``` + +The operation above yields a dataframe with 6 metrics across 102 participant days (6 days for 17 participants). The grouping for `Month` did not add additional groups, as each participant day is already solely either in the `"Aug/Sep"` or `"Oct/Nov"` group. Next we will regroup the data by `Month` and look at a summary table similar to above, but for more metrics. + +```{r} +metrics <- metrics %>% group_by(Month) %>% select(-Id, -wDay) + +#Table output +metrics %>% + tbl_summary(by = Month, + statistic = list(all_continuous() ~ "{mean} (±{sd})"), + digits = list( + c( + mean_timing_above_250, first_timing_above_250, + last_timing_above_250, duration_above_250 + ) ~ style_time), + label = list( + mean_timing_above_250 = + "mean timing above 250 lx mel EDI (HH:MM)", + first_timing_above_250 = + "first time above 250 lx mel EDI (HH:MM)", + last_timing_above_250 = + "last time above 250 lx mel EDI (HH:MM)", + duration_above_250 = "duration above 250 lx mel EDI (HH:MM)", + average_MEDI = "average mel EDI (lx)", + light_exposure = "light exposure (lx·h)" + ) + ) +``` + +And that is all you need to work with metrics in `LightLogR`. Be sure to look at the documentation for each function to understand the parameters and outputs, and at the [reference section](https://tscnlab.github.io/LightLogR/reference/index.html#metrics) to get an overview of all available metrics. \ No newline at end of file diff --git a/vignettes/articles/Visualizations.Rmd b/vignettes/articles/Visualizations.Rmd new file mode 100644 index 0000000..fa6af98 --- /dev/null +++ b/vignettes/articles/Visualizations.Rmd @@ -0,0 +1,34 @@ +--- +title: "Visualizations" +--- + +```{r, include = FALSE} +knitr::opts_chunk$set( + collapse = TRUE, + comment = "#>" +) +``` + +This article focuses on various ways to visualize personal light exposure data with `LightLogR`. It is important to note that `LightLogR` is using the `ggplot2` package for visualizations. This means that all the `ggplot2` functions can be used to further customize the plots. +The following packages are needed for the analysis: + +```{r setup, message = FALSE} +library(LightLogR) +library(tidyverse) +``` + +# Importing Data + +We will use data imported and cleaned already in the article [Import & Cleaning](https://tscnlab.github.io/LightLogR/articles/Import.html). + +```{r, import} +data <- readRDS("cleaned_data/ll_data.rds") +``` + +# gg_overview() + +As can be seen by using `gg_overview()`, the dataset contains 17 ids with one weeks worth of data each, and one to three participants per week. + +```{r, overview} +data %>% gg_overview() +``` diff --git a/vignettes/articles/cleaned_data/ll_data.rds b/vignettes/articles/cleaned_data/ll_data.rds new file mode 100644 index 0000000..4521ad9 Binary files /dev/null and b/vignettes/articles/cleaned_data/ll_data.rds differ diff --git a/vignettes/articles/images/.DS_Store b/vignettes/articles/images/.DS_Store deleted file mode 100644 index 5008ddf..0000000 Binary files a/vignettes/articles/images/.DS_Store and /dev/null differ diff --git a/vignettes/articles/images/Day.png b/vignettes/articles/images/Day.png index af7c796..d3a3451 100644 Binary files a/vignettes/articles/images/Day.png and b/vignettes/articles/images/Day.png differ