Merge pull request #21 from tscnlab/dev_general

tscnlab · Jun 20, 2024 · 62689a4 · 62689a4
2 parents c8282c4 + dfafb6d
commit 62689a4
Show file tree

Hide file tree

Showing 18 changed files with 233 additions and 16 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -58,7 +58,9 @@ Depends:
 LazyData: true
 Suggests: 
     covr,
+    gghighlight,
     gt,
+    gtsummary,
     knitr,
     patchwork,
     rmarkdown,

diff --git a/R/aggregate_Date.R b/R/aggregate_Date.R
@@ -77,7 +77,7 @@ aggregate_Date <- function(dataset,
                            date.handler = stats::median,
                            numeric.handler = 
                                  mean,
-                               character.handler = 
+                           character.handler = 
                                  \(x) names(which.max(table(x, useNA = "ifany"))),
                                logical.handler = 
                                  \(x) mean(x) >= 0.5,
@@ -117,7 +117,7 @@ aggregate_Date <- function(dataset,
     dataset %>% 
     create_Timedata(Datetime.colname = !!Datetime.colname.defused) %>% 
     dplyr::mutate(Date.data = lubridate::date(!!Datetime.colname.defused),
-                  Date.data = (!!date.handler)(unique(Date.data))) #set the date according to the date handler
+                  Date.data = (!!date.handler)(Date.data)) #set the date according to the date handler
 
   #group by Time.data
   dataset <- 

diff --git a/R/gg_day.r b/R/gg_day.r
@@ -238,7 +238,9 @@ gg_day <- function(dataset,
     # Scales --------------------------------------------------------------
     jco_color_scheme+
     ggplot2::scale_x_time(breaks = x.axis.breaks, 
-                          labels = scales::label_time(format = "%H:%M")) + 
+                          labels = scales::label_time(format = "%H:%M"),
+                          expand = c(0,0),
+                          limits = c(0,24*3600)) + 
     ggplot2::scale_y_continuous(
         trans = y.scale,
         breaks = y.axis.breaks,

diff --git a/R/gg_doubleplot.R b/R/gg_doubleplot.R
@@ -58,7 +58,7 @@ gg_doubleplot <- function(dataset,
                           type = c("auto", "repeat", "next"),
                           geom = "ribbon",
                           alpha = 0.5,
-                          col = "#EFC000FF",
+                          col = "grey40",
                           fill = "#EFC000FF",
                           linewidth = 0.4,
                           x.axis.breaks.next = Datetime_breaks,
@@ -120,7 +120,10 @@ gg_doubleplot <- function(dataset,
     linewidth = linewidth, 
     x.axis.breaks = x.axis.breaks,
     x.axis.format = x.axis.format,
-    x.axis.limits = \(x) Datetime_limits(x, length = lubridate::ddays(1), doubleplot = TRUE),
+    x.axis.limits = 
+      \(x) Datetime_limits(
+        x, length = lubridate::ddays(1), midnight.rollover = TRUE
+        ),
     ...
   ))
 }
diff --git a/R/visualize_helper.R b/R/visualize_helper.R
@@ -45,7 +45,7 @@ Datetime_breaks <- function(x,
 #'   desired length to get the correct axis-scaling if you start at midnight.
 #' @param unit a `character` scalar giving the unit of rounding in
 #'   [lubridate::floor_date()] and [lubridate::ceiling_date()]
-#' @param doubleplot a `logical` scalar indicating if used in [gg_doubleplot()]
+#' @param midnight.rollover a `logical` scalar indicating whether to rollover in cases of exact matches of rounded values and input values. Helpful if some cases fall exactly on the rounded values and others don`t.
 #' @param ... other arguments passed to [lubridate::floor_date()] and
 #'   [lubridate::ceiling_date()]
 #'
@@ -63,7 +63,7 @@ Datetime_limits <- function(x,
                             start = NULL,
                             length = NULL,
                             unit = "1 day",
-                            doubleplot = FALSE,
+                            midnight.rollover = FALSE,
                             ...) {
 
   min_date <- x %>% lubridate::as_datetime() %>% min()
@@ -79,7 +79,7 @@ Datetime_limits <- function(x,
       y %>% lubridate::as_datetime()
     }
 
-  if(doubleplot) {
+  if(midnight.rollover) {
   if(identical(max_date, lubridate::ceiling_date(max_date, unit = unit, ...))) {
     max_date <- max_date + lubridate::duration(unit)
   }

diff --git a/README.Rmd b/README.Rmd
@@ -20,7 +20,7 @@ knitr::opts_chunk$set(
 [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.11562600.svg)](https://doi.org/10.5281/zenodo.11562600)
 <!-- badges: end -->
 
-Personalized luminous exposure data is progressively gaining importance in various sectors, including research, occupational affairs, and fitness tracking. Data are collected through a proliferating selection of wearable loggers and dosimeters, varying in size, shape, functionality, and output format. Despite or maybe because of numerous use cases, the field lacks a unified framework for collecting, validating, and analyzing the accumulated data. This issue increases the time and expertise necessary to handle such data and also compromises the FAIRness (Findability, Accessibility, Interoperability, Reusability) of the results, especially in meta-analyses.
+Personalized luminous exposure data is progressively gaining importance in various sectors, including research, occupational affairs, and fitness tracking. Data are collected through a proliferating selection of wearable loggers and dosimeters, varying in size, shape, functionality, and output format. Despite or maybe because of numerous use cases, the field lacks a unified framework for collecting, validating, and analyzing the accumulated data. This issue increases the time and expertise necessary to handle such data and also compromises the FAIRness (Findability, Accessibility, Interoperability, Reusability) of the results, especially in meta-analyses. 
 
 ::: {style="text-align:center"}
 ![Light logger data can powerfully convey insights into personal light exposure](man/figures/Day.png){width="90%"}

diff --git a/README.md b/README.md
@@ -10,7 +10,7 @@
 <!-- badges: end -->
 
 Personalized luminous exposure data is progressively gaining importance
-in various sectors, including research, occupational health, and
+in various sectors, including research, occupational affairs, and
 fitness tracking. Data are collected through a proliferating selection
 of wearable loggers and dosimeters, varying in size, shape,
 functionality, and output format. Despite or maybe because of numerous

diff --git a/man/Datetime_limits.Rd b/man/Datetime_limits.Rd
diff --git a/man/bright_dark_period.Rd b/man/bright_dark_period.Rd
diff --git a/man/figures/README-unnamed-chunk-3-2.png b/man/figures/README-unnamed-chunk-3-2.png
diff --git a/man/figures/README-unnamed-chunk-4-1.png b/man/figures/README-unnamed-chunk-4-1.png
diff --git a/man/figures/README-unnamed-chunk-5-1.png b/man/figures/README-unnamed-chunk-5-1.png
diff --git a/man/figures/README-unnamed-chunk-6-1.png b/man/figures/README-unnamed-chunk-6-1.png
diff --git a/man/figures/README-unnamed-chunk-7-1.png b/man/figures/README-unnamed-chunk-7-1.png
diff --git a/man/figures/README-unnamed-chunk-8-1.png b/man/figures/README-unnamed-chunk-8-1.png
diff --git a/man/gg_doubleplot.Rd b/man/gg_doubleplot.Rd
diff --git a/vignettes/articles/Metrics.Rmd b/vignettes/articles/Metrics.Rmd
@@ -149,7 +149,7 @@ Onset <- M10$darkest_10h_onset
 Offset <- M10$darkest_10h_offset
 
 data_Id201 %>% 
-  gg_day(aes_col = Datetime >= Onset | Datetime <= (Offset -days())) +
+  gg_day(aes_col = Datetime >= Onset | Datetime <= Offset) +
   guides(color = "none")
 ```
 

diff --git a/vignettes/articles/Visualizations.Rmd b/vignettes/articles/Visualizations.Rmd
@@ -27,8 +27,218 @@ data <- readRDS("cleaned_data/ll_data.rds")
 
 # gg_overview()
 
-As can be seen by using `gg_overview()`, the dataset contains 17 ids with one weeks worth of data each, and one to three participants per week.
+`gg_overview()` provides a glance at *when* data is available for each Id. Let's call it on our dataset.
 
 ```{r, overview}
 data %>% gg_overview()
 ```
+
+As can be seen the dataset contains 17 ids with one weeks worth of data each, and one to three participants per week. `gg_overview()` will by default test whether there are gaps in the data and will show them as grey bars, as well as a message in the lower right corner. Let us force this behavior in our dataset by removing two days.
+
+```{r}
+data %>%
+  filter(!(date(Datetime) %in% c("2023-08-16", "2023-08-17"))) %>% 
+  gg_overview()
+```
+
+Calculating gaps in the data can be computationally expensive for large datasets with small epochs. If you just require an overview of the data without being concerned about gaps, you can provide an empty `tibble::tibble()` to the `gap.data` argument. This will skip the gap calculation and speed up the graph generation.
+
+```{r}
+data %>%
+  filter(!(date(Datetime) %in% c("2023-08-16", "2023-08-17"))) %>% 
+  gg_overview(gap.data = tibble())
+```
+
+*Hint: `gg_overview()` is automatically called by import functions in `LightLogR`, unless the argument `auto.plot = FALSE` is set. If your import is slow, this can also help in speeding up the process.*
+
+# gg_day()
+
+## Basics
+`gg_day()` compares days within a dataset. By default it will use the `date`. Let`s call it on a subset of our data. To distinguish between different Ids, we can set the `aes_col` argument to `Id`.
+
+```{r, fig.width=7, fig.height = 10}
+data %>% 
+  filter(Id %in% c(205, 206)) %>% 
+  gg_day(aes_col = Id, size = 0.5)
+```
+
+## Facetting
+
+Note that each day is represented by its own facet, which is named after the date. We can give each Id its own facet by using the `ggplot2::facet_wrap()` function. The `Day.data` column is produced by `gg_day()` and contains the structure of the daily facets. It has to be used by `facet_wrap()` to ensure that the facets are shown correctly. We also reduce the breaks on the x-axis to avoid overlap at 00:00.
+
+```{r, fig.width=7, fig.height = 10}
+data %>% 
+  filter(Id %in% c(205, 206)) %>% 
+  gg_day(aes_col = Id, size = 0.5, 
+         x.axis.breaks = hms::hms(hours = c(0, 6, 12, 18))) + 
+  guides(color = "none") +
+  facet_grid(rows = vars(Day.data), cols = vars(Id), switch = "y")
+```
+
+## Date-grouping
+
+Showing the days by date is the default behavior of `gg_day()`. It can also be grouped by any other formatting of `base::strptime()`. Using `format.day = "%A"` in the function call will group all output by the weekday. Putting so many Participants in each facet makes the plot unreadable, but it demonstrates how `gg_day()` can be configured to combine observations from different dates. We have to provide a different color scale compared to the default one, as the default has only 10 colors compared to the 17 we need here.
+
+```{r, fig.width=7, fig.height = 10}
+data %>% 
+  gg_day(aes_col = Id, size = 0.5, format.day = "%A") + scale_color_viridis_d()
+```
+
+## Customizing geoms and miscellanea
+
+`gg_day()` uses `geom_point()` by default. This can be changed by providing a different `geom` to the function. Here we use `geom_line()` to connect the points. To make this more readable. Let us first recreate a simpler version of the above dataset by filtering and aggregating
+
+```{r}
+data_subset <- 
+  data %>% 
+  filter(Id %in% c(205, 206)) %>% #choosing 2 ids
+  aggregate_Datetime(unit = "15 mins") %>% #aggregating to 15 min intervals
+  filter_Datetime(length = "3 days", full.day = TRUE) #restricting their length to 3 days
+
+data_subset %>%  gg_day(aes_col = Id)
+```
+
+Now we can use a different geom.
+```{r}
+data_subset %>%  gg_day(aes_col = Id, geom = "line")
+```
+
+Also a ribbon is possible.
+```{r}
+data_subset %>%  
+  gg_day(aes_col = Id, aes_fill = Id, geom = "ribbon", alpha = 0.5)
+```
+
+# gg_days()
+
+This is the companion function to `gg_day()`. Instead of using individual days, it will create a timeline of days across all Ids.
+
+```{r, fig.width=10}
+data_subset2 <- 
+data %>% 
+  filter(Id %in% c(205, 216, 219)) %>% #choosing 2 ids
+  aggregate_Datetime(unit = "15 mins") #aggregating to 15 min intervals
+
+data_subset2 %>%  gg_days()
+```
+
+By default, `gg_days()` will always plot full days. Let us strip one participant of data for one day.
+
+```{r, fig.width=10}
+data_subset3 <- 
+data_subset2 %>% 
+  filter(!(Id == 205 & date(Datetime) == "2023-08-28"))
+
+data_subset3 %>%  gg_days()
+```
+
+You can see the plots are misaligned in their facets. We can correct for that by providing an exact number of days to the `x.axis.limits` argument. `Datetime_limits()` is a helper function from `LightLogR` and the documentation reveals more about its arguments.
+
+```{r, fig.width=10}
+data_subset3 %>% 
+  gg_days(
+    x.axis.limits = 
+      \(x) Datetime_limits(x, length = ddays(7), midnight.rollover = TRUE)
+    )
+
+```
+
+`gg_days()` has all of the customization options of `gg_day()`. Here we will customize the plot for a ribbon, different naming and breaks on the datetime axis.
+
+```{r, fig.width=10}
+data_subset3 %>% 
+  gg_days(
+    geom = "ribbon", aes_col = Id, aes_fill = Id, alpha = 0.5, jco_color = TRUE,
+    x.axis.limits = 
+      \(x) Datetime_limits(x, length = ddays(7), midnight.rollover = TRUE),
+    x.axis.breaks = 
+      \(x) Datetime_breaks(x, by = "6 hours", shift = 0),
+    x.axis.format = "%H",
+    ) +
+  guides(color = "none", fill = "none")
+
+```
+
+# gg_doubleplot()
+
+`gg_doubleplot()` repeats days within a plot, either horizontally or vertically. Doubleplots are generally useful to visualize patterns that center around midnight (horizontally) or that deviate from 24-hour rhythms (vertically). 
+
+## Preparation
+
+We will use a subset from the data used in `gg_day()` above: two Ids, aggregated to 15-minute intervals, and restricted to three days. Because the first day is only partly present for both Ids, we will use the `gap_handler()` function to fill in the implicitly missing data with NA. If we ignore this step, the doubleplot will be incorrect, as it will connect the last point of the first day (around midnight) with the first point of the second day (somewhen before noon), which is incorrect and also looks bad.
+
+```{r}
+data_subset <- data_subset %>% gap_handler(full.days = TRUE)
+```
+
+## Horizontal doubleplot
+
+The horizontal doubleplot is activated by default, if only one day is present within all provided groups, or it can be set explicitly by `type = "repeat"`. 
+
+```{r, fig.height=8}
+ data_subset %>% 
+   gg_doubleplot(aes_fill = Id, jco_color = TRUE, type = "repeat")
+#identical:
+# data_subset %>% group_by(Date = date(Datetime), .add = TRUE) %>%
+ # gg_doubleplot(aes_fill = Id, jco_color = TRUE)
+```
+
+Each plot line thus is the same day, plotted twice.
+
+## Vertical doubleplot
+
+The vertical doubleplot is activated by default if any group has more than one day. It can be set explicitly by `type = "next"`. 
+
+```{r,fig.height=8}
+data_subset %>% 
+  gg_doubleplot(aes_fill = Id, jco_color = TRUE)
+#identical:
+# data_subset %>% 
+#  gg_doubleplot(aes_fill = Id, jco_color = TRUE, type = "next")
+```
+Note that the second day in each row is the first day of the next row. This allows to visualize non-24-hour rhythms, such as when Entrainment is lost due to pathologies or experimental conditions. Note that the x-axis labels change automatically depending on whether the doubleplot has type `"next"` or `"repeat"`.
+
+In both cases (horizontally and vertically) it is easy to condense the plots to a single line per day, by ungrouping the data structure (makes only sense if the datetimes are identical):
+
+```{r, fig.height=5}
+data_subset %>% ungroup() %>% 
+  gg_doubleplot(aes_fill = Id, jco_color = TRUE)
+```
+
+## Aggregated doubleplot
+
+Independent of `gg_doubleplot()`, but great in concert with it is `aggregate_Date()`, which allows to aggregate groups of data to a single day each. This way, one can easily calculate the average day of a participant or a group of participants and then perform a doubleplot (by default with `type = "next"`).
+
+Let us first group our data by whether participants were in the first or last two months of the experiment.
+
+```{r}
+data_two_groups <- data %>% 
+  mutate(
+    Month = case_when(month(Datetime) %in% 8:9 ~ "Aug/Sep",
+                      month(Datetime) %in% 10:11 ~ "Oct/Nov")
+  ) %>% group_by(Month)
+```
+
+Now we can aggregate the data to a single day per group and make a doubleplot from it.
+
+```{r}
+data_two_groups %>% 
+  aggregate_Date(unit = "15 mins") %>% 
+  gg_doubleplot(aes_fill = Month, jco_color = TRUE) +
+  guides(fill = "none")
+
+```
+
+With `aggregate_Date()` we condensed a large dataset with 10-second intervals to a single day for two groups with a 15 minute interval. The day that is assigned by default is the median measurement day of the group, it is shown to the as part of the strip label on the left. 
+
+# Interactivity
+
+All plotting functions have the inbuilt option to be displayed interactively. This is great for exploring the data. The `plotly` package is used for this. All `LightLogR` plotting functions have the `interactive` argument set to `FALSE` by default. Setting it to `TRUE` will create an interactive plot. 
+
+```{r}
+data_subset %>%  
+  gg_day(aes_col = Id, geom = "line",
+         interactive = TRUE
+         )
+```
+