Added Documentation

tscnlab · Sep 27, 2023 · 59e0c1a · 59e0c1a
1 parent 1b28a16
commit 59e0c1a
Show file tree

Hide file tree

Showing 7 changed files with 81 additions and 40 deletions.
diff --git a/R/import_LL.R b/R/import_LL.R
@@ -7,6 +7,10 @@
 #' timezone of the data to `UTC`. It will also enforce an `id` to separate
 #' different datasets and will order/arrange the dataset within each `id`.
 #'
+#' If the `Id` column is already part of the `dataset` it will just use this
+#' column. If the column is not present it will add this column and fill it with
+#' the filename of the importfile (see param `auto.id`).
+#'
 #' @param filename Filename(s) for the Dataset. Can also contain the filepath,
 #'   but `path` must then be `NULL`. Expects a `character`. If the vector is
 #'   longer than `1`, multiple files will be read in into one Tibble.
@@ -16,16 +20,14 @@
 #' @param tz Timezone of the data. `"UTC"` is the default. Expects a
 #'   `character`. You can look up the supported timezones with [OlsonNames()].
 #' @param ID.colname Lets you specify a column for the participant id. Expects a
-#'   symbol (Default is `Id`). If the column is already part of the `dataset` it
-#'   will just use this column. If the column is not present it will add this
-#'   column and fill it with the filename of the importfile (see param
-#'   `auto.id`). This column will be used for grouping ([dplyr::group_by()]).
-#' @param auto.id If the `Id.colname` column is added to the `dataset`, the 
-#'   `Id` can be automatically extracted from the filename. The argument expects
-#'   a regular expression [regex] and will by default just give the whole
-#'   filename without file extension.
-#' @param manual.id If this argument is not `NULL`, and no `ID` column is part 
-#' of the `dataset`, this `character` scalar will be used. 
+#'   symbol (Default is `Id`). This column will be used for grouping
+#'   ([dplyr::group_by()]).
+#' @param auto.id If the `Id.colname` column is added to the `dataset`, the `Id`
+#'   can be automatically extracted from the filename. The argument expects a
+#'   regular expression [regex] and will by default just give the whole filename
+#'   without file extension.
+#' @param manual.id If this argument is not `NULL`, and no `ID` column is part
+#'   of the `dataset`, this `character` scalar will be used.
 #' **Don´t use this argument if multiple files from different participants are used!**.
 #' @param ... Parameters that get handed down to the specific import functions
 #' @param device From what device do you want to import? For every supported

diff --git a/R/import_States.R b/R/import_States.R
@@ -8,18 +8,28 @@
 #                             dec = ",")
 
 
-#' Title
+#' Import data that contain `Datetimes` of `Statechanges`
+#' 
+#' Auxiliary data greatly enhances data analysis. This function allows the import of files that contain `Statechanges`, i.e., specific time points of when a `State` (like `sleep` or `wake`) starts.
+#' 
+#' Data can be present in the long or wide format.
+#' * In the `wide` format, multiple `Datetime` columns indicate the state through the column name. These get pivoted to the `long` format and can be recoded through the `State.encoding` argument.
+#' * In the `long` format, one column indicates the `State`, while the other gives the `Datetime`.
 #'
-#' @param filePath 
-#' @param sep 
-#' @param dec 
-#' @param Datetime.format 
-#' @param tz 
-#' @param State.colnames 
-#' @param State.encoding 
+#' @inheritParams import.Dataset
+#' @param sep String that separates columns in the import file. Defaults to `","`.
+#' @param dec String that indicates a decimal separator in the import file. Defaults to `"."`.
+#' @param structure String that specifies whether the import file is in the `long` or `wide` format. Defaults to `"wide"`.
+#' @param Datetime.format String that specifies the format of the `Datetimes` in the file. The default `"ymdHMS"` specifies a format like "2023-07-10 10:00:00". In the function, [lubridate::parse_date_time()] does the actual conversion - the documentation can be searched for valid inputs.
+#' @param State.colnames Column name or vector of column names (the latter only in the `wide` format). Expects a `character`.
+#' * In the `wide` format, the column names indicate the `State` and must contain `Datetimes`. The columns will be pivoted to the columns specified in `Datetime.column` and `State.newname`.
+#' * In the `long` format, the column contains the `State`
+#' @param State.encoding In the `wide` format, this enables recoding the column names to state names, if there are any differences. The default uses the `State.colnames` argument. Expects a `character` (vector) with the same length as `State.colnames`.
+#' @param Datetime.column Symbol of the `Datetime` column (which is also the default). 
+#' * In the `wide` format, this is the newly created column from the `Datetimes` in the `State.colnames`.
+#' * In the `long` format, this is the existing column that contains the `Datetimes`.
 #' @param ID.colname 
 #' @param State.newname 
-#' @param State.valueName 
 #' @param ID.newname 
 #' @param keepAllColumns 
 #'
@@ -28,16 +38,17 @@
 #'
 #' @examples
 #' #example
-import.Statechanges <- function(filePath, 
+import.Statechanges <- function(filename, path, 
                        sep = ",", 
                        dec = ".", 
+                       structure = "wide",
                        Datetime.format = "ymdHMS",
                        tz = "UTC",
                        State.colnames, # a vector
                        State.encoding = State.colnames,
+                       Datetime.column = Datetime,
                        ID.colname,
                        State.newname = State,
-                       State.valueName = Datetime,
                        ID.newname = Id,
                        keepAllColumns = FALSE) {
 
@@ -100,5 +111,5 @@ import.Statechanges <- function(filePath,
 
 import.Sleep <- function(filePath, State.newname = Sleep,
                          ...) {
-  import.Statechanges(filePath = filePath, ..., State.newname = Sleep)
+  import.Statechanges(filePath = filePath, ..., State.newname = State.newname)
 }
diff --git a/man/figures/Day.png b/man/figures/Day.png
diff --git a/vignettes/Day.png b/vignettes/Day.png
diff --git a/vignettes/Day2.png b/vignettes/Day2.png
diff --git a/vignettes/Styling.Rmd b/vignettes/Styling.Rmd
@@ -65,7 +65,7 @@ dataset.LL <- import.ActLumus(file.LL, path, auto.id = "^(\\d{3})", tz = tz)
 
 As you can see, the import is accompanied by a (hopefully) helpful message about the imported data. It contains the number ob measurements, the timezone, start- and enddate, the timespan, and all observation intervals. In this case, the measurements all follow a *10 second* epoch.
 
-Because we have no missing values that we would have to deal with first, this dataset is already good to go. If you, e.g., want to know the median value of melanopic EDI (a measure of stimulus strength for the nonvisual system) for every day in the dataset, you can do that:
+Because we have no missing values that we would have to deal with first, this dataset is already good to go. If you, e.g., want to know the range of melanopic EDI (a measure of stimulus strength for the nonvisual system) for every day in the dataset, you can do that:
 
 ```{r}
 dataset.LL %>% group_by(Date = as_date(Datetime)) %>% 
@@ -94,7 +94,7 @@ Here you can see that we follow roughly the same time span, but the measurement
 
 ### Participant Sleep Data
 
-Our last dataset is a sleep diary that contains, among other things, a column for `Id` and a column for *sleep* and for *wake* (called *offset*). Because sleep diaries and other event datasets can vary widely in their structure, we must manually set a few arguments. Importantly, we need to specify how the Datetimes are structured. In this case, we have values like *2023-08-28 23:20*, which give a structure of `dmyHM`.
+Our last dataset is a sleep diary that contains, among other things, a column for `Id` and a column for *sleep* and for *wake* (called *offset*). Because sleep diaries and other event datasets can vary widely in their structure, we must manually set a few arguments. Importantly, we need to specify how the Datetimes are structured. In this case, we have values like *28-08-2023 23:20*, which give a structure of `dmyHM`.
 
 What we need after import is a coherent table that contains a column with a `Datetime` besides a column with the `State` that starts at that point in time. `import.Statechanges` facilitates this, because we can provide a `vector` of column names that form a continuous indicator of a given state - in this case `Sleep`.
 
@@ -309,9 +309,10 @@ dataset.LL.partial  %>%
             across(where(is.character), \(x) names(which.max(table(x)))), #choose the dominant string
             across(where(is.logical), \(x) sum(x) > (length(x) / 2)), # average a binary outcome
             .groups = "drop_last") %>% #remove the rounded Datetime group
-  mutate(Reference.check = Brown.check(MEDI, State.Brown)) %>% #recalculate conformity to Brown recs.
-  rename(Datetime = Datetime.rounded) #remove the rounded Datetime column
-}
+  rename(Datetime = Datetime.rounded) %>%  #remove the rounded Datetime column
+  select(-Reference.Brown) %>% #remove the rounded 
+  Brown2reference(Brown.rec.colname = Reference.Brown) #recalculate the brown times
+    }
 ```
 
 ### Data aggregation {.tabset .tabset-pills}
@@ -482,13 +483,15 @@ This plot requires a bit of preparation, but it focuses nicely on the unrealized
 **Conclusion:** While the `second plot` option is nice, it focuses on one aspect - the missed or unrealized potential. The ´geom_ribbon\` variant still includes this information, but is more general, which is exactly what we want here.
 
 ```{r, warning=FALSE}
+Day.end <- as_datetime("2023-09-01 23:59:59", tz = tz)
 Plot <- 
-dataset.LL.aggregate("5 mins")  %>% 
+dataset.LL.aggregate("5 mins") %>% filter_Datetime(end = Day.end) %>% 
   gg_day(facetting = FALSE, geom = "blank", y.axis.breaks = c(10^(0:5), 250)) + #base plot
-  geom_ribbon(aes(ymin = 0, ymax = MEDI), alpha = 0.25, fill = "#EFC000", 
+    geom_ribbon(aes(ymin = MEDI, ymax=Reference), 
+              alpha = 0.25, fill = "#0073C2FF",
+              outline.type = "upper", col = "#0073C2FF", size = 0.15) + #solar reference
+  geom_ribbon(aes(ymin = 0, ymax = MEDI), alpha = 0.30, fill = "#EFC000", 
               outline.type = "upper", col = "#EFC000", size = 0.4) + #ribbon geom
-  geom_ribbon(aes(ymin = MEDI, ymax=Reference), 
-              alpha = 0.25, fill = "#0073C2FF") + #solar reference
   scale.correction
 ```
 
@@ -568,23 +571,48 @@ This approach uses a conditional coloration of points, depending on whether or n
 Plot <- 
 Plot + 
   geom_point(aes(col = Reference.check), size = 0.5)+
-  geom_line(aes(y=Reference.Brown), lty = 2, size = 0.4, col = "grey60") + #Brown reference
+  geom_line(aes(y=Reference.Brown, 
+                # group = consecutive_id(State.Brown)
+                ), 
+            col = "grey40",
+            lty = 2, size = 0.4) + #Brown reference
   scale_color_manual(values = c("grey50", "#EFC000"))+
   guides(color = "none")
 ```
 
 ### Final Touches
 
-Our figure needs some final touches before we can use it, namely labels. We disabled automatic guides, and they would not provide nice legends. Instead we will solve this trough `annotations`.
+Our figure needs some final touches before we can use it, namely labels. Automatic guides and labels work well when we use color palettes. Here, we mostly specified the coloring ourselves. Thus we disabled automatic guides. Instead we will solve this trough `annotations`.
 
 ```{r, fig.retina=2, fig.width=7, fig.height = 4, warning=FALSE}
 x <- 900
-Plot + 
-  annotate("rect", fill = "white", xmin = 0, xmax = 7.5*60*60, ymin = 2500, ymax = 60000)+
-  annotate("text", x=x, y = 1.5, label = "Brown et al. (2022)", hjust = 0, col = "grey25")+
-  annotate("text", x=x, y = 40000, label = "- Exposure within", hjust = 0, col = "#EFC000")+
-  annotate("text", x=x, y = 19500, label = "  recommended levels or", hjust = 0, col = "black")+
-  annotate("text", x=x, y = 10000, label = "  outside", hjust = 0, col = "grey50")+
-  annotate("text", x=x, y = 4000, label = "- Daylight Potential", hjust = 0, col = "#0073C2DD")
 
+Brown.times <- 
+  Brown.times %>% 
+  mutate(xmean = (xmax - xmin)/2 + xmin,
+         label.Brown = case_match(State.Brown,
+                                  "night" ~ "sleep",
+                                  "evening" ~ "pre-bed",
+                                  .default = State.Brown))
+
+Plot + 
+  geom_vline(data = Brown.times[-1,],
+             aes(xintercept = xmin), lty = 2, col = "grey40", size = 0.4) + #adding vertical lines
+  geom_label(data = Brown.times[-4,], 
+             aes(x = xmean, y = 0.2, label = label.Brown), 
+             col = "grey40", alpha = 0.75) + #adding labels
+  annotate("rect", fill = "white", xmin = 0, xmax = 7.5*60*60, 
+           ymin = 2500, ymax = 60000)+
+  annotate("text", x=x, y = 1.7, label = "Brown et al. (2022)", 
+           hjust = 0, col = "grey25")+
+  annotate("text", x=x, y = 40000, label = "- Exposure within", 
+           hjust = 0, col = "#EFC000")+
+  annotate("text", x=x, y = 19500, label = "  recommended levels or", 
+           hjust = 0, col = "black")+
+  annotate("text", x=x, y = 10000, label = "  outside", 
+           hjust = 0, col = "grey50")+
+  annotate("text", x=x, y = 4000, label = "- Daylight Potential", 
+           hjust = 0, col = "#0073C2DD")
+
+ggsave("Day2.png", dpi = 600, height = 4, width = 7)
 ```
diff --git a/vignettes/images/Day.png b/vignettes/images/Day.png