From be4cd48c07b7c0ce41d85763cda8ce41754f3d97 Mon Sep 17 00:00:00 2001 From: Johannes Zauner <112665672+JZauner@users.noreply.github.com> Date: Wed, 9 Oct 2024 13:51:24 +0200 Subject: [PATCH] Increment version number to 0.4.2 --- DESCRIPTION | 3 ++- NEWS.md | 2 ++ R/import_LL.R | 18 ++++++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 37dbd85..e589d28 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: LightLogR Title: Process Data from Wearable Light Loggers and Optical Radiation Dosimeters -Version: 0.4.1 +Version: 0.4.2 Authors@R: c( person("Johannes", "Zauner", email = "johannes.zauner@tum.de", role = c("aut", "cre"), @@ -32,6 +32,7 @@ Imports: ggsci, ggtext, hms, + janitor, lubridate, magrittr, pkgload, diff --git a/NEWS.md b/NEWS.md index 76f5f95..79399b6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,5 @@ +# LightLogR 0.4.2 + # LightLogR 0.4.1 * added support for OcuWEAR devices diff --git a/R/import_LL.R b/R/import_LL.R index 790bf1b..dd9c18b 100644 --- a/R/import_LL.R +++ b/R/import_LL.R @@ -44,6 +44,7 @@ #' column. If the column is not present it will add this column and fill it #' with the filename of the importfile (see param `auto.id`). #' * `print_n` can be used if you want to see more rows from the observation intervals +#' * `remove_duplicates` can be used if identical observations are present within or across multiple files. The default is `FALSE`. The function keeps only unique observations (=rows) if set to' TRUE'. This is a convenience implementation of [dplyr::distinct()]. #' #' @param ... Parameters that get handed down to the specific import functions #' @param device From what device do you want to import? For a few devices, @@ -282,6 +283,7 @@ imports <- function(device, locale = readr::default_locale(), silent = FALSE, print_n = 10, + remove_duplicates = FALSE, ... = ), #function expression @@ -353,6 +355,22 @@ imports <- function(device, ) } + #if there are duplicate rows, remove them and print an info message + duplicates <- suppressMessages(janitor::get_dupes(data, -file.name) %>% nrow()) + orig_rows <- data %>% nrow() + + if(duplicates > 0 & remove_duplicates) { + data <- data %>% dplyr::distinct(dplyr::pick(-file.name),.keep_all = TRUE) + cat(paste0(format(orig_rows - nrow(data), big.mark = "'"), " duplicate rows were removed during import.\n")) + } + + #if there are untreated duplicate rows, give a warning + if(duplicates > 0 & !remove_duplicates) { + messages <- paste0(format(duplicates, big.mark = "'"), " rows in your dataset(s) are identical to at least one other row. This causes problems during analysis. Please set `remove_duplicates = TRUE` during import. \nIf you still want to import the data as is and it failed with an error, try setting `auto.plot = FALSE`. You may want to do this to find out which entries are duplicates. Use `{replace_with_data_object} %>% janitor::get_dupes(-file.name) on your imported dataset.\n") + cat(messages) + warning(messages) + } + #if dst_adjustment is TRUE, adjust the datetime column if(dst_adjustment) { data <- data %>% dst_change_handler(filename.colname = file.name)