From efaa09832b4a0556bfb85dcfef37af3e397bb6de Mon Sep 17 00:00:00 2001 From: Johannes Zauner <112665672+JZauner@users.noreply.github.com> Date: Wed, 9 Oct 2024 14:00:51 +0200 Subject: [PATCH] import update to deal with identical observations --- NEWS.md | 2 ++ R/aaa.r | 2 +- R/import_LL.R | 4 ++-- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/NEWS.md b/NEWS.md index 79399b6..5fe2546 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,7 @@ # LightLogR 0.4.2 +* `import` functions will now give a warning message about identical observations in the provided data files, stop the import process and return a tibble with the duplicate rows. Through the `remove_duplicates` parameter, the user can decide to automatically remove these duplicates during import. **Note: identical observations refers to identical rows when disregarding the filename.** + # LightLogR 0.4.1 * added support for OcuWEAR devices diff --git a/R/aaa.r b/R/aaa.r index 31a27f1..73dc65d 100644 --- a/R/aaa.r +++ b/R/aaa.r @@ -1,4 +1,4 @@ -Time <- mEDI <- Time.data <- Datetime <- timestamp <- tz <- Day.data <- `DATE/TIME` <- n <- Datetime.rounded <- id <- sleep.colname.string <- file.name <- Interval <- original.datapoints.fleeting <- MEDI <- State.Brown <- Reference <- Reference.check <- Id <- Start.date.shift <- data <- Shift <- `MELANOPIC EDI` <- State <- group <- End <- Start <- Quant.x <- Quant.y <- is.implicit <- group.indices <- Id2 <- gap.id <- start <- end <- path <- auto.id <- n_max <- manual.id <- silent <- Light <- Day <- N <- is_missing <- Hour <- .change <- dst_start <- .dst <- .dst2 <- dst_adjustment <- auto.plot <- group.1 <- group.2 <- group.indices2 <- cluster_start <- cluster_end <- row_idx <- is_cluster <- cluster_idx <- is_pulse <- pulse_idx <- light <- time <- level <- duration <- mean_duration <- onset <- midpoint <- offset <- mean_onset <- mean_midpoint <- mean_offset <- Date.data <- print_n <- NULL +Time <- mEDI <- Time.data <- Datetime <- timestamp <- tz <- Day.data <- `DATE/TIME` <- n <- Datetime.rounded <- id <- sleep.colname.string <- file.name <- Interval <- original.datapoints.fleeting <- MEDI <- State.Brown <- Reference <- Reference.check <- Id <- Start.date.shift <- data <- Shift <- `MELANOPIC EDI` <- State <- group <- End <- Start <- Quant.x <- Quant.y <- is.implicit <- group.indices <- Id2 <- gap.id <- start <- end <- path <- auto.id <- n_max <- manual.id <- silent <- Light <- Day <- N <- is_missing <- Hour <- .change <- dst_start <- .dst <- .dst2 <- dst_adjustment <- auto.plot <- group.1 <- group.2 <- group.indices2 <- cluster_start <- cluster_end <- row_idx <- is_cluster <- cluster_idx <- is_pulse <- pulse_idx <- light <- time <- level <- duration <- mean_duration <- onset <- midpoint <- offset <- mean_onset <- mean_midpoint <- mean_offset <- Date.data <- print_n <- remove_duplicates <- NULL empty_function <- function() { rsconnect::accountInfo() diff --git a/R/import_LL.R b/R/import_LL.R index dd9c18b..a911d93 100644 --- a/R/import_LL.R +++ b/R/import_LL.R @@ -366,9 +366,9 @@ imports <- function(device, #if there are untreated duplicate rows, give a warning if(duplicates > 0 & !remove_duplicates) { - messages <- paste0(format(duplicates, big.mark = "'"), " rows in your dataset(s) are identical to at least one other row. This causes problems during analysis. Please set `remove_duplicates = TRUE` during import. \nIf you still want to import the data as is and it failed with an error, try setting `auto.plot = FALSE`. You may want to do this to find out which entries are duplicates. Use `{replace_with_data_object} %>% janitor::get_dupes(-file.name) on your imported dataset.\n") - cat(messages) + messages <- paste0(format(duplicates, big.mark = "'"), " rows in your dataset(s) are identical to at least one other row. This causes problems during analysis. Please set `remove_duplicates = TRUE` during import. Import will be stopped now and a dataframe with the duplicate rows returned \nIf you want to find out which entries are duplicates. Use `{replace_with_data_object} %>% janitor::get_dupes(-file.name) on your imported dataset.\n") warning(messages) + return(janitor::get_dupes(data, -file.name)) } #if dst_adjustment is TRUE, adjust the datetime column