From d9d51b978b6ccbe92380d8bf2614e8faa1c592dc Mon Sep 17 00:00:00 2001 From: Trevor Riley <89118428+TNRiley@users.noreply.github.com> Date: Thu, 23 May 2024 23:12:23 -0400 Subject: [PATCH] Update compare.R Replaced base::unique() with dplyr::distinct() for improved performance in count_unique function. --- R/compare.R | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/R/compare.R b/R/compare.R index 87b26869..c1a6dd20 100644 --- a/R/compare.R +++ b/R/compare.R @@ -27,7 +27,14 @@ count_unique <- function(unique_data, include_references = FALSE) { type = ifelse(.data$unique, "unique", "duplicated") %>% factor(levels = c("unique", "duplicated")) ) %>% dplyr::ungroup() %>% - unique() + dplyr::distinct() + + if (include_references == TRUE) { + out %>% dplyr::left_join(unique_data %>% dplyr::select(-dplyr::all_of(setdiff(intersect(names(.), names(out)), "duplicate_id"))), by = "duplicate_id") + } else { + out + } +} if (include_references == TRUE) { out %>% dplyr::left_join(unique_data %>% dplyr::select(-dplyr::all_of(setdiff(intersect(names(.), names(out)), "duplicate_id"))), by = "duplicate_id")