From 0971400b090b74e838fa9ffd02d952b2fc41dff4 Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Mon, 9 Sep 2024 16:15:35 -0400
Subject: [PATCH 01/17] Draft join_data_tibbles

- no documentation yet
- no support for mixed structure insturments yet
---
 R/join_data_tibbles.R | 49 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 R/join_data_tibbles.R

diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R
new file mode 100644
index 00000000..4b7b56cd
--- /dev/null
+++ b/R/join_data_tibbles.R
@@ -0,0 +1,49 @@
+join_data_tibbles <- function(suprtbl, x, y, by = NULL, type = "left", name = "new_tibble", suffix = c(".x", ".y")) {
+  record_id_field <- get_record_id_field(suprtbl$redcap_data[[1]])
+
+  # Append the supertibble with the primary keys column
+  suprtbl <- suprtbl |>
+    mutate(pks = purrr::map_chr(redcap_data, ~extract_keys(., record_id_field = record_id_field))) %>%
+    select(redcap_form_name, redcap_form_label, redcap_data, redcap_metadata, structure, pks)
+
+  # Extract the user defined tibbles and assign the structure and pks to attributes
+  tbl_x <- extract_tibble(suprtbl, tbl = x)
+  attributes(tbl_x)$structure <- suprtbl$structure[suprtbl$redcap_form_name == x]
+  attributes(tbl_x)$pks <- suprtbl$pks[suprtbl$redcap_form_name == x] %>% stringr::str_split(", ", simplify = TRUE)
+
+  tbl_y <- extract_tibble(suprtbl, tbl = y)
+  attributes(tbl_y)$structure <- suprtbl$structure[suprtbl$redcap_form_name == y]
+  attributes(tbl_y)$pks <- suprtbl$pks[suprtbl$redcap_form_name == y] %>% stringr::str_split(", ", simplify = TRUE)
+
+  # Define a named list of join functions corresponding to the join types
+  join_functions <- list(
+    left = dplyr::left_join,
+    right = dplyr::right_join,
+    inner = dplyr::inner_join,
+    full = dplyr::full_join
+  )
+
+  # Check if the specified type is valid
+  # TODO: Make a standard check function with cli
+  if (!type %in% names(join_functions)) {
+    stop("Invalid join type. Choose from 'left', 'right', 'inner', or 'full'.")
+  }
+
+  # Mixed structure requires special handling
+  is_mixed <- any(c(attr(tbl_x, "structure"), attr(tbl_y, "structure")) == "mixed")
+
+  if (is_mixed) {
+    return() # TODO: this is the complicated part
+  } else {
+    rlang::exec(
+      join_functions[[type]], tbl_x, tbl_y, by = intersect(attr(tbl_x, "pks"), attr(tbl_y, "pks")), suffix
+    )
+  }
+}
+
+extract_keys <- function(suprtbl, record_id_field) {
+  suprtbl |>
+    colnames() |>
+    intersect(c(record_id_field, "redcap_event", "redcap_form_instance", "redcap_event_instance", "redcap_arm")) |>
+    paste(collapse = ", ")
+}

From e544edb007cd7d74fea9d4bf062a23124004693d Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Mon, 9 Sep 2024 16:32:59 -0400
Subject: [PATCH 02/17] Consolidate code

---
 R/join_data_tibbles.R | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R
index 4b7b56cd..cb3480da 100644
--- a/R/join_data_tibbles.R
+++ b/R/join_data_tibbles.R
@@ -6,14 +6,8 @@ join_data_tibbles <- function(suprtbl, x, y, by = NULL, type = "left", name = "n
     mutate(pks = purrr::map_chr(redcap_data, ~extract_keys(., record_id_field = record_id_field))) %>%
     select(redcap_form_name, redcap_form_label, redcap_data, redcap_metadata, structure, pks)
 
-  # Extract the user defined tibbles and assign the structure and pks to attributes
-  tbl_x <- extract_tibble(suprtbl, tbl = x)
-  attributes(tbl_x)$structure <- suprtbl$structure[suprtbl$redcap_form_name == x]
-  attributes(tbl_x)$pks <- suprtbl$pks[suprtbl$redcap_form_name == x] %>% stringr::str_split(", ", simplify = TRUE)
-
-  tbl_y <- extract_tibble(suprtbl, tbl = y)
-  attributes(tbl_y)$structure <- suprtbl$structure[suprtbl$redcap_form_name == y]
-  attributes(tbl_y)$pks <- suprtbl$pks[suprtbl$redcap_form_name == y] %>% stringr::str_split(", ", simplify = TRUE)
+  tbl_x <- prepare_tibble(suprtbl, x)
+  tbl_y <- prepare_tibble(suprtbl, y)
 
   # Define a named list of join functions corresponding to the join types
   join_functions <- list(
@@ -33,7 +27,7 @@ join_data_tibbles <- function(suprtbl, x, y, by = NULL, type = "left", name = "n
   is_mixed <- any(c(attr(tbl_x, "structure"), attr(tbl_y, "structure")) == "mixed")
 
   if (is_mixed) {
-    return() # TODO: this is the complicated part
+    stop("Mixed structure table detected, this feature is not currently supported.") # TODO: Fix, this is the complicated part
   } else {
     rlang::exec(
       join_functions[[type]], tbl_x, tbl_y, by = intersect(attr(tbl_x, "pks"), attr(tbl_y, "pks")), suffix
@@ -42,8 +36,19 @@ join_data_tibbles <- function(suprtbl, x, y, by = NULL, type = "left", name = "n
 }
 
 extract_keys <- function(suprtbl, record_id_field) {
+  redcap_keys <- c(record_id_field, "redcap_event", "redcap_form_instance",
+                   "redcap_event_instance", "redcap_arm")
+
   suprtbl |>
     colnames() |>
-    intersect(c(record_id_field, "redcap_event", "redcap_form_instance", "redcap_event_instance", "redcap_arm")) |>
+    intersect(redcap_keys) |>
     paste(collapse = ", ")
 }
+
+prepare_tibble <- function(suprtbl, tbl_name) {
+  tbl <- extract_tibble(suprtbl, tbl = tbl_name)
+  attributes(tbl)$structure <- suprtbl$structure[suprtbl$redcap_form_name == tbl_name]
+  attributes(tbl)$pks <- suprtbl$pks[suprtbl$redcap_form_name == tbl_name] %>%
+    stringr::str_split(", ", simplify = TRUE)
+  return(tbl)
+}

From 7dd592b53eb9cbd9b48a7c4fc9366032df2787c1 Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Tue, 10 Sep 2024 16:35:02 -0400
Subject: [PATCH 03/17] Some updates

---
 R/join_data_tibbles.R | 61 +++++++++++++++++++++++++++----------------
 1 file changed, 38 insertions(+), 23 deletions(-)

diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R
index cb3480da..7ba38714 100644
--- a/R/join_data_tibbles.R
+++ b/R/join_data_tibbles.R
@@ -1,37 +1,26 @@
 join_data_tibbles <- function(suprtbl, x, y, by = NULL, type = "left", name = "new_tibble", suffix = c(".x", ".y")) {
   record_id_field <- get_record_id_field(suprtbl$redcap_data[[1]])
+  join_fn <- get_join_fn(type)
 
   # Append the supertibble with the primary keys column
   suprtbl <- suprtbl |>
     mutate(pks = purrr::map_chr(redcap_data, ~extract_keys(., record_id_field = record_id_field))) %>%
     select(redcap_form_name, redcap_form_label, redcap_data, redcap_metadata, structure, pks)
 
-  tbl_x <- prepare_tibble(suprtbl, x)
-  tbl_y <- prepare_tibble(suprtbl, y)
+  tbl_x <- extract_tibble(suprtbl, x)
+  tbl_x_type <- get_structure(suprtbl, x)
+  tbl_y <- extract_tibble(suprtbl, y)
+  tbl_y_type <- get_structure(suprtbl, y)
 
-  # Define a named list of join functions corresponding to the join types
-  join_functions <- list(
-    left = dplyr::left_join,
-    right = dplyr::right_join,
-    inner = dplyr::inner_join,
-    full = dplyr::full_join
-  )
-
-  # Check if the specified type is valid
-  # TODO: Make a standard check function with cli
-  if (!type %in% names(join_functions)) {
-    stop("Invalid join type. Choose from 'left', 'right', 'inner', or 'full'.")
-  }
+  by <- build_by(suprtbl, x, y)
 
   # Mixed structure requires special handling
-  is_mixed <- any(c(attr(tbl_x, "structure"), attr(tbl_y, "structure")) == "mixed")
+  is_mixed <- any(c(tbl_x_type, tbl_y_type) == "mixed")
 
   if (is_mixed) {
     stop("Mixed structure table detected, this feature is not currently supported.") # TODO: Fix, this is the complicated part
   } else {
-    rlang::exec(
-      join_functions[[type]], tbl_x, tbl_y, by = intersect(attr(tbl_x, "pks"), attr(tbl_y, "pks")), suffix
-    )
+    join_fn(tbl_x, tbl_y, by = by, suffix = suffix)
   }
 }
 
@@ -45,10 +34,36 @@ extract_keys <- function(suprtbl, record_id_field) {
     paste(collapse = ", ")
 }
 
-prepare_tibble <- function(suprtbl, tbl_name) {
+get_structure <- function(suprtbl, tbl_name) {
   tbl <- extract_tibble(suprtbl, tbl = tbl_name)
-  attributes(tbl)$structure <- suprtbl$structure[suprtbl$redcap_form_name == tbl_name]
-  attributes(tbl)$pks <- suprtbl$pks[suprtbl$redcap_form_name == tbl_name] %>%
+  suprtbl$structure[suprtbl$redcap_form_name == tbl_name]
+}
+
+get_join_fn <- function(type) {
+
+  join_functions <- list(
+    left = dplyr::left_join,
+    right = dplyr::right_join,
+    inner = dplyr::inner_join,
+    full = dplyr::full_join
+  )
+
+  # Check if the specified type is valid
+  # TODO: Make a standard check function with cli
+  if (!type %in% names(join_functions)) {
+    stop("Invalid join type. Choose from 'left', 'right', 'inner', or 'full'.")
+  }
+
+  join_functions[[type]]
+}
+
+build_by <- function(suprtbl, x, y) {
+
+  x_pks <- suprtbl$pks[suprtbl$redcap_form_name == x] %>%
+    stringr::str_split(", ", simplify = TRUE)
+  y_pks <- suprtbl$pks[suprtbl$redcap_form_name == y] %>%
     stringr::str_split(", ", simplify = TRUE)
-  return(tbl)
+
+  intersect(x_pks, y_pks)
+
 }

From e80733ebcb05bf5c14155602738523717c08bfc3 Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Mon, 16 Sep 2024 13:29:32 -0400
Subject: [PATCH 04/17] Update to include mixed_structure handling

---
 R/join_data_tibbles.R | 39 +++++++++++++++++++++++++++++++--------
 1 file changed, 31 insertions(+), 8 deletions(-)

diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R
index 7ba38714..d77975a4 100644
--- a/R/join_data_tibbles.R
+++ b/R/join_data_tibbles.R
@@ -1,4 +1,9 @@
-join_data_tibbles <- function(suprtbl, x, y, by = NULL, type = "left", name = "new_tibble", suffix = c(".x", ".y")) {
+join_data_tibbles <- function(suprtbl,
+                              x,
+                              y,
+                              by = NULL,
+                              type = "left",
+                              suffix = c(".x", ".y")) {
   record_id_field <- get_record_id_field(suprtbl$redcap_data[[1]])
   join_fn <- get_join_fn(type)
 
@@ -12,16 +17,20 @@ join_data_tibbles <- function(suprtbl, x, y, by = NULL, type = "left", name = "n
   tbl_y <- extract_tibble(suprtbl, y)
   tbl_y_type <- get_structure(suprtbl, y)
 
-  by <- build_by(suprtbl, x, y)
-
   # Mixed structure requires special handling
   is_mixed <- any(c(tbl_x_type, tbl_y_type) == "mixed")
 
   if (is_mixed) {
-    stop("Mixed structure table detected, this feature is not currently supported.") # TODO: Fix, this is the complicated part
-  } else {
-    join_fn(tbl_x, tbl_y, by = by, suffix = suffix)
+    required_columns <- c("redcap_event_instance", "redcap_form_instance")
+    tbl_x <- add_missing_columns(tbl_x, required_columns)
+    tbl_y <- add_missing_columns(tbl_y, required_columns)
   }
+
+  join_fn <- get_join_fn(type)
+  by <- build_by(suprtbl, x, y, is_mixed)
+
+  join_fn(tbl_x, tbl_y, by = by, suffix = suffix) %>%
+    relocate(starts_with("form_status_complete"), .after = everything())
 }
 
 extract_keys <- function(suprtbl, record_id_field) {
@@ -57,13 +66,27 @@ get_join_fn <- function(type) {
   join_functions[[type]]
 }
 
-build_by <- function(suprtbl, x, y) {
+build_by <- function(suprtbl, x, y, is_mixed) {
 
   x_pks <- suprtbl$pks[suprtbl$redcap_form_name == x] %>%
     stringr::str_split(", ", simplify = TRUE)
   y_pks <- suprtbl$pks[suprtbl$redcap_form_name == y] %>%
     stringr::str_split(", ", simplify = TRUE)
 
-  intersect(x_pks, y_pks)
+  out <- intersect(x_pks, y_pks)
+
+  if (is_mixed) {
+    out <- c(out, "redcap_event_instance", "redcap_form_instance") %>%
+      # TODO: Make standard, currently needed for repeat/mixed joins
+      unique()
+  }
 
+  out
 }
+
+add_missing_columns <- function(tbl, columns) {
+  missing_cols <- setdiff(columns, names(tbl))
+  tbl[missing_cols] <- NA
+  return(tbl)
+}
+

From 9eee96e7f16782e5a9cf46f03a89071d16d14c48 Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Thu, 19 Sep 2024 16:52:31 -0400
Subject: [PATCH 05/17] Update how read_redcap processes mixed RS records

---
 R/clean_redcap_long.R | 31 +++++++++++++++++++++++----
 R/utils.R             | 50 +++++++++++++++++++++++++------------------
 2 files changed, 56 insertions(+), 25 deletions(-)

diff --git a/R/clean_redcap_long.R b/R/clean_redcap_long.R
index ec5941ea..e33cfad7 100644
--- a/R/clean_redcap_long.R
+++ b/R/clean_redcap_long.R
@@ -318,7 +318,7 @@ distill_repeat_table_long <- function(form_name,
   db_data_long <- db_data_long %>%
     add_partial_keys(var = .data$redcap_event_name) %>%
     filter(
-      !is.na(.data$redcap_form_instance) &
+      (!is.na(.data$redcap_form_instance) | !is.na(redcap_event_instance)) &
         .data$redcap_repeat_instrument == my_form
     )
 
@@ -416,14 +416,37 @@ convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) {
         )
       )
 
+    repeat_together_present <- any(is.na(db_data_long$redcap_repeat_instrument) & !is.na(db_data_long$redcap_repeat_instance))
+
+    if (!"redcap_event_instance" %in% names(db_data_long) & repeat_together_present) {
+      db_data_long <- db_data_long %>%
+        mutate(
+          redcap_event_instance = NA
+        ) %>%
+        relocate(redcap_event_instance, .after = redcap_repeat_instance)
+    }
+
+    if (repeat_together_present) {
+      db_data_long <- db_data_long %>%
+        mutate(
+          redcap_event_instance = case_when(
+            # Shift form instances to even instances for repeat-together types
+            update_mask & is.na(redcap_repeat_instrument) ~ redcap_repeat_instance,
+            # Otherwise
+            TRUE ~ redcap_event_instance
+          )
+        )
+    }
+
     # Assign update data based on rules below
     db_data_long <- db_data_long %>%
       mutate(
         redcap_repeat_instance = case_when(
           # Add single instance repeat event instance vals when none exist
-          update_mask & is.na(redcap_repeat_instance) ~ 1,
-          # Keep repeat event instance vals when they already exist
-          update_mask & !is.na(redcap_repeat_instance) ~ redcap_repeat_instance,
+          # update_mask & is.na(redcap_repeat_instance) ~ 1,
+          # update_mask & !is.na(redcap_repeat_instance) ~ redcap_repeat_instance, # TODO: Remove
+          # If repeat-together type, remove values from redcap_repeat_instance (shifted and captured in redcap_event_instance)
+          update_mask & is.na(redcap_repeat_instrument) ~ NA,
           TRUE ~ .data$redcap_repeat_instance
         ),
         redcap_repeat_instrument = case_when(
diff --git a/R/utils.R b/R/utils.R
index 44045c6d..203b75d0 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -64,35 +64,43 @@ create_repeat_instance_vars <- function(db_data) {
   }
 
   # Detect if repeat events exist
-  #   Determined by non-NA vals in new "redcap_form_instance" alongside
-  #   NA vals in "redcap_repeat_instrument"
-  #   `has_repeat_forms` will always be TRUE for events to exist
-  if (has_repeat_forms) {
+  # First determined if redcap_event_instance added during mixed structure handling
+  # See: convert_mixed_instrument()
+  has_repeat_events <- "redcap_event_instance" %in% names(out)
+
+  # Next determined by non-NA vals in new "redcap_form_instance" alongside
+  # NA vals in "redcap_repeat_instrument"
+  # `has_repeat_forms` will always be TRUE for events to exist
+  if (has_repeat_forms & !has_repeat_events) {
     has_repeat_events <- any(
       is.na(out$redcap_repeat_instrument) & !is.na(out$redcap_form_instance)
     )
-  } else {
-    has_repeat_events <- FALSE
   }
 
   if (has_repeat_events) {
-    out$redcap_event_instance <- ifelse(
-      is.na(out$redcap_repeat_instrument) &
-        !is.na(out$redcap_form_instance),
-      out$redcap_form_instance,
-      NA
-    )
 
-    out$redcap_form_instance <- ifelse(
-      is.na(out$redcap_repeat_instrument) &
-        !is.na(out$redcap_form_instance),
-      NA,
-      out$redcap_form_instance
-    )
+    # In cases where there are repeating events but they were not added by
+    # convert_mixed_instrument(), add an empty redcap_event_instance column
+    if (!"redcap_event_instance" %in% names(out)) {
+      out$redcap_event_instance <- NA
+    }
+
+    out <- out %>%
+      mutate(
+        redcap_event_instance = case_when(
+          is.na(redcap_repeat_instrument) & !is.na(redcap_form_instance) ~ redcap_form_instance,
+          # Else leave NA or the value given by conver_mixed_instrument()
+          TRUE ~ redcap_event_instance
+        ),
+        redcap_form_instance = case_when(
+          is.na(redcap_repeat_instrument) & !is.na(redcap_form_instance) ~ NA,
+          TRUE ~ redcap_form_instance
+        )
+      )
 
     out <- relocate(out,
-      "redcap_event_instance",
-      .after = "redcap_form_instance"
+                    "redcap_event_instance",
+                    .after = "redcap_form_instance"
     )
   }
 
@@ -185,7 +193,7 @@ parse_labels <- function(string, return_vector = FALSE, return_stripped_text_fla
   # If string is empty/NA, throw a warning
   if (is.na(string)) {
     cli_warn("Empty string detected for a given multiple choice label.",
-      class = c("empty_parse_warning", "REDCapTidieR_cond")
+             class = c("empty_parse_warning", "REDCapTidieR_cond")
     )
   }
 

From 0e755f0f70dfd483e871a910edb4f7ad52013d47 Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Fri, 20 Sep 2024 14:15:47 -0400
Subject: [PATCH 06/17] Add support to retrieve repeat event types, fix
 add_partial_keys bug

---
 R/clean_redcap_long.R             | 24 +++++++-----
 R/read_redcap.R                   | 63 ++++++++++++++++++++++++++++---
 R/utils.R                         | 16 ++++----
 man/add_event_mapping.Rd          |  5 ++-
 man/get_repeat_event_types.Rd     | 29 ++++++++++++++
 tests/testthat/test-read_redcap.R | 20 ++++++++++
 tests/testthat/test-utils.R       | 12 ++++--
 7 files changed, 143 insertions(+), 26 deletions(-)
 create mode 100644 man/get_repeat_event_types.Rd

diff --git a/R/clean_redcap_long.R b/R/clean_redcap_long.R
index e33cfad7..750578cc 100644
--- a/R/clean_redcap_long.R
+++ b/R/clean_redcap_long.R
@@ -84,7 +84,9 @@ clean_redcap_long <- function(db_data_long,
       # Retrieve mixed structure fields and forms in reference df
       mixed_structure_ref <- get_mixed_structure_fields(db_data_long) %>%
         filter(.data$rep_and_nonrep & !str_ends(.data$field_name, "_form_complete")) %>%
-        left_join(db_metadata_long %>% select("field_name", "form_name"),
+        left_join(
+          db_metadata_long %>%
+            select("field_name", "form_name"),
           by = "field_name"
         )
 
@@ -318,7 +320,10 @@ distill_repeat_table_long <- function(form_name,
   db_data_long <- db_data_long %>%
     add_partial_keys(var = .data$redcap_event_name) %>%
     filter(
-      (!is.na(.data$redcap_form_instance) | !is.na(redcap_event_instance)) &
+      (
+        !is.na(.data$redcap_form_instance) |
+          if_any(starts_with("redcap_event_instance"), ~ !is.na(.))
+      ) &
         .data$redcap_repeat_instrument == my_form
     )
 
@@ -416,14 +421,17 @@ convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) {
         )
       )
 
-    repeat_together_present <- any(is.na(db_data_long$redcap_repeat_instrument) & !is.na(db_data_long$redcap_repeat_instance))
+    repeat_together_present <- any(
+      is.na(db_data_long$redcap_repeat_instrument) &
+        !is.na(db_data_long$redcap_repeat_instance)
+    )
 
-    if (!"redcap_event_instance" %in% names(db_data_long) & repeat_together_present) {
+    if (!"redcap_event_instance" %in% names(db_data_long) && repeat_together_present) {
       db_data_long <- db_data_long %>%
         mutate(
           redcap_event_instance = NA
         ) %>%
-        relocate(redcap_event_instance, .after = redcap_repeat_instance)
+        relocate(.data$redcap_event_instance, .after = .data$redcap_repeat_instance)
     }
 
     if (repeat_together_present) {
@@ -442,10 +450,8 @@ convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) {
     db_data_long <- db_data_long %>%
       mutate(
         redcap_repeat_instance = case_when(
-          # Add single instance repeat event instance vals when none exist
-          # update_mask & is.na(redcap_repeat_instance) ~ 1,
-          # update_mask & !is.na(redcap_repeat_instance) ~ redcap_repeat_instance, # TODO: Remove
-          # If repeat-together type, remove values from redcap_repeat_instance (shifted and captured in redcap_event_instance)
+          # If repeat-together type, remove values from redcap_repeat_instance
+          # (shifted and captured in redcap_event_instance)
           update_mask & is.na(redcap_repeat_instrument) ~ NA,
           TRUE ~ .data$redcap_repeat_instance
         ),
diff --git a/R/read_redcap.R b/R/read_redcap.R
index 874540ce..f20ff34d 100644
--- a/R/read_redcap.R
+++ b/R/read_redcap.R
@@ -270,8 +270,11 @@ read_redcap <- function(redcap_uri,
   }
 
   if (is_longitudinal) {
+    repeat_event_types <- get_repeat_event_types(db_data)
+
     linked_arms <- link_arms(
-      redcap_uri = redcap_uri, token = token,
+      redcap_uri = redcap_uri,
+      token = token,
       suppress_redcapr_messages = suppress_redcapr_messages
     )
 
@@ -292,7 +295,7 @@ read_redcap <- function(redcap_uri,
   out <- add_metadata(out, db_metadata, redcap_uri, token, suppress_redcapr_messages)
 
   if (is_longitudinal) {
-    out <- add_event_mapping(out, linked_arms)
+    out <- add_event_mapping(out, linked_arms, repeat_event_types)
   }
 
   out <- out %>%
@@ -440,18 +443,21 @@ add_metadata <- function(supertbl, db_metadata, redcap_uri, token, suppress_redc
 #' @param supertbl a supertibble object to supplement with metadata
 #' @param linked_arms the tibble with event mappings created by
 #' \code{link_arms()}
+#' @param repeat_event_types a dataframe output from [repeat_event_types()] which
+#' specifies NR, RS, and RT types for events
 #'
 #' @return
 #' The original supertibble with an events \code{redcap_events} list column
 #' containing arms and events associated with each instrument
 #'
 #' @keywords internal
-#'
-add_event_mapping <- function(supertbl, linked_arms) {
+
+add_event_mapping <- function(supertbl, linked_arms, repeat_event_types) {
   event_info <- linked_arms %>%
+    left_join(repeat_event_types, by = c("unique_event_name" = "redcap_event_name")) %>%
     add_partial_keys(.data$unique_event_name) %>%
     select(
-      redcap_form_name = "form", "redcap_event", "event_name", "redcap_arm", "arm_name"
+      redcap_form_name = "form", "redcap_event", "event_name", "redcap_arm", "arm_name", "repeat_type"
     ) %>%
     nest(redcap_events = !"redcap_form_name")
 
@@ -498,3 +504,50 @@ calc_metadata_stats <- function(data) {
     form_complete_pct = percent(form_complete_pct, digits = 2, format = "fg")
   )
 }
+
+#' @title
+#' Add identification for repeat event types
+#'
+#' @description
+#' To correctly assign repeat event types a few assumptions must be made:
+#'
+#' - There are only 3 behaviors: nonrepeating, repeat_separately, and repeat_together
+#' - If an event only shows `redcap_repeat_instance` and `redcap_repeat_instrument`
+#' as `NA`, it can be considered a nonrepeat event.
+#' - If an event is always `NA` for `redcap_repeat_instrument` and filled for `redcap_repeat_instance`
+#' it can be assumed to be a repeat_together event
+#' - repeat_separate and nonrepeating event types exhibit the same behavior along the
+#' primary keys of the data. nonrepeating event types can have data display with
+#' `redcap_repeat_instance`values both filled and as `NA`. If this is the case,
+#' it can be assumed the event is a repeating separate event.
+#'
+#' @param data the REDCap data
+#'
+#' @return
+#' A dataframe with unique event names mapped to their corresponding repeat types
+#'
+#' @keywords internal
+
+get_repeat_event_types <- function(data) {
+  out <- data %>%
+    dplyr::distinct(.data$redcap_event_name, .data$redcap_repeat_instrument, .data$redcap_repeat_instance) %>%
+    mutate(
+      repeat_type = case_when(
+        !is.na(redcap_event_name) & !is.na(redcap_repeat_instrument) & !is.na(redcap_repeat_instance) ~
+          "repeat_separate",
+        !is.na(redcap_event_name) & is.na(redcap_repeat_instrument) & !is.na(redcap_repeat_instance) ~
+          "repeat_together",
+        TRUE ~ "nonrepeating"
+      )
+    ) %>%
+    dplyr::distinct(.data$redcap_event_name, .data$repeat_type)
+
+  # Check for instances where the same event is labelled as nonrepeating & repeating separate
+  # If this is the case, it must be repeating separate (there is just data that qualifies as both)
+  out %>%
+    mutate(
+      is_duplicated = duplicated(.data$repeat_type) | duplicated(.data$repeat_type, fromLast = TRUE)
+    ) %>%
+    filter(.data$is_duplicated == FALSE | (.data$is_duplicated == TRUE & .data$ repeat_type == "repeat_separate")) %>%
+    select(-.data$is_duplicated)
+}
diff --git a/R/utils.R b/R/utils.R
index 203b75d0..e660031b 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -18,12 +18,13 @@
 add_partial_keys <- function(db_data,
                              var = NULL) {
   if (!is.null(enexpr(var))) {
-    pattern <- "^(\\w+?)_arm_(\\d)$"
+    # Include handling for instances where REDCap appends with "_1b" or similar
+    pattern <- "^(\\w+?)_arm_(\\d+\\w?)$"
 
     db_data <- db_data %>%
       mutate(
         redcap_event = sub(pattern, "\\1", {{ var }}),
-        redcap_arm   = as.integer(sub(pattern, "\\2", {{ var }}))
+        redcap_arm   = as.factor(sub(pattern, "\\2", {{ var }}))
       )
   }
 
@@ -65,20 +66,19 @@ create_repeat_instance_vars <- function(db_data) {
 
   # Detect if repeat events exist
   # First determined if redcap_event_instance added during mixed structure handling
-  # See: convert_mixed_instrument()
+  # See: convert_mixed_instrument
   has_repeat_events <- "redcap_event_instance" %in% names(out)
 
   # Next determined by non-NA vals in new "redcap_form_instance" alongside
   # NA vals in "redcap_repeat_instrument"
   # `has_repeat_forms` will always be TRUE for events to exist
-  if (has_repeat_forms & !has_repeat_events) {
+  if (has_repeat_forms && !has_repeat_events) {
     has_repeat_events <- any(
       is.na(out$redcap_repeat_instrument) & !is.na(out$redcap_form_instance)
     )
   }
 
   if (has_repeat_events) {
-
     # In cases where there are repeating events but they were not added by
     # convert_mixed_instrument(), add an empty redcap_event_instance column
     if (!"redcap_event_instance" %in% names(out)) {
@@ -99,8 +99,8 @@ create_repeat_instance_vars <- function(db_data) {
       )
 
     out <- relocate(out,
-                    "redcap_event_instance",
-                    .after = "redcap_form_instance"
+      "redcap_event_instance",
+      .after = "redcap_form_instance"
     )
   }
 
@@ -193,7 +193,7 @@ parse_labels <- function(string, return_vector = FALSE, return_stripped_text_fla
   # If string is empty/NA, throw a warning
   if (is.na(string)) {
     cli_warn("Empty string detected for a given multiple choice label.",
-             class = c("empty_parse_warning", "REDCapTidieR_cond")
+      class = c("empty_parse_warning", "REDCapTidieR_cond")
     )
   }
 
diff --git a/man/add_event_mapping.Rd b/man/add_event_mapping.Rd
index 58a8ee22..3fe9a8a3 100644
--- a/man/add_event_mapping.Rd
+++ b/man/add_event_mapping.Rd
@@ -5,13 +5,16 @@
 \title{Supplement a supertibble from a longitudinal database with information about
 the events associated with each instrument}
 \usage{
-add_event_mapping(supertbl, linked_arms)
+add_event_mapping(supertbl, linked_arms, repeat_event_types)
 }
 \arguments{
 \item{supertbl}{a supertibble object to supplement with metadata}
 
 \item{linked_arms}{the tibble with event mappings created by
 \code{link_arms()}}
+
+\item{repeat_event_types}{a dataframe output from \code{\link[=repeat_event_types]{repeat_event_types()}} which
+specifies NR, RS, and RT types for events}
 }
 \value{
 The original supertibble with an events \code{redcap_events} list column
diff --git a/man/get_repeat_event_types.Rd b/man/get_repeat_event_types.Rd
new file mode 100644
index 00000000..2d861189
--- /dev/null
+++ b/man/get_repeat_event_types.Rd
@@ -0,0 +1,29 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/read_redcap.R
+\name{get_repeat_event_types}
+\alias{get_repeat_event_types}
+\title{Add identification for repeat event types}
+\usage{
+get_repeat_event_types(data)
+}
+\arguments{
+\item{data}{the REDCap data}
+}
+\value{
+A dataframe with unique event names mapped to their corresponding repeat types
+}
+\description{
+To correctly assign repeat event types a few assumptions must be made:
+\itemize{
+\item There are only 3 behaviors: nonrepeating, repeat_separately, and repeat_together
+\item If an event only shows \code{redcap_repeat_instance} and \code{redcap_repeat_instrument}
+as \code{NA}, it can be considered a nonrepeat event.
+\item If an event is always \code{NA} for \code{redcap_repeat_instrument} and filled for \code{redcap_repeat_instance}
+it can be assumed to be a repeat_together event
+\item repeat_separate and nonrepeating event types exhibit the same behavior along the
+primary keys of the data. nonrepeating event types can have data display with
+\code{redcap_repeat_instance}values both filled and as \code{NA}. If this is the case,
+it can be assumed the event is a repeating separate event.
+}
+}
+\keyword{internal}
diff --git a/tests/testthat/test-read_redcap.R b/tests/testthat/test-read_redcap.R
index 0683c212..194e63f0 100644
--- a/tests/testthat/test-read_redcap.R
+++ b/tests/testthat/test-read_redcap.R
@@ -641,3 +641,23 @@ test_that("read_redcap handles missing data codes", {
   }) |>
     expect_no_warning()
 })
+
+test_that("get_repeat_event_types() works", {
+  mixed_data_structure <- tibble::tribble(
+    ~"record_id", ~"redcap_event_name", ~"redcap_repeat_instrument", ~"redcap_repeat_instance",
+    1, "nonrepeat", NA, NA,
+    1, "repeat_together", NA, 1,
+    1, "repeat_separate", "mixed_structure_form", 1
+  )
+
+  expected_out <- tibble::tribble(
+    ~"redcap_event_name", ~"repeat_type",
+    "nonrepeat", "nonrepeating",
+    "repeat_together", "repeat_together",
+    "repeat_separate", "repeat_separate"
+  )
+
+  out <- get_repeat_event_types(mixed_data_structure)
+
+  expect_equal(out, expected_out)
+})
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
index 299be04e..344d4e9a 100644
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@@ -219,8 +219,10 @@ test_that("link_arms works", {
   expect_s3_class(out, "tbl")
 
   # output contains expected columns
-  expected_cols <- c("arm_num", "unique_event_name", "form", "arm_name",
-                     "event_name", "custom_event_label", "event_id")
+  expected_cols <- c(
+    "arm_num", "unique_event_name", "form", "arm_name",
+    "event_name", "custom_event_label", "event_id"
+  )
   expect_setequal(expected_cols, names(out))
 
   # all arms are represented in output (test redcap has 2 arms)
@@ -346,7 +348,8 @@ test_that("add_partial_keys works", {
     1, "nr_event_arm_1", NA, NA,
     1, "nr_event_arm_1", "r_instrument", 1,
     3, "nr_event_arm_1", "r_instrument", 1,
-    4, "r_event_arm_1", NA, 1
+    4, "r_event_arm_1", NA, 1,
+    5, "r_event_arm_1b", NA, 1
   )
 
   out <- test_data %>%
@@ -365,6 +368,9 @@ test_that("add_partial_keys works", {
   expect_true(all(expected_cols %in% names(out)))
   expect_s3_class(out, "data.frame")
   expect_true(nrow(out) > 0)
+
+  expected_redcap_arm_col <- factor(c(1, 1, 1, 1, "1b"))
+  expect_equal(out$redcap_arm, expected_redcap_arm_col)
 })
 
 test_that("create_repeat_instance_vars works", {

From 3ce0e120c3bc67ac07364d5e05022b73887737ca Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Fri, 20 Sep 2024 14:15:57 -0400
Subject: [PATCH 07/17] Update join_data_tibbles

---
 R/join_data_tibbles.R | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R
index d77975a4..e09a4d41 100644
--- a/R/join_data_tibbles.R
+++ b/R/join_data_tibbles.R
@@ -4,13 +4,14 @@ join_data_tibbles <- function(suprtbl,
                               by = NULL,
                               type = "left",
                               suffix = c(".x", ".y")) {
-  record_id_field <- get_record_id_field(suprtbl$redcap_data[[1]])
+  record_id_field <- get_record_id_field(suprtbl$redcap_data[[1]]) # nolint: object_usage_linter
   join_fn <- get_join_fn(type)
 
   # Append the supertibble with the primary keys column
   suprtbl <- suprtbl |>
-    mutate(pks = purrr::map_chr(redcap_data, ~extract_keys(., record_id_field = record_id_field))) %>%
-    select(redcap_form_name, redcap_form_label, redcap_data, redcap_metadata, structure, pks)
+    mutate(pks = purrr::map_chr(.data$redcap_data, ~ extract_keys(., record_id_field = record_id_field))) %>%
+    select(.data$redcap_form_name, .data$redcap_form_label, .data$redcap_data,
+           .data$redcap_metadata, .data$structure, .data$pks)
 
   tbl_x <- extract_tibble(suprtbl, x)
   tbl_x_type <- get_structure(suprtbl, x)
@@ -34,8 +35,10 @@ join_data_tibbles <- function(suprtbl,
 }
 
 extract_keys <- function(suprtbl, record_id_field) {
-  redcap_keys <- c(record_id_field, "redcap_event", "redcap_form_instance",
-                   "redcap_event_instance", "redcap_arm")
+  redcap_keys <- c(
+    record_id_field, "redcap_event", "redcap_form_instance",
+    "redcap_event_instance", "redcap_arm"
+  )
 
   suprtbl |>
     colnames() |>
@@ -44,12 +47,10 @@ extract_keys <- function(suprtbl, record_id_field) {
 }
 
 get_structure <- function(suprtbl, tbl_name) {
-  tbl <- extract_tibble(suprtbl, tbl = tbl_name)
   suprtbl$structure[suprtbl$redcap_form_name == tbl_name]
 }
 
 get_join_fn <- function(type) {
-
   join_functions <- list(
     left = dplyr::left_join,
     right = dplyr::right_join,
@@ -67,7 +68,6 @@ get_join_fn <- function(type) {
 }
 
 build_by <- function(suprtbl, x, y, is_mixed) {
-
   x_pks <- suprtbl$pks[suprtbl$redcap_form_name == x] %>%
     stringr::str_split(", ", simplify = TRUE)
   y_pks <- suprtbl$pks[suprtbl$redcap_form_name == y] %>%
@@ -89,4 +89,3 @@ add_missing_columns <- function(tbl, columns) {
   tbl[missing_cols] <- NA
   return(tbl)
 }
-

From 374ad4852b84477897f5505896476646a4887f0b Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Fri, 20 Sep 2024 14:35:08 -0400
Subject: [PATCH 08/17] Update tests, no longer nonrepeats with single instance
 repeat

TODO: Check if true, but beleive this is no longer needed now that we've redefined redcap_form_instance/redcap_event_instance
---
 tests/testthat/test-clean_redcap_long.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/testthat/test-clean_redcap_long.R b/tests/testthat/test-clean_redcap_long.R
index 3a68e780..3cf78be7 100644
--- a/tests/testthat/test-clean_redcap_long.R
+++ b/tests/testthat/test-clean_redcap_long.R
@@ -112,7 +112,7 @@ test_that("clean_redcap_long with mixed structure works", {
   # Check redcap_data contents for mixed and nonrepeating structure
   expected_mixed_data <- tibble::tribble(
     ~record_id, ~redcap_event, ~redcap_form_instance, ~mixed_structure_1, ~form_status_complete,
-    1, "event_1", 1, "Mixed Nonrepeat 1", 0,
+    1, "event_1", NA, "Mixed Nonrepeat 1", 0,
     1, "event_2", 1, "Mixed Repeat 1", 0,
     1, "event_2", 2, "Mixed Repeat 2", 0
   )
@@ -309,7 +309,7 @@ test_that("convert_mixed_instrument works", {
   expected_out <- tibble::tribble(
     ~record_id, ~redcap_repeat_instrument, ~redcap_repeat_instance, ~mixed_structure_variable,
     ~repeat_form_variable, ~mixed_repeat_var,
-    1, "mixed_structure_form", 1, "A", NA, NA,
+    1, "mixed_structure_form", NA, "A", NA, NA,
     2, "mixed_structure_form", 1, "B", NA, NA,
     3, "repeat_form", 1, NA, "C", NA,
     4, "repeat_form", 2, NA, "D", NA,

From 37b88a811c281d07fca873870ef853b9e9f8aceb Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Fri, 20 Sep 2024 15:40:01 -0400
Subject: [PATCH 09/17] Fix get_repeat_event_types edge case

---
 R/read_redcap.R                   |  5 +++--
 tests/testthat/test-read_redcap.R | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/R/read_redcap.R b/R/read_redcap.R
index f20ff34d..4cb6751f 100644
--- a/R/read_redcap.R
+++ b/R/read_redcap.R
@@ -544,10 +544,11 @@ get_repeat_event_types <- function(data) {
 
   # Check for instances where the same event is labelled as nonrepeating & repeating separate
   # If this is the case, it must be repeating separate (there is just data that qualifies as both)
+
   out %>%
     mutate(
-      is_duplicated = duplicated(.data$repeat_type) | duplicated(.data$repeat_type, fromLast = TRUE)
+      is_duplicated = (duplicated(.data$redcap_event_name) | duplicated(.data$redcap_event_name, fromLast = TRUE))
     ) %>%
-    filter(.data$is_duplicated == FALSE | (.data$is_duplicated == TRUE & .data$ repeat_type == "repeat_separate")) %>%
+    filter(!.data$is_duplicated | (.data$is_duplicated & .data$repeat_type == "repeat_separate")) %>%
     select(-.data$is_duplicated)
 }
diff --git a/tests/testthat/test-read_redcap.R b/tests/testthat/test-read_redcap.R
index 194e63f0..769b0d53 100644
--- a/tests/testthat/test-read_redcap.R
+++ b/tests/testthat/test-read_redcap.R
@@ -660,4 +660,25 @@ test_that("get_repeat_event_types() works", {
   out <- get_repeat_event_types(mixed_data_structure)
 
   expect_equal(out, expected_out)
+
+  # Example with nonrepeating arm that contains repeating and non repeating forms
+  mixed_data_structure <- tibble::tribble(
+    ~"record_id", ~"redcap_event_name", ~"redcap_repeat_instrument", ~"redcap_repeat_instance",
+    1, "nonrepeat", NA, NA,
+    1, "nonrepeat", "repeat_form", 1,
+    1, "repeat_together", NA, 1,
+    1, "repeat_separate", "mixed_structure_form", 1
+  )
+
+  out <- get_repeat_event_types(mixed_data_structure)
+
+  expected_out <- tibble::tribble(
+    ~"redcap_event_name", ~"repeat_type",
+    "nonrepeat", "repeat_separate",
+    "repeat_together", "repeat_together",
+    "repeat_separate", "repeat_separate"
+  )
+
+  expect_equal(out, expected_out)
+
 })

From 7f64232dc27ff7f0cfa816462784b7e5b256743e Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Mon, 23 Sep 2024 10:16:27 -0400
Subject: [PATCH 10/17] Reinstate single instance for nonrepeat

---
 R/clean_redcap_long.R                   | 3 +++
 R/read_redcap.R                         | 2 +-
 man/add_event_mapping.Rd                | 2 +-
 tests/testthat/test-clean_redcap_long.R | 4 ++--
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/R/clean_redcap_long.R b/R/clean_redcap_long.R
index 750578cc..13171692 100644
--- a/R/clean_redcap_long.R
+++ b/R/clean_redcap_long.R
@@ -450,6 +450,9 @@ convert_mixed_instrument <- function(db_data_long, mixed_structure_ref) {
     db_data_long <- db_data_long %>%
       mutate(
         redcap_repeat_instance = case_when(
+          # Add single instance repeat event instance vals when none exist
+          # This handles nonrepeating data in events set to repeat separately
+          update_mask & is.na(redcap_repeat_instance) ~ 1,
           # If repeat-together type, remove values from redcap_repeat_instance
           # (shifted and captured in redcap_event_instance)
           update_mask & is.na(redcap_repeat_instrument) ~ NA,
diff --git a/R/read_redcap.R b/R/read_redcap.R
index 4cb6751f..0e5e8e6f 100644
--- a/R/read_redcap.R
+++ b/R/read_redcap.R
@@ -443,7 +443,7 @@ add_metadata <- function(supertbl, db_metadata, redcap_uri, token, suppress_redc
 #' @param supertbl a supertibble object to supplement with metadata
 #' @param linked_arms the tibble with event mappings created by
 #' \code{link_arms()}
-#' @param repeat_event_types a dataframe output from [repeat_event_types()] which
+#' @param repeat_event_types a dataframe output from [get_repeat_event_types()] which
 #' specifies NR, RS, and RT types for events
 #'
 #' @return
diff --git a/man/add_event_mapping.Rd b/man/add_event_mapping.Rd
index 3fe9a8a3..5c6d68df 100644
--- a/man/add_event_mapping.Rd
+++ b/man/add_event_mapping.Rd
@@ -13,7 +13,7 @@ add_event_mapping(supertbl, linked_arms, repeat_event_types)
 \item{linked_arms}{the tibble with event mappings created by
 \code{link_arms()}}
 
-\item{repeat_event_types}{a dataframe output from \code{\link[=repeat_event_types]{repeat_event_types()}} which
+\item{repeat_event_types}{a dataframe output from \code{\link[=get_repeat_event_types]{get_repeat_event_types()}} which
 specifies NR, RS, and RT types for events}
 }
 \value{
diff --git a/tests/testthat/test-clean_redcap_long.R b/tests/testthat/test-clean_redcap_long.R
index 3cf78be7..3a68e780 100644
--- a/tests/testthat/test-clean_redcap_long.R
+++ b/tests/testthat/test-clean_redcap_long.R
@@ -112,7 +112,7 @@ test_that("clean_redcap_long with mixed structure works", {
   # Check redcap_data contents for mixed and nonrepeating structure
   expected_mixed_data <- tibble::tribble(
     ~record_id, ~redcap_event, ~redcap_form_instance, ~mixed_structure_1, ~form_status_complete,
-    1, "event_1", NA, "Mixed Nonrepeat 1", 0,
+    1, "event_1", 1, "Mixed Nonrepeat 1", 0,
     1, "event_2", 1, "Mixed Repeat 1", 0,
     1, "event_2", 2, "Mixed Repeat 2", 0
   )
@@ -309,7 +309,7 @@ test_that("convert_mixed_instrument works", {
   expected_out <- tibble::tribble(
     ~record_id, ~redcap_repeat_instrument, ~redcap_repeat_instance, ~mixed_structure_variable,
     ~repeat_form_variable, ~mixed_repeat_var,
-    1, "mixed_structure_form", NA, "A", NA, NA,
+    1, "mixed_structure_form", 1, "A", NA, NA,
     2, "mixed_structure_form", 1, "B", NA, NA,
     3, "repeat_form", 1, NA, "C", NA,
     4, "repeat_form", 2, NA, "D", NA,

From 593254d30c1d6997f4f709a2356b7a026e3ae672 Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Mon, 23 Sep 2024 14:34:35 -0400
Subject: [PATCH 11/17] Add in support for mixed-mixed join, to verify

---
 R/join_data_tibbles.R | 83 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 74 insertions(+), 9 deletions(-)

diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R
index e09a4d41..591e4465 100644
--- a/R/join_data_tibbles.R
+++ b/R/join_data_tibbles.R
@@ -11,27 +11,35 @@ join_data_tibbles <- function(suprtbl,
   suprtbl <- suprtbl |>
     mutate(pks = purrr::map_chr(.data$redcap_data, ~ extract_keys(., record_id_field = record_id_field))) %>%
     select(.data$redcap_form_name, .data$redcap_form_label, .data$redcap_data,
-           .data$redcap_metadata, .data$structure, .data$pks)
+           .data$redcap_metadata, .data$structure, .data$pks, .data$redcap_events)
 
   tbl_x <- extract_tibble(suprtbl, x)
-  tbl_x_type <- get_structure(suprtbl, x)
+  tbl_x_structure <- get_structure(suprtbl, x)
   tbl_y <- extract_tibble(suprtbl, y)
-  tbl_y_type <- get_structure(suprtbl, y)
+  tbl_y_structure <- get_structure(suprtbl, y)
 
   # Mixed structure requires special handling
-  is_mixed <- any(c(tbl_x_type, tbl_y_type) == "mixed")
+  is_mixed <- any(c(tbl_x_structure, tbl_y_structure) == "mixed")
 
   if (is_mixed) {
-    required_columns <- c("redcap_event_instance", "redcap_form_instance")
-    tbl_x <- add_missing_columns(tbl_x, required_columns)
-    tbl_y <- add_missing_columns(tbl_y, required_columns)
+    # TODO: Determine if ok to remove
+    # required_columns <- c("redcap_event_instance", "redcap_form_instance") # nolint: commented_code_linter
+    # tbl_x <- add_missing_columns(tbl_x, required_columns) # nolint: commented_code_linter
+    # tbl_y <- add_missing_columns(tbl_y, required_columns) # nolint: commented_code_linter
+
+    tbl_x_type <- get_type(suprtbl, x)
+    tbl_y_type <- get_type(suprtbl, y)
+
+    tbl_x <- left_join(tbl_x, tbl_x_type, by = "redcap_event")
+    tbl_y <- left_join(tbl_y, tbl_y_type, by = "redcap_event")
   }
 
   join_fn <- get_join_fn(type)
   by <- build_by(suprtbl, x, y, is_mixed)
 
-  join_fn(tbl_x, tbl_y, by = by, suffix = suffix) %>%
-    relocate(starts_with("form_status_complete"), .after = everything())
+  join_tbls(tbl_x, tbl_y, join_fn, by, suffix, is_mixed) %>%
+    relocate(starts_with("form_status_complete"), .after = everything()) %>%
+    select(-starts_with(".repeat_type"))
 }
 
 extract_keys <- function(suprtbl, record_id_field) {
@@ -50,6 +58,16 @@ get_structure <- function(suprtbl, tbl_name) {
   suprtbl$structure[suprtbl$redcap_form_name == tbl_name]
 }
 
+get_type <- function(suprtbl, tbl_name) {
+  suprtbl %>%
+    filter(.data$redcap_form_name == tbl_name) %>%
+    pull(.data$redcap_events) %>%
+    pluck(1) %>%
+    select(.data$redcap_event,
+           ".repeat_type" = .data$repeat_type) %>%
+    unique()
+}
+
 get_join_fn <- function(type) {
   join_functions <- list(
     left = dplyr::left_join,
@@ -84,8 +102,55 @@ build_by <- function(suprtbl, x, y, is_mixed) {
   out
 }
 
+# TODO: Determine if ok to remove
 add_missing_columns <- function(tbl, columns) {
   missing_cols <- setdiff(columns, names(tbl))
   tbl[missing_cols] <- NA
   return(tbl)
 }
+
+join_tbls <- function(x, y, join_fn, by, suffix, is_mixed) {
+  if (is_mixed) {
+    # Filter based on .repeat_type
+    x_together <- x %>% filter(.data$.repeat_type == "repeat_together")
+    y_together <- y %>% filter(.data$.repeat_type == "repeat_together")
+
+    x_separate <- x %>% filter(.data$.repeat_type == "repeat_separate")
+    y_separate <- y %>% filter(.data$.repeat_type == "repeat_separate")
+
+    # Join together sets
+    joined_together <- x_together %>%
+      join_fn(y_together, by = by[by != "redcap_form_instance"], suffix = suffix)
+
+    # Join separate sets
+    joined_separate <- x_separate %>%
+      join_fn(y_separate, by = by[by != "redcap_form_instance"], suffix = suffix)
+
+    # Bind rows together
+    result <- bind_rows(joined_together, joined_separate) %>%
+      drop_non_suffix_columns()
+  } else {
+    result <- join_fn(x, y, by = by, suffix = suffix)
+  }
+
+  result
+}
+
+drop_non_suffix_columns <- function(data) {
+  # Extract column names that contain a "."
+  # Note: We can look for periods because REDCap will not allow variables to made
+  # with them. Only user tampering with column names in the output would result in this.
+  dot_columns <- names(data)[grepl("\\.", names(data))]
+
+  # Extract the base column names without the suffixes (everything before the ".")
+  base_columns <- unique(sub("\\..*", "", dot_columns))
+
+  # Filter out base columns that do not exist without a suffix
+  columns_to_drop <- base_columns[base_columns %in% names(data)]
+
+  # Drop only those base columns that exist both with and without suffixes
+  data <- data %>%
+    select(-all_of(columns_to_drop))
+
+  return(data)
+}

From a343d6e6606c2220041620082c56a5bf33a3334c Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Tue, 24 Sep 2024 14:46:02 -0400
Subject: [PATCH 12/17] Update documentation, test methods for mixed joins

---
 NAMESPACE                |   1 +
 R/join_data_tibbles.R    | 148 +++++++++++++++++++++++++++++----------
 man/build_by.Rd          |  24 +++++++
 man/extract_keys.Rd      |  21 ++++++
 man/get_join_fn.Rd       |  19 +++++
 man/get_structure.Rd     |  17 +++++
 man/get_type.Rd          |  17 +++++
 man/join_data_tibbles.Rd |  69 ++++++++++++++++++
 8 files changed, 279 insertions(+), 37 deletions(-)
 create mode 100644 man/build_by.Rd
 create mode 100644 man/extract_keys.Rd
 create mode 100644 man/get_join_fn.Rd
 create mode 100644 man/get_structure.Rd
 create mode 100644 man/get_type.Rd
 create mode 100644 man/join_data_tibbles.Rd

diff --git a/NAMESPACE b/NAMESPACE
index b74e60ed..e584bcaf 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -12,6 +12,7 @@ export(fmt_strip_html)
 export(fmt_strip_trailing_colon)
 export(fmt_strip_trailing_punct)
 export(fmt_strip_whitespace)
+export(join_data_tibbles)
 export(make_labelled)
 export(read_redcap)
 export(write_redcap_xlsx)
diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R
index 591e4465..57a9871d 100644
--- a/R/join_data_tibbles.R
+++ b/R/join_data_tibbles.R
@@ -1,65 +1,107 @@
-join_data_tibbles <- function(suprtbl,
+#' @title Join Two Data Tibbles from a Supertibble
+#'
+#' @description
+#' The [join_data_tibbles()] function provides a way to intelligently join two
+#' data tibbles from a REDCaTidieR supertibble. A supertibble is an output of
+#' [read_redcap()].
+#'
+#' [join_data_tibbles()] attempts to correctly assign the `by` when left `NULL` (the default)
+#' based on detecting the data tibble structure of `x` and `y`.
+#'
+#' @inheritParams extract_tibbles
+#' @param type A character string for the type of join to be performed borrowing from
+#' dplyr. One of "left", "right", "inner", or "full". Default "left".
+#' @inheritParams dplyr::inner_join
+#'
+#'
+#' @returns A `tibble`.
+#'
+#' @export
+
+join_data_tibbles <- function(supertbl,
                               x,
                               y,
                               by = NULL,
                               type = "left",
                               suffix = c(".x", ".y")) {
-  record_id_field <- get_record_id_field(suprtbl$redcap_data[[1]]) # nolint: object_usage_linter
+  record_id_field <- get_record_id_field(supertbl$redcap_data[[1]]) # nolint: object_usage_linter
   join_fn <- get_join_fn(type)
 
   # Append the supertibble with the primary keys column
-  suprtbl <- suprtbl |>
+  supertbl <- supertbl |>
     mutate(pks = purrr::map_chr(.data$redcap_data, ~ extract_keys(., record_id_field = record_id_field))) %>%
     select(.data$redcap_form_name, .data$redcap_form_label, .data$redcap_data,
-           .data$redcap_metadata, .data$structure, .data$pks, .data$redcap_events)
+           .data$redcap_metadata, .data$structure, .data$pks, matches("redcap_events"))
 
-  tbl_x <- extract_tibble(suprtbl, x)
-  tbl_x_structure <- get_structure(suprtbl, x)
-  tbl_y <- extract_tibble(suprtbl, y)
-  tbl_y_structure <- get_structure(suprtbl, y)
+  tbl_x <- extract_tibble(supertbl, x)
+  tbl_x_structure <- get_structure(supertbl, x)
+  tbl_y <- extract_tibble(supertbl, y)
+  tbl_y_structure <- get_structure(supertbl, y)
 
   # Mixed structure requires special handling
   is_mixed <- any(c(tbl_x_structure, tbl_y_structure) == "mixed")
 
   if (is_mixed) {
     # TODO: Determine if ok to remove
-    # required_columns <- c("redcap_event_instance", "redcap_form_instance") # nolint: commented_code_linter
-    # tbl_x <- add_missing_columns(tbl_x, required_columns) # nolint: commented_code_linter
-    # tbl_y <- add_missing_columns(tbl_y, required_columns) # nolint: commented_code_linter
+    required_columns <- c("redcap_event_instance", "redcap_form_instance") # nolint: commented_code_linter
+    tbl_x <- add_missing_columns(tbl_x, required_columns) # nolint: commented_code_linter
+    tbl_y <- add_missing_columns(tbl_y, required_columns) # nolint: commented_code_linter
 
-    tbl_x_type <- get_type(suprtbl, x)
-    tbl_y_type <- get_type(suprtbl, y)
+    tbl_x_type <- get_type(supertbl, x)
+    tbl_y_type <- get_type(supertbl, y)
 
+    # Add on .repeat_type specifier for the redcap_event column
     tbl_x <- left_join(tbl_x, tbl_x_type, by = "redcap_event")
     tbl_y <- left_join(tbl_y, tbl_y_type, by = "redcap_event")
   }
 
   join_fn <- get_join_fn(type)
-  by <- build_by(suprtbl, x, y, is_mixed)
+  by <- build_by(supertbl, x, y, is_mixed)
 
-  join_tbls(tbl_x, tbl_y, join_fn, by, suffix, is_mixed) %>%
+  join_fn(tbl_x, tbl_y, by, suffix) %>%
     relocate(starts_with("form_status_complete"), .after = everything()) %>%
     select(-starts_with(".repeat_type"))
 }
 
-extract_keys <- function(suprtbl, record_id_field) {
+#' @title Extract the primary keys associated with a data tibble
+#'
+#' @param data_tbl A data tibble from a supertibble
+#' @param record_id_field The record ID field for the REDCap project, retrieved
+#' as an ouput of [get_record_id_field()]
+#'
+#' @returns a character string
+#'
+#' @keywords internal
+extract_keys <- function(data_tbl, record_id_field) {
   redcap_keys <- c(
     record_id_field, "redcap_event", "redcap_form_instance",
     "redcap_event_instance", "redcap_arm"
   )
 
-  suprtbl |>
+  data_tbl |>
     colnames() |>
     intersect(redcap_keys) |>
     paste(collapse = ", ")
 }
 
-get_structure <- function(suprtbl, tbl_name) {
-  suprtbl$structure[suprtbl$redcap_form_name == tbl_name]
+#' @title Retrieve the structure data for a form from the supertibble
+#'
+#' @inheritParams join_data_tibbles
+#' @param tbl_name the `x` or `y` values assigned to `join_data_tibbles`
+#'
+#' @keywords internal
+get_structure <- function(supertbl, tbl_name) {
+  supertbl$structure[supertbl$redcap_form_name == tbl_name]
 }
 
-get_type <- function(suprtbl, tbl_name) {
-  suprtbl %>%
+#' @title Retrieve the repeat event type data for a form from the supertibble
+#'
+#' @inheritParams join_data_tibbles
+#' @param tbl_name the `x` or `y` values assigned to `join_data_tibbles`
+#'
+#' @keywords internal
+get_type <- function(supertbl, tbl_name) {
+  supertbl %>%
     filter(.data$redcap_form_name == tbl_name) %>%
     pull(.data$redcap_events) %>%
     pluck(1) %>%
@@ -68,6 +110,13 @@ get_type <- function(suprtbl, tbl_name) {
     unique()
 }
 
+#' @title Retrieve the appropriate user specified join function
+#'
+#' @inheritParams join_data_tibbles
+#'
+#' @returns a function
+#'
+#' @keywords internal
 get_join_fn <- function(type) {
   join_functions <- list(
     left = dplyr::left_join,
@@ -76,24 +125,32 @@ get_join_fn <- function(type) {
     full = dplyr::full_join
   )
 
-  # Check if the specified type is valid
-  # TODO: Make a standard check function with cli
   if (!type %in% names(join_functions)) {
-    stop("Invalid join type. Choose from 'left', 'right', 'inner', or 'full'.")
+    cli::cli_abort("Invalid join type. Choose from 'left', 'right', 'inner', or 'full'.")
   }
 
   join_functions[[type]]
 }
 
-build_by <- function(suprtbl, x, y, is_mixed) {
-  x_pks <- suprtbl$pks[suprtbl$redcap_form_name == x] %>%
+#' @title Intelligently retrieve the join by cols
+#'
+#' @inheritParams join_data_tibbles
+#' @param is_mixed TRUE/FALSE, whether or not the given tables contain a mixed structure
+#'
+#' @returns a character vector
+#'
+#' @keywords interal
+build_by <- function(supertbl, x, y, is_mixed) {
+  x_pks <- supertbl$pks[supertbl$redcap_form_name == x] %>%
     stringr::str_split(", ", simplify = TRUE)
-  y_pks <- suprtbl$pks[suprtbl$redcap_form_name == y] %>%
+  y_pks <- supertbl$pks[supertbl$redcap_form_name == y] %>%
     stringr::str_split(", ", simplify = TRUE)
 
   out <- intersect(x_pks, y_pks)
 
   if (is_mixed) {
+    # For mixed tables, depending on the .repeat_types present tables may not
+    # have event and form instance columns and must be added
     out <- c(out, "redcap_event_instance", "redcap_form_instance") %>%
       # TODO: Make standard, currently needed for repeat/mixed joins
       unique()
@@ -102,37 +159,54 @@ build_by <- function(suprtbl, x, y, is_mixed) {
   out
 }
 
-# TODO: Determine if ok to remove
+#' @keywords intenral
+#' @noRd
 add_missing_columns <- function(tbl, columns) {
   missing_cols <- setdiff(columns, names(tbl))
   tbl[missing_cols] <- NA
   return(tbl)
 }
 
+#' @title Join data tbls of various structures and types
+#'
+#' @description
+#' [join_tbls()] either performs the `join_fun()` specified by the `type` or, in
+#' the event of mixed structure data tibble joins, will seek to split data into
+#' three categories before performing the joins. The key identifiers here are
+#' `redcap_form_instance` and the added `.repeat_type` columns.
+
 join_tbls <- function(x, y, join_fn, by, suffix, is_mixed) {
   if (is_mixed) {
     # Filter based on .repeat_type
+    # If repeating together events, can use redcap_form_instance (NA) and redcap_event_instance
     x_together <- x %>% filter(.data$.repeat_type == "repeat_together")
     y_together <- y %>% filter(.data$.repeat_type == "repeat_together")
 
-    x_separate <- x %>% filter(.data$.repeat_type == "repeat_separate")
-    y_separate <- y %>% filter(.data$.repeat_type == "repeat_separate")
+    # repeating instruments for separately repeating events shouldn't be joined by redcap_form_instance
+    x_separate_repeating <- x %>% filter(.data$.repeat_type == "repeat_separate"  & !is.na(.data$redcap_form_instance))
+    y_separate_repeating <- y %>% filter(.data$.repeat_type == "repeat_separate"  & !is.na(.data$redcap_form_instance))
+
+    # nonrepeating instruments for separately repeating events should be joined by redcap_form_instance
+    x_separate_nonrepeating <- x %>% filter(.data$.repeat_type == "repeat_separate" & is.na(.data$redcap_form_instance))
+    y_separate_nonrepeating <- y %>% filter(.data$.repeat_type == "repeat_separate" & is.na(.data$redcap_form_instance))
 
     # Join together sets
     joined_together <- x_together %>%
-      join_fn(y_together, by = by[by != "redcap_form_instance"], suffix = suffix)
+      join_fn(y_together, by = by, suffix = suffix)
 
-    # Join separate sets
-    joined_separate <- x_separate %>%
-      join_fn(y_separate, by = by[by != "redcap_form_instance"], suffix = suffix)
+    joined_separate_repeating <- x_separate_repeating %>%
+      join_fn(y_separate_repeating, by = by[by != "redcap_form_instance"], suffix = suffix)
 
-    # Bind rows together
-    result <- bind_rows(joined_together, joined_separate) %>%
+    joined_separate_nonrepeating <- x_separate_nonrepeating %>%
+      join_fn(y_separate_nonrepeating, by = by, suffix = suffix)
+
+    # Bind rows together, issue in arrangmenet of output
+    result <- bind_rows(joined_together, joined_separate_repeating) %>%
+      bind_rows(joined_separate_nonrepeating) %>%
       drop_non_suffix_columns()
   } else {
     result <- join_fn(x, y, by = by, suffix = suffix)
   }
-
   result
 }
 
diff --git a/man/build_by.Rd b/man/build_by.Rd
new file mode 100644
index 00000000..e491c439
--- /dev/null
+++ b/man/build_by.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/join_data_tibbles.R
+\name{build_by}
+\alias{build_by}
+\title{Intelligently retrieve the join by cols}
+\usage{
+build_by(supertbl, x, y, is_mixed)
+}
+\arguments{
+\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.}
+
+\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
+lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
+more details.}
+
+\item{is_mixed}{TRUE/FALSE, whether or not the given tables contain a mixed structure}
+}
+\value{
+a character vector
+}
+\description{
+Intelligently retrieve the join by cols
+}
+\keyword{interal}
diff --git a/man/extract_keys.Rd b/man/extract_keys.Rd
new file mode 100644
index 00000000..b497120b
--- /dev/null
+++ b/man/extract_keys.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/join_data_tibbles.R
+\name{extract_keys}
+\alias{extract_keys}
+\title{Extract the primary keys associated with a data tibble}
+\usage{
+extract_keys(data_tbl, record_id_field)
+}
+\arguments{
+\item{data_tbl}{A data tibble from a supertibble}
+
+\item{record_id_field}{The record ID field for the REDCap project, retrieved
+as an ouput of \code{\link[=get_record_id_field]{get_record_id_field()}}}
+}
+\value{
+a character string
+}
+\description{
+Extract the primary keys associated with a data tibble
+}
+\keyword{internal}
diff --git a/man/get_join_fn.Rd b/man/get_join_fn.Rd
new file mode 100644
index 00000000..f99d7b01
--- /dev/null
+++ b/man/get_join_fn.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/join_data_tibbles.R
+\name{get_join_fn}
+\alias{get_join_fn}
+\title{Retrieve the appropriate user specified join function}
+\usage{
+get_join_fn(type)
+}
+\arguments{
+\item{type}{A character string for the type of join to be performed borrowing from
+dplyr. One of "left", "right", "inner", or "full". Default "left".}
+}
+\value{
+a function
+}
+\description{
+Retrieve the appropriate user specified join function
+}
+\keyword{internal}
diff --git a/man/get_structure.Rd b/man/get_structure.Rd
new file mode 100644
index 00000000..b8a901eb
--- /dev/null
+++ b/man/get_structure.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/join_data_tibbles.R
+\name{get_structure}
+\alias{get_structure}
+\title{Retrieve the structure data for a form from the supertibble}
+\usage{
+get_structure(supertbl, tbl_name)
+}
+\arguments{
+\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.}
+
+\item{tbl_name}{the \code{x} or \code{y} values assigned to \code{join_data_tibbles}}
+}
+\description{
+Retrieve the structure data for a form from the supertibble
+}
+\keyword{internal}
diff --git a/man/get_type.Rd b/man/get_type.Rd
new file mode 100644
index 00000000..f3088095
--- /dev/null
+++ b/man/get_type.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/join_data_tibbles.R
+\name{get_type}
+\alias{get_type}
+\title{Retrieve the repeat event type data for a form from the supertibble}
+\usage{
+get_type(supertbl, tbl_name)
+}
+\arguments{
+\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.}
+
+\item{tbl_name}{the \code{x} or \code{y} values assigned to \code{join_data_tibbles}}
+}
+\description{
+Retrieve the repeat event type data for a form from the supertibble
+}
+\keyword{internal}
diff --git a/man/join_data_tibbles.Rd b/man/join_data_tibbles.Rd
new file mode 100644
index 00000000..75dc94b1
--- /dev/null
+++ b/man/join_data_tibbles.Rd
@@ -0,0 +1,69 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/join_data_tibbles.R
+\name{join_data_tibbles}
+\alias{join_data_tibbles}
+\title{Join Two Data Tibbles from a Supertibble}
+\usage{
+join_data_tibbles(
+  supertbl,
+  x,
+  y,
+  by = NULL,
+  type = "left",
+  suffix = c(".x", ".y")
+)
+}
+\arguments{
+\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.}
+
+\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
+lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
+more details.}
+
+\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character
+vector of variables to join by.
+
+If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
+variables in common across \code{x} and \code{y}. A message lists the variables so
+that you can check they're correct; suppress the message by supplying \code{by}
+explicitly.
+
+To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}}
+specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.
+
+To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with
+multiple expressions. For example, \code{join_by(a == b, c == d)} will match
+\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
+\code{x} and \code{y}, you can shorten this by listing only the variable names, like
+\code{join_by(a, c)}.
+
+\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
+joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on
+these types of joins.
+
+For simple equality joins, you can alternatively specify a character vector
+of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
+to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
+use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.
+
+To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
+\code{\link[dplyr:cross_join]{cross_join()}}.}
+
+\item{type}{A character string for the type of join to be performed borrowing from
+dplyr. One of "left", "right", "inner", or "full". Default "left".}
+
+\item{suffix}{If there are non-joined duplicate variables in \code{x} and
+\code{y}, these suffixes will be added to the output to disambiguate them.
+Should be a character vector of length 2.}
+}
+\value{
+A \code{tibble}.
+}
+\description{
+The \code{\link[=join_data_tibbles]{join_data_tibbles()}} function provides a way to intelligently join two
+data tibbles from a REDCaTidieR supertibble. A supertibble is an output of
+\code{\link[=read_redcap]{read_redcap()}}.
+
+\code{\link[=join_data_tibbles]{join_data_tibbles()}} attempts to correctly assign the \code{by} when left \code{NULL} (the default)
+based on detecting the data tibble structure of \code{x} and \code{y}.
+}

From 851450fad8dadd080b1bd0d57d9089be924e984d Mon Sep 17 00:00:00 2001
From: Richard Hanna <HANNAR1@EMAIL.CHOP.EDU>
Date: Thu, 26 Sep 2024 13:25:15 -0400
Subject: [PATCH 13/17] Documentation fixes

---
 NAMESPACE                |  2 ++
 R/REDCapTidieR-package.R |  4 +--
 R/clean_redcap_long.R    |  2 +-
 R/join_data_tibbles.R    | 10 ++++++-
 R/read_redcap.R          |  4 +--
 man/build_by.Rd          |  2 +-
 man/join_tbls.Rd         | 60 ++++++++++++++++++++++++++++++++++++++++
 pkgdown/_pkgdown.yml     |  1 +
 8 files changed, 78 insertions(+), 7 deletions(-)
 create mode 100644 man/join_tbls.Rd

diff --git a/NAMESPACE b/NAMESPACE
index e584bcaf..056ce167 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -46,6 +46,7 @@ importFrom(dplyr,bind_rows)
 importFrom(dplyr,case_when)
 importFrom(dplyr,coalesce)
 importFrom(dplyr,cur_column)
+importFrom(dplyr,distinct)
 importFrom(dplyr,filter)
 importFrom(dplyr,first)
 importFrom(dplyr,group_by)
@@ -148,6 +149,7 @@ importFrom(tidyselect,any_of)
 importFrom(tidyselect,ends_with)
 importFrom(tidyselect,eval_select)
 importFrom(tidyselect,everything)
+importFrom(tidyselect,matches)
 importFrom(tidyselect,starts_with)
 importFrom(tidyselect,where)
 importFrom(vctrs,vec_ptype)
diff --git a/R/REDCapTidieR-package.R b/R/REDCapTidieR-package.R
index 5771fc22..4c8d6142 100644
--- a/R/REDCapTidieR-package.R
+++ b/R/REDCapTidieR-package.R
@@ -7,7 +7,7 @@
 #' @importFrom dplyr %>% across bind_rows case_when filter group_by if_any if_else
 #' left_join mutate pull recode relocate rename right_join row_number rowwise
 #' select slice summarise ungroup coalesce cur_column bind_cols first nth n_distinct
-#' first
+#' first distinct
 #' @importFrom formattable percent
 #' @importFrom lobstr obj_size
 #' @importFrom lubridate is.difftime is.period is.POSIXt is.Date
@@ -27,7 +27,7 @@
 #' @importFrom tidyr complete fill pivot_wider nest separate_wider_delim unnest
 #' unnest_wider
 #' @importFrom tidyselect all_of any_of ends_with eval_select everything
-#' starts_with where
+#' starts_with where matches
 #' @importFrom vctrs vec_ptype_abbr vec_ptype
 #' @importFrom pillar tbl_sum
 #' @importFrom readr parse_logical parse_integer parse_double parse_date parse_time
diff --git a/R/clean_redcap_long.R b/R/clean_redcap_long.R
index 13171692..1dc7b549 100644
--- a/R/clean_redcap_long.R
+++ b/R/clean_redcap_long.R
@@ -322,7 +322,7 @@ distill_repeat_table_long <- function(form_name,
     filter(
       (
         !is.na(.data$redcap_form_instance) |
-          if_any(starts_with("redcap_event_instance"), ~ !is.na(.))
+          if_any(matches("redcap_event_instance"), ~ !is.na(.))
       ) &
         .data$redcap_repeat_instrument == my_form
     )
diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R
index 57a9871d..3db4912b 100644
--- a/R/join_data_tibbles.R
+++ b/R/join_data_tibbles.R
@@ -139,7 +139,7 @@ get_join_fn <- function(type) {
 #'
 #' @returns a character vector
 #'
-#' @keywords interal
+#' @keywords internal
 build_by <- function(supertbl, x, y, is_mixed) {
   x_pks <- supertbl$pks[supertbl$redcap_form_name == x] %>%
     stringr::str_split(", ", simplify = TRUE)
@@ -174,6 +174,14 @@ add_missing_columns <- function(tbl, columns) {
 #' the event of mixed structure data tibble joins, will seek to split data into
 #' three categories before performing the joins. The key identifiers here are
 #' `redcap_form_instance` and the added `.repeat_type` columns.
+#'
+#' @inheritParams join_data_tibbles
+#' @param join_fn the user specified join function type output by [get_join_fn()]
+#' @param is_mixed TRUE/FALSE mixed data structure
+#'
+#' @returns a dataframe
+#'
+#' @keywords internal
 
 join_tbls <- function(x, y, join_fn, by, suffix, is_mixed) {
   if (is_mixed) {
diff --git a/R/read_redcap.R b/R/read_redcap.R
index 0e5e8e6f..af098fc7 100644
--- a/R/read_redcap.R
+++ b/R/read_redcap.R
@@ -530,7 +530,7 @@ calc_metadata_stats <- function(data) {
 
 get_repeat_event_types <- function(data) {
   out <- data %>%
-    dplyr::distinct(.data$redcap_event_name, .data$redcap_repeat_instrument, .data$redcap_repeat_instance) %>%
+    distinct(.data$redcap_event_name, .data$redcap_repeat_instrument, .data$redcap_repeat_instance) %>%
     mutate(
       repeat_type = case_when(
         !is.na(redcap_event_name) & !is.na(redcap_repeat_instrument) & !is.na(redcap_repeat_instance) ~
@@ -540,7 +540,7 @@ get_repeat_event_types <- function(data) {
         TRUE ~ "nonrepeating"
       )
     ) %>%
-    dplyr::distinct(.data$redcap_event_name, .data$repeat_type)
+    distinct(.data$redcap_event_name, .data$repeat_type)
 
   # Check for instances where the same event is labelled as nonrepeating & repeating separate
   # If this is the case, it must be repeating separate (there is just data that qualifies as both)
diff --git a/man/build_by.Rd b/man/build_by.Rd
index e491c439..81696fb2 100644
--- a/man/build_by.Rd
+++ b/man/build_by.Rd
@@ -21,4 +21,4 @@ a character vector
 \description{
 Intelligently retrieve the join by cols
 }
-\keyword{interal}
+\keyword{internal}
diff --git a/man/join_tbls.Rd b/man/join_tbls.Rd
new file mode 100644
index 00000000..95fd52b5
--- /dev/null
+++ b/man/join_tbls.Rd
@@ -0,0 +1,60 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/join_data_tibbles.R
+\name{join_tbls}
+\alias{join_tbls}
+\title{Join data tbls of various structures and types}
+\usage{
+join_tbls(x, y, join_fn, by, suffix, is_mixed)
+}
+\arguments{
+\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
+lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
+more details.}
+
+\item{join_fn}{the user specified join function type output by \code{\link[=get_join_fn]{get_join_fn()}}}
+
+\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character
+vector of variables to join by.
+
+If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
+variables in common across \code{x} and \code{y}. A message lists the variables so
+that you can check they're correct; suppress the message by supplying \code{by}
+explicitly.
+
+To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}}
+specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.
+
+To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with
+multiple expressions. For example, \code{join_by(a == b, c == d)} will match
+\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
+\code{x} and \code{y}, you can shorten this by listing only the variable names, like
+\code{join_by(a, c)}.
+
+\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
+joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on
+these types of joins.
+
+For simple equality joins, you can alternatively specify a character vector
+of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
+to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
+use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.
+
+To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
+\code{\link[dplyr:cross_join]{cross_join()}}.}
+
+\item{suffix}{If there are non-joined duplicate variables in \code{x} and
+\code{y}, these suffixes will be added to the output to disambiguate them.
+Should be a character vector of length 2.}
+
+\item{is_mixed}{TRUE/FALSE mixed data structure}
+}
+\value{
+a dataframe
+}
+\description{
+\code{\link[=join_tbls]{join_tbls()}} either performs the \code{join_fun()} specified by the \code{type} or, in
+the event of mixed structure data tibble joins, will seek to split data into
+three categories before performing the joins. The key identifiers here are
+\code{redcap_form_instance} and the added \code{.repeat_type} columns.
+}
+\keyword{internal}
diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml
index ede73fed..9829a0fc 100644
--- a/pkgdown/_pkgdown.yml
+++ b/pkgdown/_pkgdown.yml
@@ -69,6 +69,7 @@ reference:
     Helpful functions for supertibble data analytics.
   contents:
   - combine_checkboxes
+  - join_data_tibbles
 - title: "Data"
   contents:
   - superheroes_supertbl

From 0fdb9bfb5eb7d43e0a9da367fa61e0322d87f491 Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Mon, 14 Oct 2024 15:24:28 -0400
Subject: [PATCH 14/17] Move join_data_tibbles out to new branch

---
 NAMESPACE                |   1 -
 R/join_data_tibbles.R    | 238 ---------------------------------------
 man/build_by.Rd          |  24 ----
 man/extract_keys.Rd      |  21 ----
 man/get_join_fn.Rd       |  19 ----
 man/get_structure.Rd     |  17 ---
 man/get_type.Rd          |  17 ---
 man/join_data_tibbles.Rd |  69 ------------
 man/join_tbls.Rd         |  60 ----------
 pkgdown/_pkgdown.yml     |   1 -
 10 files changed, 467 deletions(-)
 delete mode 100644 R/join_data_tibbles.R
 delete mode 100644 man/build_by.Rd
 delete mode 100644 man/extract_keys.Rd
 delete mode 100644 man/get_join_fn.Rd
 delete mode 100644 man/get_structure.Rd
 delete mode 100644 man/get_type.Rd
 delete mode 100644 man/join_data_tibbles.Rd
 delete mode 100644 man/join_tbls.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 056ce167..5c16f592 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -12,7 +12,6 @@ export(fmt_strip_html)
 export(fmt_strip_trailing_colon)
 export(fmt_strip_trailing_punct)
 export(fmt_strip_whitespace)
-export(join_data_tibbles)
 export(make_labelled)
 export(read_redcap)
 export(write_redcap_xlsx)
diff --git a/R/join_data_tibbles.R b/R/join_data_tibbles.R
deleted file mode 100644
index 3db4912b..00000000
--- a/R/join_data_tibbles.R
+++ /dev/null
@@ -1,238 +0,0 @@
-#' @title Join Two Data Tibbles from a Supertibble
-#'
-#' @description
-#' The [join_data_tibbles()] function provides a way to intelligently join two
-#' data tibbles from a REDCaTidieR supertibble. A supertibble is an output of
-#' [read_redcap()].
-#'
-#' [join_data_tibbles()] attempts to correctly assign the `by` when left `NULL` (the default)
-#' based on detecting the data tibble structure of `x` and `y`.
-#'
-#' @inheritParams extract_tibbles
-#' @param type A character string for the type of join to be performed borrowing from
-#' dplyr. One of "left", "right", "inner", or "full". Default "left".
-#' @inheritParams dplyr::inner_join
-#'
-#'
-#' @returns A `tibble`.
-#'
-#' @export
-
-join_data_tibbles <- function(supertbl,
-                              x,
-                              y,
-                              by = NULL,
-                              type = "left",
-                              suffix = c(".x", ".y")) {
-  record_id_field <- get_record_id_field(supertbl$redcap_data[[1]]) # nolint: object_usage_linter
-  join_fn <- get_join_fn(type)
-
-  # Append the supertibble with the primary keys column
-  supertbl <- supertbl |>
-    mutate(pks = purrr::map_chr(.data$redcap_data, ~ extract_keys(., record_id_field = record_id_field))) %>%
-    select(.data$redcap_form_name, .data$redcap_form_label, .data$redcap_data,
-           .data$redcap_metadata, .data$structure, .data$pks, matches("redcap_events"))
-
-  tbl_x <- extract_tibble(supertbl, x)
-  tbl_x_structure <- get_structure(supertbl, x)
-  tbl_y <- extract_tibble(supertbl, y)
-  tbl_y_structure <- get_structure(supertbl, y)
-
-  # Mixed structure requires special handling
-  is_mixed <- any(c(tbl_x_structure, tbl_y_structure) == "mixed")
-
-  if (is_mixed) {
-    # TODO: Determine if ok to remove
-    required_columns <- c("redcap_event_instance", "redcap_form_instance") # nolint: commented_code_linter
-    tbl_x <- add_missing_columns(tbl_x, required_columns) # nolint: commented_code_linter
-    tbl_y <- add_missing_columns(tbl_y, required_columns) # nolint: commented_code_linter
-
-    tbl_x_type <- get_type(supertbl, x)
-    tbl_y_type <- get_type(supertbl, y)
-
-    # Add on .repeat_type specifier for the redcap_event column
-    tbl_x <- left_join(tbl_x, tbl_x_type, by = "redcap_event")
-    tbl_y <- left_join(tbl_y, tbl_y_type, by = "redcap_event")
-  }
-
-  join_fn <- get_join_fn(type)
-  by <- build_by(supertbl, x, y, is_mixed)
-
-  join_fn(tbl_x, tbl_y, by, suffix) %>%
-    relocate(starts_with("form_status_complete"), .after = everything()) %>%
-    select(-starts_with(".repeat_type"))
-}
-
-#' @title Extract the primary keys associated with a data tibble
-#'
-#' @param data_tbl A data tibble from a supertibble
-#' @param record_id_field The record ID field for the REDCap project, retrieved
-#' as an ouput of [get_record_id_field()]
-#'
-#' @returns a character string
-#'
-#' @keywords internal
-extract_keys <- function(data_tbl, record_id_field) {
-  redcap_keys <- c(
-    record_id_field, "redcap_event", "redcap_form_instance",
-    "redcap_event_instance", "redcap_arm"
-  )
-
-  data_tbl |>
-    colnames() |>
-    intersect(redcap_keys) |>
-    paste(collapse = ", ")
-}
-
-#' @title Retrieve the structure data for a form from the supertibble
-#'
-#' @inheritParams join_data_tibbles
-#' @param tbl_name the `x` or `y` values assigned to `join_data_tibbles`
-#'
-#' @keywords internal
-get_structure <- function(supertbl, tbl_name) {
-  supertbl$structure[supertbl$redcap_form_name == tbl_name]
-}
-
-#' @title Retrieve the repeat event type data for a form from the supertibble
-#'
-#' @inheritParams join_data_tibbles
-#' @param tbl_name the `x` or `y` values assigned to `join_data_tibbles`
-#'
-#' @keywords internal
-get_type <- function(supertbl, tbl_name) {
-  supertbl %>%
-    filter(.data$redcap_form_name == tbl_name) %>%
-    pull(.data$redcap_events) %>%
-    pluck(1) %>%
-    select(.data$redcap_event,
-           ".repeat_type" = .data$repeat_type) %>%
-    unique()
-}
-
-#' @title Retrieve the appropriate user specified join function
-#'
-#' @inheritParams join_data_tibbles
-#'
-#' @returns a function
-#'
-#' @keywords internal
-get_join_fn <- function(type) {
-  join_functions <- list(
-    left = dplyr::left_join,
-    right = dplyr::right_join,
-    inner = dplyr::inner_join,
-    full = dplyr::full_join
-  )
-
-  if (!type %in% names(join_functions)) {
-    cli::cli_abort("Invalid join type. Choose from 'left', 'right', 'inner', or 'full'.")
-  }
-
-  join_functions[[type]]
-}
-
-#' @title Intelligently retrieve the join by cols
-#'
-#' @inheritParams join_data_tibbles
-#' @param is_mixed TRUE/FALSE, whether or not the given tables contain a mixed structure
-#'
-#' @returns a character vector
-#'
-#' @keywords internal
-build_by <- function(supertbl, x, y, is_mixed) {
-  x_pks <- supertbl$pks[supertbl$redcap_form_name == x] %>%
-    stringr::str_split(", ", simplify = TRUE)
-  y_pks <- supertbl$pks[supertbl$redcap_form_name == y] %>%
-    stringr::str_split(", ", simplify = TRUE)
-
-  out <- intersect(x_pks, y_pks)
-
-  if (is_mixed) {
-    # For mixed tables, depending on the .repeat_types present tables may not
-    # have event and form instance columns and must be added
-    out <- c(out, "redcap_event_instance", "redcap_form_instance") %>%
-      # TODO: Make standard, currently needed for repeat/mixed joins
-      unique()
-  }
-
-  out
-}
-
-#' @keywords intenral
-#' @noRd
-add_missing_columns <- function(tbl, columns) {
-  missing_cols <- setdiff(columns, names(tbl))
-  tbl[missing_cols] <- NA
-  return(tbl)
-}
-
-#' @title Join data tbls of various structures and types
-#'
-#' @description
-#' [join_tbls()] either performs the `join_fun()` specified by the `type` or, in
-#' the event of mixed structure data tibble joins, will seek to split data into
-#' three categories before performing the joins. The key identifiers here are
-#' `redcap_form_instance` and the added `.repeat_type` columns.
-#'
-#' @inheritParams join_data_tibbles
-#' @param join_fn the user specified join function type output by [get_join_fn()]
-#' @param is_mixed TRUE/FALSE mixed data structure
-#'
-#' @returns a dataframe
-#'
-#' @keywords internal
-
-join_tbls <- function(x, y, join_fn, by, suffix, is_mixed) {
-  if (is_mixed) {
-    # Filter based on .repeat_type
-    # If repeating together events, can use redcap_form_instance (NA) and redcap_event_instance
-    x_together <- x %>% filter(.data$.repeat_type == "repeat_together")
-    y_together <- y %>% filter(.data$.repeat_type == "repeat_together")
-
-    # repeating instruments for separately repeating events shouldn't be joined by redcap_form_instance
-    x_separate_repeating <- x %>% filter(.data$.repeat_type == "repeat_separate"  & !is.na(.data$redcap_form_instance))
-    y_separate_repeating <- y %>% filter(.data$.repeat_type == "repeat_separate"  & !is.na(.data$redcap_form_instance))
-
-    # nonrepeating instruments for separately repeating events should be joined by redcap_form_instance
-    x_separate_nonrepeating <- x %>% filter(.data$.repeat_type == "repeat_separate" & is.na(.data$redcap_form_instance))
-    y_separate_nonrepeating <- y %>% filter(.data$.repeat_type == "repeat_separate" & is.na(.data$redcap_form_instance))
-
-    # Join together sets
-    joined_together <- x_together %>%
-      join_fn(y_together, by = by, suffix = suffix)
-
-    joined_separate_repeating <- x_separate_repeating %>%
-      join_fn(y_separate_repeating, by = by[by != "redcap_form_instance"], suffix = suffix)
-
-    joined_separate_nonrepeating <- x_separate_nonrepeating %>%
-      join_fn(y_separate_nonrepeating, by = by, suffix = suffix)
-
-    # Bind rows together, issue in arrangmenet of output
-    result <- bind_rows(joined_together, joined_separate_repeating) %>%
-      bind_rows(joined_separate_nonrepeating) %>%
-      drop_non_suffix_columns()
-  } else {
-    result <- join_fn(x, y, by = by, suffix = suffix)
-  }
-  result
-}
-
-drop_non_suffix_columns <- function(data) {
-  # Extract column names that contain a "."
-  # Note: We can look for periods because REDCap will not allow variables to made
-  # with them. Only user tampering with column names in the output would result in this.
-  dot_columns <- names(data)[grepl("\\.", names(data))]
-
-  # Extract the base column names without the suffixes (everything before the ".")
-  base_columns <- unique(sub("\\..*", "", dot_columns))
-
-  # Filter out base columns that do not exist without a suffix
-  columns_to_drop <- base_columns[base_columns %in% names(data)]
-
-  # Drop only those base columns that exist both with and without suffixes
-  data <- data %>%
-    select(-all_of(columns_to_drop))
-
-  return(data)
-}
diff --git a/man/build_by.Rd b/man/build_by.Rd
deleted file mode 100644
index 81696fb2..00000000
--- a/man/build_by.Rd
+++ /dev/null
@@ -1,24 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/join_data_tibbles.R
-\name{build_by}
-\alias{build_by}
-\title{Intelligently retrieve the join by cols}
-\usage{
-build_by(supertbl, x, y, is_mixed)
-}
-\arguments{
-\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.}
-
-\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
-lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
-more details.}
-
-\item{is_mixed}{TRUE/FALSE, whether or not the given tables contain a mixed structure}
-}
-\value{
-a character vector
-}
-\description{
-Intelligently retrieve the join by cols
-}
-\keyword{internal}
diff --git a/man/extract_keys.Rd b/man/extract_keys.Rd
deleted file mode 100644
index b497120b..00000000
--- a/man/extract_keys.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/join_data_tibbles.R
-\name{extract_keys}
-\alias{extract_keys}
-\title{Extract the primary keys associated with a data tibble}
-\usage{
-extract_keys(data_tbl, record_id_field)
-}
-\arguments{
-\item{data_tbl}{A data tibble from a supertibble}
-
-\item{record_id_field}{The record ID field for the REDCap project, retrieved
-as an ouput of \code{\link[=get_record_id_field]{get_record_id_field()}}}
-}
-\value{
-a character string
-}
-\description{
-Extract the primary keys associated with a data tibble
-}
-\keyword{internal}
diff --git a/man/get_join_fn.Rd b/man/get_join_fn.Rd
deleted file mode 100644
index f99d7b01..00000000
--- a/man/get_join_fn.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/join_data_tibbles.R
-\name{get_join_fn}
-\alias{get_join_fn}
-\title{Retrieve the appropriate user specified join function}
-\usage{
-get_join_fn(type)
-}
-\arguments{
-\item{type}{A character string for the type of join to be performed borrowing from
-dplyr. One of "left", "right", "inner", or "full". Default "left".}
-}
-\value{
-a function
-}
-\description{
-Retrieve the appropriate user specified join function
-}
-\keyword{internal}
diff --git a/man/get_structure.Rd b/man/get_structure.Rd
deleted file mode 100644
index b8a901eb..00000000
--- a/man/get_structure.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/join_data_tibbles.R
-\name{get_structure}
-\alias{get_structure}
-\title{Retrieve the structure data for a form from the supertibble}
-\usage{
-get_structure(supertbl, tbl_name)
-}
-\arguments{
-\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.}
-
-\item{tbl_name}{the \code{x} or \code{y} values assigned to \code{join_data_tibbles}}
-}
-\description{
-Retrieve the structure data for a form from the supertibble
-}
-\keyword{internal}
diff --git a/man/get_type.Rd b/man/get_type.Rd
deleted file mode 100644
index f3088095..00000000
--- a/man/get_type.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/join_data_tibbles.R
-\name{get_type}
-\alias{get_type}
-\title{Retrieve the repeat event type data for a form from the supertibble}
-\usage{
-get_type(supertbl, tbl_name)
-}
-\arguments{
-\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.}
-
-\item{tbl_name}{the \code{x} or \code{y} values assigned to \code{join_data_tibbles}}
-}
-\description{
-Retrieve the repeat event type data for a form from the supertibble
-}
-\keyword{internal}
diff --git a/man/join_data_tibbles.Rd b/man/join_data_tibbles.Rd
deleted file mode 100644
index 75dc94b1..00000000
--- a/man/join_data_tibbles.Rd
+++ /dev/null
@@ -1,69 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/join_data_tibbles.R
-\name{join_data_tibbles}
-\alias{join_data_tibbles}
-\title{Join Two Data Tibbles from a Supertibble}
-\usage{
-join_data_tibbles(
-  supertbl,
-  x,
-  y,
-  by = NULL,
-  type = "left",
-  suffix = c(".x", ".y")
-)
-}
-\arguments{
-\item{supertbl}{A supertibble generated by \code{read_redcap()}. Required.}
-
-\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
-lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
-more details.}
-
-\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character
-vector of variables to join by.
-
-If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
-variables in common across \code{x} and \code{y}. A message lists the variables so
-that you can check they're correct; suppress the message by supplying \code{by}
-explicitly.
-
-To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}}
-specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.
-
-To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with
-multiple expressions. For example, \code{join_by(a == b, c == d)} will match
-\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
-\code{x} and \code{y}, you can shorten this by listing only the variable names, like
-\code{join_by(a, c)}.
-
-\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
-joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on
-these types of joins.
-
-For simple equality joins, you can alternatively specify a character vector
-of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
-to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
-use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.
-
-To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
-\code{\link[dplyr:cross_join]{cross_join()}}.}
-
-\item{type}{A character string for the type of join to be performed borrowing from
-dplyr. One of "left", "right", "inner", or "full". Default "left".}
-
-\item{suffix}{If there are non-joined duplicate variables in \code{x} and
-\code{y}, these suffixes will be added to the output to disambiguate them.
-Should be a character vector of length 2.}
-}
-\value{
-A \code{tibble}.
-}
-\description{
-The \code{\link[=join_data_tibbles]{join_data_tibbles()}} function provides a way to intelligently join two
-data tibbles from a REDCaTidieR supertibble. A supertibble is an output of
-\code{\link[=read_redcap]{read_redcap()}}.
-
-\code{\link[=join_data_tibbles]{join_data_tibbles()}} attempts to correctly assign the \code{by} when left \code{NULL} (the default)
-based on detecting the data tibble structure of \code{x} and \code{y}.
-}
diff --git a/man/join_tbls.Rd b/man/join_tbls.Rd
deleted file mode 100644
index 95fd52b5..00000000
--- a/man/join_tbls.Rd
+++ /dev/null
@@ -1,60 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/join_data_tibbles.R
-\name{join_tbls}
-\alias{join_tbls}
-\title{Join data tbls of various structures and types}
-\usage{
-join_tbls(x, y, join_fn, by, suffix, is_mixed)
-}
-\arguments{
-\item{x, y}{A pair of data frames, data frame extensions (e.g. a tibble), or
-lazy data frames (e.g. from dbplyr or dtplyr). See \emph{Methods}, below, for
-more details.}
-
-\item{join_fn}{the user specified join function type output by \code{\link[=get_join_fn]{get_join_fn()}}}
-
-\item{by}{A join specification created with \code{\link[dplyr:join_by]{join_by()}}, or a character
-vector of variables to join by.
-
-If \code{NULL}, the default, \verb{*_join()} will perform a natural join, using all
-variables in common across \code{x} and \code{y}. A message lists the variables so
-that you can check they're correct; suppress the message by supplying \code{by}
-explicitly.
-
-To join on different variables between \code{x} and \code{y}, use a \code{\link[dplyr:join_by]{join_by()}}
-specification. For example, \code{join_by(a == b)} will match \code{x$a} to \code{y$b}.
-
-To join by multiple variables, use a \code{\link[dplyr:join_by]{join_by()}} specification with
-multiple expressions. For example, \code{join_by(a == b, c == d)} will match
-\code{x$a} to \code{y$b} and \code{x$c} to \code{y$d}. If the column names are the same between
-\code{x} and \code{y}, you can shorten this by listing only the variable names, like
-\code{join_by(a, c)}.
-
-\code{\link[dplyr:join_by]{join_by()}} can also be used to perform inequality, rolling, and overlap
-joins. See the documentation at \link[dplyr:join_by]{?join_by} for details on
-these types of joins.
-
-For simple equality joins, you can alternatively specify a character vector
-of variable names to join by. For example, \code{by = c("a", "b")} joins \code{x$a}
-to \code{y$a} and \code{x$b} to \code{y$b}. If variable names differ between \code{x} and \code{y},
-use a named character vector like \code{by = c("x_a" = "y_a", "x_b" = "y_b")}.
-
-To perform a cross-join, generating all combinations of \code{x} and \code{y}, see
-\code{\link[dplyr:cross_join]{cross_join()}}.}
-
-\item{suffix}{If there are non-joined duplicate variables in \code{x} and
-\code{y}, these suffixes will be added to the output to disambiguate them.
-Should be a character vector of length 2.}
-
-\item{is_mixed}{TRUE/FALSE mixed data structure}
-}
-\value{
-a dataframe
-}
-\description{
-\code{\link[=join_tbls]{join_tbls()}} either performs the \code{join_fun()} specified by the \code{type} or, in
-the event of mixed structure data tibble joins, will seek to split data into
-three categories before performing the joins. The key identifiers here are
-\code{redcap_form_instance} and the added \code{.repeat_type} columns.
-}
-\keyword{internal}
diff --git a/pkgdown/_pkgdown.yml b/pkgdown/_pkgdown.yml
index 9829a0fc..ede73fed 100644
--- a/pkgdown/_pkgdown.yml
+++ b/pkgdown/_pkgdown.yml
@@ -69,7 +69,6 @@ reference:
     Helpful functions for supertibble data analytics.
   contents:
   - combine_checkboxes
-  - join_data_tibbles
 - title: "Data"
   contents:
   - superheroes_supertbl

From 2d0e61fd80c54640aa23d5d27f6d09580cefc78b Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Mon, 14 Oct 2024 15:24:51 -0400
Subject: [PATCH 15/17] Update snapshot per laballed vignette update

---
 tests/testthat/_snaps/write.md | 154 +++++++++++++++++++++++++++++----
 1 file changed, 138 insertions(+), 16 deletions(-)

diff --git a/tests/testthat/_snaps/write.md b/tests/testthat/_snaps/write.md
index d77388fc..c6943f93 100644
--- a/tests/testthat/_snaps/write.md
+++ b/tests/testthat/_snaps/write.md
@@ -16,7 +16,8 @@
       9              api_no_access_2               API No Access 2
       10                      survey                        Survey
       11               repeat_survey                 Repeat Survey
-      12             REDCap Metadata                          <NA>
+      12           labelled_vignette             Labelled Vignette
+      13             REDCap Metadata                          <NA>
          Repeating or Nonrepeating? # of Rows in Data # of Columns in Data
       2                   structure         data_rows            data_cols
       3                nonrepeating                 4                    4
@@ -28,19 +29,21 @@
       9                nonrepeating                 4                    5
       10               nonrepeating                 4                    9
       11                  repeating                 3                   10
-      12                       <NA>              <NA>                 <NA>
+      12               nonrepeating                 4                    7
+      13                       <NA>              <NA>                 <NA>
          Data size in Memory % of Data Missing                NA Sheet #
       2            data_size       data_na_pct form_complete_pct Sheet #
       3              2.28 kB              0.25                 0       1
       4              1.94 kB               0.5                 0       2
       5              2.58 kB                 0                 0       3
-      6              7.71 kB 0.293103448275862                 0       4
+      6              7.71 kB  0.28448275862069                 0       4
       7              7.40 kB              0.75                 0       5
       8              1.78 kB                 1                 0       6
       9              2.06 kB                 1                 0       7
       10             3.73 kB 0.392857142857143                 0       8
       11             3.94 kB 0.142857142857143                 0       9
-      12                <NA>              <NA>              <NA>      10
+      12             3.04 kB                 0                 0      10
+      13                <NA>              <NA>              <NA>      11
       
       [[1]][[2]]
         Record ID Text Box Input Text Box Input REDCap Instrument Completed?
@@ -77,7 +80,7 @@
       2 record_id text  note calculated dropdown_single radio_single
       3         1 text notes          2             one            B
       4         2 <NA>  <NA>          2           three            C
-      5         3 <NA>  <NA>       <NA>            <NA>         <NA>
+      5         3 <NA>  <NA>          2            <NA>         <NA>
       6         4 <NA>  <NA>          2            <NA>         <NA>
                            NA                    NA                    NA
       2 radio_duplicate_label checkbox_multiple___1 checkbox_multiple___2
@@ -225,6 +228,20 @@
       5     2022-11-09 12:21:04                     Complete
       
       [[1]][[11]]
+        Record ID   Text Box   Radio Buttons  Checkbox: A  Checkbox: B  Checkbox: C
+      2 record_id text_box_1 radio_buttons_1 checkbox___1 checkbox___2 checkbox___3
+      3         1   Record 1               A      Checked    Unchecked    Unchecked
+      4         2   Record 2               B      Checked      Checked    Unchecked
+      5         3   Record 3               C    Unchecked      Checked      Checked
+      6         4   Record 4               A    Unchecked    Unchecked    Unchecked
+        REDCap Instrument Completed?
+      2         form_status_complete
+      3                     Complete
+      4                     Complete
+      5                     Complete
+      6                     Complete
+      
+      [[1]][[12]]
               REDCap Instrument Name REDCap Instrument Description
       2             redcap_form_name             redcap_form_label
       3                         <NA>                          <NA>
@@ -293,6 +310,11 @@
       66               repeat_survey                 Repeat Survey
       67               repeat_survey                 Repeat Survey
       68               repeat_survey                 Repeat Survey
+      69           labelled_vignette             Labelled Vignette
+      70           labelled_vignette             Labelled Vignette
+      71           labelled_vignette             Labelled Vignette
+      72           labelled_vignette             Labelled Vignette
+      73           labelled_vignette             Labelled Vignette
                     Variable / Field Name
       2                        field_name
       3                         record_id
@@ -361,6 +383,11 @@
       66   repeatsurvey_checkbox_v2___one
       67   repeatsurvey_checkbox_v2___two
       68 repeatsurvey_checkbox_v2___three
+      69                       text_box_1
+      70                  radio_buttons_1
+      71                     checkbox___1
+      72                     checkbox___2
+      73                     checkbox___3
                                                              Field Label Field Type
       2                                                      field_label field_type
       3                                                        Record ID       text
@@ -429,6 +456,11 @@
       66                                        Checkbox Field: Choice 1   checkbox
       67                                        Checkbox Field: Choice 2   checkbox
       68                                        Checkbox Field: Choice 3   checkbox
+      69                                                        Text Box       text
+      70                                                   Radio Buttons      radio
+      71                                                     Checkbox: A   checkbox
+      72                                                     Checkbox: B   checkbox
+      73                                                     Checkbox: C   checkbox
          Section Header Prior to this Field
       2                      section_header
       3                                <NA>
@@ -497,6 +529,11 @@
       66                               <NA>
       67                               <NA>
       68                               <NA>
+      69                               <NA>
+      70                               <NA>
+      71                               <NA>
+      72                               <NA>
+      73                               <NA>
                                                                        Choices, Calculations, or Slider Labels
       2                                                                         select_choices_or_calculations
       3                                                                                                   <NA>
@@ -565,6 +602,11 @@
       66                                                       one, Choice 1 | two, Choice 2 | three, Choice 3
       67                                                       one, Choice 1 | two, Choice 2 | three, Choice 3
       68                                                       one, Choice 1 | two, Choice 2 | three, Choice 3
+      69                                                                                                  <NA>
+      70                                                                                    1, A | 2, B | 3, C
+      71                                                                                    1, A | 2, B | 3, C
+      72                                                                                    1, A | 2, B | 3, C
+      73                                                                                    1, A | 2, B | 3, C
          Field Note Text Validation Type OR Show Slider Number
       2  field_note text_validation_type_or_show_slider_number
       3        <NA>                                       <NA>
@@ -633,6 +675,11 @@
       66       <NA>                                       <NA>
       67       <NA>                                       <NA>
       68       <NA>                                       <NA>
+      69       <NA>                                       <NA>
+      70       <NA>                                       <NA>
+      71       <NA>                                       <NA>
+      72       <NA>                                       <NA>
+      73       <NA>                                       <NA>
          Minimum Accepted Value for Text Validation
       2                         text_validation_min
       3                                        <NA>
@@ -701,6 +748,11 @@
       66                                       <NA>
       67                                       <NA>
       68                                       <NA>
+      69                                       <NA>
+      70                                       <NA>
+      71                                       <NA>
+      72                                       <NA>
+      73                                       <NA>
          Maximum Accepted Value for Text Validation Is this Field an Identifier?
       2                         text_validation_max                   identifier
       3                                        <NA>                         <NA>
@@ -769,6 +821,11 @@
       66                                       <NA>                         <NA>
       67                                       <NA>                         <NA>
       68                                       <NA>                         <NA>
+      69                                       <NA>                         <NA>
+      70                                       <NA>                         <NA>
+      71                                       <NA>                         <NA>
+      72                                       <NA>                         <NA>
+      73                                       <NA>                         <NA>
          Branching Logic (Show field only if...) Is this Field Required?
       2                          branching_logic          required_field
       3                                     <NA>                    <NA>
@@ -837,6 +894,11 @@
       66                                    <NA>                    <NA>
       67                                    <NA>                    <NA>
       68                                    <NA>                    <NA>
+      69                                    <NA>                    <NA>
+      70                                    <NA>                    <NA>
+      71                                    <NA>                    <NA>
+      72                                    <NA>                    <NA>
+      73                                    <NA>                    <NA>
          Custom Alignment Question Number (surveys only) Matrix Group Name
       2  custom_alignment                question_number matrix_group_name
       3              <NA>                           <NA>              <NA>
@@ -905,6 +967,11 @@
       66             <NA>                           <NA>              <NA>
       67             <NA>                           <NA>              <NA>
       68             <NA>                           <NA>              <NA>
+      69             <NA>                           <NA>              <NA>
+      70             <NA>                           <NA>              <NA>
+      71             <NA>                           <NA>              <NA>
+      72             <NA>                           <NA>              <NA>
+      73             <NA>                           <NA>              <NA>
          Matrix Ranking? Field Annotation Data Type Count of Missing Values
       2   matrix_ranking field_annotation skim_type               n_missing
       3             <NA>             <NA>      <NA>                    <NA>
@@ -916,7 +983,7 @@
       9             <NA>             <NA> character                       0
       10            <NA>             <NA> character                       3
       11            <NA>             <NA> character                       3
-      12            <NA>             <NA>   numeric                       1
+      12            <NA>             <NA>   numeric                       0
       13            <NA>             <NA>    factor                       2
       14            <NA>             <NA>    factor                       2
       15            <NA>             <NA>    factor                       4
@@ -973,6 +1040,11 @@
       66            <NA>             <NA>   logical                       0
       67            <NA>             <NA>   logical                       0
       68            <NA>             <NA>   logical                       0
+      69            <NA>             <NA> character                       0
+      70            <NA>             <NA>    factor                       0
+      71            <NA>             <NA>   logical                       0
+      72            <NA>             <NA>   logical                       0
+      73            <NA>             <NA>   logical                       0
          Proportion of Non-Missing Values Shortest Value (Fewest Characters)
       2                     complete_rate                      character.min
       3                              <NA>                               <NA>
@@ -984,7 +1056,7 @@
       9                                 1                                  1
       10                             0.25                                  4
       11                             0.25                                  5
-      12                             0.75                               <NA>
+      12                                1                               <NA>
       13                              0.5                               <NA>
       14                              0.5                               <NA>
       15                                0                               <NA>
@@ -1041,6 +1113,11 @@
       66                                1                               <NA>
       67                                1                               <NA>
       68                                1                               <NA>
+      69                                1                                  8
+      70                                1                               <NA>
+      71                                1                               <NA>
+      72                                1                               <NA>
+      73                                1                               <NA>
          Longest Value (Most Characters) Count of Empty Values Count of Unique Values
       2                    character.max       character.empty     character.n_unique
       3                             <NA>                  <NA>                   <NA>
@@ -1109,6 +1186,11 @@
       66                            <NA>                  <NA>                   <NA>
       67                            <NA>                  <NA>                   <NA>
       68                            <NA>                  <NA>                   <NA>
+      69                               8                     0                      4
+      70                            <NA>                  <NA>                   <NA>
+      71                            <NA>                  <NA>                   <NA>
+      72                            <NA>                  <NA>                   <NA>
+      73                            <NA>                  <NA>                   <NA>
          Count of Values that are all Whitespace         Mean Standard Deviation 
       2                     character.whitespace numeric.mean          numeric.sd
       3                                     <NA>         <NA>                <NA>
@@ -1177,6 +1259,11 @@
       66                                    <NA>         <NA>                <NA>
       67                                    <NA>         <NA>                <NA>
       68                                    <NA>         <NA>                <NA>
+      69                                       0         <NA>                <NA>
+      70                                    <NA>         <NA>                <NA>
+      71                                    <NA>         <NA>                <NA>
+      72                                    <NA>         <NA>                <NA>
+      73                                    <NA>         <NA>                <NA>
             Minimum 25th Percentile      Median 75th Percentile      Maximum
       2  numeric.p0     numeric.p25 numeric.p50     numeric.p75 numeric.p100
       3        <NA>            <NA>        <NA>            <NA>         <NA>
@@ -1245,6 +1332,11 @@
       66       <NA>            <NA>        <NA>            <NA>         <NA>
       67       <NA>            <NA>        <NA>            <NA>         <NA>
       68       <NA>            <NA>        <NA>            <NA>         <NA>
+      69       <NA>            <NA>        <NA>            <NA>         <NA>
+      70       <NA>            <NA>        <NA>            <NA>         <NA>
+      71       <NA>            <NA>        <NA>            <NA>         <NA>
+      72       <NA>            <NA>        <NA>            <NA>         <NA>
+      73       <NA>            <NA>        <NA>            <NA>         <NA>
             Histogram Is the Categorical Value Ordered? Count of Unique Values
       2  numeric.hist                    factor.ordered        factor.n_unique
       3          <NA>                              <NA>                   <NA>
@@ -1313,6 +1405,11 @@
       66         <NA>                              <NA>                   <NA>
       67         <NA>                              <NA>                   <NA>
       68         <NA>                              <NA>                   <NA>
+      69         <NA>                              <NA>                   <NA>
+      70         <NA>                             FALSE                      3
+      71         <NA>                              <NA>                   <NA>
+      72         <NA>                              <NA>                   <NA>
+      73         <NA>                              <NA>                   <NA>
            Most Frequent Values Proportion of TRUE Values Count of Logical Values
       2       factor.top_counts              logical.mean           logical.count
       3                    <NA>                      <NA>                    <NA>
@@ -1381,6 +1478,11 @@
       66                   <NA>         0.666666666666667          TRU: 2, FAL: 1
       67                   <NA>         0.333333333333333          FAL: 2, TRU: 1
       68                   <NA>         0.333333333333333          FAL: 2, TRU: 1
+      69                   <NA>                      <NA>                    <NA>
+      70       A: 2, B: 1, C: 1                      <NA>                    <NA>
+      71                   <NA>                       0.5          FAL: 2, TRU: 2
+      72                   <NA>                       0.5          FAL: 2, TRU: 2
+      73                   <NA>                      0.25          FAL: 3, TRU: 1
            Earliest     Latest      Median Count of Unique Values            Earliest
       2    Date.min   Date.max Date.median          Date.n_unique         POSIXct.min
       3        <NA>       <NA>        <NA>                   <NA>                <NA>
@@ -1449,6 +1551,11 @@
       66       <NA>       <NA>        <NA>                   <NA>                <NA>
       67       <NA>       <NA>        <NA>                   <NA>                <NA>
       68       <NA>       <NA>        <NA>                   <NA>                <NA>
+      69       <NA>       <NA>        <NA>                   <NA>                <NA>
+      70       <NA>       <NA>        <NA>                   <NA>                <NA>
+      71       <NA>       <NA>        <NA>                   <NA>                <NA>
+      72       <NA>       <NA>        <NA>                   <NA>                <NA>
+      73       <NA>       <NA>        <NA>                   <NA>                <NA>
                       Latest              Median Count of Unique Values      Minimum
       2          POSIXct.max      POSIXct.median       POSIXct.n_unique difftime.min
       3                 <NA>                <NA>                   <NA>         <NA>
@@ -1517,6 +1624,11 @@
       66                <NA>                <NA>                   <NA>         <NA>
       67                <NA>                <NA>                   <NA>         <NA>
       68                <NA>                <NA>                   <NA>         <NA>
+      69                <NA>                <NA>                   <NA>         <NA>
+      70                <NA>                <NA>                   <NA>         <NA>
+      71                <NA>                <NA>                   <NA>         <NA>
+      72                <NA>                <NA>                   <NA>         <NA>
+      73                <NA>                <NA>                   <NA>         <NA>
               Maximum          Median Count of Unique Values
       2  difftime.max difftime.median      difftime.n_unique
       3          <NA>            <NA>                   <NA>
@@ -1585,11 +1697,16 @@
       66         <NA>            <NA>                   <NA>
       67         <NA>            <NA>                   <NA>
       68         <NA>            <NA>                   <NA>
+      69         <NA>            <NA>                   <NA>
+      70         <NA>            <NA>                   <NA>
+      71         <NA>            <NA>                   <NA>
+      72         <NA>            <NA>                   <NA>
+      73         <NA>            <NA>                   <NA>
       
       
       [[2]]
          tab_name tab_sheet tab_ref
-      1    Table1         1  A2:I12
+      1    Table1         1  A2:I13
       2    Table2         2   A2:D6
       3    Table3         3   A2:D6
       4    Table4         4   A2:E6
@@ -1599,9 +1716,10 @@
       8    Table8         8   A2:E6
       9    Table9         9   A2:I6
       10  Table10        10   A2:J5
-      11  Table11        11 A2:AZ68
+      11  Table11        11   A2:G6
+      12  Table12        12 A2:AZ73
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    tab_xml
-      1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       <table xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" id="1" name="Table1" displayName="Table1" ref="A2:I12" totalsRowCount="0" totalsRowShown="0"><autoFilter ref="A2:I12"/><tableColumns count="9"><tableColumn id="1" name="redcap_form_name"/><tableColumn id="2" name="redcap_form_label"/><tableColumn id="3" name="structure"/><tableColumn id="4" name="data_rows"/><tableColumn id="5" name="data_cols"/><tableColumn id="6" name="data_size"/><tableColumn id="7" name="data_na_pct"/><tableColumn id="8" name="form_complete_pct"/><tableColumn id="9" name="Sheet #"/></tableColumns><tableStyleInfo name="TableStyleLight8" showFirstColumn="0" showLastColumn="0" showRowStripes="1" showColumnStripes="0"/></table>
+      1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       <table xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" id="1" name="Table1" displayName="Table1" ref="A2:I13" totalsRowCount="0" totalsRowShown="0"><autoFilter ref="A2:I13"/><tableColumns count="9"><tableColumn id="1" name="redcap_form_name"/><tableColumn id="2" name="redcap_form_label"/><tableColumn id="3" name="structure"/><tableColumn id="4" name="data_rows"/><tableColumn id="5" name="data_cols"/><tableColumn id="6" name="data_size"/><tableColumn id="7" name="data_na_pct"/><tableColumn id="8" name="form_complete_pct"/><tableColumn id="9" name="Sheet #"/></tableColumns><tableStyleInfo name="TableStyleLight8" showFirstColumn="0" showLastColumn="0" showRowStripes="1" showColumnStripes="0"/></table>
       2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               <table xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" id="2" name="Table2" displayName="Table2" ref="A2:D6" totalsRowCount="0" totalsRowShown="0"><autoFilter ref="A2:D6"/><tableColumns count="4"><tableColumn id="1" name="record_id"/><tableColumn id="2" name="nonrepeat_1"/><tableColumn id="3" name="nonrepeat_2"/><tableColumn id="4" name="form_status_complete"/></tableColumns><tableStyleInfo name="TableStyleLight8" showFirstColumn="0" showLastColumn="0" showRowStripes="1" showColumnStripes="0"/></table>
       3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               <table xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" id="3" name="Table3" displayName="Table3" ref="A2:D6" totalsRowCount="0" totalsRowShown="0"><autoFilter ref="A2:D6"/><tableColumns count="4"><tableColumn id="1" name="record_id"/><tableColumn id="2" name="nonrepeat_3"/><tableColumn id="3" name="nonrepeat_4"/><tableColumn id="4" name="form_status_complete"/></tableColumns><tableStyleInfo name="TableStyleLight8" showFirstColumn="0" showLastColumn="0" showRowStripes="1" showColumnStripes="0"/></table>
       4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    <table xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" id="4" name="Table4" displayName="Table4" ref="A2:E6" totalsRowCount="0" totalsRowShown="0"><autoFilter ref="A2:E6"/><tableColumns count="5"><tableColumn id="1" name="record_id"/><tableColumn id="2" name="redcap_form_instance"/><tableColumn id="3" name="repeat_1"/><tableColumn id="4" name="repeat_2"/><tableColumn id="5" name="form_status_complete"/></tableColumns><tableStyleInfo name="TableStyleLight8" showFirstColumn="0" showLastColumn="0" showRowStripes="1" showColumnStripes="0"/></table>
@@ -1611,7 +1729,8 @@
       8                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          <table xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" id="8" name="Table8" displayName="Table8" ref="A2:E6" totalsRowCount="0" totalsRowShown="0"><autoFilter ref="A2:E6"/><tableColumns count="5"><tableColumn id="1" name="record_id"/><tableColumn id="2" name="api_text_2"/><tableColumn id="3" name="api_text_3"/><tableColumn id="4" name="api_text_4"/><tableColumn id="5" name="form_status_complete"/></tableColumns><tableStyleInfo name="TableStyleLight8" showFirstColumn="0" showLastColumn="0" showRowStripes="1" showColumnStripes="0"/></table>
       9                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            <table xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" id="9" name="Table9" displayName="Table9" ref="A2:I6" totalsRowCount="0" totalsRowShown="0"><autoFilter ref="A2:I6"/><tableColumns count="9"><tableColumn id="1" name="record_id"/><tableColumn id="2" name="survey_yesno"/><tableColumn id="3" name="survey_radio"/><tableColumn id="4" name="survey_checkbox___one"/><tableColumn id="5" name="survey_checkbox___two"/><tableColumn id="6" name="survey_checkbox___three"/><tableColumn id="7" name="redcap_survey_identifier"/><tableColumn id="8" name="redcap_survey_timestamp"/><tableColumn id="9" name="form_status_complete"/></tableColumns><tableStyleInfo name="TableStyleLight8" showFirstColumn="0" showLastColumn="0" showRowStripes="1" showColumnStripes="0"/></table>
       10                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           <table xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" id="10" name="Table10" displayName="Table10" ref="A2:J5" totalsRowCount="0" totalsRowShown="0"><autoFilter ref="A2:J5"/><tableColumns count="10"><tableColumn id="1" name="record_id"/><tableColumn id="2" name="redcap_form_instance"/><tableColumn id="3" name="repeatsurvey_yesno"/><tableColumn id="4" name="repeatsurvey_radio_v2"/><tableColumn id="5" name="repeatsurvey_checkbox_v2___one"/><tableColumn id="6" name="repeatsurvey_checkbox_v2___two"/><tableColumn id="7" name="repeatsurvey_checkbox_v2___three"/><tableColumn id="8" name="redcap_survey_identifier"/><tableColumn id="9" name="redcap_survey_timestamp"/><tableColumn id="10" name="form_status_complete"/></tableColumns><tableStyleInfo name="TableStyleLight8" showFirstColumn="0" showLastColumn="0" showRowStripes="1" showColumnStripes="0"/></table>
-      11 <table xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" id="11" name="Table11" displayName="Table11" ref="A2:AZ68" totalsRowCount="0" totalsRowShown="0"><autoFilter ref="A2:AZ68"/><tableColumns count="52"><tableColumn id="1" name="redcap_form_name"/><tableColumn id="2" name="redcap_form_label"/><tableColumn id="3" name="field_name"/><tableColumn id="4" name="field_label"/><tableColumn id="5" name="field_type"/><tableColumn id="6" name="section_header"/><tableColumn id="7" name="select_choices_or_calculations"/><tableColumn id="8" name="field_note"/><tableColumn id="9" name="text_validation_type_or_show_slider_number"/><tableColumn id="10" name="text_validation_min"/><tableColumn id="11" name="text_validation_max"/><tableColumn id="12" name="identifier"/><tableColumn id="13" name="branching_logic"/><tableColumn id="14" name="required_field"/><tableColumn id="15" name="custom_alignment"/><tableColumn id="16" name="question_number"/><tableColumn id="17" name="matrix_group_name"/><tableColumn id="18" name="matrix_ranking"/><tableColumn id="19" name="field_annotation"/><tableColumn id="20" name="skim_type"/><tableColumn id="21" name="n_missing"/><tableColumn id="22" name="complete_rate"/><tableColumn id="23" name="character.min"/><tableColumn id="24" name="character.max"/><tableColumn id="25" name="character.empty"/><tableColumn id="26" name="character.n_unique"/><tableColumn id="27" name="character.whitespace"/><tableColumn id="28" name="numeric.mean"/><tableColumn id="29" name="numeric.sd"/><tableColumn id="30" name="numeric.p0"/><tableColumn id="31" name="numeric.p25"/><tableColumn id="32" name="numeric.p50"/><tableColumn id="33" name="numeric.p75"/><tableColumn id="34" name="numeric.p100"/><tableColumn id="35" name="numeric.hist"/><tableColumn id="36" name="factor.ordered"/><tableColumn id="37" name="factor.n_unique"/><tableColumn id="38" name="factor.top_counts"/><tableColumn id="39" name="logical.mean"/><tableColumn id="40" name="logical.count"/><tableColumn id="41" name="Date.min"/><tableColumn id="42" name="Date.max"/><tableColumn id="43" name="Date.median"/><tableColumn id="44" name="Date.n_unique"/><tableColumn id="45" name="POSIXct.min"/><tableColumn id="46" name="POSIXct.max"/><tableColumn id="47" name="POSIXct.median"/><tableColumn id="48" name="POSIXct.n_unique"/><tableColumn id="49" name="difftime.min"/><tableColumn id="50" name="difftime.max"/><tableColumn id="51" name="difftime.median"/><tableColumn id="52" name="difftime.n_unique"/></tableColumns><tableStyleInfo name="TableStyleLight8" showFirstColumn="0" showLastColumn="0" showRowStripes="1" showColumnStripes="0"/></table>
+      11                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             <table xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" id="11" name="Table11" displayName="Table11" ref="A2:G6" totalsRowCount="0" totalsRowShown="0"><autoFilter ref="A2:G6"/><tableColumns count="7"><tableColumn id="1" name="record_id"/><tableColumn id="2" name="text_box_1"/><tableColumn id="3" name="radio_buttons_1"/><tableColumn id="4" name="checkbox___1"/><tableColumn id="5" name="checkbox___2"/><tableColumn id="6" name="checkbox___3"/><tableColumn id="7" name="form_status_complete"/></tableColumns><tableStyleInfo name="TableStyleLight8" showFirstColumn="0" showLastColumn="0" showRowStripes="1" showColumnStripes="0"/></table>
+      12 <table xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main" xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" id="12" name="Table12" displayName="Table12" ref="A2:AZ73" totalsRowCount="0" totalsRowShown="0"><autoFilter ref="A2:AZ73"/><tableColumns count="52"><tableColumn id="1" name="redcap_form_name"/><tableColumn id="2" name="redcap_form_label"/><tableColumn id="3" name="field_name"/><tableColumn id="4" name="field_label"/><tableColumn id="5" name="field_type"/><tableColumn id="6" name="section_header"/><tableColumn id="7" name="select_choices_or_calculations"/><tableColumn id="8" name="field_note"/><tableColumn id="9" name="text_validation_type_or_show_slider_number"/><tableColumn id="10" name="text_validation_min"/><tableColumn id="11" name="text_validation_max"/><tableColumn id="12" name="identifier"/><tableColumn id="13" name="branching_logic"/><tableColumn id="14" name="required_field"/><tableColumn id="15" name="custom_alignment"/><tableColumn id="16" name="question_number"/><tableColumn id="17" name="matrix_group_name"/><tableColumn id="18" name="matrix_ranking"/><tableColumn id="19" name="field_annotation"/><tableColumn id="20" name="skim_type"/><tableColumn id="21" name="n_missing"/><tableColumn id="22" name="complete_rate"/><tableColumn id="23" name="character.min"/><tableColumn id="24" name="character.max"/><tableColumn id="25" name="character.empty"/><tableColumn id="26" name="character.n_unique"/><tableColumn id="27" name="character.whitespace"/><tableColumn id="28" name="numeric.mean"/><tableColumn id="29" name="numeric.sd"/><tableColumn id="30" name="numeric.p0"/><tableColumn id="31" name="numeric.p25"/><tableColumn id="32" name="numeric.p50"/><tableColumn id="33" name="numeric.p75"/><tableColumn id="34" name="numeric.p100"/><tableColumn id="35" name="numeric.hist"/><tableColumn id="36" name="factor.ordered"/><tableColumn id="37" name="factor.n_unique"/><tableColumn id="38" name="factor.top_counts"/><tableColumn id="39" name="logical.mean"/><tableColumn id="40" name="logical.count"/><tableColumn id="41" name="Date.min"/><tableColumn id="42" name="Date.max"/><tableColumn id="43" name="Date.median"/><tableColumn id="44" name="Date.n_unique"/><tableColumn id="45" name="POSIXct.min"/><tableColumn id="46" name="POSIXct.max"/><tableColumn id="47" name="POSIXct.median"/><tableColumn id="48" name="POSIXct.n_unique"/><tableColumn id="49" name="difftime.min"/><tableColumn id="50" name="difftime.max"/><tableColumn id="51" name="difftime.median"/><tableColumn id="52" name="difftime.n_unique"/></tableColumns><tableStyleInfo name="TableStyleLight8" showFirstColumn="0" showLastColumn="0" showRowStripes="1" showColumnStripes="0"/></table>
          tab_act
       1        1
       2        1
@@ -1624,6 +1743,7 @@
       9        1
       10       1
       11       1
+      12       1
       
       [[3]]
       [[3]]$fileVersion
@@ -1661,7 +1781,8 @@
        [8] "<sheet name=\"API No Access 2\" sheetId=\"8\" state=\"visible\" r:id=\"rId8\"/>"            
        [9] "<sheet name=\"Survey\" sheetId=\"9\" state=\"visible\" r:id=\"rId9\"/>"                     
       [10] "<sheet name=\"Repeat Survey\" sheetId=\"10\" state=\"visible\" r:id=\"rId10\"/>"            
-      [11] "<sheet name=\"REDCap Metadata\" sheetId=\"11\" state=\"visible\" r:id=\"rId11\"/>"          
+      [11] "<sheet name=\"Labelled Vignette\" sheetId=\"11\" state=\"visible\" r:id=\"rId11\"/>"        
+      [12] "<sheet name=\"REDCap Metadata\" sheetId=\"12\" state=\"visible\" r:id=\"rId12\"/>"          
       
       [[3]]$functionGroups
       NULL
@@ -1715,11 +1836,12 @@
        [9] "<Relationship Id=\"rId9\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet\" Target=\"worksheets/sheet9.xml\"/>"  
       [10] "<Relationship Id=\"rId10\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet\" Target=\"worksheets/sheet10.xml\"/>"
       [11] "<Relationship Id=\"rId11\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet\" Target=\"worksheets/sheet11.xml\"/>"
-      [12] "<Relationship Id=\"rId12\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme\" Target=\"theme/theme1.xml\"/>"          
-      [13] "<Relationship Id=\"rId13\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles\" Target=\"styles.xml\"/>"               
+      [12] "<Relationship Id=\"rId12\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet\" Target=\"worksheets/sheet12.xml\"/>"
+      [13] "<Relationship Id=\"rId13\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme\" Target=\"theme/theme1.xml\"/>"          
+      [14] "<Relationship Id=\"rId14\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles\" Target=\"styles.xml\"/>"               
       
       [[5]]
-       [1]  1  2  3  4  5  6  7  8  9 10 11
+       [1]  1  2  3  4  5  6  7  8  9 10 11 12
       
       [[6]]
        [1] "Table of Contents"           "Nonrepeated"                
@@ -1727,6 +1849,6 @@
        [5] "Data Field Types"            "Text Input Validation Types"
        [7] "API No Access"               "API No Access 2"            
        [9] "Survey"                      "Repeat Survey"              
-      [11] "REDCap Metadata"            
+      [11] "Labelled Vignette"           "REDCap Metadata"            
       
 

From 2c5046b56773cc83ae8edaab4fd6dcc21cd7de13 Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Tue, 15 Oct 2024 12:04:19 -0400
Subject: [PATCH 16/17] add_partial_keys redcap_arm factor -> character

---
 R/utils.R                   | 2 +-
 tests/testthat/test-utils.R | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/utils.R b/R/utils.R
index e660031b..302aab5b 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -24,7 +24,7 @@ add_partial_keys <- function(db_data,
     db_data <- db_data %>%
       mutate(
         redcap_event = sub(pattern, "\\1", {{ var }}),
-        redcap_arm   = as.factor(sub(pattern, "\\2", {{ var }}))
+        redcap_arm   = as.character(sub(pattern, "\\2", {{ var }}))
       )
   }
 
diff --git a/tests/testthat/test-utils.R b/tests/testthat/test-utils.R
index 344d4e9a..7eb23520 100644
--- a/tests/testthat/test-utils.R
+++ b/tests/testthat/test-utils.R
@@ -369,7 +369,7 @@ test_that("add_partial_keys works", {
   expect_s3_class(out, "data.frame")
   expect_true(nrow(out) > 0)
 
-  expected_redcap_arm_col <- factor(c(1, 1, 1, 1, "1b"))
+  expected_redcap_arm_col <- c("1", "1", "1", "1", "1b")
   expect_equal(out$redcap_arm, expected_redcap_arm_col)
 })
 

From 14d2587bba2c54a3f691859b19ff606dbde31829 Mon Sep 17 00:00:00 2001
From: Richard Hanna <richardshanna91@gmail.com>
Date: Tue, 15 Oct 2024 15:22:21 -0400
Subject: [PATCH 17/17] Update NEWS.md

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index 14c724ee..cad90c90 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -3,6 +3,7 @@
 - Added `combine_checkboxes()` analytics function
    - Use `combine_checkboxes()` to consolidate multiple checkbox fields in a REDCap data tibble under a single column
 - Fixed a bug for mixed structure databases resulting in data loss when some fields had dual repeating-separately/repeating-together behavior
+- Fixed a bug where partial keys taken from REDCap arms could be incorrectly specified
 - Various improvements and additions with CRAN release of REDCapR 1.2.0:
    - `event_name` added as a column to the `redcap_event` column of longitudinal supertibbles
    - `guess_max` parameter in `read_redcap()` default updated to `Inf`