Public-Health-Scotland · Jennit07 · Sep 18, 2023 · Sep 12, 2023 · Sep 12, 2023 · Sep 12, 2023
diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R
@@ -7,7 +7,7 @@
 #' @importFrom data.table .SD
 #'
 #' @inheritParams create_individual_file
-aggregate_by_chi <- function(episode_file) {
+aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) {
   cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")
 
   # Convert to data.table
@@ -28,17 +28,33 @@ aggregate_by_chi <- function(episode_file) {
     )
   )
 
-  data.table::setnames(
-    episode_file,
-    c(
-      "ch_chi_cis", "cij_marker", "ooh_case_id"
-      # ,"hh_in_fy"
-    ),
-    c(
-      "ch_cis_episodes", "cij_total", "ooh_cases"
-      # ,"hl1_in_fy"
+  if (exclude_sc_var) {
+    data.table::setnames(
+      episode_file,
+      c(
+        "cij_marker",
+        "ooh_case_id"
+      ),
+      c(
+        "cij_total",
+        "ooh_cases"
+      )
     )
-  )
+  } else {
+    data.table::setnames(
+      episode_file,
+      c(
+        "ch_chi_cis",
+        "cij_marker",
+        "ooh_case_id"
+      ),
+      c(
+        "ch_cis_episodes",
+        "cij_total",
+        "ooh_cases"
+      )
+    )
+  }
 
   # column specification, grouped by chi
   # columns to select last
@@ -48,6 +64,9 @@ aggregate_by_chi <- function(episode_file) {
     "gpprac",
     vars_start_with(episode_file, "sc_")
   )
+  if (exclude_sc_var) {
+    cols2 <- cols2[!(cols2 %in% vars_start_with(episode_file, "sc_"))]
+  }
   # columns to count unique rows
   cols3 <- c(
     "ch_cis_episodes",
@@ -59,6 +78,9 @@ aggregate_by_chi <- function(episode_file) {
     "ooh_cases",
     "preventable_admissions"
   )
+  if (exclude_sc_var) {
+    cols3 <- cols3[!(cols3 %in% "ch_cis_episodes")]
+  }
   # columns to sum up
   cols4 <- c(
     vars_end_with(
@@ -91,6 +113,22 @@ aggregate_by_chi <- function(episode_file) {
     "health_net_cost_inc_dnas"
   )
   cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")]
+  if (exclude_sc_var) {
+    cols4 <-
+      cols4[!(cols4 %in% c(
+        vars_end_with(
+          episode_file,
+          c(
+            "alarms",
+            "telecare"
+          )
+        ),
+        vars_start_with(
+          episode_file,
+          "sds_option"
+        )
+      ))]
+  }
   # columns to select maximum
   cols5 <- c("nsu", vars_contain(episode_file, "hl1_in_fy"))
   data.table::setnafill(episode_file, fill = 0L, cols = cols5)
@@ -222,165 +260,3 @@ aggregate_ch_episodes <- function(episode_file) {
 
   return(episode_file)
 }
-
-
-#' Aggregate by CHI with no social care variables
-#'
-#' @description Aggregate episode file by CHI to convert into
-#' individual file. Exclude social care variables for the creation of
-#' the newest file with Q1 data.
-#'
-#' @importFrom data.table .N
-#' @importFrom data.table .SD
-#'
-#' @inheritParams create_individual_file
-aggregate_by_chi_no_sc <- function(episode_file) {
-  cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")
-
-  # Convert to data.table
-  data.table::setDT(episode_file)
-
-  # Ensure all variable names are lowercase
-  data.table::setnames(episode_file, stringr::str_to_lower)
-
-  # Sort the data
-  data.table::setkeyv(
-    episode_file,
-    c(
-      "chi",
-      "record_keydate1",
-      "keytime1",
-      "record_keydate2",
-      "keytime2"
-    )
-  )
-
-  data.table::setnames(
-    episode_file,
-    c(
-      # "ch_chi_cis",
-      "cij_marker",
-      "ooh_case_id",
-      "hh_in_fy"
-    ),
-    c(
-      # "ch_cis_episodes",
-      "cij_total",
-      "ooh_cases",
-      "hl1_in_fy"
-    )
-  )
-
-  # column specification, grouped by chi
-  # columns to select last
-  cols2 <- c(
-    "postcode",
-    "dob",
-    "gpprac" # ,
-    # vars_start_with(episode_file, "sc_")
-  )
-  # columns to count unique rows
-  cols3 <- c(
-    # "ch_cis_episodes",
-    "cij_total",
-    "cij_el",
-    "cij_non_el",
-    "cij_mat",
-    "cij_delay",
-    "ooh_cases",
-    "preventable_admissions"
-  )
-  # columns to sum up
-  cols4 <- c(
-    vars_end_with(
-      episode_file,
-      c(
-        "episodes",
-        "beddays",
-        "cost",
-        "attendances",
-        "attend",
-        "contacts",
-        "hours",
-        # "alarms",
-        # "telecare",
-        "paid_items",
-        "advice",
-        "homev",
-        "time",
-        "assessment",
-        "other",
-        "dn",
-        "nhs24",
-        "pcc"
-        # )
-      ),
-      # vars_start_with(
-      # episode_file,
-      # "sds_option"
-    ),
-    "health_net_cost_inc_dnas"
-  )
-  # cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")]
-  # columns to select maximum
-  cols5 <- c("nsu", vars_contain(episode_file, "hl1_in_fy"))
-  data.table::setnafill(episode_file, fill = 0L, cols = cols5)
-  # compute
-  individual_file_cols1 <- episode_file[,
-    .(gender = mean(gender)),
-    by = "chi"
-  ]
-  individual_file_cols2 <- episode_file[,
-    .SD[.N],
-    .SDcols = cols2,
-    by = "chi"
-  ]
-  individual_file_cols3 <- episode_file[,
-    lapply(.SD, function(x) {
-      data.table::uniqueN(x, na.rm = TRUE)
-    }),
-    .SDcols = cols3,
-    by = "chi"
-  ]
-  individual_file_cols4 <- episode_file[,
-    lapply(.SD, function(x) {
-      sum(x, na.rm = TRUE)
-    }),
-    .SDcols = cols4,
-    by = "chi"
-  ]
-  individual_file_cols5 <- episode_file[,
-    lapply(.SD, function(x) max(x, na.rm = TRUE)),
-    .SDcols = cols5,
-    by = "chi"
-  ]
-  individual_file_cols6 <- episode_file[,
-    .(
-      preventable_beddays = ifelse(
-        any(cij_ppa, na.rm = TRUE),
-        as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))),
-        NA_integer_
-      )
-    ),
-    # cij_marker has been renamed as cij_total
-    by = c("chi", "cij_total")
-  ]
-  individual_file_cols6 <- individual_file_cols6[,
-    .(
-      preventable_beddays = sum(preventable_beddays, na.rm = TRUE)
-    ),
-    by = "chi"
-  ]
-
-  individual_file <- dplyr::bind_cols(
-    individual_file_cols1,
-    individual_file_cols2[, chi := NULL],
-    individual_file_cols3[, chi := NULL],
-    individual_file_cols4[, chi := NULL],
-    individual_file_cols5[, chi := NULL],
-    individual_file_cols6[, chi := NULL]
-  )
-
-  # convert back to tibble
-  return(dplyr::as_tibble(individual_file))
-}
diff --git a/R/create_individual_file.R b/R/create_individual_file.R
@@ -76,12 +76,12 @@ create_individual_file <- function(
 
   if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
     individual_file <- individual_file %>%
-      aggregate_by_chi_no_sc()
+      aggregate_by_chi(exclude_sc_var = TRUE)
   } else {
     individual_file <- individual_file %>%
       aggregate_ch_episodes() %>%
       clean_up_ch(year) %>%
-      aggregate_by_chi() %>%
+      aggregate_by_chi(exclude_sc_var = FALSE) %>%
       join_sc_client(year)
   }
 

diff --git a/man/aggregate_by_chi.Rd b/man/aggregate_by_chi.Rd
diff --git a/man/aggregate_by_chi_no_sc.Rd b/man/aggregate_by_chi_no_sc.Rd