Public-Health-Scotland · lizihao-anu · Sep 12, 2023 · Sep 8, 2023 · Sep 8, 2023 · Sep 8, 2023
diff --git a/R/add_hri_variables.R b/R/add_hri_variables.R
@@ -71,7 +71,7 @@ add_hri_variables <- function(
   hri_lookup <- data %>%
     dplyr::select(
       "year",
-      "chi",
+      chi_variable,
       "postcode",
       "gpprac",
       "lca",

diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R
@@ -222,3 +222,165 @@ aggregate_ch_episodes <- function(episode_file) {
 
   return(episode_file)
 }
+
+
+#' Aggregate by CHI with no social care variables
+#'
+#' @description Aggregate episode file by CHI to convert into
+#' individual file. Exclude social care variables for the creation of
+#' the newest file with Q1 data.
+#'
+#' @importFrom data.table .N
+#' @importFrom data.table .SD
+#'
+#' @inheritParams create_individual_file
+aggregate_by_chi_no_sc <- function(episode_file) {
+  cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")
+
+  # Convert to data.table
+  data.table::setDT(episode_file)
+
+  # Ensure all variable names are lowercase
+  data.table::setnames(episode_file, stringr::str_to_lower)
+
+  # Sort the data
+  data.table::setkeyv(
+    episode_file,
+    c(
+      "chi",
+      "record_keydate1",
+      "keytime1",
+      "record_keydate2",
+      "keytime2"
+    )
+  )
+
+  data.table::setnames(
+    episode_file,
+    c(
+      # "ch_chi_cis",
+      "cij_marker",
+      "ooh_case_id",
+      "hh_in_fy"
+    ),
+    c(
+      # "ch_cis_episodes",
+      "cij_total",
+      "ooh_cases",
+      "hl1_in_fy"
+    )
+  )
+
+  # column specification, grouped by chi
+  # columns to select last
+  cols2 <- c(
+    "postcode",
+    "dob",
+    "gpprac" # ,
+    # vars_start_with(episode_file, "sc_")
+  )
+  # columns to count unique rows
+  cols3 <- c(
+    # "ch_cis_episodes",
+    "cij_total",
+    "cij_el",
+    "cij_non_el",
+    "cij_mat",
+    "cij_delay",
+    "ooh_cases",
+    "preventable_admissions"
+  )
+  # columns to sum up
+  cols4 <- c(
+    vars_end_with(
+      episode_file,
+      c(
+        "episodes",
+        "beddays",
+        "cost",
+        "attendances",
+        "attend",
+        "contacts",
+        "hours",
+        # "alarms",
+        # "telecare",
+        "paid_items",
+        "advice",
+        "homev",
+        "time",
+        "assessment",
+        "other",
+        "dn",
+        "nhs24",
+        "pcc"
+        # )
+      ),
+      # vars_start_with(
+      # episode_file,
+      # "sds_option"
+    ),
+    "health_net_cost_inc_dnas"
+  )
+  # cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")]
+  # columns to select maximum
+  cols5 <- c("nsu", vars_contain(episode_file, "hl1_in_fy"))
+  data.table::setnafill(episode_file, fill = 0L, cols = cols5)
+  # compute
+  individual_file_cols1 <- episode_file[,
+    .(gender = mean(gender)),
+    by = "chi"
+  ]
+  individual_file_cols2 <- episode_file[,
+    .SD[.N],
+    .SDcols = cols2,
+    by = "chi"
+  ]
+  individual_file_cols3 <- episode_file[,
+    lapply(.SD, function(x) {
+      data.table::uniqueN(x, na.rm = TRUE)
+    }),
+    .SDcols = cols3,
+    by = "chi"
+  ]
+  individual_file_cols4 <- episode_file[,
+    lapply(.SD, function(x) {
+      sum(x, na.rm = TRUE)
+    }),
+    .SDcols = cols4,
+    by = "chi"
+  ]
+  individual_file_cols5 <- episode_file[,
+    lapply(.SD, function(x) max(x, na.rm = TRUE)),
+    .SDcols = cols5,
+    by = "chi"
+  ]
+  individual_file_cols6 <- episode_file[,
+    .(
+      preventable_beddays = ifelse(
+        any(cij_ppa, na.rm = TRUE),
+        as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))),
+        NA_integer_
+      )
+    ),
+    # cij_marker has been renamed as cij_total
+    by = c("chi", "cij_total")
+  ]
+  individual_file_cols6 <- individual_file_cols6[,
+    .(
+      preventable_beddays = sum(preventable_beddays, na.rm = TRUE)
+    ),
+    by = "chi"
+  ]
+
+  individual_file <- dplyr::bind_cols(
+    individual_file_cols1,
+    individual_file_cols2[, chi := NULL],
+    individual_file_cols3[, chi := NULL],
+    individual_file_cols4[, chi := NULL],
+    individual_file_cols5[, chi := NULL],
+    individual_file_cols6[, chi := NULL]
+  )
+
+  # convert back to tibble
+  return(dplyr::as_tibble(individual_file))
+}
diff --git a/R/create_individual_file.R b/R/create_individual_file.R
@@ -72,23 +72,59 @@ create_individual_file <- function(
     ))) %>%
     remove_blank_chi() %>%
     add_cij_columns() %>%
-    add_all_columns() %>%
-    aggregate_ch_episodes() %>%
-    clean_up_ch(year) %>%
+    add_all_columns()
+
+  if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
+    individual_file <- individual_file %>%
+      aggregate_by_chi_no_sc()
+  } else {
+    individual_file <- individual_file %>%
+      aggregate_ch_episodes() %>%
+      clean_up_ch(year) %>%
+      aggregate_by_chi() %>%
+      join_sc_client(year)
+  }
+
+  individual_file <- individual_file %>%
     recode_gender() %>%
-    aggregate_by_chi() %>%
     clean_individual_file(year) %>%
     join_cohort_lookups(year) %>%
     add_homelessness_flag(year, lookup = homelessness_lookup) %>%
     match_on_ltcs(year) %>%
     join_deaths_data(year) %>%
     join_sparra_hhg(year) %>%
     join_slf_lookup_vars() %>%
-    join_sc_client(year) %>%
-    dplyr::mutate(year = year, .before = dplyr::everything()) %>%
+    dplyr::mutate(year = year) %>%
     add_hri_variables(chi_variable = "chi")
 
-
+  if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
+    individual_file <- individual_file %>%
+      dplyr::mutate(
+        ch_cis_episodes = NA,
+        ch_beddays = NA,
+        ch_cost = NA,
+        hc_episodes = NA,
+        hc_personal_episodes = NA,
+        hc_non_personal_episodes = NA,
+        hc_reablement_episodes = NA,
+        hc_total_cost = NA,
+        hc_total_hours = NA,
+        hc_personal_hours = NA,
+        hc_non_personal_hours = NA,
+        hc_reablement_hours = NA,
+        at_alarms = NA,
+        at_telecare = NA,
+        sds_option_1 = NA,
+        sds_option_2 = NA,
+        sds_option_3 = NA,
+        sds_option_4 = NA,
+        sc_living_alone = NA,
+        sc_support_from_unpaid_carer = NA,
+        sc_social_worker = NA,
+        sc_meals = NA,
+        sc_day_care = NA
+      )
+  }
 
   if (anon_chi_out) {
     individual_file <- individual_file %>%
@@ -173,7 +209,7 @@ add_cij_columns <- function(episode_file) {
 add_all_columns <- function(episode_file) {
   cli::cli_alert_info("Add all columns function started at {Sys.time()}")
 
-  episode_file %>%
+  episode_file <- episode_file %>%
     add_acute_columns("Acute", (.data$smrtype == "Acute-DC" | .data$smrtype == "Acute-IP") & .data$cij_pattype != "Maternity") %>%
     add_mat_columns("Mat", .data$recid == "02B" | .data$cij_pattype == "Maternity") %>%
     add_mh_columns("MH", .data$recid == "04B" & .data$cij_pattype != "Maternity") %>%
@@ -187,11 +223,17 @@ add_all_columns <- function(episode_file) {
     add_dd_columns("DD", .data$recid == "DD") %>%
     add_nsu_columns("NSU", .data$recid == "NSU") %>%
     add_nrs_columns("NRS", .data$recid == "NRS") %>%
-    add_hl1_columns("HL1", .data$recid == "HL1") %>%
-    add_ch_columns("CH", .data$recid == "CH") %>%
-    add_hc_columns("HC", .data$recid == "HC") %>%
-    add_at_columns("AT", .data$recid == "AT") %>%
-    add_sds_columns("SDS", .data$recid == "SDS") %>%
+    add_hl1_columns("HL1", .data$recid == "HL1")
+
+  if (check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
+    episode_file <- episode_file %>%
+      add_ch_columns("CH", .data$recid == "CH") %>%
+      add_hc_columns("HC", .data$recid == "HC") %>%
+      add_at_columns("AT", .data$recid == "AT") %>%
+      add_sds_columns("SDS", .data$recid == "SDS")
+  }
+
+  episode_file <- episode_file %>%
     dplyr::mutate(
       health_net_cost = rowSums(
         dplyr::pick(

diff --git a/R/run_episode_file.R b/R/run_episode_file.R
@@ -115,6 +115,38 @@ run_episode_file <- function(
     join_deaths_data(year) %>%
     load_ep_file_vars(year)
 
+  if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
+    episode_file <- episode_file %>%
+      dplyr::mutate(
+        sc_send_lca = NA,
+        sc_living_alone = NA,
+        sc_support_from_unpaid_carer = NA,
+        sc_social_worker = NA,
+        sc_type_of_housing = NA,
+        sc_meals = NA,
+        sc_day_care = NA,
+        sc_latest_submission = NA,
+        ch_chi_cis = NA,
+        sc_id_cis = NA,
+        ch_name = NA,
+        ch_adm_reason = NA,
+        ch_provider = NA,
+        ch_nursing = NA,
+        hc_hours_annual = NA,
+        hc_hours_q1 = NA,
+        hc_hours_q2 = NA,
+        hc_hours_q3 = NA,
+        hc_hours_q4 = NA,
+        hc_cost_q1 = NA,
+        hc_cost_q2 = NA,
+        hc_cost_q3 = NA,
+        hc_cost_q4 = NA,
+        hc_provider = NA,
+        hc_reablement = NA,
+        sds_option_4 = NA,
+      )
+  }
+
   if (anon_chi_out) {
     episode_file <- slfhelper::get_anon_chi(episode_file)
   }

diff --git a/man/aggregate_by_chi_no_sc.Rd b/man/aggregate_by_chi_no_sc.Rd