From ff51536611038173483b8c72162f3d4b8fd180bb Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 12 Sep 2023 15:56:26 +0100
Subject: [PATCH 1/5] test commit push

---
 R/create_individual_file.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index df67f8c0f..b59c497f3 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -74,6 +74,8 @@ create_individual_file <- function(
     add_cij_columns() %>%
     add_all_columns()
 
+  adfasdfasdfasdfasdf
+
   if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
     individual_file <- individual_file %>%
       aggregate_by_chi_no_sc()

From 1af102e0b9b008bc177c001b17189a19c52cb5c9 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 12 Sep 2023 15:56:26 +0100
Subject: [PATCH 2/5] Revert "test commit push"

This reverts commit ff51536611038173483b8c72162f3d4b8fd180bb.
---
 R/create_individual_file.R | 2 --
 1 file changed, 2 deletions(-)

diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index b59c497f3..df67f8c0f 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -74,8 +74,6 @@ create_individual_file <- function(
     add_cij_columns() %>%
     add_all_columns()
 
-  adfasdfasdfasdfasdf
-
   if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
     individual_file <- individual_file %>%
       aggregate_by_chi_no_sc()

From d38fe2995a4e6aefecdbe338d80c9d35164d2919 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 12 Sep 2023 19:04:35 +0100
Subject: [PATCH 3/5] combine aggregate_by_chi with sc and without sc

---
 R/aggregate_by_chi.R       | 275 +++++++++----------------------------
 R/create_individual_file.R |   4 +-
 2 files changed, 69 insertions(+), 210 deletions(-)

diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R
index d2f8ca799..676d59c41 100644
--- a/R/aggregate_by_chi.R
+++ b/R/aggregate_by_chi.R
@@ -7,7 +7,7 @@
 #' @importFrom data.table .SD
 #'
 #' @inheritParams create_individual_file
-aggregate_by_chi <- function(episode_file) {
+aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) {
   cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")
 
   # Convert to data.table
@@ -28,17 +28,23 @@ aggregate_by_chi <- function(episode_file) {
     )
   )
 
-  data.table::setnames(
-    episode_file,
-    c(
-      "ch_chi_cis", "cij_marker", "ooh_case_id"
-      # ,"hh_in_fy"
-    ),
-    c(
-      "ch_cis_episodes", "cij_total", "ooh_cases"
-      # ,"hl1_in_fy"
+  if (exclude_sc_var) {
+    data.table::setnames(episode_file,
+                         c("cij_marker",
+                           "ooh_case_id"),
+                         c("cij_total",
+                           "ooh_cases"))
+  } else{
+    data.table::setnames(
+      episode_file,
+      c("ch_chi_cis",
+        "cij_marker",
+        "ooh_case_id"),
+      c("ch_cis_episodes",
+        "cij_total",
+        "ooh_cases")
     )
-  )
+  }
 
   # column specification, grouped by chi
   # columns to select last
@@ -48,6 +54,9 @@ aggregate_by_chi <- function(episode_file) {
     "gpprac",
     vars_start_with(episode_file, "sc_")
   )
+  if (exclude_sc_var) {
+    cols2 <- cols2[!(cols2 %in% vars_start_with(episode_file, "sc_"))]
+  }
   # columns to count unique rows
   cols3 <- c(
     "ch_cis_episodes",
@@ -59,6 +68,9 @@ aggregate_by_chi <- function(episode_file) {
     "ooh_cases",
     "preventable_admissions"
   )
+  if (exclude_sc_var) {
+    cols3 <- cols3[!(cols3 %in% "ch_cis_episodes")]
+  }
   # columns to sum up
   cols4 <- c(
     vars_end_with(
@@ -84,61 +96,70 @@ aggregate_by_chi <- function(episode_file) {
         "pcc"
       )
     ),
-    vars_start_with(
-      episode_file,
-      "sds_option"
-    ),
+    vars_start_with(episode_file,
+                    "sds_option"),
     "health_net_cost_inc_dnas"
   )
   cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")]
+  if (exclude_sc_var) {
+    cols4 <-
+      cols4[!(cols4 %in% c(
+        vars_end_with(
+        episode_file,
+        c("alarms",
+          "telecare")),
+        vars_start_with(episode_file,
+                        "sds_option")
+      ))]
+  }
   # columns to select maximum
   cols5 <- c("nsu", vars_contain(episode_file, "hl1_in_fy"))
   data.table::setnafill(episode_file, fill = 0L, cols = cols5)
   # compute
   individual_file_cols1 <- episode_file[,
-    .(gender = mean(gender)),
-    by = "chi"
+                                        .(gender = mean(gender)),
+                                        by = "chi"
   ]
   individual_file_cols2 <- episode_file[,
-    .SD[.N],
-    .SDcols = cols2,
-    by = "chi"
+                                        .SD[.N],
+                                        .SDcols = cols2,
+                                        by = "chi"
   ]
   individual_file_cols3 <- episode_file[,
-    lapply(.SD, function(x) {
-      data.table::uniqueN(x, na.rm = TRUE)
-    }),
-    .SDcols = cols3,
-    by = "chi"
+                                        lapply(.SD, function(x) {
+                                          data.table::uniqueN(x, na.rm = TRUE)
+                                        }),
+                                        .SDcols = cols3,
+                                        by = "chi"
   ]
   individual_file_cols4 <- episode_file[,
-    lapply(.SD, function(x) {
-      sum(x, na.rm = TRUE)
-    }),
-    .SDcols = cols4,
-    by = "chi"
+                                        lapply(.SD, function(x) {
+                                          sum(x, na.rm = TRUE)
+                                        }),
+                                        .SDcols = cols4,
+                                        by = "chi"
   ]
   individual_file_cols5 <- episode_file[,
-    lapply(.SD, function(x) max(x, na.rm = TRUE)),
-    .SDcols = cols5,
-    by = "chi"
+                                        lapply(.SD, function(x) max(x, na.rm = TRUE)),
+                                        .SDcols = cols5,
+                                        by = "chi"
   ]
   individual_file_cols6 <- episode_file[,
-    .(
-      preventable_beddays = ifelse(
-        any(cij_ppa, na.rm = TRUE),
-        as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))),
-        NA_integer_
-      )
-    ),
-    # cij_marker has been renamed as cij_total
-    by = c("chi", "cij_total")
+                                        .(
+                                          preventable_beddays = ifelse(
+                                            any(cij_ppa, na.rm = TRUE),
+                                            as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))),
+                                            NA_integer_
+                                          )
+                                        ),
+                                        # cij_marker has been renamed as cij_total
+                                        by = c("chi", "cij_total")
   ]
   individual_file_cols6 <- individual_file_cols6[,
-    .(
-      preventable_beddays = sum(preventable_beddays, na.rm = TRUE)
-    ),
-    by = "chi"
+                                                 .(
+                                                   preventable_beddays = sum(preventable_beddays, na.rm = TRUE)
+                                                 ),
+                                                 by = "chi"
   ]
 
   individual_file <- dplyr::bind_cols(
@@ -222,165 +243,3 @@ aggregate_ch_episodes <- function(episode_file) {
 
   return(episode_file)
 }
-
-
-#' Aggregate by CHI with no social care variables
-#'
-#' @description Aggregate episode file by CHI to convert into
-#' individual file. Exclude social care variables for the creation of
-#' the newest file with Q1 data.
-#'
-#' @importFrom data.table .N
-#' @importFrom data.table .SD
-#'
-#' @inheritParams create_individual_file
-aggregate_by_chi_no_sc <- function(episode_file) {
-  cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")
-
-  # Convert to data.table
-  data.table::setDT(episode_file)
-
-  # Ensure all variable names are lowercase
-  data.table::setnames(episode_file, stringr::str_to_lower)
-
-  # Sort the data
-  data.table::setkeyv(
-    episode_file,
-    c(
-      "chi",
-      "record_keydate1",
-      "keytime1",
-      "record_keydate2",
-      "keytime2"
-    )
-  )
-
-  data.table::setnames(
-    episode_file,
-    c(
-      # "ch_chi_cis",
-      "cij_marker",
-      "ooh_case_id",
-      "hh_in_fy"
-    ),
-    c(
-      # "ch_cis_episodes",
-      "cij_total",
-      "ooh_cases",
-      "hl1_in_fy"
-    )
-  )
-
-  # column specification, grouped by chi
-  # columns to select last
-  cols2 <- c(
-    "postcode",
-    "dob",
-    "gpprac" # ,
-    # vars_start_with(episode_file, "sc_")
-  )
-  # columns to count unique rows
-  cols3 <- c(
-    # "ch_cis_episodes",
-    "cij_total",
-    "cij_el",
-    "cij_non_el",
-    "cij_mat",
-    "cij_delay",
-    "ooh_cases",
-    "preventable_admissions"
-  )
-  # columns to sum up
-  cols4 <- c(
-    vars_end_with(
-      episode_file,
-      c(
-        "episodes",
-        "beddays",
-        "cost",
-        "attendances",
-        "attend",
-        "contacts",
-        "hours",
-        # "alarms",
-        # "telecare",
-        "paid_items",
-        "advice",
-        "homev",
-        "time",
-        "assessment",
-        "other",
-        "dn",
-        "nhs24",
-        "pcc"
-        # )
-      ),
-      # vars_start_with(
-      # episode_file,
-      # "sds_option"
-    ),
-    "health_net_cost_inc_dnas"
-  )
-  # cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")]
-  # columns to select maximum
-  cols5 <- c("nsu", vars_contain(episode_file, "hl1_in_fy"))
-  data.table::setnafill(episode_file, fill = 0L, cols = cols5)
-  # compute
-  individual_file_cols1 <- episode_file[,
-    .(gender = mean(gender)),
-    by = "chi"
-  ]
-  individual_file_cols2 <- episode_file[,
-    .SD[.N],
-    .SDcols = cols2,
-    by = "chi"
-  ]
-  individual_file_cols3 <- episode_file[,
-    lapply(.SD, function(x) {
-      data.table::uniqueN(x, na.rm = TRUE)
-    }),
-    .SDcols = cols3,
-    by = "chi"
-  ]
-  individual_file_cols4 <- episode_file[,
-    lapply(.SD, function(x) {
-      sum(x, na.rm = TRUE)
-    }),
-    .SDcols = cols4,
-    by = "chi"
-  ]
-  individual_file_cols5 <- episode_file[,
-    lapply(.SD, function(x) max(x, na.rm = TRUE)),
-    .SDcols = cols5,
-    by = "chi"
-  ]
-  individual_file_cols6 <- episode_file[,
-    .(
-      preventable_beddays = ifelse(
-        any(cij_ppa, na.rm = TRUE),
-        as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))),
-        NA_integer_
-      )
-    ),
-    # cij_marker has been renamed as cij_total
-    by = c("chi", "cij_total")
-  ]
-  individual_file_cols6 <- individual_file_cols6[,
-    .(
-      preventable_beddays = sum(preventable_beddays, na.rm = TRUE)
-    ),
-    by = "chi"
-  ]
-
-  individual_file <- dplyr::bind_cols(
-    individual_file_cols1,
-    individual_file_cols2[, chi := NULL],
-    individual_file_cols3[, chi := NULL],
-    individual_file_cols4[, chi := NULL],
-    individual_file_cols5[, chi := NULL],
-    individual_file_cols6[, chi := NULL]
-  )
-
-  # convert back to tibble
-  return(dplyr::as_tibble(individual_file))
-}
diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index df67f8c0f..4dc389d6a 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -76,12 +76,12 @@ create_individual_file <- function(
 
   if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
     individual_file <- individual_file %>%
-      aggregate_by_chi_no_sc()
+      aggregate_by_chi(exclude_sc_var = TRUE)
   } else {
     individual_file <- individual_file %>%
       aggregate_ch_episodes() %>%
       clean_up_ch(year) %>%
-      aggregate_by_chi() %>%
+      aggregate_by_chi(exclude_sc_var = FALSE) %>%
       join_sc_client(year)
   }
 

From 7bba9d04c1b7846cb64eea3849f281cecfd34654 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Tue, 12 Sep 2023 18:08:02 +0000
Subject: [PATCH 4/5] Style code

---
 R/aggregate_by_chi.R | 113 +++++++++++++++++++++++++------------------
 1 file changed, 65 insertions(+), 48 deletions(-)

diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R
index 676d59c41..8d9dff96d 100644
--- a/R/aggregate_by_chi.R
+++ b/R/aggregate_by_chi.R
@@ -29,20 +29,30 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) {
   )
 
   if (exclude_sc_var) {
-    data.table::setnames(episode_file,
-                         c("cij_marker",
-                           "ooh_case_id"),
-                         c("cij_total",
-                           "ooh_cases"))
-  } else{
     data.table::setnames(
       episode_file,
-      c("ch_chi_cis",
+      c(
+        "cij_marker",
+        "ooh_case_id"
+      ),
+      c(
+        "cij_total",
+        "ooh_cases"
+      )
+    )
+  } else {
+    data.table::setnames(
+      episode_file,
+      c(
+        "ch_chi_cis",
         "cij_marker",
-        "ooh_case_id"),
-      c("ch_cis_episodes",
+        "ooh_case_id"
+      ),
+      c(
+        "ch_cis_episodes",
         "cij_total",
-        "ooh_cases")
+        "ooh_cases"
+      )
     )
   }
 
@@ -96,8 +106,10 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) {
         "pcc"
       )
     ),
-    vars_start_with(episode_file,
-                    "sds_option"),
+    vars_start_with(
+      episode_file,
+      "sds_option"
+    ),
     "health_net_cost_inc_dnas"
   )
   cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")]
@@ -105,11 +117,16 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) {
     cols4 <-
       cols4[!(cols4 %in% c(
         vars_end_with(
-        episode_file,
-        c("alarms",
-          "telecare")),
-        vars_start_with(episode_file,
-                        "sds_option")
+          episode_file,
+          c(
+            "alarms",
+            "telecare"
+          )
+        ),
+        vars_start_with(
+          episode_file,
+          "sds_option"
+        )
       ))]
   }
   # columns to select maximum
@@ -117,49 +134,49 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) {
   data.table::setnafill(episode_file, fill = 0L, cols = cols5)
   # compute
   individual_file_cols1 <- episode_file[,
-                                        .(gender = mean(gender)),
-                                        by = "chi"
+    .(gender = mean(gender)),
+    by = "chi"
   ]
   individual_file_cols2 <- episode_file[,
-                                        .SD[.N],
-                                        .SDcols = cols2,
-                                        by = "chi"
+    .SD[.N],
+    .SDcols = cols2,
+    by = "chi"
   ]
   individual_file_cols3 <- episode_file[,
-                                        lapply(.SD, function(x) {
-                                          data.table::uniqueN(x, na.rm = TRUE)
-                                        }),
-                                        .SDcols = cols3,
-                                        by = "chi"
+    lapply(.SD, function(x) {
+      data.table::uniqueN(x, na.rm = TRUE)
+    }),
+    .SDcols = cols3,
+    by = "chi"
   ]
   individual_file_cols4 <- episode_file[,
-                                        lapply(.SD, function(x) {
-                                          sum(x, na.rm = TRUE)
-                                        }),
-                                        .SDcols = cols4,
-                                        by = "chi"
+    lapply(.SD, function(x) {
+      sum(x, na.rm = TRUE)
+    }),
+    .SDcols = cols4,
+    by = "chi"
   ]
   individual_file_cols5 <- episode_file[,
-                                        lapply(.SD, function(x) max(x, na.rm = TRUE)),
-                                        .SDcols = cols5,
-                                        by = "chi"
+    lapply(.SD, function(x) max(x, na.rm = TRUE)),
+    .SDcols = cols5,
+    by = "chi"
   ]
   individual_file_cols6 <- episode_file[,
-                                        .(
-                                          preventable_beddays = ifelse(
-                                            any(cij_ppa, na.rm = TRUE),
-                                            as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))),
-                                            NA_integer_
-                                          )
-                                        ),
-                                        # cij_marker has been renamed as cij_total
-                                        by = c("chi", "cij_total")
+    .(
+      preventable_beddays = ifelse(
+        any(cij_ppa, na.rm = TRUE),
+        as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))),
+        NA_integer_
+      )
+    ),
+    # cij_marker has been renamed as cij_total
+    by = c("chi", "cij_total")
   ]
   individual_file_cols6 <- individual_file_cols6[,
-                                                 .(
-                                                   preventable_beddays = sum(preventable_beddays, na.rm = TRUE)
-                                                 ),
-                                                 by = "chi"
+    .(
+      preventable_beddays = sum(preventable_beddays, na.rm = TRUE)
+    ),
+    by = "chi"
   ]
 
   individual_file <- dplyr::bind_cols(

From 55300d52f6d826e2c7da4f0d422667ae88fe82b1 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Wed, 13 Sep 2023 13:15:10 +0000
Subject: [PATCH 5/5] Update documentation

---
 man/aggregate_by_chi.Rd       |  2 +-
 man/aggregate_by_chi_no_sc.Rd | 16 ----------------
 2 files changed, 1 insertion(+), 17 deletions(-)
 delete mode 100644 man/aggregate_by_chi_no_sc.Rd

diff --git a/man/aggregate_by_chi.Rd b/man/aggregate_by_chi.Rd
index 013123902..5d5983ce2 100644
--- a/man/aggregate_by_chi.Rd
+++ b/man/aggregate_by_chi.Rd
@@ -4,7 +4,7 @@
 \alias{aggregate_by_chi}
 \title{Aggregate by CHI}
 \usage{
-aggregate_by_chi(episode_file)
+aggregate_by_chi(episode_file, exclude_sc_var = FALSE)
 }
 \arguments{
 \item{episode_file}{Tibble containing episodic data}
diff --git a/man/aggregate_by_chi_no_sc.Rd b/man/aggregate_by_chi_no_sc.Rd
deleted file mode 100644
index 02a20d709..000000000
--- a/man/aggregate_by_chi_no_sc.Rd
+++ /dev/null
@@ -1,16 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/aggregate_by_chi.R
-\name{aggregate_by_chi_no_sc}
-\alias{aggregate_by_chi_no_sc}
-\title{Aggregate by CHI with no social care variables}
-\usage{
-aggregate_by_chi_no_sc(episode_file)
-}
-\arguments{
-\item{episode_file}{Tibble containing episodic data}
-}
-\description{
-Aggregate episode file by CHI to convert into
-individual file. Exclude social care variables for the creation of
-the newest file with Q1 data.
-}