From 7d05ce9c8f5f678abd147c8325cb9c64b221b03c Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 16 Jan 2024 15:39:38 +0000
Subject: [PATCH 001/186] Remove redundant code

---
 R/process_tests_district_nursing.R | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/R/process_tests_district_nursing.R b/R/process_tests_district_nursing.R
index d3d55a15a..df3c876f0 100644
--- a/R/process_tests_district_nursing.R
+++ b/R/process_tests_district_nursing.R
@@ -13,14 +13,7 @@ process_tests_district_nursing <- function(data, year) {
     return(data)
   }
 
-  old_data <- get_existing_data_for_tests(data) %>%
-    # TODO: remove this bit after SPSS stopped
-    # replace NA by 0 in monthly costs
-    dplyr::mutate(dplyr::across(
-      dplyr::ends_with("_cost"),
-      ~ tidyr::replace_na(.x, 0.0)
-    ))
-
+  old_data <- get_existing_data_for_tests(data)
   data <- rename_hscp(data)
 
   comparison <- produce_test_comparison(

From d1718f0a2a630b854cd9fd2add7912cd984e5514 Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Tue, 16 Jan 2024 15:41:28 +0000
Subject: [PATCH 002/186] Update documentation

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5123289dd..4bb0c6f18 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -73,4 +73,4 @@ Encoding: UTF-8
 Language: en-GB
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.0

From 6aec7b1ce6ca0be3d1902240fa6ae371ef82bb3b Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Tue, 16 Jan 2024 15:44:11 +0000
Subject: [PATCH 003/186] Style code

---
 R/create_individual_file.R                     | 3 ++-
 R/get_fy_quarter_dates.R                       | 8 ++++----
 Run_SLF_Files_manually/run_episode_file_1718.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_1819.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_1920.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2021.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2122.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2223.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2324.R | 3 ++-
 9 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index d9316b41b..4ca2f96d7 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -483,7 +483,8 @@ add_ch_columns <- function(episode_file, prefix, condition) {
       ch_ep_end = dplyr::if_else(
         eval(condition),
         .data$record_keydate2,
-        lubridate::NA_Date_  ),
+        lubridate::NA_Date_
+      ),
       # If end date is missing use the first day of next FY quarter
       ch_ep_end = dplyr::if_else(
         eval(condition) & is.na(.data$ch_ep_end),
diff --git a/R/get_fy_quarter_dates.R b/R/get_fy_quarter_dates.R
index cd4c3492c..a772099b8 100644
--- a/R/get_fy_quarter_dates.R
+++ b/R/get_fy_quarter_dates.R
@@ -15,7 +15,7 @@
 start_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
@@ -47,7 +47,7 @@ start_fy_quarter <- function(quarter) {
 end_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
@@ -80,7 +80,7 @@ end_fy_quarter <- function(quarter) {
 start_next_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
@@ -112,7 +112,7 @@ start_next_fy_quarter <- function(quarter) {
 end_next_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R
index 9be2eb9c6..ab75b94d7 100644
--- a/Run_SLF_Files_manually/run_episode_file_1718.R
+++ b/Run_SLF_Files_manually/run_episode_file_1718.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "1718"
 
 processed_data_list <- targets::tar_read("processed_data_list_1718",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R
index 7dec9e5c1..cd5a7435f 100644
--- a/Run_SLF_Files_manually/run_episode_file_1819.R
+++ b/Run_SLF_Files_manually/run_episode_file_1819.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "1819"
 
 processed_data_list <- targets::tar_read("processed_data_list_1819",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R
index 066bd27b7..a9dc591b1 100644
--- a/Run_SLF_Files_manually/run_episode_file_1920.R
+++ b/Run_SLF_Files_manually/run_episode_file_1920.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "1920"
 
 processed_data_list <- targets::tar_read("processed_data_list_1920",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R
index 8354f49ae..37708ee8b 100644
--- a/Run_SLF_Files_manually/run_episode_file_2021.R
+++ b/Run_SLF_Files_manually/run_episode_file_2021.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2021"
 
 processed_data_list <- targets::tar_read("processed_data_list_2021",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R
index 4057770d1..47400e2d1 100644
--- a/Run_SLF_Files_manually/run_episode_file_2122.R
+++ b/Run_SLF_Files_manually/run_episode_file_2122.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2122"
 
 processed_data_list <- targets::tar_read("processed_data_list_2122",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R
index 5df7b5db6..e64a57f32 100644
--- a/Run_SLF_Files_manually/run_episode_file_2223.R
+++ b/Run_SLF_Files_manually/run_episode_file_2223.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2223"
 
 processed_data_list <- targets::tar_read("processed_data_list_2223",
-                      store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R
index af9a3efe5..4a7f0ad29 100644
--- a/Run_SLF_Files_manually/run_episode_file_2324.R
+++ b/Run_SLF_Files_manually/run_episode_file_2324.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2324"
 
 processed_data_list <- targets::tar_read("processed_data_list_2324",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%

From 65e8caa56008ec4eccac7f828e329064de9219e0 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 22 Jan 2024 10:14:00 +0000
Subject: [PATCH 004/186] Reorder when we match on client variables This was
 causing NSUs to show a social care id. This now resolves this.

---
 R/create_episode_file.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index 3de9223dd..493d71bd3 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -103,6 +103,8 @@ create_episode_file <- function(
         "mar_beddays"
       )
     ) %>%
+    # match on sc client variables
+    join_sc_client(year, sc_client = sc_client, file_type = "episode") %>%
     # Check chi is valid using phsmethods function
     # If the CHI is invalid for whatever reason, set the CHI to NA
     dplyr::mutate(
@@ -135,7 +137,6 @@ create_episode_file <- function(
       year,
       slf_deaths_lookup
     ) %>%
-    join_sc_client(year, sc_client = sc_client, file_type = "episode") %>%
     load_ep_file_vars(year)
 
   if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {

From 35bcddcbfc18a3d034dc7ae1ba1cd2ecdfdec437 Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Mon, 22 Jan 2024 10:16:22 +0000
Subject: [PATCH 005/186] Update documentation

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5123289dd..4bb0c6f18 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -73,4 +73,4 @@ Encoding: UTF-8
 Language: en-GB
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.0

From 800083a72d212e82d08898effc8f602a290922e2 Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Mon, 22 Jan 2024 10:23:44 +0000
Subject: [PATCH 006/186] Style code

---
 R/create_individual_file.R                     | 3 ++-
 R/get_fy_quarter_dates.R                       | 8 ++++----
 Run_SLF_Files_manually/run_episode_file_1718.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_1819.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_1920.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2021.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2122.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2223.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2324.R | 3 ++-
 9 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index d9316b41b..4ca2f96d7 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -483,7 +483,8 @@ add_ch_columns <- function(episode_file, prefix, condition) {
       ch_ep_end = dplyr::if_else(
         eval(condition),
         .data$record_keydate2,
-        lubridate::NA_Date_  ),
+        lubridate::NA_Date_
+      ),
       # If end date is missing use the first day of next FY quarter
       ch_ep_end = dplyr::if_else(
         eval(condition) & is.na(.data$ch_ep_end),
diff --git a/R/get_fy_quarter_dates.R b/R/get_fy_quarter_dates.R
index cd4c3492c..a772099b8 100644
--- a/R/get_fy_quarter_dates.R
+++ b/R/get_fy_quarter_dates.R
@@ -15,7 +15,7 @@
 start_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
@@ -47,7 +47,7 @@ start_fy_quarter <- function(quarter) {
 end_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
@@ -80,7 +80,7 @@ end_fy_quarter <- function(quarter) {
 start_next_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
@@ -112,7 +112,7 @@ start_next_fy_quarter <- function(quarter) {
 end_next_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R
index 9be2eb9c6..ab75b94d7 100644
--- a/Run_SLF_Files_manually/run_episode_file_1718.R
+++ b/Run_SLF_Files_manually/run_episode_file_1718.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "1718"
 
 processed_data_list <- targets::tar_read("processed_data_list_1718",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R
index 7dec9e5c1..cd5a7435f 100644
--- a/Run_SLF_Files_manually/run_episode_file_1819.R
+++ b/Run_SLF_Files_manually/run_episode_file_1819.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "1819"
 
 processed_data_list <- targets::tar_read("processed_data_list_1819",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R
index 066bd27b7..a9dc591b1 100644
--- a/Run_SLF_Files_manually/run_episode_file_1920.R
+++ b/Run_SLF_Files_manually/run_episode_file_1920.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "1920"
 
 processed_data_list <- targets::tar_read("processed_data_list_1920",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R
index 8354f49ae..37708ee8b 100644
--- a/Run_SLF_Files_manually/run_episode_file_2021.R
+++ b/Run_SLF_Files_manually/run_episode_file_2021.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2021"
 
 processed_data_list <- targets::tar_read("processed_data_list_2021",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R
index 4057770d1..47400e2d1 100644
--- a/Run_SLF_Files_manually/run_episode_file_2122.R
+++ b/Run_SLF_Files_manually/run_episode_file_2122.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2122"
 
 processed_data_list <- targets::tar_read("processed_data_list_2122",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R
index 5df7b5db6..e64a57f32 100644
--- a/Run_SLF_Files_manually/run_episode_file_2223.R
+++ b/Run_SLF_Files_manually/run_episode_file_2223.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2223"
 
 processed_data_list <- targets::tar_read("processed_data_list_2223",
-                      store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R
index af9a3efe5..4a7f0ad29 100644
--- a/Run_SLF_Files_manually/run_episode_file_2324.R
+++ b/Run_SLF_Files_manually/run_episode_file_2324.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2324"
 
 processed_data_list <- targets::tar_read("processed_data_list_2324",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%

From 996db4cf576834dfc4549b07288d5990aef68748 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 22 Jan 2024 12:16:46 +0000
Subject: [PATCH 007/186] Revert "Update logic to use end of Quarter"

This reverts commit 004e831449f56f898bb48596c491c2acc954acc9.
---
 R/create_individual_file.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index d9316b41b..4ca2f96d7 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -483,7 +483,8 @@ add_ch_columns <- function(episode_file, prefix, condition) {
       ch_ep_end = dplyr::if_else(
         eval(condition),
         .data$record_keydate2,
-        lubridate::NA_Date_  ),
+        lubridate::NA_Date_
+      ),
       # If end date is missing use the first day of next FY quarter
       ch_ep_end = dplyr::if_else(
         eval(condition) & is.na(.data$ch_ep_end),

From d10376ddc84871b82b8a8844d527739f5fb6f789 Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Mon, 22 Jan 2024 12:18:30 +0000
Subject: [PATCH 008/186] Style code

---
 R/get_fy_quarter_dates.R                       | 8 ++++----
 Run_SLF_Files_manually/run_episode_file_1718.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_1819.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_1920.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2021.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2122.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2223.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2324.R | 3 ++-
 8 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/R/get_fy_quarter_dates.R b/R/get_fy_quarter_dates.R
index cd4c3492c..a772099b8 100644
--- a/R/get_fy_quarter_dates.R
+++ b/R/get_fy_quarter_dates.R
@@ -15,7 +15,7 @@
 start_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
@@ -47,7 +47,7 @@ start_fy_quarter <- function(quarter) {
 end_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
@@ -80,7 +80,7 @@ end_fy_quarter <- function(quarter) {
 start_next_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
@@ -112,7 +112,7 @@ start_next_fy_quarter <- function(quarter) {
 end_next_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R
index 9be2eb9c6..ab75b94d7 100644
--- a/Run_SLF_Files_manually/run_episode_file_1718.R
+++ b/Run_SLF_Files_manually/run_episode_file_1718.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "1718"
 
 processed_data_list <- targets::tar_read("processed_data_list_1718",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R
index 7dec9e5c1..cd5a7435f 100644
--- a/Run_SLF_Files_manually/run_episode_file_1819.R
+++ b/Run_SLF_Files_manually/run_episode_file_1819.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "1819"
 
 processed_data_list <- targets::tar_read("processed_data_list_1819",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R
index 066bd27b7..a9dc591b1 100644
--- a/Run_SLF_Files_manually/run_episode_file_1920.R
+++ b/Run_SLF_Files_manually/run_episode_file_1920.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "1920"
 
 processed_data_list <- targets::tar_read("processed_data_list_1920",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R
index 8354f49ae..37708ee8b 100644
--- a/Run_SLF_Files_manually/run_episode_file_2021.R
+++ b/Run_SLF_Files_manually/run_episode_file_2021.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2021"
 
 processed_data_list <- targets::tar_read("processed_data_list_2021",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R
index 4057770d1..47400e2d1 100644
--- a/Run_SLF_Files_manually/run_episode_file_2122.R
+++ b/Run_SLF_Files_manually/run_episode_file_2122.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2122"
 
 processed_data_list <- targets::tar_read("processed_data_list_2122",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R
index 5df7b5db6..e64a57f32 100644
--- a/Run_SLF_Files_manually/run_episode_file_2223.R
+++ b/Run_SLF_Files_manually/run_episode_file_2223.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2223"
 
 processed_data_list <- targets::tar_read("processed_data_list_2223",
-                      store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R
index af9a3efe5..4a7f0ad29 100644
--- a/Run_SLF_Files_manually/run_episode_file_2324.R
+++ b/Run_SLF_Files_manually/run_episode_file_2324.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2324"
 
 processed_data_list <- targets::tar_read("processed_data_list_2324",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%

From b8e1dd250095eb92a1273aeb0abd45b4c9f2bdda Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Mon, 22 Jan 2024 12:21:31 +0000
Subject: [PATCH 009/186] Update documentation

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5123289dd..4bb0c6f18 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -73,4 +73,4 @@ Encoding: UTF-8
 Language: en-GB
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.0

From 3591aca7a3e77b39fd8405b2da23cfd39028c783 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 22 Jan 2024 12:23:48 +0000
Subject: [PATCH 010/186] add check comment (TO DO for this PR)

---
 R/calculate_stay.R         | 1 +
 R/create_individual_file.R | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/R/calculate_stay.R b/R/calculate_stay.R
index ae80b33c1..ad4f5f53c 100644
--- a/R/calculate_stay.R
+++ b/R/calculate_stay.R
@@ -51,6 +51,7 @@ calculate_stay <- function(year, start_date, end_date, sc_qtr = NULL) {
       lubridate::period(1L, "days")
     )
 
+    # check logic here for care home methodology
     dummy_end_date <- dplyr::case_when(
       # If end_date is not missing use the end date
       !is.na(end_date) ~ end_date,
diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index 4ca2f96d7..70066b42d 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -485,7 +485,8 @@ add_ch_columns <- function(episode_file, prefix, condition) {
         .data$record_keydate2,
         lubridate::NA_Date_
       ),
-      # If end date is missing use the first day of next FY quarter
+      # check logic here for care home methodology
+      # If end date is missing use the end of the FY quarter
       ch_ep_end = dplyr::if_else(
         eval(condition) & is.na(.data$ch_ep_end),
         start_next_fy_quarter(.data$sc_latest_submission),

From 47769e31f78eaf507eeb594975184f004bc47ccc Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 22 Jan 2024 12:32:52 +0000
Subject: [PATCH 011/186] Remove `check_quarter_format` function

---
 R/get_fy_quarter_dates.R | 25 -------------------------
 1 file changed, 25 deletions(-)

diff --git a/R/get_fy_quarter_dates.R b/R/get_fy_quarter_dates.R
index a772099b8..86e9ed95f 100644
--- a/R/get_fy_quarter_dates.R
+++ b/R/get_fy_quarter_dates.R
@@ -128,28 +128,3 @@ end_next_fy_quarter <- function(quarter) {
 
   return(end_next_fy_quarter)
 }
-
-#' Check quarter format
-#'
-#' @inheritParams start_fy_quarter
-#'
-#' @return `quarter` invisibly if no issues were found
-#'
-#' @family date functions
-# check_quarter_format <- function(quarter) {
-#   stopifnot(typeof(quarter) == "character")
-#
-#   if (any(
-#     stringr::str_detect(quarter, "^\\d{4}Q[1-4]$", negate = TRUE),
-#     na.rm = TRUE
-#   )) {
-#     cli::cli_abort(
-#       c("{.var quarter} must be in the format {.val YYYYQx}
-#                    where {.val x} is the quarter number.",
-#         "v" = "For example {.val 2019Q1}."
-#       )
-#     )
-#   }
-#
-#   return(invisible(quarter))
-# }

From 85c22ad7f1f64321f03a24bf17c0c01e9ec9e179 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 22 Jan 2024 12:35:40 +0000
Subject: [PATCH 012/186] Remove `check_quarter_format`

---
 R/calculate_stay.R       | 3 ---
 R/get_fy_quarter_dates.R | 8 --------
 2 files changed, 11 deletions(-)

diff --git a/R/calculate_stay.R b/R/calculate_stay.R
index ad4f5f53c..d1748a470 100644
--- a/R/calculate_stay.R
+++ b/R/calculate_stay.R
@@ -37,9 +37,6 @@ calculate_stay <- function(year, start_date, end_date, sc_qtr = NULL) {
     if (anyNA(sc_qtr)) {
       cli::cli_abort("Some of the submitted quarters are missing")
     }
-    # else {
-    #   sc_qtr <- check_quarter_format(sc_qtr)
-    # }
 
     # Set Quarters
     qtr_end <- lubridate::add_with_rollback(
diff --git a/R/get_fy_quarter_dates.R b/R/get_fy_quarter_dates.R
index 86e9ed95f..68ac3266e 100644
--- a/R/get_fy_quarter_dates.R
+++ b/R/get_fy_quarter_dates.R
@@ -15,8 +15,6 @@
 start_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  # check_quarter_format(quarter)
-
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
   fy_quarter_date_unique <- lubridate::add_with_rollback(
@@ -47,8 +45,6 @@ start_fy_quarter <- function(quarter) {
 end_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  # check_quarter_format(quarter)
-
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
   fy_quarter_date_unique <- lubridate::add_with_rollback(
@@ -80,8 +76,6 @@ end_fy_quarter <- function(quarter) {
 start_next_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  # check_quarter_format(quarter)
-
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
   fy_quarter_date_unique <- lubridate::add_with_rollback(
@@ -112,8 +106,6 @@ start_next_fy_quarter <- function(quarter) {
 end_next_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  # check_quarter_format(quarter)
-
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
   fy_quarter_date_unique <- lubridate::add_with_rollback(

From e4d91284ecfbc82700c4b36d5d668b6d82ebb15f Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 22 Jan 2024 12:59:08 +0000
Subject: [PATCH 013/186] Add chi parameter to `create_demog_test_flags`

---
 DESCRIPTION                    |  2 +-
 R/create_demog_test_flags.R    | 10 +++++-----
 man/create_demog_test_flags.Rd |  4 +++-
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5123289dd..4bb0c6f18 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -73,4 +73,4 @@ Encoding: UTF-8
 Language: en-GB
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.0
diff --git a/R/create_demog_test_flags.R b/R/create_demog_test_flags.R
index 3023292ce..b909679d9 100644
--- a/R/create_demog_test_flags.R
+++ b/R/create_demog_test_flags.R
@@ -3,19 +3,19 @@
 #' @description Create the demographic flags for testing
 #'
 #' @param data a dataframe containing demographic variables e.g. chi
+#' @param chi Specify chi or anon_chi.
 #'
 #' @return a dataframe with flag (1 or 0) for each demographic variable.
 #' Missing value flag from [is_missing()]
 #'
 #' @family flag functions
-create_demog_test_flags <- function(data) {
+create_demog_test_flags <- function(data, chi = c(chi, anon_chi)) {
   data %>%
-    dplyr::arrange(.data$chi) %>%
+    dplyr::arrange({{ chi }}) %>%
     # create test flags
     dplyr::mutate(
-      valid_chi = phsmethods::chi_check(.data$chi) == "Valid CHI",
-      unique_chi = dplyr::lag(.data$chi) != .data$chi,
-      n_missing_chi = is_missing(.data$chi),
+      unique_chi = dplyr::lag({{ chi }}) != {{ chi }},
+      n_missing_chi = is_missing({{ chi }}),
       n_males = .data$gender == 1L,
       n_females = .data$gender == 2L,
       n_postcode = !is.na(.data$postcode) | !.data$postcode == "",
diff --git a/man/create_demog_test_flags.Rd b/man/create_demog_test_flags.Rd
index 589877738..fbc0fadcc 100644
--- a/man/create_demog_test_flags.Rd
+++ b/man/create_demog_test_flags.Rd
@@ -4,10 +4,12 @@
 \alias{create_demog_test_flags}
 \title{Create demographic test flags}
 \usage{
-create_demog_test_flags(data)
+create_demog_test_flags(data, chi = c(chi, anon_chi))
 }
 \arguments{
 \item{data}{a dataframe containing demographic variables e.g. chi}
+
+\item{chi}{Specify chi or anon_chi.}
 }
 \value{
 a dataframe with flag (1 or 0) for each demographic variable.

From daa9ee7a87ba8e5daa4c42fca7c9256a32f84246 Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Mon, 22 Jan 2024 13:02:10 +0000
Subject: [PATCH 014/186] Style code

---
 R/create_individual_file.R                     | 3 ++-
 R/get_fy_quarter_dates.R                       | 8 ++++----
 Run_SLF_Files_manually/run_episode_file_1718.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_1819.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_1920.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2021.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2122.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2223.R | 3 ++-
 Run_SLF_Files_manually/run_episode_file_2324.R | 3 ++-
 9 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index d9316b41b..4ca2f96d7 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -483,7 +483,8 @@ add_ch_columns <- function(episode_file, prefix, condition) {
       ch_ep_end = dplyr::if_else(
         eval(condition),
         .data$record_keydate2,
-        lubridate::NA_Date_  ),
+        lubridate::NA_Date_
+      ),
       # If end date is missing use the first day of next FY quarter
       ch_ep_end = dplyr::if_else(
         eval(condition) & is.na(.data$ch_ep_end),
diff --git a/R/get_fy_quarter_dates.R b/R/get_fy_quarter_dates.R
index cd4c3492c..a772099b8 100644
--- a/R/get_fy_quarter_dates.R
+++ b/R/get_fy_quarter_dates.R
@@ -15,7 +15,7 @@
 start_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
@@ -47,7 +47,7 @@ start_fy_quarter <- function(quarter) {
 end_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
@@ -80,7 +80,7 @@ end_fy_quarter <- function(quarter) {
 start_next_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
@@ -112,7 +112,7 @@ start_next_fy_quarter <- function(quarter) {
 end_next_fy_quarter <- function(quarter) {
   quarter_unique <- unique(quarter)
 
-  #check_quarter_format(quarter)
+  # check_quarter_format(quarter)
 
   cal_quarter_date_unique <- lubridate::yq(quarter_unique)
 
diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R
index 9be2eb9c6..ab75b94d7 100644
--- a/Run_SLF_Files_manually/run_episode_file_1718.R
+++ b/Run_SLF_Files_manually/run_episode_file_1718.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "1718"
 
 processed_data_list <- targets::tar_read("processed_data_list_1718",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R
index 7dec9e5c1..cd5a7435f 100644
--- a/Run_SLF_Files_manually/run_episode_file_1819.R
+++ b/Run_SLF_Files_manually/run_episode_file_1819.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "1819"
 
 processed_data_list <- targets::tar_read("processed_data_list_1819",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R
index 066bd27b7..a9dc591b1 100644
--- a/Run_SLF_Files_manually/run_episode_file_1920.R
+++ b/Run_SLF_Files_manually/run_episode_file_1920.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "1920"
 
 processed_data_list <- targets::tar_read("processed_data_list_1920",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R
index 8354f49ae..37708ee8b 100644
--- a/Run_SLF_Files_manually/run_episode_file_2021.R
+++ b/Run_SLF_Files_manually/run_episode_file_2021.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2021"
 
 processed_data_list <- targets::tar_read("processed_data_list_2021",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R
index 4057770d1..47400e2d1 100644
--- a/Run_SLF_Files_manually/run_episode_file_2122.R
+++ b/Run_SLF_Files_manually/run_episode_file_2122.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2122"
 
 processed_data_list <- targets::tar_read("processed_data_list_2122",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R
index 5df7b5db6..e64a57f32 100644
--- a/Run_SLF_Files_manually/run_episode_file_2223.R
+++ b/Run_SLF_Files_manually/run_episode_file_2223.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2223"
 
 processed_data_list <- targets::tar_read("processed_data_list_2223",
-                      store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R
index af9a3efe5..4a7f0ad29 100644
--- a/Run_SLF_Files_manually/run_episode_file_2324.R
+++ b/Run_SLF_Files_manually/run_episode_file_2324.R
@@ -4,7 +4,8 @@ library(createslf)
 year <- "2324"
 
 processed_data_list <- targets::tar_read("processed_data_list_2324",
-                                         store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets"))
+  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+)
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%

From 702225fd6d31ca64d30067bc1b42c2c0dab4313f Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 22 Jan 2024 14:40:00 +0000
Subject: [PATCH 015/186] Use CHI parameter for ep/indiv tests

---
 R/process_tests_episode_file.R    | 12 ++----------
 R/process_tests_individual_file.R | 12 ++----------
 2 files changed, 4 insertions(+), 20 deletions(-)

diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R
index eaa946e3e..6b66cd655 100644
--- a/R/process_tests_episode_file.R
+++ b/R/process_tests_episode_file.R
@@ -73,15 +73,7 @@ produce_episode_file_tests <- function(
   test_flags <- data %>%
     dplyr::group_by(.data$recid) %>%
     # use functions to create HB and partnership flags
-    dplyr::mutate(
-      unique_anon_chi = dplyr::lag(.data$anon_chi) != .data$anon_chi,
-      n_missing_anon_chi = is_missing(.data$anon_chi),
-      n_males = .data$gender == 1L,
-      n_females = .data$gender == 2L,
-      n_postcode = !is.na(.data$postcode) | !.data$postcode == "",
-      n_missing_postcode = is_missing(.data$postcode),
-      missing_dob = is.na(.data$dob)
-    ) %>%
+    create_demog_test_flags(chi = anon_chi) %>%
     create_hb_test_flags(.data$hbtreatcode) %>%
     create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>%
     create_hscp_test_flags(.data$hscp2018) %>%
@@ -111,7 +103,7 @@ produce_episode_file_tests <- function(
 
   test_flags <- test_flags %>%
     # keep variables for comparison
-    dplyr::select("unique_anon_chi":dplyr::last_col()) %>%
+    dplyr::select("unique_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum", group_by = "recid")
 
diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R
index bbd13948c..35ad7443a 100644
--- a/R/process_tests_individual_file.R
+++ b/R/process_tests_individual_file.R
@@ -60,19 +60,11 @@ produce_individual_file_tests <- function(data) {
 
   test_flags <- data %>%
     # use functions to create HB and partnership flags
-    dplyr::mutate(
-      unique_anon_chi = dplyr::lag(.data$anon_chi) != .data$anon_chi,
-      n_missing_anon_chi = is_missing(.data$anon_chi),
-      n_males = .data$gender == 1L,
-      n_females = .data$gender == 2L,
-      n_postcode = !is.na(.data$postcode) | !.data$postcode == "",
-      n_missing_postcode = is_missing(.data$postcode),
-      missing_dob = is.na(.data$dob)
-    ) %>%
+    create_demog_test_flags(chi = anon_chi) %>%
     create_hb_test_flags(.data$hbrescode) %>%
     create_hb_cost_test_flags(.data$hbrescode, .data$health_net_cost) %>%
     # keep variables for comparison
-    dplyr::select(c("unique_anon_chi":dplyr::last_col())) %>%
+    dplyr::select(c("unique_chi":dplyr::last_col())) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 

From d0fb3cdcb8b4120244d748544c70f910fe35ec31 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 22 Jan 2024 14:48:35 +0000
Subject: [PATCH 016/186] Use CHI parameter for extract tests (chi)

---
 R/process_tests_alarms_telecare.R  | 4 ++--
 R/process_tests_care_home.R        | 4 ++--
 R/process_tests_cmh.R              | 4 ++--
 R/process_tests_district_nursing.R | 4 ++--
 R/process_tests_home_care.R        | 4 ++--
 R/process_tests_homelessness.R     | 4 ++--
 R/process_tests_nrs_deaths.R       | 4 ++--
 R/process_tests_prescribing.R      | 4 ++--
 R/process_tests_sc_demographics.R  | 2 +-
 R/process_tests_sds.R              | 4 ++--
 R/produce_sc_all_episodes_tests.R  | 4 ++--
 R/produce_source_extract_tests.R   | 4 ++--
 12 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/R/process_tests_alarms_telecare.R b/R/process_tests_alarms_telecare.R
index d7f9fa699..3c70c8cab 100644
--- a/R/process_tests_alarms_telecare.R
+++ b/R/process_tests_alarms_telecare.R
@@ -37,14 +37,14 @@ produce_source_at_tests <- function(data,
                                     max_min_vars = c("record_keydate1", "record_keydate2")) {
   test_flags <- data %>%
     # create test flags
-    create_demog_test_flags() %>%
+    create_demog_test_flags(chi = chi) %>%
     dplyr::mutate(
       n_at_alarms = .data$smrtype == "AT-Alarm",
       n_at_telecare = .data$smrtype == "AT-Tele"
     ) %>%
     create_lca_test_flags(.data$sc_send_lca) %>%
     # remove variables that won't be summed
-    dplyr::select(.data$valid_chi:.data$West_Lothian) %>%
+    dplyr::select(.data$unique_chi:.data$West_Lothian) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/process_tests_care_home.R b/R/process_tests_care_home.R
index 2032c2473..a46071c22 100644
--- a/R/process_tests_care_home.R
+++ b/R/process_tests_care_home.R
@@ -47,7 +47,7 @@ produce_source_ch_tests <- function(data,
                                     )) {
   test_flags <- data %>%
     # use functions to create HB and partnership flags
-    create_demog_test_flags() %>%
+    create_demog_test_flags(chi = chi) %>%
     dplyr::mutate(
       n_episodes = 1L,
       ch_name_missing = is.na(.data$ch_name),
@@ -60,7 +60,7 @@ produce_source_ch_tests <- function(data,
     ) %>%
     create_lca_test_flags(.data$sc_send_lca) %>%
     # keep variables for comparison
-    dplyr::select("valid_chi":dplyr::last_col()) %>%
+    dplyr::select("unique_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/process_tests_cmh.R b/R/process_tests_cmh.R
index 09a17bdbb..4a34c08f9 100644
--- a/R/process_tests_cmh.R
+++ b/R/process_tests_cmh.R
@@ -43,11 +43,11 @@ process_tests_cmh <- function(data, year) {
 produce_source_cmh_tests <- function(data) {
   test_flags <- data %>%
     # create test flags
-    create_demog_test_flags() %>%
+    create_demog_test_flags(chi = chi) %>%
     create_hb_test_flags(hb_var = .data$hbrescode) %>%
     dplyr::mutate(n_episodes = 1L) %>%
     # keep variables for comparison
-    dplyr::select("valid_chi":dplyr::last_col()) %>%
+    dplyr::select("unique_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/process_tests_district_nursing.R b/R/process_tests_district_nursing.R
index d3d55a15a..8f428b954 100644
--- a/R/process_tests_district_nursing.R
+++ b/R/process_tests_district_nursing.R
@@ -65,11 +65,11 @@ produce_source_dn_tests <- function(data,
                                     )) {
   test_flags <- data %>%
     # use functions to create HB and partnership flags
-    create_demog_test_flags() %>%
+    create_demog_test_flags(chi = chi) %>%
     create_hb_test_flags(.data$hbtreatcode) %>%
     create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>%
     # keep variables for comparison
-    dplyr::select(.data$valid_chi:.data$NHS_Lanarkshire_cost) %>%
+    dplyr::select(.data$unique_chi:.data$NHS_Lanarkshire_cost) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/process_tests_home_care.R b/R/process_tests_home_care.R
index c1af63e97..a8ee76672 100644
--- a/R/process_tests_home_care.R
+++ b/R/process_tests_home_care.R
@@ -49,7 +49,7 @@ produce_source_hc_tests <- function(data,
                                     )) {
   test_flags <- data %>%
     # use functions to create HB and partnership flags
-    create_demog_test_flags() %>%
+    create_demog_test_flags(chi = chi) %>%
     dplyr::mutate(
       n_episodes = 1L,
       hc_per = dplyr::if_else(.data$smrtype == "HC-Per", 1L, 0L),
@@ -61,7 +61,7 @@ produce_source_hc_tests <- function(data,
     ) %>%
     create_lca_test_flags(.data$sc_send_lca) %>%
     # keep variables for comparison
-    dplyr::select("valid_chi":dplyr::last_col()) %>%
+    dplyr::select("unique_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/process_tests_homelessness.R b/R/process_tests_homelessness.R
index 4d49f1aa4..0b2c33880 100644
--- a/R/process_tests_homelessness.R
+++ b/R/process_tests_homelessness.R
@@ -38,10 +38,10 @@ produce_slf_homelessness_tests <- function(data,
   test_flags <- data %>%
     dplyr::arrange(.data$chi) %>%
     # create test flags
-    create_demog_test_flags() %>%
+    create_demog_test_flags(chi = chi) %>%
     create_lca_test_flags(.data$hl1_sending_lca) %>%
     # keep variables for comparison
-    dplyr::select("valid_chi":dplyr::last_col()) %>%
+    dplyr::select("unique_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/process_tests_nrs_deaths.R b/R/process_tests_nrs_deaths.R
index c1a963dcf..3796476c5 100644
--- a/R/process_tests_nrs_deaths.R
+++ b/R/process_tests_nrs_deaths.R
@@ -38,10 +38,10 @@ process_tests_nrs_deaths <- function(data, year) {
 produce_source_nrs_tests <- function(data) {
   test_flags <- data %>%
     # create test flags
-    create_demog_test_flags() %>%
+    create_demog_test_flags(chi = chi) %>%
     dplyr::mutate(n_deaths = 1L) %>%
     # keep variables for comparison
-    dplyr::select("valid_chi":dplyr::last_col()) %>%
+    dplyr::select("unique_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/process_tests_prescribing.R b/R/process_tests_prescribing.R
index bac0e3c52..8a7b0e73f 100644
--- a/R/process_tests_prescribing.R
+++ b/R/process_tests_prescribing.R
@@ -41,10 +41,10 @@ process_tests_prescribing <- function(data, year) {
 produce_source_pis_tests <- function(data) {
   test_flags <- data %>%
     # use functions to create HB and partnership flags
-    create_demog_test_flags() %>%
+    create_demog_test_flags(chi = chi) %>%
     dplyr::mutate(n_episodes = 1L) %>%
     # keep variables for comparison
-    dplyr::select("valid_chi":dplyr::last_col()) %>%
+    dplyr::select("unique_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/process_tests_sc_demographics.R b/R/process_tests_sc_demographics.R
index dfb110aa9..b503969ef 100644
--- a/R/process_tests_sc_demographics.R
+++ b/R/process_tests_sc_demographics.R
@@ -36,7 +36,7 @@ process_tests_sc_demographics <- function(data) {
 produce_sc_demog_lookup_tests <- function(data) {
   data %>%
     # create test flags
-    create_demog_test_flags() %>%
+    create_demog_test_flags(chi = chi) %>%
     dplyr::mutate(
       n_missing_sending_loc = is.na(.data$sending_location),
       n_missing_sc_id = is.na(.data$social_care_id)
diff --git a/R/process_tests_sds.R b/R/process_tests_sds.R
index f624f504b..ce6de656b 100644
--- a/R/process_tests_sds.R
+++ b/R/process_tests_sds.R
@@ -35,10 +35,10 @@ produce_source_sds_tests <- function(data,
                                      max_min_vars = c("record_keydate1", "record_keydate2")) {
   test_flags <- data %>%
     # create test flags
-    create_demog_test_flags() %>%
+    create_demog_test_flags(chi = chi) %>%
     create_lca_test_flags(.data$sc_send_lca) %>%
     # remove variables that won't be summed
-    dplyr::select("valid_chi":"West_Lothian") %>%
+    dplyr::select("unique_chi":"West_Lothian") %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
diff --git a/R/produce_sc_all_episodes_tests.R b/R/produce_sc_all_episodes_tests.R
index efe980cd4..4c5f736bb 100644
--- a/R/produce_sc_all_episodes_tests.R
+++ b/R/produce_sc_all_episodes_tests.R
@@ -10,7 +10,7 @@
 produce_sc_all_episodes_tests <- function(data) {
   data %>%
     # create test flags
-    create_demog_test_flags() %>%
+    create_demog_test_flags(chi = chi) %>%
     dplyr::mutate(
       n_missing_sending_loc = dplyr::if_else(
         is.na(.data$sending_location),
@@ -24,7 +24,7 @@ produce_sc_all_episodes_tests <- function(data) {
       )
     ) %>%
     # keep variables for comparison
-    dplyr::select(c("valid_chi":dplyr::last_col())) %>%
+    dplyr::select(c("unique_chi":dplyr::last_col())) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 }
diff --git a/R/produce_source_extract_tests.R b/R/produce_source_extract_tests.R
index d9a07c893..13b33d549 100644
--- a/R/produce_source_extract_tests.R
+++ b/R/produce_source_extract_tests.R
@@ -33,7 +33,7 @@ produce_source_extract_tests <- function(data,
                                          add_hscp_count = TRUE) {
   test_flags <- data %>%
     # use functions to create HB and partnership flags
-    create_demog_test_flags() %>%
+    create_demog_test_flags(chi = chi) %>%
     create_hb_test_flags(.data$hbtreatcode) %>%
     create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net)
 
@@ -43,7 +43,7 @@ produce_source_extract_tests <- function(data,
 
   test_flags <- test_flags %>%
     # keep variables for comparison
-    dplyr::select("valid_chi":dplyr::last_col()) %>%
+    dplyr::select("unique_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 

From bbf28dd6ad0f535800e17bc16d5abd8ea08f4811 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 23 Jan 2024 11:09:42 +0000
Subject: [PATCH 017/186] Change test sheet names to lowercase

---
 R/process_tests_acute.R              | 2 +-
 R/process_tests_ae.R                 | 2 +-
 R/process_tests_alarms_telecare.R    | 2 +-
 R/process_tests_care_home.R          | 2 +-
 R/process_tests_cmh.R                | 2 +-
 R/process_tests_delayed_discharges.R | 2 +-
 R/process_tests_gp_ooh.R             | 2 +-
 R/process_tests_home_care.R          | 2 +-
 R/process_tests_homelessness.R       | 2 +-
 R/process_tests_maternity.R          | 2 +-
 R/process_tests_mental_health.R      | 2 +-
 R/process_tests_nrs_deaths.R         | 2 +-
 R/process_tests_outpatients.R        | 2 +-
 R/process_tests_prescribing.R        | 2 +-
 14 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/R/process_tests_acute.R b/R/process_tests_acute.R
index 759d866b7..8a974e442 100644
--- a/R/process_tests_acute.R
+++ b/R/process_tests_acute.R
@@ -18,7 +18,7 @@ process_tests_acute <- function(data, year) {
     old_data = produce_source_extract_tests(old_data),
     new_data = produce_source_extract_tests(data)
   ) %>%
-    write_tests_xlsx(sheet_name = "01B", year, workbook_name = "extract")
+    write_tests_xlsx(sheet_name = "01b", year, workbook_name = "extract")
 
   return(comparison)
 }
diff --git a/R/process_tests_ae.R b/R/process_tests_ae.R
index 5bcd6a3c9..2ec97b8d0 100644
--- a/R/process_tests_ae.R
+++ b/R/process_tests_ae.R
@@ -21,7 +21,7 @@ process_tests_ae <- function(data, year) {
       max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net")
     )
   ) %>%
-    write_tests_xlsx(sheet_name = "AE2", year, workbook_name = "extract")
+    write_tests_xlsx(sheet_name = "ae2", year, workbook_name = "extract")
 
   return(comparison)
 }
diff --git a/R/process_tests_alarms_telecare.R b/R/process_tests_alarms_telecare.R
index d7f9fa699..c99aaa857 100644
--- a/R/process_tests_alarms_telecare.R
+++ b/R/process_tests_alarms_telecare.R
@@ -18,7 +18,7 @@ process_tests_alarms_telecare <- function(data, year) {
   )
 
   comparison %>%
-    write_tests_xlsx(sheet_name = "AT", year, workbook_name = "extract")
+    write_tests_xlsx(sheet_name = "at", year, workbook_name = "extract")
 
   return(comparison)
 }
diff --git a/R/process_tests_care_home.R b/R/process_tests_care_home.R
index 2032c2473..dd54bd98b 100644
--- a/R/process_tests_care_home.R
+++ b/R/process_tests_care_home.R
@@ -15,7 +15,7 @@ process_tests_care_home <- function(data, year) {
     old_data = produce_source_ch_tests(old_data),
     new_data = produce_source_ch_tests(data)
   ) %>%
-    write_tests_xlsx(sheet_name = "CH", year, workbook_name = "extract")
+    write_tests_xlsx(sheet_name = "ch", year, workbook_name = "extract")
 
   return(comparison)
 }
diff --git a/R/process_tests_cmh.R b/R/process_tests_cmh.R
index 09a17bdbb..7ddec1657 100644
--- a/R/process_tests_cmh.R
+++ b/R/process_tests_cmh.R
@@ -20,7 +20,7 @@ process_tests_cmh <- function(data, year) {
     old_data = produce_source_cmh_tests(old_data),
     new_data = produce_source_cmh_tests(data)
   ) %>%
-    write_tests_xlsx(sheet_name = "CMH", year, workbook_name = "extract")
+    write_tests_xlsx(sheet_name = "cmh", year, workbook_name = "extract")
 
   return(comparison)
 }
diff --git a/R/process_tests_delayed_discharges.R b/R/process_tests_delayed_discharges.R
index c2370eb76..86a60968d 100644
--- a/R/process_tests_delayed_discharges.R
+++ b/R/process_tests_delayed_discharges.R
@@ -18,7 +18,7 @@ process_tests_delayed_discharges <- function(data, year) {
     old_data = produce_source_dd_tests(old_data),
     new_data = produce_source_dd_tests(data)
   ) %>%
-    write_tests_xlsx(sheet_name = "DD", year, workbook_name = "extract")
+    write_tests_xlsx(sheet_name = "dd", year, workbook_name = "extract")
 
   return(comparison)
 }
diff --git a/R/process_tests_gp_ooh.R b/R/process_tests_gp_ooh.R
index fd3ec5f59..6ce3ab5e5 100644
--- a/R/process_tests_gp_ooh.R
+++ b/R/process_tests_gp_ooh.R
@@ -19,7 +19,7 @@ process_tests_gp_ooh <- function(data, year) {
       sum_mean_vars = "cost"
     )
   ) %>%
-    write_tests_xlsx(sheet_name = "GPOoH", year, workbook_name = "extract")
+    write_tests_xlsx(sheet_name = "gpooh", year, workbook_name = "extract")
 
   return(comparison)
 }
diff --git a/R/process_tests_home_care.R b/R/process_tests_home_care.R
index c1af63e97..f39cf8f5d 100644
--- a/R/process_tests_home_care.R
+++ b/R/process_tests_home_care.R
@@ -17,7 +17,7 @@ process_tests_home_care <- function(data, year) {
   )
 
   comparison %>%
-    write_tests_xlsx(sheet_name = "home_care", year, workbook_name = "extract")
+    write_tests_xlsx(sheet_name = "hc", year, workbook_name = "extract")
 
   return(comparison)
 }
diff --git a/R/process_tests_homelessness.R b/R/process_tests_homelessness.R
index 4d49f1aa4..9c70161e1 100644
--- a/R/process_tests_homelessness.R
+++ b/R/process_tests_homelessness.R
@@ -16,7 +16,7 @@ process_tests_homelessness <- function(data, year) {
     old_data = produce_slf_homelessness_tests(old_data),
     new_data = produce_slf_homelessness_tests(data)
   ) %>%
-    write_tests_xlsx(sheet_name = "HL1", year, workbook_name = "extract")
+    write_tests_xlsx(sheet_name = "hl1", year, workbook_name = "extract")
 
   return(comparison)
 }
diff --git a/R/process_tests_maternity.R b/R/process_tests_maternity.R
index 90f0ec449..39d7c8aa1 100644
--- a/R/process_tests_maternity.R
+++ b/R/process_tests_maternity.R
@@ -15,7 +15,7 @@ process_tests_maternity <- function(data, year) {
     old_data = produce_source_extract_tests(old_data),
     new_data = produce_source_extract_tests(data)
   ) %>%
-    write_tests_xlsx(sheet_name = "02B", year, workbook_name = "extract")
+    write_tests_xlsx(sheet_name = "02b", year, workbook_name = "extract")
 
   return(comparison)
 }
diff --git a/R/process_tests_mental_health.R b/R/process_tests_mental_health.R
index 96283d47b..5b5cb9001 100644
--- a/R/process_tests_mental_health.R
+++ b/R/process_tests_mental_health.R
@@ -15,7 +15,7 @@ process_tests_mental_health <- function(data, year) {
     old_data = produce_source_extract_tests(old_data),
     new_data = produce_source_extract_tests(data)
   ) %>%
-    write_tests_xlsx(sheet_name = "04B", year, workbook_name = "extract")
+    write_tests_xlsx(sheet_name = "04b", year, workbook_name = "extract")
 
   return(comparison)
 }
diff --git a/R/process_tests_nrs_deaths.R b/R/process_tests_nrs_deaths.R
index c1a963dcf..cbfff2d54 100644
--- a/R/process_tests_nrs_deaths.R
+++ b/R/process_tests_nrs_deaths.R
@@ -15,7 +15,7 @@ process_tests_nrs_deaths <- function(data, year) {
     old_data = produce_source_nrs_tests(old_data),
     new_data = produce_source_nrs_tests(data)
   ) %>%
-    write_tests_xlsx(sheet_name = "NRS", year, workbook_name = "extract")
+    write_tests_xlsx(sheet_name = "nrs", year, workbook_name = "extract")
 
   return(comparison)
 }
diff --git a/R/process_tests_outpatients.R b/R/process_tests_outpatients.R
index 5787e6884..6a377fcf2 100644
--- a/R/process_tests_outpatients.R
+++ b/R/process_tests_outpatients.R
@@ -23,7 +23,7 @@ process_tests_outpatients <- function(data, year) {
       add_hscp_count = FALSE
     )
   ) %>%
-    write_tests_xlsx(sheet_name = "00B", year, workbook_name = "extract")
+    write_tests_xlsx(sheet_name = "00b", year, workbook_name = "extract")
 
   return(comparison)
 }
diff --git a/R/process_tests_prescribing.R b/R/process_tests_prescribing.R
index bac0e3c52..003f00091 100644
--- a/R/process_tests_prescribing.R
+++ b/R/process_tests_prescribing.R
@@ -15,7 +15,7 @@ process_tests_prescribing <- function(data, year) {
     old_data = produce_source_pis_tests(old_data),
     new_data = produce_source_pis_tests(data)
   ) %>%
-    write_tests_xlsx(sheet_name = "PIS", year, workbook_name = "extract")
+    write_tests_xlsx(sheet_name = "pis", year, workbook_name = "extract")
 
   return(comparison)
 }

From b3d826bb1ff034b1ba5573299490a0ee1f5b6071 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 23 Jan 2024 11:10:37 +0000
Subject: [PATCH 018/186] Change date to lowercase

---
 R/write_tests_xlsx.R | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R
index c6a962857..f05f20025 100644
--- a/R/write_tests_xlsx.R
+++ b/R/write_tests_xlsx.R
@@ -89,6 +89,9 @@ write_tests_xlsx <- function(comparison_data,
 
   # add a new sheet for tests
   date_today <- format(Sys.Date(), "%d_%b")
+
+  date_today<- stringr::str_to_lower(date_today)
+
   sheet_name_dated <- ifelse(
     is.null(year),
     stringr::str_glue("{sheet_name}_{date_today}"),

From 4ca03b7f738056c618a8a18adb11d5db7c483d1c Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Tue, 23 Jan 2024 11:12:23 +0000
Subject: [PATCH 019/186] Update documentation

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5123289dd..4bb0c6f18 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -73,4 +73,4 @@ Encoding: UTF-8
 Language: en-GB
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.0

From 0e69e503643c08ee33a559ac8e7010c118c164f8 Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Wed, 24 Jan 2024 13:24:51 +0000
Subject: [PATCH 020/186] Update documentation

---
 DESCRIPTION                    | 2 +-
 man/calculate_stay.Rd          | 4 ++--
 man/compute_mid_year_age.Rd    | 4 ++--
 man/convert_date_to_numeric.Rd | 4 ++--
 man/convert_numeric_to_date.Rd | 4 ++--
 man/end_fy.Rd                  | 2 +-
 man/end_fy_quarter.Rd          | 2 +-
 man/end_next_fy_quarter.Rd     | 4 ++--
 man/fy_interval.Rd             | 4 ++--
 man/is_date_in_fyyear.Rd       | 4 ++--
 man/last_date_month.Rd         | 4 ++--
 man/midpoint_fy.Rd             | 4 ++--
 man/next_fy.Rd                 | 4 ++--
 man/start_fy.Rd                | 2 +-
 man/start_fy_quarter.Rd        | 2 +-
 man/start_next_fy_quarter.Rd   | 6 +++---
 16 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 4bb0c6f18..3a75852e2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -73,4 +73,4 @@ Encoding: UTF-8
 Language: en-GB
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.0
+RoxygenNote: 7.3.1
diff --git a/man/calculate_stay.Rd b/man/calculate_stay.Rd
index 43b7bd166..5e9266b10 100644
--- a/man/calculate_stay.Rd
+++ b/man/calculate_stay.Rd
@@ -34,16 +34,16 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/compute_mid_year_age.Rd b/man/compute_mid_year_age.Rd
index 142fa4aab..5a50370e0 100644
--- a/man/compute_mid_year_age.Rd
+++ b/man/compute_mid_year_age.Rd
@@ -31,16 +31,16 @@ Other date functions:
 \code{\link{calculate_stay}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/convert_date_to_numeric.Rd b/man/convert_date_to_numeric.Rd
index 5511fec84..b67eaa778 100644
--- a/man/convert_date_to_numeric.Rd
+++ b/man/convert_date_to_numeric.Rd
@@ -24,16 +24,16 @@ Other date functions:
 \code{\link{calculate_stay}()},
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/convert_numeric_to_date.Rd b/man/convert_numeric_to_date.Rd
index f786e0319..a09b7b9b9 100644
--- a/man/convert_numeric_to_date.Rd
+++ b/man/convert_numeric_to_date.Rd
@@ -24,16 +24,16 @@ Other date functions:
 \code{\link{calculate_stay}()},
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/end_fy.Rd b/man/end_fy.Rd
index 2925ffe60..6220f5f32 100644
--- a/man/end_fy.Rd
+++ b/man/end_fy.Rd
@@ -34,8 +34,8 @@ Other date functions:
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/end_fy_quarter.Rd b/man/end_fy_quarter.Rd
index 0efe9624a..26c439a04 100644
--- a/man/end_fy_quarter.Rd
+++ b/man/end_fy_quarter.Rd
@@ -33,8 +33,8 @@ Other date functions:
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/end_next_fy_quarter.Rd b/man/end_next_fy_quarter.Rd
index f9cc1720a..702446e82 100644
--- a/man/end_next_fy_quarter.Rd
+++ b/man/end_next_fy_quarter.Rd
@@ -26,15 +26,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/fy_interval.Rd b/man/fy_interval.Rd
index 12d1d36bb..00b9ea52c 100644
--- a/man/fy_interval.Rd
+++ b/man/fy_interval.Rd
@@ -26,15 +26,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/is_date_in_fyyear.Rd b/man/is_date_in_fyyear.Rd
index 97a0f3639..e74bd5734 100644
--- a/man/is_date_in_fyyear.Rd
+++ b/man/is_date_in_fyyear.Rd
@@ -41,15 +41,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/last_date_month.Rd b/man/last_date_month.Rd
index f52305356..3d3b9544e 100644
--- a/man/last_date_month.Rd
+++ b/man/last_date_month.Rd
@@ -25,15 +25,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/midpoint_fy.Rd b/man/midpoint_fy.Rd
index 7bac9b6b3..2363df773 100644
--- a/man/midpoint_fy.Rd
+++ b/man/midpoint_fy.Rd
@@ -27,15 +27,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/next_fy.Rd b/man/next_fy.Rd
index 19e1193f4..7524c5f11 100644
--- a/man/next_fy.Rd
+++ b/man/next_fy.Rd
@@ -27,15 +27,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/start_fy.Rd b/man/start_fy.Rd
index 4996bfb72..9951af2ec 100644
--- a/man/start_fy.Rd
+++ b/man/start_fy.Rd
@@ -27,8 +27,8 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
diff --git a/man/start_fy_quarter.Rd b/man/start_fy_quarter.Rd
index f5729dcb0..9936736a8 100644
--- a/man/start_fy_quarter.Rd
+++ b/man/start_fy_quarter.Rd
@@ -26,8 +26,8 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
diff --git a/man/start_next_fy_quarter.Rd b/man/start_next_fy_quarter.Rd
index 098f0bf73..fdac297a7 100644
--- a/man/start_next_fy_quarter.Rd
+++ b/man/start_next_fy_quarter.Rd
@@ -26,15 +26,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
-\code{\link{start_fy}()}
+\code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()}
 }
 \concept{date functions}

From cbf5ae4cd6f6d6f12ee0de18a01313abd3aaa3df Mon Sep 17 00:00:00 2001
From: SwiftySalmon <SwiftySalmon@users.noreply.github.com>
Date: Wed, 24 Jan 2024 13:58:36 +0000
Subject: [PATCH 021/186] Update documentation

---
 DESCRIPTION                    | 2 +-
 man/calculate_stay.Rd          | 4 ++--
 man/compute_mid_year_age.Rd    | 4 ++--
 man/convert_date_to_numeric.Rd | 4 ++--
 man/convert_numeric_to_date.Rd | 4 ++--
 man/end_fy.Rd                  | 2 +-
 man/end_fy_quarter.Rd          | 2 +-
 man/end_next_fy_quarter.Rd     | 4 ++--
 man/fy_interval.Rd             | 4 ++--
 man/is_date_in_fyyear.Rd       | 4 ++--
 man/last_date_month.Rd         | 4 ++--
 man/midpoint_fy.Rd             | 4 ++--
 man/next_fy.Rd                 | 4 ++--
 man/start_fy.Rd                | 2 +-
 man/start_fy_quarter.Rd        | 2 +-
 man/start_next_fy_quarter.Rd   | 6 +++---
 16 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 4bb0c6f18..3a75852e2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -73,4 +73,4 @@ Encoding: UTF-8
 Language: en-GB
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.0
+RoxygenNote: 7.3.1
diff --git a/man/calculate_stay.Rd b/man/calculate_stay.Rd
index 43b7bd166..5e9266b10 100644
--- a/man/calculate_stay.Rd
+++ b/man/calculate_stay.Rd
@@ -34,16 +34,16 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/compute_mid_year_age.Rd b/man/compute_mid_year_age.Rd
index 142fa4aab..5a50370e0 100644
--- a/man/compute_mid_year_age.Rd
+++ b/man/compute_mid_year_age.Rd
@@ -31,16 +31,16 @@ Other date functions:
 \code{\link{calculate_stay}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/convert_date_to_numeric.Rd b/man/convert_date_to_numeric.Rd
index 5511fec84..b67eaa778 100644
--- a/man/convert_date_to_numeric.Rd
+++ b/man/convert_date_to_numeric.Rd
@@ -24,16 +24,16 @@ Other date functions:
 \code{\link{calculate_stay}()},
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/convert_numeric_to_date.Rd b/man/convert_numeric_to_date.Rd
index f786e0319..a09b7b9b9 100644
--- a/man/convert_numeric_to_date.Rd
+++ b/man/convert_numeric_to_date.Rd
@@ -24,16 +24,16 @@ Other date functions:
 \code{\link{calculate_stay}()},
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/end_fy.Rd b/man/end_fy.Rd
index 2925ffe60..6220f5f32 100644
--- a/man/end_fy.Rd
+++ b/man/end_fy.Rd
@@ -34,8 +34,8 @@ Other date functions:
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/end_fy_quarter.Rd b/man/end_fy_quarter.Rd
index 0efe9624a..26c439a04 100644
--- a/man/end_fy_quarter.Rd
+++ b/man/end_fy_quarter.Rd
@@ -33,8 +33,8 @@ Other date functions:
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/end_next_fy_quarter.Rd b/man/end_next_fy_quarter.Rd
index f9cc1720a..702446e82 100644
--- a/man/end_next_fy_quarter.Rd
+++ b/man/end_next_fy_quarter.Rd
@@ -26,15 +26,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/fy_interval.Rd b/man/fy_interval.Rd
index 12d1d36bb..00b9ea52c 100644
--- a/man/fy_interval.Rd
+++ b/man/fy_interval.Rd
@@ -26,15 +26,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/is_date_in_fyyear.Rd b/man/is_date_in_fyyear.Rd
index 97a0f3639..e74bd5734 100644
--- a/man/is_date_in_fyyear.Rd
+++ b/man/is_date_in_fyyear.Rd
@@ -41,15 +41,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/last_date_month.Rd b/man/last_date_month.Rd
index f52305356..3d3b9544e 100644
--- a/man/last_date_month.Rd
+++ b/man/last_date_month.Rd
@@ -25,15 +25,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/midpoint_fy.Rd b/man/midpoint_fy.Rd
index 7bac9b6b3..2363df773 100644
--- a/man/midpoint_fy.Rd
+++ b/man/midpoint_fy.Rd
@@ -27,15 +27,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/next_fy.Rd b/man/next_fy.Rd
index 19e1193f4..7524c5f11 100644
--- a/man/next_fy.Rd
+++ b/man/next_fy.Rd
@@ -27,15 +27,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/start_fy.Rd b/man/start_fy.Rd
index 4996bfb72..9951af2ec 100644
--- a/man/start_fy.Rd
+++ b/man/start_fy.Rd
@@ -27,8 +27,8 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
diff --git a/man/start_fy_quarter.Rd b/man/start_fy_quarter.Rd
index f5729dcb0..9936736a8 100644
--- a/man/start_fy_quarter.Rd
+++ b/man/start_fy_quarter.Rd
@@ -26,8 +26,8 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
diff --git a/man/start_next_fy_quarter.Rd b/man/start_next_fy_quarter.Rd
index 098f0bf73..fdac297a7 100644
--- a/man/start_next_fy_quarter.Rd
+++ b/man/start_next_fy_quarter.Rd
@@ -26,15 +26,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
-\code{\link{start_fy}()}
+\code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()}
 }
 \concept{date functions}

From 3055d54f80f75b3b8c29306116a0ad83837d8645 Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Mon, 29 Jan 2024 09:56:34 +0000
Subject: [PATCH 022/186] Style code

---
 R/write_tests_xlsx.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R
index f05f20025..ffe86f48f 100644
--- a/R/write_tests_xlsx.R
+++ b/R/write_tests_xlsx.R
@@ -90,7 +90,7 @@ write_tests_xlsx <- function(comparison_data,
   # add a new sheet for tests
   date_today <- format(Sys.Date(), "%d_%b")
 
-  date_today<- stringr::str_to_lower(date_today)
+  date_today <- stringr::str_to_lower(date_today)
 
   sheet_name_dated <- ifelse(
     is.null(year),

From 30cb567fe1df6cfab6e0818017a0e6aef5014c51 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 5 Feb 2024 13:41:25 +0000
Subject: [PATCH 023/186] Fix pick variables This was not taking the correct
 variables, leading to NSUs being assigned psychiatry

---
 R/create_service_use_lookup.R | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/R/create_service_use_lookup.R b/R/create_service_use_lookup.R
index 4acbfc507..242e0b351 100644
--- a/R/create_service_use_lookup.R
+++ b/R/create_service_use_lookup.R
@@ -908,7 +908,13 @@ assign_cohort_names <- function(data) {
         # Situation where no cost is greater than another,
         # so the maximum is the same  as the mean
         .data$cost_max == rowSums(
-          dplyr::pick("psychiatry_cost":"residential_care_cost")
+          dplyr::pick(c(
+            "psychiatry_cost", "maternity_cost", "geriatric_cost",
+            "elective_inpatient_cost", "limited_daycases_cost",
+            "routine_daycase_cost", "single_emergency_cost",
+            "multiple_emergency_cost", "prescribing_cost",
+            "outpatient_cost", "ae2_cost", "residential_care_cost"
+          ))
         ) / 12.0 ~ "Unassigned",
         .data$cost_max == .data$psychiatry_cost ~ "Psychiatry",
         .data$cost_max == .data$maternity_cost ~ "Maternity",

From 744bbc024b5cba09ebe9514cd36ad794b1e24fff Mon Sep 17 00:00:00 2001
From: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com>
Date: Wed, 7 Feb 2024 14:28:21 +0000
Subject: [PATCH 024/186] SC Demographics and SDS (#900)

* Style code

* # read in sc demographics

different variables - removed extract date as not accurate, using chi over upi after discussion with social care data management. Added in date of death just for fun.

* social care demographics first draft

removed a lot of the submitted variables and instead using chi variables from chi seeding. Other changes:
- Fill in missing values,
- create flag for latest social care id (one from database is not accurate), this makes sure that each chi only has ONE sc id as the latest to stop it creating duplicates
- change postcode to choose chi over submitted

* Style code

* had a github error? Not sure what happened but commiting first draft of sc demographics

* Style code

* first draft sds.
No major changes - only how demographics is matched on and how latest social care id is selected

* Update documentation

* demographics - add sending location to group by

* Style code

* Update documentation

* Added ungroup()

* Remove comments

* Remove comments

* Style code

---------

Co-authored-by: SwiftySalmon <SwiftySalmon@users.noreply.github.com>
Co-authored-by: marjom02 <megan.mcnicol2@nhs.scot>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: Zihao Li <zihao.li@phs.scot>
---
 R/fix_sc_dates.R                   |  8 +--
 R/process_lookup_sc_demographics.R | 97 +++++++++++++++++++-----------
 R/process_sc_all_sds.R             | 19 ++++--
 R/read_lookup_sc_demographics.R    | 15 +++--
 R/read_sc_all_sds.R                |  8 +--
 R/replace_sc_id_with_latest.R      | 25 +++-----
 man/fix_sc_end_dates.Rd            |  2 +-
 7 files changed, 98 insertions(+), 76 deletions(-)

diff --git a/R/fix_sc_dates.R b/R/fix_sc_dates.R
index c636980a6..117acbaab 100644
--- a/R/fix_sc_dates.R
+++ b/R/fix_sc_dates.R
@@ -9,7 +9,7 @@
 #' @return A date vector with replaced end dates
 fix_sc_start_dates <- function(start_date, period_start) {
   # Fix sds_start_date is missing by setting start_date to be the start of
-  # financial year
+  # financial period
   start_date <- dplyr::if_else(
     is.na(start_date),
     period_start,
@@ -30,12 +30,12 @@ fix_sc_start_dates <- function(start_date, period_start) {
 #' @param period Social care latest submission period.
 #'
 #' @return A date vector with replaced end dates
-fix_sc_end_dates <- function(start_date, end_date, period) {
+fix_sc_end_dates <- function(start_date, end_date, period_end_date) {
   # Fix sds_end_date is earlier than sds_start_date by setting end_date to be
   # the end of financial year
   end_date <- dplyr::if_else(
     start_date > end_date,
-    end_fy(year = stringr::str_sub(period, 1L, 4L), "alternate"),
+    period_end_date,
     end_date
   )
 
@@ -57,7 +57,7 @@ fix_sc_end_dates <- function(start_date, end_date, period) {
 #' @return A date vector with replaced end dates
 fix_sc_missing_end_dates <- function(end_date, period_end) {
   # Fix sds_end_date is earlier than sds_start_date by setting end_date to be
-  # the end of financial year
+  # the end of financial period
   end_date <- dplyr::if_else(
     is.na(end_date),
     period_end,
diff --git a/R/process_lookup_sc_demographics.R b/R/process_lookup_sc_demographics.R
index 8c363f547..96adc985e 100644
--- a/R/process_lookup_sc_demographics.R
+++ b/R/process_lookup_sc_demographics.R
@@ -28,30 +28,46 @@ process_lookup_sc_demographics <- function(
     dplyr::pull(.data$pc7)
 
 
-  # Data Cleaning ---------------------------------------
-
+  #  Fill in missing data and flag latest cases to keep ---------------------------------------
   sc_demog <- data %>%
-    dplyr::mutate(
-      # use chi if upi is NA
-      upi = dplyr::coalesce(.data$upi, .data$chi_upi),
-      # check gender code - replace code 99 with 9
-      submitted_gender = replace(.data$submitted_gender, .data$submitted_gender == 99L, 9L)
+    dplyr::rename(
+      chi = chi_upi,
+      gender = chi_gender_code,
+      dob = chi_date_of_birth
     ) %>%
+    # fill in missing demographic details
+    dplyr::arrange(period, social_care_id) %>%
+    dplyr::group_by(social_care_id, sending_location) %>%
+    tidyr::fill(chi, .direction = ("updown")) %>%
+    tidyr::fill(dob, .direction = ("updown")) %>%
+    tidyr::fill(date_of_death, .direction = ("updown")) %>%
+    tidyr::fill(gender, .direction = ("updown")) %>%
+    tidyr::fill(chi_postcode, .direction = ("updown")) %>%
+    tidyr::fill(submitted_postcode, .direction = ("updown")) %>%
+    dplyr::ungroup() %>%
+    # format postcodes using `phsmethods`
+    dplyr::mutate(dplyr::across(tidyselect::contains("postcode"), ~ phsmethods::format_postcode(.x, format = "pc7"))) # are sc postcodes even used anywhere?
+
+
+  # flag unique cases of chi and sc_id, and flag the latest record (sc_demographics latest flag is not accurate)
+  sc_demog <- sc_demog %>%
+    dplyr::group_by(chi, sending_location) %>%
+    dplyr::mutate(latest = dplyr::last(period)) %>% # flag latest period for chi
+    dplyr::group_by(chi, social_care_id, sending_location) %>%
+    dplyr::mutate(latest_sc_id = dplyr::last(period)) %>% # flag latest period for social care
+    dplyr::group_by(chi, sending_location) %>%
+    dplyr::mutate(last_sc_id = dplyr::last(social_care_id)) %>%
     dplyr::mutate(
-      # use CHI sex if available
-      gender = dplyr::if_else(
-        is.na(.data$chi_gender_code) | .data$chi_gender_code == 9L,
-        .data$submitted_gender,
-        .data$chi_gender_code
-      ),
-      # Use CHI DoB if available
-      dob = dplyr::coalesce(.data$chi_date_of_birth, .data$submitted_date_of_birth)
+      latest_flag = ifelse((latest == period & last_sc_id == social_care_id) | is.na(chi), 1, 0),
+      keep = ifelse(latest_sc_id == period, 1, 0)
     ) %>%
-    # format postcodes using `phsmethods`
-    dplyr::mutate(dplyr::across(
-      tidyselect::contains("postcode"),
-      ~ phsmethods::format_postcode(.x, format = "pc7")
-    ))
+    dplyr::ungroup()
+
+  sc_demog <- sc_demog %>%
+    dplyr::select(-period, -latest_record_flag, -latest, -last_sc_id, -latest_sc_id) %>%
+    dplyr::distinct()
+
+  # postcodes ---------------------------------------------------------------
 
   # count number of na postcodes
   na_postcodes <- sc_demog %>%
@@ -69,29 +85,32 @@ process_lookup_sc_demographics <- function(
       ~ dplyr::if_else(stringr::str_detect(.x, uk_pc_regexp), .x, NA)
     )) %>%
     dplyr::select(
-      "latest_record_flag",
-      "extract_date",
       "sending_location",
       "social_care_id",
-      "upi",
+      "chi",
       "gender",
       "dob",
+      "date_of_death",
       "submitted_postcode",
-      "chi_postcode"
+      "chi_postcode",
+      "keep",
+      "latest_flag"
     ) %>%
     # check if submitted_postcode matches with postcode lookup
     dplyr::mutate(
-      valid_pc = .data$submitted_postcode %in% valid_spd_postcodes
+      valid_pc_submitted = .data$submitted_postcode %in% valid_spd_postcodes,
+      valid_pc_chi = .data$chi_postcode %in% valid_spd_postcodes
     ) %>%
     # use submitted_postcode if valid, otherwise use chi_postcode
     dplyr::mutate(postcode = dplyr::case_when(
-      (!is.na(.data$submitted_postcode) & .data$valid_pc) ~ .data$submitted_postcode,
-      (is.na(.data$submitted_postcode) & !.data$valid_pc) ~ .data$chi_postcode
+      (!is.na(.data$chi_postcode) & .data$valid_pc_chi) ~ .data$chi_postcode,
+      ((is.na(.data$chi_postcode) | !(.data$valid_pc_chi)) & !(is.na(.data$submitted_postcode)) & .data$valid_pc_submitted) ~ .data$submitted_postcode,
+      (is.na(.data$submitted_postcode) & !.data$valid_pc_submitted) ~ .data$chi_postcode
     )) %>%
     dplyr::mutate(postcode_type = dplyr::case_when(
-      (!is.na(.data$submitted_postcode) & .data$valid_pc) ~ "submitted",
-      (is.na(.data$submitted_postcode) & !.data$valid_pc) ~ "chi",
-      (is.na(.data$submitted_postcode) & is.na(.data$chi_postcode)) ~ "missing"
+      (postcode == chi_postcode) ~ "chi",
+      (postcode == submitted_postcode) ~ "submitted",
+      (is.na(.data$submitted_postcode) & is.na(.data$chi_postcode) | is.na(.data$postcode)) ~ "missing"
     ))
 
   # Check where the postcodes are coming from
@@ -102,26 +121,32 @@ process_lookup_sc_demographics <- function(
   na_replaced_postcodes <- sc_demog %>%
     dplyr::count(dplyr::across(tidyselect::ends_with("_postcode"), ~ is.na(.x)))
 
-
   sc_demog_lookup <- sc_demog %>%
+    dplyr::filter(keep == 1) %>% # filter to only keep latest record for sc id and chi
+    dplyr::select(-postcode_type, -valid_pc_submitted, -valid_pc_chi, -submitted_postcode, -chi_postcode) %>%
+    dplyr::distinct() %>%
     # group by sending location and ID
-    dplyr::group_by(.data$sending_location, .data$social_care_id) %>%
+    dplyr::group_by(.data$sending_location, .data$chi, .data$social_care_id, .data$latest_flag) %>%
     # arrange so latest submissions are last
     dplyr::arrange(
       .data$sending_location,
       .data$social_care_id,
-      .data$latest_record_flag,
-      .data$extract_date
+      .data$latest_flag
     ) %>%
     # summarise to select the last (non NA) submission
     dplyr::summarise(
-      chi = dplyr::last(.data$upi),
       gender = dplyr::last(.data$gender),
       dob = dplyr::last(.data$dob),
-      postcode = dplyr::last(.data$postcode)
+      postcode = dplyr::last(.data$postcode),
+      date_of_death = dplyr::last(.data$date_of_death)
     ) %>%
     dplyr::ungroup()
 
+  # check to make sure all cases of chi are still there
+  dplyr::n_distinct(sc_demog_lookup$chi) # 524810
+  dplyr::n_distinct(sc_demog_lookup$social_care_id) # 636404
+
+
   if (write_to_disk) {
     write_file(
       sc_demog_lookup,
diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R
index f9ca52f24..453db3e40 100644
--- a/R/process_sc_all_sds.R
+++ b/R/process_sc_all_sds.R
@@ -15,14 +15,17 @@ process_sc_all_sds <- function(
     sc_demog_lookup,
     write_to_disk = TRUE) {
   # Match on demographics data (chi, gender, dob and postcode)
-  matched_sds_data <- data %>%
-    dplyr::left_join(
+  matched_sds_data <- data %>% #
+    dplyr::filter(.data$sds_start_date_after_period_end_date != 1) %>%
+    dplyr::right_join(
       sc_demog_lookup,
       by = c("sending_location", "social_care_id")
     ) %>%
     # when multiple social_care_id from sending_location for single CHI
     # replace social_care_id with latest
-    replace_sc_id_with_latest()
+    replace_sc_id_with_latest() %>%
+    dplyr::select(-latest_sc_id, -latest_flag, -sds_start_date_after_period_end_date) %>%
+    dplyr::distinct()
 
   # Data Cleaning ---------------------------------------
   sds_full_clean <- matched_sds_data %>%
@@ -50,7 +53,7 @@ process_sc_all_sds <- function(
         .data$sds_start_date,
         .data$sds_period_start_date
       ),
-      # If SDS end date is missing, assign end of FY
+      # If SDS end date is missing, assign end of financial period
       sds_end_date = fix_sc_missing_end_dates(
         .data$sds_end_date,
         .data$sds_period_end_date
@@ -59,14 +62,19 @@ process_sc_all_sds <- function(
       sds_end_date = fix_sc_end_dates(
         .data$sds_start_date,
         .data$sds_end_date,
-        .data$period
+        .data$sds_period_end_date
       )
     ) %>%
+    dplyr::select(
+      -sds_period_start_date, -sds_period_end_date,
+      -sds_start_date_after_end_date
+    ) %>%
     # rename for matching source variables
     dplyr::rename(
       record_keydate1 = .data$sds_start_date,
       record_keydate2 = .data$sds_end_date
     ) %>%
+    dplyr::distinct() %>%
     # Pivot longer on sds option variables
     tidyr::pivot_longer(
       cols = tidyselect::contains("sds_option_"),
@@ -103,6 +111,7 @@ process_sc_all_sds <- function(
     ) %>%
     dplyr::arrange(.data$period,
       .data$record_keydate1,
+      .data$record_keydate2,
       .by_group = TRUE
     ) %>%
     # Create a flag for episodes that are going to be merged
diff --git a/R/read_lookup_sc_demographics.R b/R/read_lookup_sc_demographics.R
index fcdde5417..fe9a5e71f 100644
--- a/R/read_lookup_sc_demographics.R
+++ b/R/read_lookup_sc_demographics.R
@@ -12,16 +12,15 @@ read_lookup_sc_demographics <- function(sc_connection = phs_db_connection(dsn =
   ) %>%
     dplyr::select(
       "latest_record_flag",
-      "extract_date",
+      "period",
       "sending_location",
+      "sending_location_name",
       "social_care_id",
-      "upi",
       "chi_upi",
-      "submitted_postcode",
-      "chi_postcode",
-      "submitted_date_of_birth",
       "chi_date_of_birth",
-      "submitted_gender",
+      "date_of_death",
+      "chi_postcode",
+      "submitted_postcode",
       "chi_gender_code"
     ) %>%
     dplyr::collect() %>%
@@ -29,10 +28,10 @@ read_lookup_sc_demographics <- function(sc_connection = phs_db_connection(dsn =
       dplyr::across(c(
         "latest_record_flag",
         "sending_location",
-        "submitted_gender",
         "chi_gender_code"
       ), as.integer)
-    )
+    ) %>%
+    dplyr::distinct()
 
   return(sc_demog)
 }
diff --git a/R/read_sc_all_sds.R b/R/read_sc_all_sds.R
index 18c5b52ec..ab9bb20e1 100644
--- a/R/read_sc_all_sds.R
+++ b/R/read_sc_all_sds.R
@@ -22,9 +22,8 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR
       "sds_option_1",
       "sds_option_2",
       "sds_option_3",
-      "sds_start_date_after_end_date",
-      "sds_start_date_after_period_end_date",
-      "sds_end_date_not_within_period"
+      "sds_start_date_after_end_date", # get fixed
+      "sds_start_date_after_period_end_date" # get removed
     ) %>%
     dplyr::collect() %>%
     dplyr::distinct() %>%
@@ -33,8 +32,7 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR
       "sds_option_1",
       "sds_option_2",
       "sds_option_3"
-    ), as.integer)) %>%
-    dplyr::filter(.data$sds_start_date_after_period_end_date != 1)
+    ), as.integer))
 
   return(sds_full_data)
 }
diff --git a/R/replace_sc_id_with_latest.R b/R/replace_sc_id_with_latest.R
index 73c1a3706..2c32bbb93 100644
--- a/R/replace_sc_id_with_latest.R
+++ b/R/replace_sc_id_with_latest.R
@@ -7,33 +7,23 @@ replace_sc_id_with_latest <- function(data) {
   # Check for required variables
   check_variables_exist(
     data,
-    c("sending_location", "social_care_id", "chi", "period")
+    c("sending_location", "social_care_id", "chi", "latest_flag")
   )
 
   # select variables we need
   filter_data <- data %>%
     dplyr::select(
-      "sending_location", "social_care_id", "chi", "period"
+      "sending_location", "social_care_id", "chi", "latest_flag"
     ) %>%
-    dplyr::filter(!(is.na(.data$chi)))
+    dplyr::filter(!(is.na(.data$chi))) %>%
+    dplyr::distinct()
 
   change_sc_id <- filter_data %>%
-    # Sort (by sending_location, chi and period) for unique chi/sending location
-    dplyr::arrange(
-      .data$sending_location,
-      .data$chi,
-      dplyr::desc(.data$period)
-    ) %>%
-    # Find the latest sc_id for each chi/sending location by keeping latest period
-    dplyr::distinct(
-      .data$sending_location,
-      .data$chi,
-      .keep_all = TRUE
-    ) %>%
+    dplyr::filter(latest_flag == 1) %>%
     # Rename for latest sc id
     dplyr::rename(latest_sc_id = "social_care_id") %>%
-    # drop period for matching
-    dplyr::select(-"period")
+    # drop latest_flag for matching
+    dplyr::select(-"latest_flag")
 
   return_data <- change_sc_id %>%
     # Match back onto data
@@ -41,6 +31,7 @@ replace_sc_id_with_latest <- function(data) {
       by = c("sending_location", "chi"),
       multiple = "all"
     ) %>%
+    dplyr::filter(!(is.na(period))) %>%
     # Overwrite sc id with the latest
     dplyr::mutate(
       social_care_id = dplyr::if_else(
diff --git a/man/fix_sc_end_dates.Rd b/man/fix_sc_end_dates.Rd
index 1bf808bea..041751319 100644
--- a/man/fix_sc_end_dates.Rd
+++ b/man/fix_sc_end_dates.Rd
@@ -4,7 +4,7 @@
 \alias{fix_sc_end_dates}
 \title{Fix sc end dates}
 \usage{
-fix_sc_end_dates(start_date, end_date, period)
+fix_sc_end_dates(start_date, end_date, period_end_date)
 }
 \arguments{
 \item{start_date}{A vector containing dates.}

From cd8b35948abf588a8eea7fa0474be8e8cd8c03a0 Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Wed, 7 Feb 2024 15:32:47 +0000
Subject: [PATCH 025/186] Sc all at speedup (#904)

* speed up process_sc_all_alarms_telecare function with data.table package

* Update documentation

---------

Co-authored-by: lizihao-anu <lizihao-anu@users.noreply.github.com>
Co-authored-by: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 R/process_sc_all_alarms_telecare.R | 181 +++++++++++++++++------------
 1 file changed, 104 insertions(+), 77 deletions(-)

diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R
index 988d1f3e7..bc417a8cd 100644
--- a/R/process_sc_all_alarms_telecare.R
+++ b/R/process_sc_all_alarms_telecare.R
@@ -17,58 +17,85 @@ process_sc_all_alarms_telecare <- function(
     write_to_disk = TRUE) {
   # Data Cleaning-----------------------------------------------------
 
-  replaced_dates <- data %>%
-    # If the end date is missing, set this to the end of the period
-    dplyr::mutate(
-      service_end_date = fix_sc_missing_end_dates(
-        .data$service_end_date,
-        .data$period_end_date
-      ),
-      # If the start_date is missing, set this to the start of the period
-      service_start_date = fix_sc_start_dates(
-        .data$service_start_date,
-        .data$period_start_date
-      ),
-      # Fix service_end_date if earlier than service_start_date by setting end_date to the end of fy
-      service_end_date = fix_sc_end_dates(
-        .data$service_start_date,
-        .data$service_end_date,
-        .data$period
-      )
+  # Convert to data.table
+  data.table::setDT(data)
+  data.table::setDT(sc_demog_lookup)
+
+  # Fix dates and create new variables
+  data[
+    ,
+    service_end_date := fix_sc_missing_end_dates(
+      service_end_date,
+      period_end_date
     )
+  ]
+  data[
+    ,
+    service_start_date := fix_sc_start_dates(
+      service_start_date,
+      period_start_date
+    )
+  ]
+  data[
+    ,
+    service_end_date := fix_sc_end_dates(
+      service_start_date,
+      service_end_date,
+      period
+    )
+  ]
 
 
-  at_full_clean <- replaced_dates %>%
-    # rename for matching source variables
-    dplyr::rename(
-      record_keydate1 = "service_start_date",
-      record_keydate2 = "service_end_date"
-    ) %>%
-    # Include source variables
-    dplyr::mutate(
-      recid = "AT",
-      smrtype = dplyr::case_when(
-        .data$service_type == 1L ~ "AT-Alarm",
-        .data$service_type == 2L ~ "AT-Tele"
+  # Rename columns
+  data.table::setnames(
+    data,
+    old = c("service_start_date", "service_end_date"),
+    new = c("record_keydate1", "record_keydate2")
+  )
+
+  # Additional mutations
+  data[
+    ,
+    c(
+      "recid",
+      "smrtype",
+      "sc_send_lca"
+    ) := list(
+      "AT",
+      data.table::fcase(
+        service_type == 1L,
+        "AT-Alarm",
+        service_type == 2L,
+        "AT-Tele",
+        default,
+        NA_character_
       ),
-      # Create person id variable
-      person_id = stringr::str_glue("{sending_location}-{social_care_id}"),
-      # Use function for creating sc send lca variables
-      sc_send_lca = convert_sc_sending_location_to_lca(.data$sending_location)
-    ) %>%
-    # Match on demographics data (chi, gender, dob and postcode)
-    dplyr::left_join(
-      sc_demog_lookup,
-      by = c("sending_location", "social_care_id")
-    ) %>%
-    # when multiple social_care_id from sending_location for single CHI
-    # replace social_care_id with latest
-    replace_sc_id_with_latest()
+      convert_sc_sending_location_to_lca(sending_location)
+    )
+  ]
+  data$person_id <- paste0(
+    data$sending_location,
+    "-",
+    data$social_care_id
+  )
+
+  # Join with sc_demog_lookup
+  data <- sc_demog_lookup[data, on = .(sending_location, social_care_id)]
 
-  # Deal with episodes which have a package across quarters.
-  qtr_merge <- at_full_clean %>%
-    # use as.data.table to change the data format to data.table to accelerate
-    data.table::as.data.table() %>%
+  # Replace social_care_id with latest if needed (assuming replace_sc_id_with_latest is a custom function)
+  data <- replace_sc_id_with_latest(data)
+
+  # Deal with episodes that have a package across quarters
+  data[, pkg_count := seq_len(.N), by = .(
+    sending_location,
+    social_care_id,
+    record_keydate1,
+    smrtype,
+    period
+  )]
+
+  # Order data before summarizing
+  data <- data %>%
     dplyr::group_by(
       .data$sending_location,
       .data$social_care_id,
@@ -76,38 +103,38 @@ process_sc_all_alarms_telecare <- function(
       .data$smrtype,
       .data$period
     ) %>%
-    # Create a count for the package number across episodes
-    dplyr::mutate(pkg_count = dplyr::row_number()) %>%
     # Sort prior to merging
     dplyr::arrange(.by_group = TRUE) %>%
-    # group for merging episodes
-    dplyr::group_by(
-      .data$sending_location,
-      .data$social_care_id,
-      .data$record_keydate1,
-      .data$smrtype,
-      .data$pkg_count
-    ) %>%
-    # merge episodes with packages across quarters
-    # drop variables not needed
-    dplyr::summarise(
-      sending_location = dplyr::last(.data$sending_location),
-      social_care_id = dplyr::last(.data$social_care_id),
-      sc_latest_submission = dplyr::last(.data$period),
-      record_keydate1 = dplyr::last(.data$record_keydate1),
-      record_keydate2 = dplyr::last(.data$record_keydate2),
-      smrtype = dplyr::last(.data$smrtype),
-      pkg_count = dplyr::last(.data$pkg_count),
-      chi = dplyr::last(.data$chi),
-      gender = dplyr::last(.data$gender),
-      dob = dplyr::last(.data$dob),
-      postcode = dplyr::last(.data$postcode),
-      recid = dplyr::last(.data$recid),
-      person_id = dplyr::last(.data$person_id),
-      sc_send_lca = dplyr::last(.data$sc_send_lca)
-    ) %>%
-    # change the data format from data.table to data.frame
-    tibble::as_tibble()
+    dplyr::ungroup() %>%
+    data.table::as.data.table()
+
+  # Summarize to merge episodes
+  qtr_merge <- data[, .(
+    sending_location = data.table::last(sending_location),
+    social_care_id = data.table::last(social_care_id),
+    sc_latest_submission = data.table::last(period),
+    record_keydate1 = data.table::last(record_keydate1),
+    record_keydate2 = data.table::last(record_keydate2),
+    smrtype = data.table::last(smrtype),
+    pkg_count = data.table::last(pkg_count),
+    chi = data.table::last(chi),
+    gender = data.table::last(gender),
+    dob = data.table::last(dob),
+    postcode = data.table::last(postcode),
+    recid = data.table::last(recid),
+    person_id = data.table::last(person_id),
+    sc_send_lca = data.table::last(sc_send_lca)
+  ), by = .(
+    sending_location,
+    social_care_id,
+    record_keydate1,
+    smrtype,
+    pkg_count
+  )]
+
+  # Convert back to data.frame if necessary
+  qtr_merge <- as.data.frame(qtr_merge)
+
 
   if (write_to_disk) {
     write_file(

From b1a9523623b740144098418d59891228a005e74e Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Wed, 7 Feb 2024 16:40:51 +0000
Subject: [PATCH 026/186] Add case_when statement for `high_cc` cohort

---
 R/create_demographic_lookup.R | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/R/create_demographic_lookup.R b/R/create_demographic_lookup.R
index 2b252a151..d0e0c9988 100644
--- a/R/create_demographic_lookup.R
+++ b/R/create_demographic_lookup.R
@@ -344,18 +344,21 @@ assign_d_cohort_high_cc <- function(dementia,
                                     liver,
                                     cancer,
                                     spec) {
-  high_cc <-
+  high_cc <- dplyr::case_when(
+    spec == "G5" ~ TRUE,
     # FOR FUTURE: PhysicalandSensoryDisabilityClientGroup or LearningDisabilityClientGroup = "Y",
     # then high_cc_cohort = TRUE
     # FOR FUTURE: Care home removed, here's the code: .data$recid = "CH" & age < 65
-    rowSums(dplyr::pick(c(
+    (rowSums(dplyr::pick(c(
       "dementia",
       "hefailure",
       "refailure",
       "liver",
       "cancer"
-    )), na.rm = TRUE) >= 1L |
-      spec == "G5"
+    )), na.rm = TRUE) >= 1L) ~ TRUE,
+    .default = FALSE
+  )
+
   return(high_cc)
 }
 

From 6829c1acb55586050a786edc478c11281e675eb6 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Mon, 12 Feb 2024 12:04:34 +0000
Subject: [PATCH 027/186] Bug - `high_cc` in demographic cohort showing `NAs`
 instead of `TRUE/FALSE` (#911)

Add case_when statement for `high_cc` cohort
---
 R/create_demographic_lookup.R | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/R/create_demographic_lookup.R b/R/create_demographic_lookup.R
index 2b252a151..d0e0c9988 100644
--- a/R/create_demographic_lookup.R
+++ b/R/create_demographic_lookup.R
@@ -344,18 +344,21 @@ assign_d_cohort_high_cc <- function(dementia,
                                     liver,
                                     cancer,
                                     spec) {
-  high_cc <-
+  high_cc <- dplyr::case_when(
+    spec == "G5" ~ TRUE,
     # FOR FUTURE: PhysicalandSensoryDisabilityClientGroup or LearningDisabilityClientGroup = "Y",
     # then high_cc_cohort = TRUE
     # FOR FUTURE: Care home removed, here's the code: .data$recid = "CH" & age < 65
-    rowSums(dplyr::pick(c(
+    (rowSums(dplyr::pick(c(
       "dementia",
       "hefailure",
       "refailure",
       "liver",
       "cancer"
-    )), na.rm = TRUE) >= 1L |
-      spec == "G5"
+    )), na.rm = TRUE) >= 1L) ~ TRUE,
+    .default = FALSE
+  )
+
   return(high_cc)
 }
 

From c7a140068bc1376cd8ae3951e96a1aca74bcd7fd Mon Sep 17 00:00:00 2001
From: marjom02 <megan.mcnicol2@nhs.scot>
Date: Tue, 13 Feb 2024 11:55:27 +0000
Subject: [PATCH 028/186] added a casewhen to update property type description
 for homelessness

---
 R/process_extract_homelessness.R | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
index 3211f0fb7..3b9756183 100644
--- a/R/process_extract_homelessness.R
+++ b/R/process_extract_homelessness.R
@@ -100,6 +100,36 @@ process_extract_homelessness <- function(
         )
       )
     ) %>%
+    dplyr::mutate(property_type_code = as.character(property_type_code)) %>%
+    dplyr::mutate(
+      property_type_code = dplyr::case_when(
+        property_type_code == "1" ~ "1 - Own Property - LA Tenancy",
+        property_type_code == "2" ~ "2 - Own Property - RSL Tenancy",
+        property_type_code == "3" ~ "3 - Own Property - private rented tenancy",
+        property_type_code == "4" ~ "4 - Own Property - tenancy secured through employment/tied house",
+        property_type_code == "5" ~ "5 - Own Property - owning/buying",
+        property_type_code == "6" ~ "6 - Parental / family home / relatives",
+        property_type_code == "7" ~ " 7 - Friends / partners",
+        property_type_code == "8" ~ "8 - Armed Services Accommodation",
+        property_type_code == "9" ~ "9 - Prison",
+        property_type_code == "10" ~ "10 - Hospital",
+        property_type_code == "11" ~ "11 - Children's residential accommodation (looked after by the local authority)",
+        property_type_code == "12" ~ "12 - Supported accommodation",
+        property_type_code == "13" ~ "13 - Hostel (unsupported)",
+        property_type_code == "14" ~ "14 - Bed & Breakfast",
+        property_type_code == "15" ~ "15 - Caravan / mobile home",
+        property_type_code == "16" ~ "16 - Long-term roofless",
+        property_type_code == "17" ~ "17 - Long-term sofa surfing",
+        property_type_code == "18" ~ "18 - Other",
+        property_type_code == "19" ~ "19 - Not known / refused",
+        property_type_code == "20" ~ "20 - Own property - Shared ownership/Shared equity/ LCHO",
+        property_type_code == "21" ~ "21 - Lodger",
+        property_type_code == "22" ~ "22 - Shared Property - Private Rented Sector",
+        property_type_code == "23" ~ "23 - Shared Property - Local Authority",
+        property_type_code == "24" ~ "24 - Shared Property - RSL",
+        TRUE ~ property_type_code
+      )
+    ) %>%
     dplyr::left_join(
       la_code_lookup,
       by = dplyr::join_by("sending_local_authority_code_9" == "CA")
@@ -117,7 +147,7 @@ process_extract_homelessness <- function(
   if (!is.null(completeness_data)) {
     filtered_data <- data %>%
       dplyr::left_join(completeness_data,
-        by = c("year", "sending_local_authority_name")
+                       by = c("year", "sending_local_authority_name")
       ) %>%
       dplyr::filter(
         dplyr::between(.data[["pct_complete_all"]], 0.90, 1.05) |

From ea192202d8b57c2182ac0d01c2761c14745abc0a Mon Sep 17 00:00:00 2001
From: SwiftySalmon <SwiftySalmon@users.noreply.github.com>
Date: Tue, 13 Feb 2024 11:58:45 +0000
Subject: [PATCH 029/186] Update documentation

---
 DESCRIPTION                    | 2 +-
 man/calculate_stay.Rd          | 4 ++--
 man/compute_mid_year_age.Rd    | 4 ++--
 man/convert_date_to_numeric.Rd | 4 ++--
 man/convert_numeric_to_date.Rd | 4 ++--
 man/end_fy.Rd                  | 2 +-
 man/end_fy_quarter.Rd          | 2 +-
 man/end_next_fy_quarter.Rd     | 4 ++--
 man/fy_interval.Rd             | 4 ++--
 man/is_date_in_fyyear.Rd       | 4 ++--
 man/last_date_month.Rd         | 4 ++--
 man/midpoint_fy.Rd             | 4 ++--
 man/next_fy.Rd                 | 4 ++--
 man/start_fy.Rd                | 2 +-
 man/start_fy_quarter.Rd        | 2 +-
 man/start_next_fy_quarter.Rd   | 6 +++---
 16 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 5123289dd..3a75852e2 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -73,4 +73,4 @@ Encoding: UTF-8
 Language: en-GB
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.1
diff --git a/man/calculate_stay.Rd b/man/calculate_stay.Rd
index 43b7bd166..5e9266b10 100644
--- a/man/calculate_stay.Rd
+++ b/man/calculate_stay.Rd
@@ -34,16 +34,16 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/compute_mid_year_age.Rd b/man/compute_mid_year_age.Rd
index 142fa4aab..5a50370e0 100644
--- a/man/compute_mid_year_age.Rd
+++ b/man/compute_mid_year_age.Rd
@@ -31,16 +31,16 @@ Other date functions:
 \code{\link{calculate_stay}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/convert_date_to_numeric.Rd b/man/convert_date_to_numeric.Rd
index 5511fec84..b67eaa778 100644
--- a/man/convert_date_to_numeric.Rd
+++ b/man/convert_date_to_numeric.Rd
@@ -24,16 +24,16 @@ Other date functions:
 \code{\link{calculate_stay}()},
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/convert_numeric_to_date.Rd b/man/convert_numeric_to_date.Rd
index f786e0319..a09b7b9b9 100644
--- a/man/convert_numeric_to_date.Rd
+++ b/man/convert_numeric_to_date.Rd
@@ -24,16 +24,16 @@ Other date functions:
 \code{\link{calculate_stay}()},
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/end_fy.Rd b/man/end_fy.Rd
index 2925ffe60..6220f5f32 100644
--- a/man/end_fy.Rd
+++ b/man/end_fy.Rd
@@ -34,8 +34,8 @@ Other date functions:
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/end_fy_quarter.Rd b/man/end_fy_quarter.Rd
index 0efe9624a..26c439a04 100644
--- a/man/end_fy_quarter.Rd
+++ b/man/end_fy_quarter.Rd
@@ -33,8 +33,8 @@ Other date functions:
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/end_next_fy_quarter.Rd b/man/end_next_fy_quarter.Rd
index f9cc1720a..702446e82 100644
--- a/man/end_next_fy_quarter.Rd
+++ b/man/end_next_fy_quarter.Rd
@@ -26,15 +26,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/fy_interval.Rd b/man/fy_interval.Rd
index 12d1d36bb..00b9ea52c 100644
--- a/man/fy_interval.Rd
+++ b/man/fy_interval.Rd
@@ -26,15 +26,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/is_date_in_fyyear.Rd b/man/is_date_in_fyyear.Rd
index 97a0f3639..e74bd5734 100644
--- a/man/is_date_in_fyyear.Rd
+++ b/man/is_date_in_fyyear.Rd
@@ -41,15 +41,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/last_date_month.Rd b/man/last_date_month.Rd
index f52305356..3d3b9544e 100644
--- a/man/last_date_month.Rd
+++ b/man/last_date_month.Rd
@@ -25,15 +25,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/midpoint_fy.Rd b/man/midpoint_fy.Rd
index 7bac9b6b3..2363df773 100644
--- a/man/midpoint_fy.Rd
+++ b/man/midpoint_fy.Rd
@@ -27,15 +27,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/next_fy.Rd b/man/next_fy.Rd
index 19e1193f4..7524c5f11 100644
--- a/man/next_fy.Rd
+++ b/man/next_fy.Rd
@@ -27,15 +27,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/start_fy.Rd b/man/start_fy.Rd
index 4996bfb72..9951af2ec 100644
--- a/man/start_fy.Rd
+++ b/man/start_fy.Rd
@@ -27,8 +27,8 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
diff --git a/man/start_fy_quarter.Rd b/man/start_fy_quarter.Rd
index f5729dcb0..9936736a8 100644
--- a/man/start_fy_quarter.Rd
+++ b/man/start_fy_quarter.Rd
@@ -26,8 +26,8 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
diff --git a/man/start_next_fy_quarter.Rd b/man/start_next_fy_quarter.Rd
index 098f0bf73..fdac297a7 100644
--- a/man/start_next_fy_quarter.Rd
+++ b/man/start_next_fy_quarter.Rd
@@ -26,15 +26,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
-\code{\link{start_fy}()}
+\code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()}
 }
 \concept{date functions}

From a634ea74a511d4cee207bcc256fb5b7078c094d1 Mon Sep 17 00:00:00 2001
From: SwiftySalmon <SwiftySalmon@users.noreply.github.com>
Date: Tue, 13 Feb 2024 11:59:37 +0000
Subject: [PATCH 030/186] Style code

---
 R/process_extract_homelessness.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
index 3b9756183..04d7082e7 100644
--- a/R/process_extract_homelessness.R
+++ b/R/process_extract_homelessness.R
@@ -147,7 +147,7 @@ process_extract_homelessness <- function(
   if (!is.null(completeness_data)) {
     filtered_data <- data %>%
       dplyr::left_join(completeness_data,
-                       by = c("year", "sending_local_authority_name")
+        by = c("year", "sending_local_authority_name")
       ) %>%
       dplyr::filter(
         dplyr::between(.data[["pct_complete_all"]], 0.90, 1.05) |

From 14cde166bffd7d1d9ac77c8732407fe17b5268d0 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Fri, 16 Feb 2024 09:06:50 +0000
Subject: [PATCH 031/186] Bug - deal with missing variables (#914)

* Add missing sc variables for no sc data

* Fix code for including `_inc_dna` variables

* Remove commented line
---
 R/add_hri_variables.R   | 2 +-
 R/aggregate_by_chi.R    | 4 ++--
 R/create_episode_file.R | 7 +++++--
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/R/add_hri_variables.R b/R/add_hri_variables.R
index 710324646..519ce3694 100644
--- a/R/add_hri_variables.R
+++ b/R/add_hri_variables.R
@@ -82,7 +82,7 @@ add_hri_variables <- function(
       "mh_episodes",
       "gls_episodes",
       "op_newcons_attendances",
-      # op_newcons_dnas,
+      "op_newcons_dnas",
       "ae_attendances",
       "pis_paid_items",
       "ooh_cases"
diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R
index 8d9dff96d..6f5032242 100644
--- a/R/aggregate_by_chi.R
+++ b/R/aggregate_by_chi.R
@@ -89,6 +89,7 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) {
         "episodes",
         "beddays",
         "cost",
+        "_dnas",
         "attendances",
         "attend",
         "contacts",
@@ -109,8 +110,7 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) {
     vars_start_with(
       episode_file,
       "sds_option"
-    ),
-    "health_net_cost_inc_dnas"
+    )
   )
   cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")]
   if (exclude_sc_var) {
diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index 493d71bd3..a9503e83c 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -143,8 +143,9 @@ create_episode_file <- function(
     episode_file <- episode_file %>%
       dplyr::mutate(
         ch_chi_cis = NA,
-        sc_id_cis = NA,
+        ch_sc_id_cis = NA,
         ch_name = NA,
+        ch_postcode = NA,
         ch_adm_reason = NA,
         ch_provider = NA,
         ch_nursing = NA,
@@ -159,7 +160,9 @@ create_episode_file <- function(
         hc_cost_q4 = NA,
         hc_provider = NA,
         hc_reablement = NA,
-        sds_option_4 = NA,
+        person_id = NA,
+        sc_latest_submission = NA,
+        sc_send_lca = NA,
         sc_living_alone = NA,
         sc_support_from_unpaid_carer = NA,
         sc_social_worker = NA,

From 625402b52f717b1f6b35344f7fb1ebaf4a9cecff Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Fri, 16 Feb 2024 09:07:48 +0000
Subject: [PATCH 032/186] Bug - Fix get pop path failing and preventing the
 indiv file from running.  (#913)

Fix bug - pop file paths breaking indiv file
---
 R/add_keep_population_flag.R           | 2 +-
 R/get_lookup_paths.R                   | 2 +-
 Rmarkdown/costs_district_nursing.Rmd   | 2 +-
 tests/testthat/test-get_lookup_paths.R | 8 +++-----
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/R/add_keep_population_flag.R b/R/add_keep_population_flag.R
index 6050b278f..d418ac18c 100644
--- a/R/add_keep_population_flag.R
+++ b/R/add_keep_population_flag.R
@@ -15,7 +15,7 @@ add_keep_population_flag <- function(individual_file, year) {
   } else {
     ## Obtain the population estimates for Locality AgeGroup and Gender.
     pop_estimates <-
-      readr::read_rds(get_datazone_pop_path("DataZone2011_pop_est_2011_2021.rds")) %>%
+      readr::read_rds(get_pop_path(type = "datazone")) %>%
       dplyr::select(year, datazone2011, sex, age0:age90plus)
 
     # Step 1: Obtain the population estimates for Locality, AgeGroup, and Gender
diff --git a/R/get_lookup_paths.R b/R/get_lookup_paths.R
index fe35a7d2f..7df5c52e2 100644
--- a/R/get_lookup_paths.R
+++ b/R/get_lookup_paths.R
@@ -126,7 +126,7 @@ get_pop_path <- function(file_name = NULL,
     "intzone" ~ stringr::str_glue("IntZone_pop_est_2011_\\d+?\\.{ext}")
   )
 
-  datazone_pop_path <- get_file_path(
+  pop_path <- get_file_path(
     directory = pop_dir,
     file_name = file_name,
     ext = ext,
diff --git a/Rmarkdown/costs_district_nursing.Rmd b/Rmarkdown/costs_district_nursing.Rmd
index e3c9bba13..fb198bccb 100644
--- a/Rmarkdown/costs_district_nursing.Rmd
+++ b/Rmarkdown/costs_district_nursing.Rmd
@@ -75,7 +75,7 @@ dn_raw_costs_contacts <- left_join(dn_raw_contacts,
 # Of the two HSCPs, Argyll and Bute provides the
 # District Nursing data which is 27% of the population.
 
-population_lookup <- read_file(get_datazone_pop_path("HSCP2019_pop_est_1981_2021.rds")) %>%
+population_lookup <- read_file(get_pop_path(type = "datazone")) %>%
   # Select only the HSCPs for NHS Highland & years since 2015
   filter(
     hscp2019 %in% c("S37000004", "S37000016"),
diff --git a/tests/testthat/test-get_lookup_paths.R b/tests/testthat/test-get_lookup_paths.R
index c56752b03..29d538cc1 100644
--- a/tests/testthat/test-get_lookup_paths.R
+++ b/tests/testthat/test-get_lookup_paths.R
@@ -48,13 +48,11 @@ test_that("SIMD file path returns as expected", {
 
 test_that("population estimates file path returns as expected", {
   suppressMessages({
-    expect_s3_class(get_datazone_pop_path(), "fs_path")
+    expect_s3_class(get_pop_path(type = "datazone"), "fs_path")
 
-    expect_equal(fs::path_ext(get_datazone_pop_path()), "rds")
+    expect_equal(fs::path_ext(get_pop_path(type = "datazone")), "rds")
 
-    expect_match(get_datazone_pop_path(), "DataZone2011_pop_est_2001_\\d+?")
-
-    expect_true(fs::file_exists(get_datazone_pop_path()))
+    expect_true(fs::file_exists(get_pop_path(type = "datazone")))
   })
 })
 

From 36c5e74ed28444a7f44eff390bd96009bb0f0b51 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 16 Feb 2024 09:21:13 +0000
Subject: [PATCH 033/186] correct file hscp file path

---
 Rmarkdown/costs_district_nursing.Rmd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Rmarkdown/costs_district_nursing.Rmd b/Rmarkdown/costs_district_nursing.Rmd
index fb198bccb..59b8353f8 100644
--- a/Rmarkdown/costs_district_nursing.Rmd
+++ b/Rmarkdown/costs_district_nursing.Rmd
@@ -75,7 +75,7 @@ dn_raw_costs_contacts <- left_join(dn_raw_contacts,
 # Of the two HSCPs, Argyll and Bute provides the
 # District Nursing data which is 27% of the population.
 
-population_lookup <- read_file(get_pop_path(type = "datazone")) %>%
+population_lookup <- read_file(get_pop_path(type = "hscp")) %>%
   # Select only the HSCPs for NHS Highland & years since 2015
   filter(
     hscp2019 %in% c("S37000004", "S37000016"),

From ad629b27676d6b24b1d0f4552a6ce78c50e3996d Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Mon, 26 Feb 2024 10:48:28 +0000
Subject: [PATCH 034/186] Update process_sc_all_home_care.R

A small issue was identified when running targets. Linked with changes to the function `fix_sc_end_dates()`
---
 R/process_sc_all_home_care.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R
index bc3d3bdfc..0083bedaa 100644
--- a/R/process_sc_all_home_care.R
+++ b/R/process_sc_all_home_care.R
@@ -28,7 +28,7 @@ process_sc_all_home_care <- function(
       hc_service_end_date = fix_sc_end_dates(
         .data$hc_service_start_date,
         .data$hc_service_end_date,
-        .data$period
+        .data$hc_period_end_date
       )
     )
 

From 640548bfc6a1ef5a6f6435c4fbbc3f2207f2d5bf Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Mon, 26 Feb 2024 11:06:19 +0000
Subject: [PATCH 035/186] Update process_sc_all_alarms_telecare.R

---
 R/process_sc_all_alarms_telecare.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R
index bc417a8cd..e40e93241 100644
--- a/R/process_sc_all_alarms_telecare.R
+++ b/R/process_sc_all_alarms_telecare.R
@@ -41,7 +41,7 @@ process_sc_all_alarms_telecare <- function(
     service_end_date := fix_sc_end_dates(
       service_start_date,
       service_end_date,
-      period
+      period_end_date
     )
   ]
 

From e0da70ca8b66b5485be3fad9c1d97dcda5c948e9 Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Mon, 26 Feb 2024 13:08:29 +0000
Subject: [PATCH 036/186] remove duplicate columns

---
 R/process_sc_all_alarms_telecare.R | 1 -
 1 file changed, 1 deletion(-)

diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R
index e40e93241..08cb9faa7 100644
--- a/R/process_sc_all_alarms_telecare.R
+++ b/R/process_sc_all_alarms_telecare.R
@@ -113,7 +113,6 @@ process_sc_all_alarms_telecare <- function(
     sending_location = data.table::last(sending_location),
     social_care_id = data.table::last(social_care_id),
     sc_latest_submission = data.table::last(period),
-    record_keydate1 = data.table::last(record_keydate1),
     record_keydate2 = data.table::last(record_keydate2),
     smrtype = data.table::last(smrtype),
     pkg_count = data.table::last(pkg_count),

From 9699394500d1d11bd27c425ad6482732d9e502ee Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Tue, 27 Feb 2024 08:56:36 +0000
Subject: [PATCH 037/186] Fix targets (#892)

* fix sc_client_lookup sc_send_lca

* fix an issue of get_pop_path

* Style code

* fix the rest of get_pop_path from get_datazone_pop_path

* Update documentation

* fix sc_send_lca

* add missing year column

* explicitly specify the argument year to avoid corruption of targets

* Update documentation

* new data pipeline with targets
remove create_individual_files from targets and append it to run_targets script

* minor changes

* Style code

* undo sc_send_lca bit

* Update targets scripts

* Remove top level targets scripts

---------

Co-authored-by: lizihao-anu <lizihao-anu@users.noreply.github.com>
Co-authored-by: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Co-authored-by: Jennifer Thom <jennifer.thom@phs.scot>
---
 R/aggregate_by_chi.R                     |  3 ++-
 R/create_individual_file.R               |  6 ++---
 Run_SLF_Files_targets/run_targets_1718.R | 14 +++++++++++
 Run_SLF_Files_targets/run_targets_1819.R | 14 +++++++++++
 Run_SLF_Files_targets/run_targets_1920.R | 14 +++++++++++
 Run_SLF_Files_targets/run_targets_2021.R | 14 +++++++++++
 Run_SLF_Files_targets/run_targets_2122.R | 14 +++++++++++
 Run_SLF_Files_targets/run_targets_2223.R | 14 +++++++++++
 Run_SLF_Files_targets/run_targets_2324.R | 14 +++++++++++
 _targets.R                               | 32 ++++++++++++------------
 man/add_all_columns.Rd                   |  4 ++-
 man/aggregate_by_chi.Rd                  |  4 ++-
 12 files changed, 125 insertions(+), 22 deletions(-)

diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R
index 6f5032242..d207b221a 100644
--- a/R/aggregate_by_chi.R
+++ b/R/aggregate_by_chi.R
@@ -7,7 +7,7 @@
 #' @importFrom data.table .SD
 #'
 #' @inheritParams create_individual_file
-aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) {
+aggregate_by_chi <- function(episode_file, year, exclude_sc_var = FALSE) {
   cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")
 
   # Convert to data.table
@@ -187,6 +187,7 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) {
     individual_file_cols5[, chi := NULL],
     individual_file_cols6[, chi := NULL]
   )
+  individual_file <- individual_file[, year := year]
 
   # convert back to tibble
   return(dplyr::as_tibble(individual_file))
diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index 70066b42d..e5b0fd2fd 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -73,7 +73,7 @@ create_individual_file <- function(
     ))) %>%
     remove_blank_chi() %>%
     add_cij_columns() %>%
-    add_all_columns()
+    add_all_columns(year = year)
 
   if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
     individual_file <- individual_file %>%
@@ -82,7 +82,7 @@ create_individual_file <- function(
     individual_file <- individual_file %>%
       aggregate_ch_episodes() %>%
       clean_up_ch(year) %>%
-      aggregate_by_chi(exclude_sc_var = FALSE)
+      aggregate_by_chi(year = year, exclude_sc_var = FALSE)
   }
 
   individual_file <- individual_file %>%
@@ -202,7 +202,7 @@ add_cij_columns <- function(episode_file) {
 #' of prefixed column names created based on some condition.
 #' @family individual_file
 #' @inheritParams create_individual_file
-add_all_columns <- function(episode_file) {
+add_all_columns <- function(episode_file, year) {
   cli::cli_alert_info("Add all columns function started at {Sys.time()}")
 
   episode_file <- episode_file %>%
diff --git a/Run_SLF_Files_targets/run_targets_1718.R b/Run_SLF_Files_targets/run_targets_1718.R
index ebc58895f..ac03edd3f 100644
--- a/Run_SLF_Files_targets/run_targets_1718.R
+++ b/Run_SLF_Files_targets/run_targets_1718.R
@@ -1,4 +1,18 @@
 library(targets)
+
+year <- "1718"
+
+# use targets for the process until testing episode files
 tar_make_future(
+  # it does not recognise `contains(year)`
   names = (targets::contains("1718"))
 )
+
+# use targets to create individual files due to RAM limit
+library(createslf)
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_targets/run_targets_1819.R b/Run_SLF_Files_targets/run_targets_1819.R
index 83bbcedef..b60728359 100644
--- a/Run_SLF_Files_targets/run_targets_1819.R
+++ b/Run_SLF_Files_targets/run_targets_1819.R
@@ -1,4 +1,18 @@
 library(targets)
+
+year <- "1819"
+
+# use targets for the process until testing episode files
 tar_make_future(
+  # it does not recognise `contains(year)`
   names = (targets::contains("1819"))
 )
+
+# use targets to create individual files due to RAM limit
+library(createslf)
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_targets/run_targets_1920.R b/Run_SLF_Files_targets/run_targets_1920.R
index 1640d1900..897ee0b7a 100644
--- a/Run_SLF_Files_targets/run_targets_1920.R
+++ b/Run_SLF_Files_targets/run_targets_1920.R
@@ -1,4 +1,18 @@
 library(targets)
+
+year <- "1920"
+
+# use targets for the process until testing episode files
 tar_make_future(
+  # it does not recognise `contains(year)`
   names = (targets::contains("1920"))
 )
+
+# use targets to create individual files due to RAM limit
+library(createslf)
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_targets/run_targets_2021.R b/Run_SLF_Files_targets/run_targets_2021.R
index 80749e81a..53333c014 100644
--- a/Run_SLF_Files_targets/run_targets_2021.R
+++ b/Run_SLF_Files_targets/run_targets_2021.R
@@ -1,4 +1,18 @@
 library(targets)
+
+year <- "2021"
+
+# use targets for the process until testing episode files
 tar_make_future(
+  # it does not recognise `contains(year)`
   names = (targets::contains("2021"))
 )
+
+# use targets to create individual files due to RAM limit
+library(createslf)
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_targets/run_targets_2122.R b/Run_SLF_Files_targets/run_targets_2122.R
index aa95d7b24..457fe33e7 100644
--- a/Run_SLF_Files_targets/run_targets_2122.R
+++ b/Run_SLF_Files_targets/run_targets_2122.R
@@ -1,4 +1,18 @@
 library(targets)
+
+year <- "2122"
+
+# use targets for the process until testing episode files
 tar_make_future(
+  # it does not recognise `contains(year)`
   names = (targets::contains("2122"))
 )
+
+# use targets to create individual files due to RAM limit
+library(createslf)
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_targets/run_targets_2223.R b/Run_SLF_Files_targets/run_targets_2223.R
index 2ded7d5fd..fc851f3f7 100644
--- a/Run_SLF_Files_targets/run_targets_2223.R
+++ b/Run_SLF_Files_targets/run_targets_2223.R
@@ -1,4 +1,18 @@
 library(targets)
+
+year <- "2223"
+
+# use targets for the process until testing episode files
 tar_make_future(
+  # it does not recognise `contains(year)`
   names = (targets::contains("2223"))
 )
+
+# use targets to create individual files due to RAM limit
+library(createslf)
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_targets/run_targets_2324.R b/Run_SLF_Files_targets/run_targets_2324.R
index b875984f4..3b4c9b240 100644
--- a/Run_SLF_Files_targets/run_targets_2324.R
+++ b/Run_SLF_Files_targets/run_targets_2324.R
@@ -1,4 +1,18 @@
 library(targets)
+
+year <- "2324"
+
+# use targets for the process until testing episode files
 tar_make_future(
+  # it does not recognise `contains(year)`
   names = (targets::contains("2324"))
 )
+
+# use targets to create individual files due to RAM limit
+library(createslf)
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/_targets.R b/_targets.R
index 81adbf7c2..15d2584bb 100644
--- a/_targets.R
+++ b/_targets.R
@@ -591,24 +591,24 @@ list(
         data = episode_file,
         year = year
       )
-    ),
-    tar_target(
-      individual_file,
-      create_individual_file(
-        episode_file = episode_file,
-        year = year,
-        homelessness_lookup = homelessness_lookup,
-        write_to_disk = write_to_disk
-      )
-    ),
-    tar_target(
-      individual_file_tests,
-      process_tests_individual_file(
-        data = individual_file,
-        year = year
-      )
     ) # ,
     # tar_target(
+    #   individual_file,
+    #   create_individual_file(
+    #     episode_file = episode_file,
+    #     year = year,
+    #     homelessness_lookup = homelessness_lookup,
+    #     write_to_disk = write_to_disk
+    #   )
+    # ),
+    # tar_target(
+    #   individual_file_tests,
+    #   process_tests_individual_file(
+    #     data = individual_file,
+    #     year = year
+    #   )
+    # ) # ,
+    # tar_target(
     #   episode_file_dataset,
     #   arrow::write_dataset(
     #     dataset = episode_file,
diff --git a/man/add_all_columns.Rd b/man/add_all_columns.Rd
index 345a59e01..deb1594b3 100644
--- a/man/add_all_columns.Rd
+++ b/man/add_all_columns.Rd
@@ -4,10 +4,12 @@
 \alias{add_all_columns}
 \title{Add all columns}
 \usage{
-add_all_columns(episode_file)
+add_all_columns(episode_file, year)
 }
 \arguments{
 \item{episode_file}{Tibble containing episodic data.}
+
+\item{year}{The year to process, in FY format.}
 }
 \description{
 Add new columns based on SMRType and recid which follow a pattern
diff --git a/man/aggregate_by_chi.Rd b/man/aggregate_by_chi.Rd
index 84c9c0ad3..16bf7d792 100644
--- a/man/aggregate_by_chi.Rd
+++ b/man/aggregate_by_chi.Rd
@@ -4,10 +4,12 @@
 \alias{aggregate_by_chi}
 \title{Aggregate by CHI}
 \usage{
-aggregate_by_chi(episode_file, exclude_sc_var = FALSE)
+aggregate_by_chi(episode_file, year, exclude_sc_var = FALSE)
 }
 \arguments{
 \item{episode_file}{Tibble containing episodic data.}
+
+\item{year}{The year to process, in FY format.}
 }
 \description{
 Aggregate episode file by CHI to convert into

From f5c744875f0620f11677d93d1117aebc46207288 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 27 Feb 2024 11:34:28 +0000
Subject: [PATCH 038/186] remove cases that start date is later than end date

---
 R/process_sc_all_home_care.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R
index 0083bedaa..3ada9a2da 100644
--- a/R/process_sc_all_home_care.R
+++ b/R/process_sc_all_home_care.R
@@ -16,6 +16,7 @@ process_sc_all_home_care <- function(
     sc_demog_lookup,
     write_to_disk = TRUE) {
   replaced_dates <- data %>%
+    dplyr::filter(.data$hc_start_date_after_period_end_date != 1) %>%
     dplyr::mutate(
       hc_service_end_date = fix_sc_missing_end_dates(
         .data$hc_service_end_date,

From b7e713834f8f09f762ce197c2ede78ddee436a9c Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 27 Feb 2024 15:35:13 +0000
Subject: [PATCH 039/186] Update Refs for March24 SLF update

---
 R/00-update_refs.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/00-update_refs.R b/R/00-update_refs.R
index 9d119e74e..2052b938f 100644
--- a/R/00-update_refs.R
+++ b/R/00-update_refs.R
@@ -7,7 +7,7 @@
 #'
 #' @family initialisation
 latest_update <- function() {
-  "Dec_2023"
+  "Mar_2024"
 }
 
 #' Previous update
@@ -61,7 +61,7 @@ previous_update <- function(months_ago = 3L, override = NULL) {
 #'
 #' @family initialisation
 get_dd_period <- function() {
-  "Jul16_Sep23"
+  "Jul16_Dec23"
 }
 
 #' The latest financial year for Cost uplift setting

From b829eab03c0b7d4a2b315644304fe6ed7fa2385f Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 26 Mar 2024 15:20:43 +0000
Subject: [PATCH 040/186] update documentation

---
 man/calculate_stay.Rd              | 4 ++--
 man/compute_mid_year_age.Rd        | 4 ++--
 man/convert_date_to_numeric.Rd     | 4 ++--
 man/convert_numeric_to_date.Rd     | 4 ++--
 man/end_fy.Rd                      | 2 +-
 man/end_fy_quarter.Rd              | 2 +-
 man/end_next_fy_quarter.Rd         | 4 ++--
 man/fy_interval.Rd                 | 4 ++--
 man/is_date_in_fyyear.Rd           | 4 ++--
 man/last_date_month.Rd             | 4 ++--
 man/midpoint_fy.Rd                 | 4 ++--
 man/next_fy.Rd                     | 4 ++--
 man/read_lookup_sc_demographics.Rd | 6 ++++--
 man/start_fy.Rd                    | 2 +-
 man/start_fy_quarter.Rd            | 2 +-
 man/start_next_fy_quarter.Rd       | 6 +++---
 16 files changed, 31 insertions(+), 29 deletions(-)

diff --git a/man/calculate_stay.Rd b/man/calculate_stay.Rd
index 5e9266b10..43b7bd166 100644
--- a/man/calculate_stay.Rd
+++ b/man/calculate_stay.Rd
@@ -34,16 +34,16 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy}()},
 \code{\link{end_fy_quarter}()},
+\code{\link{end_fy}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy}()},
 \code{\link{start_fy_quarter}()},
+\code{\link{start_fy}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/compute_mid_year_age.Rd b/man/compute_mid_year_age.Rd
index 5a50370e0..142fa4aab 100644
--- a/man/compute_mid_year_age.Rd
+++ b/man/compute_mid_year_age.Rd
@@ -31,16 +31,16 @@ Other date functions:
 \code{\link{calculate_stay}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy}()},
 \code{\link{end_fy_quarter}()},
+\code{\link{end_fy}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy}()},
 \code{\link{start_fy_quarter}()},
+\code{\link{start_fy}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/convert_date_to_numeric.Rd b/man/convert_date_to_numeric.Rd
index b67eaa778..5511fec84 100644
--- a/man/convert_date_to_numeric.Rd
+++ b/man/convert_date_to_numeric.Rd
@@ -24,16 +24,16 @@ Other date functions:
 \code{\link{calculate_stay}()},
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy}()},
 \code{\link{end_fy_quarter}()},
+\code{\link{end_fy}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy}()},
 \code{\link{start_fy_quarter}()},
+\code{\link{start_fy}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/convert_numeric_to_date.Rd b/man/convert_numeric_to_date.Rd
index a09b7b9b9..f786e0319 100644
--- a/man/convert_numeric_to_date.Rd
+++ b/man/convert_numeric_to_date.Rd
@@ -24,16 +24,16 @@ Other date functions:
 \code{\link{calculate_stay}()},
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
-\code{\link{end_fy}()},
 \code{\link{end_fy_quarter}()},
+\code{\link{end_fy}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy}()},
 \code{\link{start_fy_quarter}()},
+\code{\link{start_fy}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/end_fy.Rd b/man/end_fy.Rd
index 6220f5f32..2925ffe60 100644
--- a/man/end_fy.Rd
+++ b/man/end_fy.Rd
@@ -34,8 +34,8 @@ Other date functions:
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy}()},
 \code{\link{start_fy_quarter}()},
+\code{\link{start_fy}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/end_fy_quarter.Rd b/man/end_fy_quarter.Rd
index 26c439a04..0efe9624a 100644
--- a/man/end_fy_quarter.Rd
+++ b/man/end_fy_quarter.Rd
@@ -33,8 +33,8 @@ Other date functions:
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy}()},
 \code{\link{start_fy_quarter}()},
+\code{\link{start_fy}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/end_next_fy_quarter.Rd b/man/end_next_fy_quarter.Rd
index 702446e82..f9cc1720a 100644
--- a/man/end_next_fy_quarter.Rd
+++ b/man/end_next_fy_quarter.Rd
@@ -26,15 +26,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy}()},
 \code{\link{end_fy_quarter}()},
+\code{\link{end_fy}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy}()},
 \code{\link{start_fy_quarter}()},
+\code{\link{start_fy}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/fy_interval.Rd b/man/fy_interval.Rd
index 00b9ea52c..12d1d36bb 100644
--- a/man/fy_interval.Rd
+++ b/man/fy_interval.Rd
@@ -26,15 +26,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy}()},
 \code{\link{end_fy_quarter}()},
+\code{\link{end_fy}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy}()},
 \code{\link{start_fy_quarter}()},
+\code{\link{start_fy}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/is_date_in_fyyear.Rd b/man/is_date_in_fyyear.Rd
index e74bd5734..97a0f3639 100644
--- a/man/is_date_in_fyyear.Rd
+++ b/man/is_date_in_fyyear.Rd
@@ -41,15 +41,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy}()},
 \code{\link{end_fy_quarter}()},
+\code{\link{end_fy}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy}()},
 \code{\link{start_fy_quarter}()},
+\code{\link{start_fy}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/last_date_month.Rd b/man/last_date_month.Rd
index 3d3b9544e..f52305356 100644
--- a/man/last_date_month.Rd
+++ b/man/last_date_month.Rd
@@ -25,15 +25,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy}()},
 \code{\link{end_fy_quarter}()},
+\code{\link{end_fy}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy}()},
 \code{\link{start_fy_quarter}()},
+\code{\link{start_fy}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/midpoint_fy.Rd b/man/midpoint_fy.Rd
index 2363df773..7bac9b6b3 100644
--- a/man/midpoint_fy.Rd
+++ b/man/midpoint_fy.Rd
@@ -27,15 +27,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy}()},
 \code{\link{end_fy_quarter}()},
+\code{\link{end_fy}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy}()},
 \code{\link{start_fy_quarter}()},
+\code{\link{start_fy}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/next_fy.Rd b/man/next_fy.Rd
index 7524c5f11..19e1193f4 100644
--- a/man/next_fy.Rd
+++ b/man/next_fy.Rd
@@ -27,15 +27,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy}()},
 \code{\link{end_fy_quarter}()},
+\code{\link{end_fy}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
-\code{\link{start_fy}()},
 \code{\link{start_fy_quarter}()},
+\code{\link{start_fy}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/read_lookup_sc_demographics.Rd b/man/read_lookup_sc_demographics.Rd
index 3bda889fe..6c7dd049e 100644
--- a/man/read_lookup_sc_demographics.Rd
+++ b/man/read_lookup_sc_demographics.Rd
@@ -4,10 +4,12 @@
 \alias{read_lookup_sc_demographics}
 \title{Read SC demographics}
 \usage{
-read_lookup_sc_demographics(sc_connection = phs_db_connection(dsn = "DVPROD"))
+read_lookup_sc_demographics(
+  sc_dvprod_connection = phs_db_connection(dsn = "DVPROD")
+)
 }
 \arguments{
-\item{sc_connection}{Connection to the sc platform}
+\item{sc_dvprod_connection}{Connection to the sc platform}
 }
 \value{
 a \link[tibble:tibble-package]{tibble}
diff --git a/man/start_fy.Rd b/man/start_fy.Rd
index 9951af2ec..4996bfb72 100644
--- a/man/start_fy.Rd
+++ b/man/start_fy.Rd
@@ -27,8 +27,8 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy}()},
 \code{\link{end_fy_quarter}()},
+\code{\link{end_fy}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
diff --git a/man/start_fy_quarter.Rd b/man/start_fy_quarter.Rd
index 9936736a8..f5729dcb0 100644
--- a/man/start_fy_quarter.Rd
+++ b/man/start_fy_quarter.Rd
@@ -26,8 +26,8 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy}()},
 \code{\link{end_fy_quarter}()},
+\code{\link{end_fy}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
diff --git a/man/start_next_fy_quarter.Rd b/man/start_next_fy_quarter.Rd
index fdac297a7..098f0bf73 100644
--- a/man/start_next_fy_quarter.Rd
+++ b/man/start_next_fy_quarter.Rd
@@ -26,15 +26,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy}()},
 \code{\link{end_fy_quarter}()},
+\code{\link{end_fy}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy}()},
-\code{\link{start_fy_quarter}()}
+\code{\link{start_fy_quarter}()},
+\code{\link{start_fy}()}
 }
 \concept{date functions}

From 99f9c2daf9df621f84ca405338b0fa320fa1a482 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 26 Mar 2024 15:21:17 +0000
Subject: [PATCH 041/186] Update sc connection name

---
 R/read_lookup_sc_demographics.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/read_lookup_sc_demographics.R b/R/read_lookup_sc_demographics.R
index 020542baa..cb3cea3c2 100644
--- a/R/read_lookup_sc_demographics.R
+++ b/R/read_lookup_sc_demographics.R
@@ -1,13 +1,13 @@
 #' Read SC demographics
 #'
-#' @param sc_connection Connection to the sc platform
+#' @param sc_dvprod_connection Connection to the sc platform
 #'
 #' @return a [tibble][tibble::tibble-package]
 #' @export
 #'
-read_lookup_sc_demographics <- function(sc_connection = phs_db_connection(dsn = "DVPROD")) {
+read_lookup_sc_demographics <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPROD")) {
   sc_demog <- dplyr::tbl(
-    sc_connection,
+    sc_dvprod_connection,
     dbplyr::in_schema("social_care_2", "demographic_snapshot")
   ) %>%
     dplyr::select(

From 298e61351441841a0edb0bdf220da34a2887e4b6 Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Tue, 26 Mar 2024 15:23:34 +0000
Subject: [PATCH 042/186] Update documentation

---
 man/calculate_stay.Rd          | 4 ++--
 man/compute_mid_year_age.Rd    | 4 ++--
 man/convert_date_to_numeric.Rd | 4 ++--
 man/convert_numeric_to_date.Rd | 4 ++--
 man/end_fy.Rd                  | 2 +-
 man/end_fy_quarter.Rd          | 2 +-
 man/end_next_fy_quarter.Rd     | 4 ++--
 man/fy_interval.Rd             | 4 ++--
 man/is_date_in_fyyear.Rd       | 4 ++--
 man/last_date_month.Rd         | 4 ++--
 man/midpoint_fy.Rd             | 4 ++--
 man/next_fy.Rd                 | 4 ++--
 man/start_fy.Rd                | 2 +-
 man/start_fy_quarter.Rd        | 2 +-
 man/start_next_fy_quarter.Rd   | 6 +++---
 15 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/man/calculate_stay.Rd b/man/calculate_stay.Rd
index 43b7bd166..5e9266b10 100644
--- a/man/calculate_stay.Rd
+++ b/man/calculate_stay.Rd
@@ -34,16 +34,16 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/compute_mid_year_age.Rd b/man/compute_mid_year_age.Rd
index 142fa4aab..5a50370e0 100644
--- a/man/compute_mid_year_age.Rd
+++ b/man/compute_mid_year_age.Rd
@@ -31,16 +31,16 @@ Other date functions:
 \code{\link{calculate_stay}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/convert_date_to_numeric.Rd b/man/convert_date_to_numeric.Rd
index 5511fec84..b67eaa778 100644
--- a/man/convert_date_to_numeric.Rd
+++ b/man/convert_date_to_numeric.Rd
@@ -24,16 +24,16 @@ Other date functions:
 \code{\link{calculate_stay}()},
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/convert_numeric_to_date.Rd b/man/convert_numeric_to_date.Rd
index f786e0319..a09b7b9b9 100644
--- a/man/convert_numeric_to_date.Rd
+++ b/man/convert_numeric_to_date.Rd
@@ -24,16 +24,16 @@ Other date functions:
 \code{\link{calculate_stay}()},
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/end_fy.Rd b/man/end_fy.Rd
index 2925ffe60..6220f5f32 100644
--- a/man/end_fy.Rd
+++ b/man/end_fy.Rd
@@ -34,8 +34,8 @@ Other date functions:
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/end_fy_quarter.Rd b/man/end_fy_quarter.Rd
index 0efe9624a..26c439a04 100644
--- a/man/end_fy_quarter.Rd
+++ b/man/end_fy_quarter.Rd
@@ -33,8 +33,8 @@ Other date functions:
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/end_next_fy_quarter.Rd b/man/end_next_fy_quarter.Rd
index f9cc1720a..702446e82 100644
--- a/man/end_next_fy_quarter.Rd
+++ b/man/end_next_fy_quarter.Rd
@@ -26,15 +26,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/fy_interval.Rd b/man/fy_interval.Rd
index 12d1d36bb..00b9ea52c 100644
--- a/man/fy_interval.Rd
+++ b/man/fy_interval.Rd
@@ -26,15 +26,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/is_date_in_fyyear.Rd b/man/is_date_in_fyyear.Rd
index 97a0f3639..e74bd5734 100644
--- a/man/is_date_in_fyyear.Rd
+++ b/man/is_date_in_fyyear.Rd
@@ -41,15 +41,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/last_date_month.Rd b/man/last_date_month.Rd
index f52305356..3d3b9544e 100644
--- a/man/last_date_month.Rd
+++ b/man/last_date_month.Rd
@@ -25,15 +25,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/midpoint_fy.Rd b/man/midpoint_fy.Rd
index 7bac9b6b3..2363df773 100644
--- a/man/midpoint_fy.Rd
+++ b/man/midpoint_fy.Rd
@@ -27,15 +27,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/next_fy.Rd b/man/next_fy.Rd
index 19e1193f4..7524c5f11 100644
--- a/man/next_fy.Rd
+++ b/man/next_fy.Rd
@@ -27,15 +27,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
-\code{\link{start_fy_quarter}()},
 \code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()},
 \code{\link{start_next_fy_quarter}()}
 }
 \concept{date functions}
diff --git a/man/start_fy.Rd b/man/start_fy.Rd
index 4996bfb72..9951af2ec 100644
--- a/man/start_fy.Rd
+++ b/man/start_fy.Rd
@@ -27,8 +27,8 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
diff --git a/man/start_fy_quarter.Rd b/man/start_fy_quarter.Rd
index f5729dcb0..9936736a8 100644
--- a/man/start_fy_quarter.Rd
+++ b/man/start_fy_quarter.Rd
@@ -26,8 +26,8 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
diff --git a/man/start_next_fy_quarter.Rd b/man/start_next_fy_quarter.Rd
index 098f0bf73..fdac297a7 100644
--- a/man/start_next_fy_quarter.Rd
+++ b/man/start_next_fy_quarter.Rd
@@ -26,15 +26,15 @@ Other date functions:
 \code{\link{compute_mid_year_age}()},
 \code{\link{convert_date_to_numeric}()},
 \code{\link{convert_numeric_to_date}()},
-\code{\link{end_fy_quarter}()},
 \code{\link{end_fy}()},
+\code{\link{end_fy_quarter}()},
 \code{\link{end_next_fy_quarter}()},
 \code{\link{fy_interval}()},
 \code{\link{is_date_in_fyyear}()},
 \code{\link{last_date_month}()},
 \code{\link{midpoint_fy}()},
 \code{\link{next_fy}()},
-\code{\link{start_fy_quarter}()},
-\code{\link{start_fy}()}
+\code{\link{start_fy}()},
+\code{\link{start_fy_quarter}()}
 }
 \concept{date functions}

From fe189a94d7cccaa156fb6a96d5e3f7f7ad22cb90 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Mon, 8 Apr 2024 15:53:03 +0100
Subject: [PATCH 043/186] 936 - Update parameters with file paths  (#939)

Specify file paths in sc function parameters
---
 R/process_sc_all_alarms_telecare.R    | 2 +-
 R/process_sc_all_care_home.R          | 8 ++++----
 R/process_sc_all_home_care.R          | 2 +-
 R/process_sc_all_sds.R                | 2 +-
 man/process_sc_all_alarms_telecare.Rd | 6 +++++-
 man/process_sc_all_care_home.Rd       | 8 ++++----
 man/process_sc_all_home_care.Rd       | 6 +++++-
 man/process_sc_all_sds.Rd             | 6 +++++-
 8 files changed, 26 insertions(+), 14 deletions(-)

diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R
index 77877d584..55bc42d4e 100644
--- a/R/process_sc_all_alarms_telecare.R
+++ b/R/process_sc_all_alarms_telecare.R
@@ -13,7 +13,7 @@
 #'
 process_sc_all_alarms_telecare <- function(
     data,
-    sc_demog_lookup,
+    sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
     write_to_disk = TRUE) {
   # Data Cleaning-----------------------------------------------------
 
diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R
index d287f2042..f0b6c3db4 100644
--- a/R/process_sc_all_care_home.R
+++ b/R/process_sc_all_care_home.R
@@ -22,10 +22,10 @@
 #'
 process_sc_all_care_home <- function(
     data,
-    sc_demog_lookup,
-    it_chi_deaths_data,
-    ch_name_lookup_path = get_slf_ch_name_lookup_path(),
-    spd_path = get_spd_path(),
+    sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
+    it_chi_deaths_data = read_file(get_slf_chi_deaths_path()),
+    ch_name_lookup_path = read_file(get_slf_ch_name_lookup_path()),
+    spd_path = read_file(get_spd_path()),
     write_to_disk = TRUE) {
   ## Data Cleaning-----------------------------------------------------
   ch_clean <- data %>%
diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R
index 3ada9a2da..331a682e1 100644
--- a/R/process_sc_all_home_care.R
+++ b/R/process_sc_all_home_care.R
@@ -13,7 +13,7 @@
 #'
 process_sc_all_home_care <- function(
     data,
-    sc_demog_lookup,
+    sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
     write_to_disk = TRUE) {
   replaced_dates <- data %>%
     dplyr::filter(.data$hc_start_date_after_period_end_date != 1) %>%
diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R
index a1a1db24a..3ebc064c0 100644
--- a/R/process_sc_all_sds.R
+++ b/R/process_sc_all_sds.R
@@ -12,7 +12,7 @@
 #'
 process_sc_all_sds <- function(
     data,
-    sc_demog_lookup,
+    sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
     write_to_disk = TRUE) {
   # Match on demographics data (chi, gender, dob and postcode)
   matched_sds_data <- data %>%
diff --git a/man/process_sc_all_alarms_telecare.Rd b/man/process_sc_all_alarms_telecare.Rd
index 1dded751d..031fd5028 100644
--- a/man/process_sc_all_alarms_telecare.Rd
+++ b/man/process_sc_all_alarms_telecare.Rd
@@ -4,7 +4,11 @@
 \alias{process_sc_all_alarms_telecare}
 \title{Process the all Alarms Telecare extract}
 \usage{
-process_sc_all_alarms_telecare(data, sc_demog_lookup, write_to_disk = TRUE)
+process_sc_all_alarms_telecare(
+  data,
+  sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
+  write_to_disk = TRUE
+)
 }
 \arguments{
 \item{data}{The extract to process}
diff --git a/man/process_sc_all_care_home.Rd b/man/process_sc_all_care_home.Rd
index 37d6332ca..a137119b7 100644
--- a/man/process_sc_all_care_home.Rd
+++ b/man/process_sc_all_care_home.Rd
@@ -6,10 +6,10 @@
 \usage{
 process_sc_all_care_home(
   data,
-  sc_demog_lookup,
-  it_chi_deaths_data,
-  ch_name_lookup_path = get_slf_ch_name_lookup_path(),
-  spd_path = get_spd_path(),
+  sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
+  it_chi_deaths_data = read_file(get_slf_chi_deaths_path()),
+  ch_name_lookup_path = read_file(get_slf_ch_name_lookup_path()),
+  spd_path = read_file(get_spd_path()),
   write_to_disk = TRUE
 )
 }
diff --git a/man/process_sc_all_home_care.Rd b/man/process_sc_all_home_care.Rd
index 1e0afcafd..ff18aac6a 100644
--- a/man/process_sc_all_home_care.Rd
+++ b/man/process_sc_all_home_care.Rd
@@ -4,7 +4,11 @@
 \alias{process_sc_all_home_care}
 \title{Process the all home care extract}
 \usage{
-process_sc_all_home_care(data, sc_demog_lookup, write_to_disk = TRUE)
+process_sc_all_home_care(
+  data,
+  sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
+  write_to_disk = TRUE
+)
 }
 \arguments{
 \item{data}{The extract to process}
diff --git a/man/process_sc_all_sds.Rd b/man/process_sc_all_sds.Rd
index 69d79fc9d..3454ef35b 100644
--- a/man/process_sc_all_sds.Rd
+++ b/man/process_sc_all_sds.Rd
@@ -4,7 +4,11 @@
 \alias{process_sc_all_sds}
 \title{Process the all SDS extract}
 \usage{
-process_sc_all_sds(data, sc_demog_lookup, write_to_disk = TRUE)
+process_sc_all_sds(
+  data,
+  sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
+  write_to_disk = TRUE
+)
 }
 \arguments{
 \item{data}{The extract to process}

From 65d70e7056a767a7ef3eba9f052ca3d05b55610f Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Wed, 10 Apr 2024 11:32:03 +0100
Subject: [PATCH 044/186] Add test for `n_records` in ep file tests

---
 R/process_tests_episode_file.R | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R
index 6f2c73fcb..a91154676 100644
--- a/R/process_tests_episode_file.R
+++ b/R/process_tests_episode_file.R
@@ -72,6 +72,9 @@ produce_episode_file_tests <- function(
     )) {
   test_flags <- data %>%
     dplyr::group_by(.data$recid) %>%
+    dplyr::mutate(
+      n_records = 1L
+    ) %>%
     # use functions to create HB and partnership flags
     create_demog_test_flags(chi = anon_chi) %>%
     create_hb_test_flags(.data$hbtreatcode) %>%
@@ -103,7 +106,7 @@ produce_episode_file_tests <- function(
 
   test_flags <- test_flags %>%
     # keep variables for comparison
-    dplyr::select("unique_chi":dplyr::last_col()) %>%
+    dplyr::select("n_records":dplyr::last_col()) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum", group_by = "recid")
 

From f03c5c60841f65f70ce69b8873ecc4597ef03dfa Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Fri, 29 Mar 2024 10:42:47 +0000
Subject: [PATCH 045/186] remove and merge overlapping records in GP OoHs

---
 R/process_extract_consultations.R | 55 +++++++++++++++++++++----------
 1 file changed, 37 insertions(+), 18 deletions(-)

diff --git a/R/process_extract_consultations.R b/R/process_extract_consultations.R
index 6dc175cb8..cab126bc5 100644
--- a/R/process_extract_consultations.R
+++ b/R/process_extract_consultations.R
@@ -81,25 +81,44 @@ process_extract_ooh_consultations <- function(data, year) {
   # Clean up some overlapping episodes
   # Only merge if they look like duplicates other than the time,
   # In which case take the earliest start and latest end.
-  consultations_clean <- consultations_covid
+  consultations_clean <- consultations_covid %>%
+    # Sort in reverse order so we can use coalesce which takes the first non-missing value
+    dplyr::arrange(chi,
+                   ooh_case_id,
+                   record_keydate1,
+                   record_keydate2) %>%
+    data.table::as.data.table()
 
-  # TODO Remove / merge overlapping records in GP OoHs
-  # dtplyr::lazy_dt() %>%
-  # # Sort in reverse order so we can use coalesce which takes the first non-missing value
-  # dplyr::arrange(chi, ooh_case_id, dplyr::desc(record_keydate1), dplyr::desc(record_keydate2)) %>%
-  # # This seems to be enough to identify a unique episode
-  # dplyr::group_by(chi, ooh_case_id, consultation_type, location) %>%
-  # # Records will be merged if they don't look unique and there is overlap or no time between them
-  # dplyr::mutate(episode_counter = replace_na(record_keydate1 > lag(record_keydate2), TRUE) %>%
-  #   cumsum()) %>%
-  # dplyr::group_by(chi, ooh_case_id, consultation_type, location, episode_counter) %>%
-  # dplyr::summarise(
-  #   record_keydate1 = min(record_keydate1),
-  #   record_keydate2 = max(record_keydate2),
-  #   dplyr::across(c(dplyr::everything(), -"record_keydate1", -"record_keydate2"), dplyr::coalesce)
-  # ) %>%
-  # dplyr::ungroup() %>%
-  # dplyr::as_tibble()
+  consultations_clean[, distinct_check := (
+    record_keydate1 > data.table::shift(record_keydate2, fill = NA, type = "lag")
+  ),
+  by = .(chi, ooh_case_id, consultation_type, location)]
+  consultations_clean[, distinct_check := tidyr::replace_na(distinct_check, TRUE)]
+  consultations_clean[, episode_counter := cumsum(distinct_check),
+                      by = .(chi, ooh_case_id, consultation_type, location)]
+  consultations_clean[,
+                      c("record_keydate1",
+                        "record_keydate2") := list(min(record_keydate1),
+                                                   max(record_keydate2)),
+                      by = .(chi,
+                             ooh_case_id,
+                             consultation_type,
+                             location,
+                             episode_counter)]
+
+  # replace NA with previous non-NA value in each column
+  col_sel = names(consultations_clean)
+  col_sel = col_sel[!(col_sel %in% c("record_keydate1", "record_keydate2"))]
+  consultations_clean[,
+                      (col_sel) := lapply(.SD, zoo::na.locf, na.rm = FALSE),
+                      .SDcols = col_sel]
+
+  consultations_clean[,
+                      c("distinct_check",
+                        "episode_counter") := list(NULL, NULL)]
+  consultations_clean = unique(consultations_clean) %>%
+    dplyr::as_tibble()
+  # cleaning up overlapping episodes done
 
   return(consultations_clean)
 }

From fda0c515e0b411868b9fed003258915c450a5793 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Fri, 29 Mar 2024 10:44:40 +0000
Subject: [PATCH 046/186] Style code

---
 R/process_extract_consultations.R | 60 +++++++++++++++++++------------
 1 file changed, 38 insertions(+), 22 deletions(-)

diff --git a/R/process_extract_consultations.R b/R/process_extract_consultations.R
index cab126bc5..2aafdbdca 100644
--- a/R/process_extract_consultations.R
+++ b/R/process_extract_consultations.R
@@ -83,40 +83,56 @@ process_extract_ooh_consultations <- function(data, year) {
   # In which case take the earliest start and latest end.
   consultations_clean <- consultations_covid %>%
     # Sort in reverse order so we can use coalesce which takes the first non-missing value
-    dplyr::arrange(chi,
-                   ooh_case_id,
-                   record_keydate1,
-                   record_keydate2) %>%
+    dplyr::arrange(
+      chi,
+      ooh_case_id,
+      record_keydate1,
+      record_keydate2
+    ) %>%
     data.table::as.data.table()
 
   consultations_clean[, distinct_check := (
     record_keydate1 > data.table::shift(record_keydate2, fill = NA, type = "lag")
   ),
-  by = .(chi, ooh_case_id, consultation_type, location)]
+  by = .(chi, ooh_case_id, consultation_type, location)
+  ]
   consultations_clean[, distinct_check := tidyr::replace_na(distinct_check, TRUE)]
   consultations_clean[, episode_counter := cumsum(distinct_check),
-                      by = .(chi, ooh_case_id, consultation_type, location)]
+    by = .(chi, ooh_case_id, consultation_type, location)
+  ]
   consultations_clean[,
-                      c("record_keydate1",
-                        "record_keydate2") := list(min(record_keydate1),
-                                                   max(record_keydate2)),
-                      by = .(chi,
-                             ooh_case_id,
-                             consultation_type,
-                             location,
-                             episode_counter)]
+    c(
+      "record_keydate1",
+      "record_keydate2"
+    ) := list(
+      min(record_keydate1),
+      max(record_keydate2)
+    ),
+    by = .(
+      chi,
+      ooh_case_id,
+      consultation_type,
+      location,
+      episode_counter
+    )
+  ]
 
   # replace NA with previous non-NA value in each column
-  col_sel = names(consultations_clean)
-  col_sel = col_sel[!(col_sel %in% c("record_keydate1", "record_keydate2"))]
+  col_sel <- names(consultations_clean)
+  col_sel <- col_sel[!(col_sel %in% c("record_keydate1", "record_keydate2"))]
   consultations_clean[,
-                      (col_sel) := lapply(.SD, zoo::na.locf, na.rm = FALSE),
-                      .SDcols = col_sel]
+    (col_sel) := lapply(.SD, zoo::na.locf, na.rm = FALSE),
+    .SDcols = col_sel
+  ]
 
-  consultations_clean[,
-                      c("distinct_check",
-                        "episode_counter") := list(NULL, NULL)]
-  consultations_clean = unique(consultations_clean) %>%
+  consultations_clean[
+    ,
+    c(
+      "distinct_check",
+      "episode_counter"
+    ) := list(NULL, NULL)
+  ]
+  consultations_clean <- unique(consultations_clean) %>%
     dplyr::as_tibble()
   # cleaning up overlapping episodes done
 

From 713e7a76ec6e40f395b90475c2a89f9c24f80c1f Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Mon, 15 Apr 2024 10:12:50 +0100
Subject: [PATCH 047/186] update spelling to lowercases

---
 .github/actions/spelling/expect.txt | 73 +++++++++++++++--------------
 1 file changed, 39 insertions(+), 34 deletions(-)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index d4124911f..af62783db 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -1,7 +1,8 @@
-Accom
+acc
+accom
 admloc
 admtype
-ADPE
+adpe
 adtf
 arrivalmode
 arth
@@ -13,23 +14,23 @@ birthtime
 bodyloc
 boxi
 callr
-Canx
+canx
 carehome
 careinspectorate
 categorises
 cattend
-CCYY
+ccyy
 cdn
 cennum
 chp
 chpstart
 cij
-Classificat
+classificat
 cls
 cmh
-CNWs
+cnws
 codecov
-Comhairle
+comhairle
 commhosp
 congen
 costincdnas
@@ -53,7 +54,7 @@ deathdiag
 demog
 devhints
 dfc
-Diagramme
+diagramme
 disch
 dischloc
 dischto
@@ -71,7 +72,8 @@ fcase
 feb
 fifelse
 fileext
-Finalise
+finalise
+fnc
 fst
 ftm
 fyear
@@ -81,7 +83,7 @@ ggplot
 github
 gls
 gms
-GPOo
+gpoo
 gpprac
 gss
 hbnames
@@ -91,7 +93,7 @@ hbrescode
 hbtreatcode
 hbtreatname
 hci
-HCP
+hcp
 hhg
 hjust
 hms
@@ -102,12 +104,12 @@ hscdiip
 hscp
 hscpnames
 htmlwidgets
-IDPC
+idpc
 infyyear
 intzone
 ipdc
 issuenumber
-itle
+istle
 iwalk
 jaccard
 jan
@@ -120,7 +122,7 @@ keytimex
 kis
 lazydt
 lcap
-LCHO
+lcho
 lgl
 lintr
 los
@@ -128,16 +130,17 @@ ltc
 lubridate
 magrittr
 markdownguide
-Matern
-Mcbride
+matern
+mcbride
 mcmahon
-MMMYY
-MONTHFLAG
+miu
+mmmyy
+monthflag
 mpat
 multiday
 multisession
 multistaff
-NAs
+na
 newcons
 nhs
 nhshosp
@@ -148,31 +151,33 @@ oldtadm
 opendata
 openxl
 openxlsx
-ORCID
+orcid
 outfile
 pandoc
 patflow
 pattype
-PCEC
+pcec
 phs
 phsmethods
 phsopendata
 pkgdown
 placeinc
 plics
-PMS
+pms
 popluation
 postcodes
-PPAs
+ppas
+prac
+praccode
 prac
 praccode
 ptypes
 purrr
 quickstart
 rankdir
-Rbuildignore
+rbuildignore
 rcmdcheck
-RDD
+rdd
 rds
 reabl
 reablement
@@ -187,28 +192,28 @@ renviron
 rlang
 rmarkdown
 roxygen
-Rprofile
-Rscript
+rprofile
+rscript
 rspm
 rstudio
 rstudioapi
-Rtype
+rtype
 scoial
 scotp
-SDcols
+sdcols
 seealso
 selfharm
 setkeyv
 setnafill
 setnames
 setorder
-Siar
+siar
 sigfac
 simd
 slf
 slfhelper
 smr
-SMRA
+smra
 smrtype
 sparra
 spd
@@ -228,10 +233,10 @@ thom
 tibble
 tidyr
 tidyselect
-TODOs
+todo
 uid
 ungroup
-Unicode
+unicode
 updown
 upi
 vline
@@ -240,7 +245,7 @@ xintercept
 xlsx
 yearstay
 yml
-YYYYQX
+yyyyqx
 zihao
 zsav
 zstd

From a0ac03060842d8cebc602b77a24797c954e967d1 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Mon, 15 Apr 2024 10:28:19 +0100
Subject: [PATCH 048/186] update spelling

---
 .github/actions/spelling/expect.txt | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index af62783db..d27b6e755 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -12,7 +12,7 @@ aut
 bedday
 birthtime
 bodyloc
-boxi
+BOXI
 callr
 canx
 carehome
@@ -109,7 +109,7 @@ infyyear
 intzone
 ipdc
 issuenumber
-istle
+itle
 iwalk
 jaccard
 jan
@@ -133,7 +133,7 @@ markdownguide
 matern
 mcbride
 mcmahon
-miu
+MIU
 mmmyy
 monthflag
 mpat
@@ -200,7 +200,7 @@ rstudioapi
 rtype
 scoial
 scotp
-sdcols
+SDcols
 seealso
 selfharm
 setkeyv

From 641d175488722531228c7bf32df9d922b0064425 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 15 Apr 2024 17:18:17 +0100
Subject: [PATCH 049/186] Add function for reading Dev SLF file Uses SLFhelper
 for easy access to Source_Linkage_Files

---
 NAMESPACE                |  1 +
 R/read_dev_slf_file.R    | 23 +++++++++++++++++++++++
 man/read_dev_slf_file.Rd | 22 ++++++++++++++++++++++
 3 files changed, 46 insertions(+)
 create mode 100644 R/read_dev_slf_file.R
 create mode 100644 man/read_dev_slf_file.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 91f6b66d9..2a033dca9 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -149,6 +149,7 @@ export(process_tests_sds)
 export(produce_episode_file_tests)
 export(produce_source_extract_tests)
 export(produce_test_comparison)
+export(read_dev_slf_file)
 export(read_extract_acute)
 export(read_extract_ae)
 export(read_extract_cmh)
diff --git a/R/read_dev_slf_file.R b/R/read_dev_slf_file.R
new file mode 100644
index 000000000..ed11eda5f
--- /dev/null
+++ b/R/read_dev_slf_file.R
@@ -0,0 +1,23 @@
+#' Read development SLF files (using SLFhelper)
+#'
+#' @param year Year of the file to be read, you can specify multiple years
+#'  which will then be returned as one file. See SLFhelper for more info.
+#' @param type Type of file to be read. Supply either Episode or Individual file.
+#' @param col_select Supply the columns you would like to select.
+#'
+#' @return a tibble with development SLF file
+#' @export
+#'
+read_dev_slf_file <- function(year, type = c("episode", "individual"), col_select = NULL) {
+  if (type == "episode") {
+    slf_file <- slfhelper::read_slf_episode(year,
+      col_select = col_select,
+      dev = TRUE
+    )
+  } else {
+    slf_file <- slfhelper::read_slf_individual(year,
+      col_select = col_select,
+      dev = TRUE
+    )
+  }
+}
diff --git a/man/read_dev_slf_file.Rd b/man/read_dev_slf_file.Rd
new file mode 100644
index 000000000..6519701cd
--- /dev/null
+++ b/man/read_dev_slf_file.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/process_tests_cross_year.R
+\name{read_dev_slf_file}
+\alias{read_dev_slf_file}
+\title{Read development SLF files (using SLFhelper)}
+\usage{
+read_dev_slf_file(year, type = c("episode", "individual"), col_select = NULL)
+}
+\arguments{
+\item{year}{Year of the file to be read, you can specify multiple years
+which will then be returned as one file. See SLFhelper for more info.}
+
+\item{type}{Type of file to be read. Supply either Episode or Individual file.}
+
+\item{col_select}{Supply the columns you would like to select.}
+}
+\value{
+a tibble with development SLF file
+}
+\description{
+Read development SLF files (using SLFhelper)
+}

From c2090664750fc328d83e31b11783603d539cfc19 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 15 Apr 2024 17:24:24 +0100
Subject: [PATCH 050/186] Add cross year tests using SLFhelper WIP WIP - still
 need to add write to disk and possibly develop visuals

---
 NAMESPACE                       |  1 +
 R/process_tests_cross_year.R    | 57 +++++++++++++++++++++++++++++++++
 man/process_tests_cross_year.Rd | 19 +++++++++++
 man/read_dev_slf_file.Rd        |  2 +-
 4 files changed, 78 insertions(+), 1 deletion(-)
 create mode 100644 R/process_tests_cross_year.R
 create mode 100644 man/process_tests_cross_year.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 2a033dca9..dd10da9a7 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -123,6 +123,7 @@ export(process_tests_ae)
 export(process_tests_alarms_telecare)
 export(process_tests_care_home)
 export(process_tests_cmh)
+export(process_tests_cross_year)
 export(process_tests_delayed_discharges)
 export(process_tests_district_nursing)
 export(process_tests_episode_file)
diff --git a/R/process_tests_cross_year.R b/R/process_tests_cross_year.R
new file mode 100644
index 000000000..18a5b40cc
--- /dev/null
+++ b/R/process_tests_cross_year.R
@@ -0,0 +1,57 @@
+#' Process cross year tests
+#'
+#' @description Process high level tests (e.g the number of records in each recid)
+#' across years.
+#'
+#' @param year Year of the file to be read, you can specify multiple years
+#'  which will then be returned as one file. See SLFhelper for more info.
+#'
+#' @return a tibble with a test summary across years
+#' @export
+#'
+process_tests_cross_year <- function(year) {
+  ep_file <- read_dev_slf_file(year,
+    type = "episode",
+    col_select = c("year", "recid", "anon_chi", "record_keydate1", "record_keydate2")
+  )
+
+  total_test <- ep_file %>%
+    dplyr::group_by(.data$year, .data$recid) %>%
+    dplyr::mutate(
+      n_records = 1L
+    ) %>%
+    dplyr::summarise(
+      n = sum(n_records)
+    ) %>%
+    dplyr::mutate(
+      fy_qtr = "total"
+    )
+
+  qtr_test <- ep_file %>%
+    dplyr::mutate(
+      fy_qtr = dplyr::if_else(recid != "PIS", lubridate::quarter(record_keydate1, fiscal_start = 4), NA)
+    ) %>%
+    dplyr::group_by(.data$year, .data$recid, .data$fy_qtr) %>%
+    dplyr::mutate(
+      n_records = 1L
+    ) %>%
+    dplyr::summarise(
+      n = sum(n_records)
+    ) %>%
+    dplyr::mutate(
+      fy_qtr = as.character(fy_qtr)
+    )
+
+  join_tests <- dplyr::bind_rows(total_test, qtr_test) %>%
+    dplyr::arrange(year, recid, fy_qtr)
+
+  pivot_tests <- join_tests %>%
+    tidyr::pivot_wider(
+      names_from = c("year", "fy_qtr"),
+      names_glue = "{year}_qtr_{fy_qtr}",
+      values_from = "n"
+    ) %>%
+    dplyr::select(-tidyselect::ends_with("NA"))
+
+  return(pivot_tests)
+}
diff --git a/man/process_tests_cross_year.Rd b/man/process_tests_cross_year.Rd
new file mode 100644
index 000000000..0519e5f0d
--- /dev/null
+++ b/man/process_tests_cross_year.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/process_tests_cross_year.R
+\name{process_tests_cross_year}
+\alias{process_tests_cross_year}
+\title{Process cross year tests}
+\usage{
+process_tests_cross_year(year)
+}
+\arguments{
+\item{year}{Year of the file to be read, you can specify multiple years
+which will then be returned as one file. See SLFhelper for more info.}
+}
+\value{
+a tibble with a test summary across years
+}
+\description{
+Process high level tests (e.g the number of records in each recid)
+across years.
+}
diff --git a/man/read_dev_slf_file.Rd b/man/read_dev_slf_file.Rd
index 6519701cd..fe2e10d4f 100644
--- a/man/read_dev_slf_file.Rd
+++ b/man/read_dev_slf_file.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/process_tests_cross_year.R
+% Please edit documentation in R/read_dev_slf_file.R
 \name{read_dev_slf_file}
 \alias{read_dev_slf_file}
 \title{Read development SLF files (using SLFhelper)}

From 2cff80d9e8bf2602f8579ac31edd24d734a6cb28 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Wed, 17 Apr 2024 12:55:41 +0100
Subject: [PATCH 051/186] Create tests for social care sandpit extracts (#943)

* Update `write_tests_xlsx`

* Update documentation

* Add in sandpit tests where the extract is saved

* Setup tests for sandpit
Further checks needed for writing to disk

* Update documentation

* Amend case_when statement

* rename function to include 'sc'

* Update documentation

* Use `is.null` instead of `missing`

* Update documentation

* Add `year` as a parameter

* Update documentation

* Setup for writing sandpit tests to disk

* Update parameters for sandpit tests

* Update documentation

* Use `process_tests_sc_sandpit`

* Apply styling

* Style code

* update documentation

Co-authored-by: Zihao Li <zihao.li@phs.scot>

* Rename variable sc_id

Co-authored-by: Zihao Li <zihao.li@phs.scot>

* Rename variable

Co-authored-by: Zihao Li <zihao.li@phs.scot>

* Rename variable

Co-authored-by: Zihao Li <zihao.li@phs.scot>

* Update documentation

* [check-spelling] Update metadata

Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/8689503990/attempts/1
Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/943#issuecomment-2056794120

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev>

* update spelling

* update spelling expect variant

---------

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: Zihao Li <zihao.li@phs.scot>
Co-authored-by: Zihao Li <lizihao_anu@outlook.com>
---
 .github/actions/spelling/expect.txt |   8 +-
 NAMESPACE                           |   2 +
 R/process_tests_sc_sandpit.R        | 144 ++++++++++++++++++++++++++++
 R/read_lookup_sc_client.R           |   3 +
 R/read_lookup_sc_demographics.R     |   3 +
 R/read_sc_all_alarms_telecare.R     |   3 +
 R/read_sc_all_care_home.R           |   3 +
 R/read_sc_all_home_care.R           |   3 +
 R/read_sc_all_sds.R                 |   3 +
 R/write_tests_xlsx.R                |  25 +++--
 man/process_tests_sc_sandpit.Rd     |  20 ++++
 man/produce_sc_sandpit_tests.Rd     |  24 +++++
 man/write_tests_xlsx.Rd             |   2 +-
 13 files changed, 228 insertions(+), 15 deletions(-)
 create mode 100644 R/process_tests_sc_sandpit.R
 create mode 100644 man/process_tests_sc_sandpit.Rd
 create mode 100644 man/produce_sc_sandpit_tests.Rd

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index d27b6e755..a4a34a58b 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -12,7 +12,7 @@ aut
 bedday
 birthtime
 bodyloc
-BOXI
+boxi
 callr
 canx
 carehome
@@ -169,8 +169,6 @@ postcodes
 ppas
 prac
 praccode
-prac
-praccode
 ptypes
 purrr
 quickstart
@@ -207,7 +205,7 @@ setkeyv
 setnafill
 setnames
 setorder
-siar
+Siar
 sigfac
 simd
 slf
@@ -246,6 +244,6 @@ xlsx
 yearstay
 yml
 yyyyqx
-zihao
+Zihao
 zsav
 zstd
diff --git a/NAMESPACE b/NAMESPACE
index 91f6b66d9..4606cf3f2 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -145,8 +145,10 @@ export(process_tests_sc_all_hc_episodes)
 export(process_tests_sc_all_sds_episodes)
 export(process_tests_sc_client_lookup)
 export(process_tests_sc_demographics)
+export(process_tests_sc_sandpit)
 export(process_tests_sds)
 export(produce_episode_file_tests)
+export(produce_sc_sandpit_tests)
 export(produce_source_extract_tests)
 export(produce_test_comparison)
 export(read_extract_acute)
diff --git a/R/process_tests_sc_sandpit.R b/R/process_tests_sc_sandpit.R
new file mode 100644
index 000000000..089f61aa1
--- /dev/null
+++ b/R/process_tests_sc_sandpit.R
@@ -0,0 +1,144 @@
+#' Process tests for the social care sandpit extracts
+#'
+#' @param type Name of sandpit extract.
+#'
+#' @return a [tibble][tibble::tibble-package] containing a test comparison.
+#' @export
+#'
+process_tests_sc_sandpit <- function(type = c("at", "hc", "ch", "sds", "demographics", "client"), year = NULL) {
+  comparison <- produce_test_comparison(
+    old_data = produce_sc_sandpit_tests(
+      read_file(get_sandpit_extract_path(type = {{ type }}, year = year, update = previous_update())),
+      type = {{ type }}
+    ),
+    new_data = produce_sc_sandpit_tests(
+      read_file(get_sandpit_extract_path(type = {{ type }}, year = year, update = latest_update())),
+      type = {{ type }}
+    )
+  )
+
+  comparison %>%
+    write_tests_xlsx(sheet_name = {{ type }}, year = year, workbook_name = "sandpit")
+
+  return(comparison)
+}
+
+
+#' Produce tests for social care sandpit extracts.
+#'
+#' @param data new or old data for testing summary flags
+#' (data is from [get_sandpit_extract_path()])
+#' @param type Name of sandpit extract.
+#'
+#' @return a dataframe with a count of each flag
+#' from [calculate_measures()]
+#' @export
+#'
+produce_sc_sandpit_tests <- function(data, type = c("demographics", "client", "at", "ch", "hc", "sds")) {
+  if (type == "demographics") {
+    missing_tests <- data %>%
+      dplyr::mutate(
+        n_missing_chi = is_missing(.data$chi_upi),
+        n_missing_sc_id = is_missing(.data$social_care_id),
+        n_missing_dob = is.na(.data$chi_date_of_birth),
+        n_missing_postcode = is_missing(.data$chi_postcode),
+        n_missing_gender = is_missing(.data$chi_gender_code)
+      ) %>%
+      dplyr::select(n_missing_chi:n_missing_gender) %>%
+      calculate_measures(measure = "sum")
+
+    latest_flag_tests <- data %>%
+      dplyr::filter(!(is.na(.data$chi_upi))) %>%
+      dplyr::group_by(.data$chi_upi, .data$sending_location) %>%
+      dplyr::summarise(latest_count = sum(.data$latest_record_flag)) %>%
+      dplyr::ungroup() %>%
+      dplyr::mutate(
+        chi_latest_flag_0 = dplyr::if_else(.data$latest_count == 0, 1, 0),
+        chi_latest_flag_1 = dplyr::if_else(.data$latest_count == 1, 1, 0),
+        chi_latest_flag_2 = dplyr::if_else(.data$latest_count == 2, 1, 0),
+        chi_latest_flag_3 = dplyr::if_else(.data$latest_count == 3, 1, 0),
+        chi_latest_flag_4 = dplyr::if_else(.data$latest_count == 4, 1, 0),
+        chi_latest_flag_5 = dplyr::if_else(.data$latest_count == 5, 1, 0),
+        chi_latest_flag_6 = dplyr::if_else(.data$latest_count == 6, 1, 0),
+        chi_latest_flag_7 = dplyr::if_else(.data$latest_count == 7, 1, 0),
+        chi_latest_flag_8 = dplyr::if_else(.data$latest_count == 8, 1, 0),
+        chi_latest_flag_9 = dplyr::if_else(.data$latest_count == 9, 1, 0),
+        chi_latest_flag_10 = dplyr::if_else(.data$latest_count == 10, 1, 0)
+      ) %>%
+      dplyr::select(.data$chi_latest_flag_0:.data$chi_latest_flag_10) %>%
+      calculate_measures(measure = "sum")
+
+    # add a flag for sc ids where there is multiple chi associated
+    sc_id_multi_chi <- data %>%
+      dplyr::distinct() %>%
+      dplyr::filter(!(is.na(.data$chi_upi))) %>%
+      dplyr::group_by(.data$social_care_id, .data$sending_location) %>%
+      dplyr::distinct(.data$chi_upi, .keep_all = TRUE) %>%
+      dplyr::mutate(distinct_chi_count = dplyr::n_distinct(.data$chi_upi)) %>%
+      dplyr::filter(distinct_chi_count > 1) %>%
+      dplyr::distinct(.data$social_care_id, .data$sending_location, .keep_all = TRUE) %>%
+      dplyr::mutate(sc_id_multi_chi = 1) %>%
+      create_sending_location_test_flags(.data$sending_location) %>%
+      dplyr::ungroup() %>%
+      dplyr::rename(
+        sc_id_multi_chi_Aberdeen_City = Aberdeen_City,
+        sc_id_multi_chi_Aberdeenshire = Aberdeenshire,
+        sc_id_multi_chi_Angus = Angus,
+        sc_id_multi_chi_Argyll_and_Bute = Argyll_and_Bute,
+        sc_id_multi_chi_City_of_Edinburgh = City_of_Edinburgh,
+        sc_id_multi_chi_Clackmannanshire = Clackmannanshire,
+        sc_id_multi_chi_Dumfries_and_Galloway = Dumfries_and_Galloway,
+        sc_id_multi_chi_Dundee_City = Dundee_City,
+        sc_id_multi_chi_East_Ayrshire = East_Ayrshire,
+        sc_id_multi_chi_East_Dunbartonshire = East_Dunbartonshire,
+        sc_id_multi_chi_East_Lothian = East_Lothian,
+        sc_id_multi_chi_East_Renfrewshire = East_Renfrewshire,
+        sc_id_multi_chi_Falkirk = Falkirk,
+        sc_id_multi_chi_Fife = Fife,
+        sc_id_multi_chi_Glasgow_City = Glasgow_City,
+        sc_id_multi_chi_Highland = Highland,
+        sc_id_multi_chi_Inverclyde = Inverclyde,
+        sc_id_multi_chi_Midlothian = Midlothian,
+        sc_id_multi_chi_Moray = Moray,
+        sc_id_multi_chi_Na_h_Eileanan_Siar = Na_h_Eileanan_Siar,
+        sc_id_multi_chi_North_Ayrshire = North_Ayrshire,
+        sc_id_multi_chi_North_Lanarkshire = North_Lanarkshire,
+        sc_id_multi_chi_Orkney_Islands = Orkney_Islands,
+        sc_id_multi_chi_Perth_and_Kinross = Perth_and_Kinross,
+        sc_id_multi_chi_Renfrewshire = Renfrewshire,
+        sc_id_multi_chi_Scottish_Borders = Scottish_Borders,
+        sc_id_multi_chi_Shetland_Islands = Shetland_Islands,
+        sc_id_multi_chi_South_Ayrshire = South_Ayrshire,
+        sc_id_multi_chi_South_Lanarkshire = South_Lanarkshire,
+        sc_id_multi_chi_Stirling = Stirling,
+        sc_id_multi_chi_West_Dunbartonshire = West_Dunbartonshire,
+        sc_id_multi_chi_West_Lothian = West_Lothian
+      ) %>%
+      dplyr::select(.data$sc_id_multi_chi, .data$sc_id_multi_chi_Aberdeen_City:.data$sc_id_multi_chi_West_Lothian) %>%
+      calculate_measures(measure = "sum")
+
+    output <- list(
+      missing_tests,
+      latest_flag_tests,
+      sc_id_multi_chi
+    ) %>%
+      purrr::reduce(dplyr::full_join, by = c("measure", "value"))
+
+    return(output)
+  } else if (type == "client" | type == "at" | type == "ch" |
+    type == "hc" | type == "sds") {
+    output <- data %>%
+      # create test flags
+      dplyr::mutate(
+        unique_sc_id = dplyr::lag(.data$social_care_id) != .data$social_care_id,
+        n_missing_sc_id = is_missing(.data$social_care_id)
+      ) %>%
+      create_sending_location_test_flags(.data$sending_location) %>%
+      # remove variables that won't be summed
+      dplyr::select(c("unique_sc_id":"West_Lothian")) %>%
+      # use function to sum new test flags
+      calculate_measures(measure = "sum")
+
+    return(output)
+  }
+}
diff --git a/R/read_lookup_sc_client.R b/R/read_lookup_sc_client.R
index 370a15722..d2b549671 100644
--- a/R/read_lookup_sc_client.R
+++ b/R/read_lookup_sc_client.R
@@ -80,6 +80,9 @@ read_lookup_sc_client <- function(fyyear,
   if (!fs::file_exists(get_sandpit_extract_path(type = "client", year = fyyear))) {
     client_data %>%
       write_file(get_sandpit_extract_path(type = "client", year = fyyear))
+
+    client_data %>%
+      process_tests_sc_sandpit(type = "client", year = fyyear)
   } else {
     client_data <- client_data
   }
diff --git a/R/read_lookup_sc_demographics.R b/R/read_lookup_sc_demographics.R
index cb3cea3c2..729f3a445 100644
--- a/R/read_lookup_sc_demographics.R
+++ b/R/read_lookup_sc_demographics.R
@@ -28,6 +28,9 @@ read_lookup_sc_demographics <- function(sc_dvprod_connection = phs_db_connection
   if (!fs::file_exists(get_sandpit_extract_path(type = "demographics"))) {
     sc_demog %>%
       write_file(get_sandpit_extract_path(type = "demographics"))
+
+    sc_demog %>%
+      process_tests_sc_sandpit(type = "demographics")
   } else {
     sc_demog <- sc_demog
   }
diff --git a/R/read_sc_all_alarms_telecare.R b/R/read_sc_all_alarms_telecare.R
index 5abd9bc7b..4af57d857 100644
--- a/R/read_sc_all_alarms_telecare.R
+++ b/R/read_sc_all_alarms_telecare.R
@@ -31,6 +31,9 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection
   if (!fs::file_exists(get_sandpit_extract_path(type = "at"))) {
     at_full_data %>%
       write_file(get_sandpit_extract_path(type = "at"))
+
+    at_full_data %>%
+      process_tests_sandpit(type = "at")
   } else {
     at_full_data <- at_full_data
   }
diff --git a/R/read_sc_all_care_home.R b/R/read_sc_all_care_home.R
index 870a94ded..0e74d6623 100644
--- a/R/read_sc_all_care_home.R
+++ b/R/read_sc_all_care_home.R
@@ -33,6 +33,9 @@ read_sc_all_care_home <- function(sc_dvprod_connection = phs_db_connection(dsn =
   if (!fs::file_exists(get_sandpit_extract_path(type = "ch"))) {
     ch_data %>%
       write_file(get_sandpit_extract_path(type = "ch"))
+
+    ch_data %>%
+      process_tests_sandpit(type = "ch")
   } else {
     ch_data <- ch_data
   }
diff --git a/R/read_sc_all_home_care.R b/R/read_sc_all_home_care.R
index cca2d0a9b..3741785a7 100644
--- a/R/read_sc_all_home_care.R
+++ b/R/read_sc_all_home_care.R
@@ -50,6 +50,9 @@ read_sc_all_home_care <- function(sc_dvprod_connection = phs_db_connection(dsn =
   if (!fs::file_exists(get_sandpit_extract_path(type = "hc"))) {
     home_care_data %>%
       write_file(get_sandpit_extract_path(type = "hc"))
+
+    home_care_date %>%
+      process_tests_sandpit(type = "hc")
   } else {
     home_care_data <- home_care_data
   }
diff --git a/R/read_sc_all_sds.R b/R/read_sc_all_sds.R
index d9d5b8b1d..e184ffaeb 100644
--- a/R/read_sc_all_sds.R
+++ b/R/read_sc_all_sds.R
@@ -31,6 +31,9 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR
   if (!fs::file_exists(get_sandpit_extract_path(type = "sds"))) {
     sds_full_data %>%
       write_file(get_sandpit_extract_path(type = "sds"))
+
+    sds_full_data %>%
+      process_tests_sandpit(type = "sds")
   } else {
     sds_full_data <- sds_full_data
   }
diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R
index ffe86f48f..d2e1490f2 100644
--- a/R/write_tests_xlsx.R
+++ b/R/write_tests_xlsx.R
@@ -19,15 +19,22 @@
 write_tests_xlsx <- function(comparison_data,
                              sheet_name,
                              year = NULL,
-                             workbook_name = c("ep_file", "indiv_file", "lookup", "extract")) {
+                             workbook_name = c("ep_file", "indiv_file", "lookup", "extract", "sandpit")) {
   # Set up the workbook ----
 
-  if (workbook_name == "lookup" | missing(year) & workbook_name == "lookup") {
-    tests_workbook_name <- stringr::str_glue(latest_update(), "_lookups_tests")
-  } else {
+  if (is.null(year)) {
     tests_workbook_name <- dplyr::case_when(
       workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_ep_file_tests"),
       workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_indiv_file_tests"),
+      workbook_name == "lookup" ~ stringr::str_glue(latest_update(), "_lookups_tests"),
+      workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests")
+    )
+  } else if (workbook_name == "sandpit" & !is.null(year)) {
+    tests_workbook_name <- dplyr::case_when(
+      workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests")
+    )
+  } else {
+    tests_workbook_name <- dplyr::case_when(
       workbook_name == "extract" ~ stringr::str_glue(latest_update(), "_{year}_extract_tests")
     )
   }
@@ -92,11 +99,11 @@ write_tests_xlsx <- function(comparison_data,
 
   date_today <- stringr::str_to_lower(date_today)
 
-  sheet_name_dated <- ifelse(
-    is.null(year),
-    stringr::str_glue("{sheet_name}_{date_today}"),
-    stringr::str_glue("{year}_{sheet_name}_{date_today}")
-  )
+  if (is.null(year)) {
+    sheet_name_dated <- stringr::str_glue("{sheet_name}_{date_today}")
+  } else {
+    sheet_name_dated <- stringr::str_glue("{year}_{sheet_name}_{date_today}")
+  }
 
   # If there has already been a sheet created today, append the time
   if (sheet_name_dated %in% names(wb)) {
diff --git a/man/process_tests_sc_sandpit.Rd b/man/process_tests_sc_sandpit.Rd
new file mode 100644
index 000000000..d3c1f5984
--- /dev/null
+++ b/man/process_tests_sc_sandpit.Rd
@@ -0,0 +1,20 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/process_tests_sc_sandpit.R
+\name{process_tests_sc_sandpit}
+\alias{process_tests_sc_sandpit}
+\title{Process tests for the social care sandpit extracts}
+\usage{
+process_tests_sc_sandpit(
+  type = c("at", "hc", "ch", "sds", "demographics", "client"),
+  year = NULL
+)
+}
+\arguments{
+\item{type}{Name of sandpit extract.}
+}
+\value{
+a \link[tibble:tibble-package]{tibble} containing a test comparison.
+}
+\description{
+Process tests for the social care sandpit extracts
+}
diff --git a/man/produce_sc_sandpit_tests.Rd b/man/produce_sc_sandpit_tests.Rd
new file mode 100644
index 000000000..4f34d506b
--- /dev/null
+++ b/man/produce_sc_sandpit_tests.Rd
@@ -0,0 +1,24 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/process_tests_sc_sandpit.R
+\name{produce_sc_sandpit_tests}
+\alias{produce_sc_sandpit_tests}
+\title{Produce tests for social care sandpit extracts.}
+\usage{
+produce_sc_sandpit_tests(
+  data,
+  type = c("demographics", "client", "at", "ch", "hc", "sds")
+)
+}
+\arguments{
+\item{data}{new or old data for testing summary flags
+(data is from \code{\link[=get_sandpit_extract_path]{get_sandpit_extract_path()}})}
+
+\item{type}{Name of sandpit extract.}
+}
+\value{
+a dataframe with a count of each flag
+from \code{\link[=calculate_measures]{calculate_measures()}}
+}
+\description{
+Produce tests for social care sandpit extracts.
+}
diff --git a/man/write_tests_xlsx.Rd b/man/write_tests_xlsx.Rd
index c510e2570..0788d0080 100644
--- a/man/write_tests_xlsx.Rd
+++ b/man/write_tests_xlsx.Rd
@@ -8,7 +8,7 @@ write_tests_xlsx(
   comparison_data,
   sheet_name,
   year = NULL,
-  workbook_name = c("ep_file", "indiv_file", "lookup", "extract")
+  workbook_name = c("ep_file", "indiv_file", "lookup", "extract", "sandpit")
 )
 }
 \arguments{

From 3cee1513b298331cb6842f7ac100ed6c08e40cd8 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 22 Apr 2024 11:14:52 +0100
Subject: [PATCH 052/186] Remove filtering between 90-105% completeness

---
 R/process_extract_homelessness.R | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
index 59541cf4e..fb824ed09 100644
--- a/R/process_extract_homelessness.R
+++ b/R/process_extract_homelessness.R
@@ -145,19 +145,6 @@ process_extract_homelessness <- function(
     sg_pub_path = sg_pub_path
   )
 
-  if (!is.null(completeness_data)) {
-    filtered_data <- data %>%
-      dplyr::left_join(completeness_data,
-        by = c("year", "sending_local_authority_name")
-      ) %>%
-      dplyr::filter(
-        dplyr::between(.data[["pct_complete_all"]], 0.90, 1.05) |
-          .data[["sending_local_authority_name"]] == "East Ayrshire"
-      )
-  } else {
-    filtered_data <- data
-  }
-
   # TODO - Include person_id (from client_id)
   final_data <- filtered_data %>%
     dplyr::select(

From 4adefae65b91f33758b768d8f2b8f98833e3e714 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 22 Apr 2024 15:08:31 +0100
Subject: [PATCH 053/186] Keep percentage comparison

---
 R/process_extract_homelessness.R | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
index fb824ed09..2b35114cc 100644
--- a/R/process_extract_homelessness.R
+++ b/R/process_extract_homelessness.R
@@ -145,8 +145,16 @@ process_extract_homelessness <- function(
     sg_pub_path = sg_pub_path
   )
 
+  data <- data %>%
+    dplyr::left_join(
+      completeness_data %>%
+        dplyr::select(sending_local_authority_name, pct_complete_all),
+      by = dplyr::join_by("sending_local_authority_name")
+    ) %>%
+    dplyr::rename(hl1_completeness = pct_complete_all)
+
   # TODO - Include person_id (from client_id)
-  final_data <- filtered_data %>%
+  final_data <- data %>%
     dplyr::select(
       "year",
       "recid",
@@ -161,7 +169,8 @@ process_extract_homelessness <- function(
       hl1_application_ref = "application_reference_number",
       hl1_sending_lca = "sending_local_authority_code_9",
       hl1_property_type = "property_type_code",
-      "hl1_reason_ftm"
+      "hl1_reason_ftm",
+      hl1_completeness
     )
 
   if (write_to_disk) {

From 440699d71a0558a3cce09a58aa242cb7648403ea Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 23 Apr 2024 13:56:11 +0100
Subject: [PATCH 054/186] Add new variable pre/post hl1 application

---
 R/process_lookup_homelessness.R | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/R/process_lookup_homelessness.R b/R/process_lookup_homelessness.R
index 7137c6393..62199c9e8 100644
--- a/R/process_lookup_homelessness.R
+++ b/R/process_lookup_homelessness.R
@@ -101,6 +101,16 @@ add_homelessness_date_flags <- function(data, year, lookup = create_homelessness
       homeless_flag,
       by = c("chi", "record_keydate1", "record_keydate2", "recid"),
       relationship = "many-to-one"
+    ) %>%
+    dplyr::mutate(
+      hl1_12_months_pre_app = lubridate::rollback(.data$record_keydate1,
+        months(-12),
+        roll_to_first = TRUE
+      ),
+      hl1_12_months_post_app = lubridate::add_with_rollback(.data$record_keydate2,
+        months(12),
+        roll_to_first = TRUE
+      )
     )
 
   return(data)

From 9a72fe4036e06c9df72e0010e632189dd342aab1 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Mon, 29 Apr 2024 15:52:27 +0100
Subject: [PATCH 055/186] re-write the logic of fill_ch_names

---
 R/fill_ch_names.R | 724 ++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 607 insertions(+), 117 deletions(-)

diff --git a/R/fill_ch_names.R b/R/fill_ch_names.R
index b7bdcf81d..dca87d05a 100644
--- a/R/fill_ch_names.R
+++ b/R/fill_ch_names.R
@@ -10,40 +10,48 @@
 #' @return the same data with improved accuracy and completeness of the Care
 #' Home names and postcodes, as a [tibble][tibble::tibble-package].
 fill_ch_names <- function(ch_data,
-                          ch_name_lookup_path = get_slf_ch_name_lookup_path(),
-                          spd_path = get_spd_path()) {
+                           ch_name_lookup_path = get_slf_ch_name_lookup_path(),
+                           spd_path = get_spd_path()) {
   ch_data <- ch_data %>%
     # Make the care home name more uniform
     dplyr::mutate(ch_name = clean_up_free_text(.data[["ch_name"]])) %>%
     # correct postcode formatting
     dplyr::mutate(
-      dplyr::across(
-        dplyr::contains("postcode"),
-        phsmethods::format_postcode
-      ),
+      dplyr::across(dplyr::contains("postcode"),
+                    phsmethods::format_postcode),
       # Replace invalid postcode with NA
       # Get a list of confirmed valid Scottish postcodes from the SPD
       ch_postcode = dplyr::if_else(
-        .data[["ch_postcode"]] %in% dplyr::pull(
-          read_file(spd_path, col_select = "pc7"),
-          "pc7"
-        ),
+        .data[["ch_postcode"]] %in% dplyr::pull(read_file(spd_path, col_select = "pc7"),
+                                                "pc7"),
         .data[["ch_postcode"]],
         NA_character_
-      )
+      ),
+      ch_name_keyword = ch_name_extract_keyword(ch_name)
+    ) %>%
+    # add unique identifier
+    dplyr::mutate(
+      unique_identifier = dplyr::row_number(),
+      ch_pc_partial = stringr::str_sub(ch_postcode, 1,-2),
+      ch_pc_partial2 = stringr::str_sub(ch_postcode, 1,-3),
+      ch_pc_partial3 = stringr::str_sub(ch_postcode, 1,-5),
+      ch_pc_partial4 = gsub("\\d.*", "", stringr::str_sub(ch_postcode, 1, 2))
     )
+  # There are many cases where a patient have many same ch_name and ch_pc, but
+  # there is one episode where ch_pc is different while ch_name is the same.
+  # fix this case here
 
-  # Care Home name lookup from the Care Inspectorate
-  # Previous contact 'Al Scougal' <Al.Scougal@careinspectorate.gov.scot>
+  # Contact: IntelligenceTeam@careinspectorate.gov.scot
+  # for an updated lookup list
   ch_name_lookup <- openxlsx::read.xlsx(ch_name_lookup_path,
-    detectDates = TRUE
-  ) %>%
+                                        detectDates = TRUE) %>%
     # Drop any Care Homes that were closed before 2017/18
     dplyr::select(
       ch_postcode = "AccomPostCodeNo",
       ch_name_validated = "ServiceName",
       ch_date_registered = "DateReg",
-      ch_date_cancelled = "DateCanx"
+      ch_date_cancelled = "DateCanx",
+      ch_active = tidyselect::contains("ServiceStatusAt")
     ) %>%
     dplyr::filter(
       is.na(.data[["ch_date_cancelled"]]) |
@@ -54,12 +62,16 @@ fill_ch_names <- function(ch_data,
       ch_postcode = phsmethods::format_postcode(.data[["ch_postcode"]]),
       ch_name_validated = clean_up_free_text(.data[["ch_name_validated"]]),
       ch_date_registered = lubridate::as_date(.data[["ch_date_registered"]]),
-      ch_date_cancelled = lubridate::as_date(.data[["ch_date_cancelled"]])
+      ch_date_cancelled = lubridate::as_date(.data[["ch_date_cancelled"]]),
+      ch_active = dplyr::case_match(ch_active,
+                                    "Active" ~ TRUE,
+                                    c("Cancelled", "Inactive") ~ FALSE) # new chagnes
     ) %>%
     # Merge any duplicates, and get the interval each CH name was active
     dplyr::group_by(.data[["ch_postcode"]], .data[["ch_name_validated"]]) %>%
     dplyr::summarise(
       # Find the latest date for each CH name / postcode
+      ch_date_registered = dplyr::first(ch_date_registered),
       latest_close_date = dplyr::if_else(
         is.na(max(.data[["ch_date_cancelled"]])),
         Sys.Date(),
@@ -68,19 +80,35 @@ fill_ch_names <- function(ch_data,
       open_interval = lubridate::interval(
         min(.data[["ch_date_registered"]]),
         .data[["latest_close_date"]]
-      )
+      ),
+      ch_active = any(ch_active) # new changes
     ) %>%
-    dplyr::ungroup()
+    dplyr::ungroup() %>%
+    dplyr::rename(ch_postcode_lookup = ch_postcode) %>%
+    dplyr::mutate(
+      ch_pc_partial = stringr::str_sub(ch_postcode_lookup, 1, -2), # new chagnes
+      ch_pc_partial2 = stringr::str_sub(ch_postcode_lookup, 1,-3),
+      ch_pc_partial3 = stringr::str_sub(ch_postcode_lookup, 1,-5),
+      ch_pc_partial4 = gsub("\\d.*", "", stringr::str_sub(ch_postcode_lookup, 1, 2)),
+      ch_name_validated_keyword = ch_name_extract_keyword(ch_name_validated)
+    )
+
+
+  # When matching the name, we need to consider episode time
+  # because the ch_name may match best with the one closed
+  # while the episode happen after it close.
+  # Namely, it is supposed to a new care home
+  # although the ch_name is not as alike as the closed one.
 
+
+  ## postcode matching process ----
   # Generate some metrics for how the submitted names connect to the valid names
-  ch_name_best_match <- ch_data %>%
-    dplyr::distinct(.data[["ch_postcode"]], .data[["ch_name"]]) %>%
+  ch_pc_match <- ch_data %>%
     dplyr::left_join(ch_name_lookup,
-      by = dplyr::join_by("ch_postcode"),
-      multiple = "all",
-      na_matches = "never"
+                     by = "ch_pc_partial",
+                     multiple = "all",
+                     na_matches = "never"
     ) %>%
-    tidyr::drop_na() %>%
     # Work out string distances between names for each postcode
     dplyr::mutate(
       match_distance_jaccard = stringdist::stringdist(
@@ -94,136 +122,598 @@ fill_ch_names <- function(ch_data,
         method = "cosine"
       ),
       match_mean = (.data[["match_distance_jaccard"]] +
-        .data[["match_distance_cosine"]]) / 2.0
-    ) %>%
-    # Drop any name matches which aren't very close
-    dplyr::filter(.data[["match_distance_jaccard"]] <= 0.25 |
-      .data[["match_distance_cosine"]] <= 0.3) %>%
-    dplyr::group_by(
-      .data[["ch_postcode"]],
-      .data[["ch_name"]],
-      .data[["open_interval"]]
+                      .data[["match_distance_cosine"]]) / 2.0,
+      # ch_name_keyword distances
+      match_distance_jaccard2 = stringdist::stringdist(
+        .data[["ch_name_keyword"]],
+        .data[["ch_name_validated_keyword"]],
+        method = "jaccard"
+      ),
+      match_distance_cosine2 = stringdist::stringdist(
+        .data[["ch_name_keyword"]],
+        .data[["ch_name_validated_keyword"]],
+        method = "cosine"
+      ),
+      match_mean2 = (.data[["match_distance_jaccard2"]] +
+                       .data[["match_distance_cosine2"]]) / 2.0
     ) %>%
+    # ch_admission_date might be inaccurate.
+    # So ch_admission_date >= ch_date_registered is NOT used.
+    dplyr::filter(ch_admission_date <= latest_close_date) %>%
     dplyr::mutate(
-      min_match_mean = min(.data[["match_mean"]], na.rm = TRUE)
-    ) %>%
-    # Identify the closest match in case there are multiple close matches
-    # If there's still multiple matches just pick the shortest
-    dplyr::arrange(
-      "min_match_mean",
-      length(.data[["ch_name_validated"]])
-    ) %>%
-    dplyr::ungroup() %>%
-    dplyr::distinct(.data[["ch_postcode"]],
-      .data[["ch_name"]],
-      .keep_all = TRUE
+      postcode_matching = (ch_postcode == ch_postcode_lookup),
+      ### quality 1L-12L ----
+      matching_quality_indicator_overall = dplyr::case_when(
+        # 1 to 9, perfect to ok.
+        # 20 is unacceptable
+
+        # if care home postcode perfectly match, then
+        # even if care home name is NA,
+        # we still overwrite the ch_name from ch_name_lookup
+
+        # we probably do not overwrite ch_name and ch_postcode if 10?
+        match_mean  < 0.001 &  postcode_matching ~ 1L,
+        match_mean2 < 0.001 &  postcode_matching ~ 2L,
+        match_mean  < 0.001 & !postcode_matching ~ 3L,
+        match_mean2 < 0.001 & !postcode_matching ~ 4L,
+
+        match_mean  < 0.1   &  postcode_matching ~ 5L,
+        match_mean2 < 0.1   &  postcode_matching ~ 6L,
+        match_mean  < 0.1   & !postcode_matching ~ 7L,
+        match_mean2 < 0.1   & !postcode_matching ~ 8L,
+
+        (match_mean  < 0.4 | match_mean2 < 0.4)   &  postcode_matching ~ 9L,
+        (match_mean  < 0.4 | match_mean2 < 0.4)   & !postcode_matching ~ 10L,
+
+        is.na(ch_name)      &  postcode_matching ~ 11L,
+        is.na(ch_name)      & !postcode_matching ~ 12L,
+        .default = 100L
+        # cases 100L will be improved in the next section
+        # 100L means no matching
+      )
     ) %>%
     dplyr::select(
+      "unique_identifier",
+      "chi",
       "ch_postcode",
+      "ch_postcode_lookup",
+      "postcode",
       "ch_name",
+      "ch_name_keyword",
       "ch_name_validated",
-      "open_interval",
-      "latest_close_date"
+      "ch_name_validated_keyword",
+      "match_mean",
+      "match_mean2",
+      # "open_interval",
+      "ch_admission_date",
+      "qtr_start",
+      "ch_date_registered",
+      "latest_close_date",
+      "ch_active",
+      "postcode_matching",
+      "matching_quality_indicator_overall",
+      everything()
     ) %>%
-    dplyr::arrange(
-      "ch_postcode",
-      "ch_name",
-      "open_interval"
+    dplyr::arrange(unique_identifier, matching_quality_indicator_overall) %>%
+    dplyr::distinct(.data[["unique_identifier"]],
+                    .keep_all = TRUE
     )
 
-  no_match_pc_name_bad <- ch_data %>%
-    dplyr::anti_join(ch_name_lookup,
-      by = dplyr::join_by("ch_postcode"),
-      na_matches = "never"
-    ) %>%
-    dplyr::filter(
-      !is.na(.data[["ch_name"]]) & !is.na(.data[["ch_postcode"]])
+
+  # fix matching quality being 100, meaning bad
+  # After this great process,
+  # there are around 7.5% with matching_quality_indicator_overall being 100
+  # This means that
+  # cases coming from postcode matching does not matching names at all
+  # But some of them may vaguely matching name
+  # but match only main area of postcode (say EH1, G1)
+  # We now try to find out these cases
+
+  ### quality 13L ----
+  # continuous episodes with consistent ch_name and inconsistent ch_postcode
+  # here to fix some postcode, which is part of fixing matching quality being 100.
+  # Cases to be fixed here:
+  # For some ch records, for a chi number,
+  # ch_name are consistent while
+  # ch_postcode are different,
+  # and those episodes seem consistent, indicated by good matching quality.
+  # Then, overwrite the minority of records with matching quality being 100.
+  ch_pc_match = ch_pc_match %>%
+    dplyr::arrange(chi, ch_name, matching_quality_indicator_overall) %>%
+    dplyr::group_by(chi, ch_name) %>%
+    dplyr::mutate(
+      # Best_quality_within_group_chi_name is supposed to be minimum within a group.
+      # Since we sort matching_quality_indicator_overall, first is ok.
+      best_quality_within_group_chi_name = dplyr::first(matching_quality_indicator_overall),
+      ch_postcode_lookup_best = dplyr::first(ch_postcode_lookup),
+      ch_name_validated_best = dplyr::first(ch_name_validated),
+      ch_name_validated_keyword_best = dplyr::first(ch_name_validated_keyword)
     ) %>%
-    dplyr::left_join(ch_name_best_match,
-      by = dplyr::join_by(
-        "ch_name",
-        closest("ch_admission_date" <= "latest_close_date")
+    dplyr::ungroup() %>%
+    dplyr::mutate(
+      overwrite_pc = (
+        matching_quality_indicator_overall == 100L &
+          best_quality_within_group_chi_name <= 10L
       ),
-      multiple = "last",
-      na_matches = "never",
-      suffix = c("_old", "")
-    ) %>%
+      matching_quality_indicator_overall = dplyr::if_else(overwrite_pc,
+                                                          13L,
+                                                          matching_quality_indicator_overall),
+      ch_postcode_lookup = dplyr::if_else(overwrite_pc,
+                                          ch_postcode_lookup_best,
+                                          ch_postcode_lookup),
+      ch_name_validated = dplyr::if_else(overwrite_pc,
+                                         ch_name_validated_best,
+                                         ch_name_validated),
+      ch_name_validated_keyword = dplyr::if_else(
+        overwrite_pc,
+        ch_name_validated_keyword_best,
+        ch_name_validated_keyword
+      )
+    )
+
+  ### quality 14L, ch_postcode match ----
+  # if ch_postcode perfect match,
+  # then we accept ch_name_lookup and overwrite ch_name
+
+  col_to_select = c(
+    "unique_identifier",
+    "matching_quality_indicator_overall",
+    "sending_location",
+    "latest_sc_id",
+    "chi",
+    "ch_name",
+    "ch_postcode",
+    "social_care_id",
+    "period",
+    "period_start_date",
+    "period_end_date",
+    "ch_provider",
+    "reason_for_admission",
+    "type_of_admission",
+    "nursing_care_provision",
+    "ch_admission_date",
+    "ch_discharge_date",
+    "age",
+    "record_date",
+    "qtr_start",
+    "latest_flag",
+    "gender",
+    "dob",
+    "postcode",
+    "date_of_death",
+    "ch_name_validated",
+    "open_interval",
+    "latest_close_date",
+    "ch_name_old",
+    "ch_postcode_old",
+    "ch_name_keyword"
+  )
+
+  ch_pc_match = ch_pc_match %>%
     dplyr::mutate(
-      ch_postcode = dplyr::if_else(!is_missing(.data[["ch_postcode"]]),
-        .data[["ch_postcode"]],
-        .data[["ch_postcode_old"]]
+      matching_quality_indicator_overall = dplyr::if_else(
+        matching_quality_indicator_overall == 100L & postcode_matching,
+        14L,
+        matching_quality_indicator_overall
       )
+    ) %>%
+    # now remove cases of quality being 100L for the next section:
+    # ch_name matching
+    dplyr::filter(matching_quality_indicator_overall != 100L) %>%
+    dplyr::mutate(
+      ch_name_old = ch_name,
+      ch_postcode_old = ch_postcode,
+      ch_name = ch_name_validated,
+      ch_postcode = ch_postcode_lookup
+    ) %>%
+    dplyr::select(
+      dplyr::all_of(col_to_select)
     )
 
-  no_match_pc_name_missing <- ch_data %>%
-    dplyr::anti_join(ch_name_lookup,
-      by = dplyr::join_by("ch_postcode"),
+  ## matching by ch_name, quality 15L-21L ----
+  ### perfect matching by ch_name, and main part of postcode, quality 15L ----
+  # ch_name matching, then overwrite postcode from ch_lookup
+  # 15L means perfect matching name,
+  # and relevant dates align,
+  # but not the main part of the postcode, say "EH12"
+
+  ch_name_match1 = ch_data %>%
+    dplyr::anti_join(ch_pc_match,
+                     by = dplyr::join_by(unique_identifier)) %>%
+    dplyr::inner_join(
+      ch_name_lookup,
+      by = dplyr::join_by(
+        ch_name == ch_name_validated,
+        ch_admission_date <= latest_close_date,
+        ch_admission_date >= ch_date_registered,
+        # some care homes have same name, so use ch_pc_parital2 to filter
+        ch_pc_partial3
+      )
+    ) %>%
+    dplyr::mutate(
+      ch_name_old = ch_name,
+      ch_postcode_old = ch_postcode,
+      ch_name_validated = ch_name,
+      # ch_name_validated is omitted because of join_by(), add back
+      ch_postcode = ch_postcode_lookup,
+      matching_quality_indicator_overall = 15L
+    ) %>%
+    dplyr::select(dplyr::all_of(col_to_select))
+
+  ### fizzy matching by ch_name, and matching main part of postcode, quality 16L ----
+  ch_match = dplyr::bind_rows(ch_pc_match, ch_name_match1)
+
+  ch_name_match2 = ch_data %>%
+    dplyr::anti_join(ch_match,
+                     by = dplyr::join_by(unique_identifier)) %>%
+    dplyr::inner_join(
+      ch_name_lookup,
+      by = dplyr::join_by(
+        ch_name_keyword == ch_name_validated_keyword,
+        ch_admission_date <= latest_close_date,
+        ch_admission_date >= ch_date_registered,
+        ch_pc_partial3
+      ),
       na_matches = "never"
     ) %>%
-    dplyr::filter(is.na(.data[["ch_name"]]) & is.na(.data[["ch_postcode"]]))
+    dplyr::mutate(
+      ch_name_old = ch_name,
+      ch_postcode_old = ch_postcode,
+      ch_name = ch_name_validated,
+      ch_postcode = ch_postcode_lookup,
+      matching_quality_indicator_overall = 16L,
+      match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
+                                                      .data[["ch_name_validated"]],
+                                                      method = "jaccard")
+    ) %>%
+    dplyr::arrange(
+      unique_identifier,
+      match_distance_jaccard
+    ) %>%
+    dplyr::distinct(unique_identifier, .keep_all = TRUE) %>%
+    dplyr::select(dplyr::all_of(col_to_select))
+
+  ### fizzy matching by ch_name, and same city, quality 17L ----
+  ch_match = dplyr::bind_rows(ch_pc_match, ch_name_match1, ch_name_match2)
 
-  no_match_pc_missing <- ch_data %>%
-    dplyr::anti_join(ch_name_lookup,
-      by = dplyr::join_by("ch_postcode"),
+  ch_name_match3 = ch_data %>%
+    dplyr::anti_join(ch_match,
+                     by = dplyr::join_by(unique_identifier)) %>%
+    dplyr::inner_join(
+      ch_name_lookup,
+      by = dplyr::join_by(
+        ch_name_keyword == ch_name_validated_keyword,
+        ch_admission_date <= latest_close_date,
+        ch_admission_date >= ch_date_registered,
+        ch_pc_partial4
+      ),
       na_matches = "never"
     ) %>%
-    dplyr::filter(
-      !is.na(.data[["ch_name"]]) & is.na(.data[["ch_postcode"]])
+    dplyr::mutate(
+      ch_name_old = ch_name,
+      ch_postcode_old = ch_postcode,
+      ch_name = ch_name_validated,
+      ch_postcode = ch_postcode_lookup,
+      matching_quality_indicator_overall = 17L,
+      match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
+                                                      .data[["ch_name_validated"]],
+                                                      method = "jaccard")
     ) %>%
-    dplyr::left_join(ch_name_best_match,
+    dplyr::arrange(
+      unique_identifier,
+      match_distance_jaccard
+    ) %>%
+    dplyr::distinct(unique_identifier, .keep_all = TRUE) %>%
+    dplyr::select(dplyr::all_of(col_to_select))
+
+  ch_match = dplyr::bind_rows(ch_pc_match,
+                              ch_name_match1,
+                              ch_name_match2,
+                              ch_name_match3)
+
+  ### ch_postcode and postcode exchange, then matching, quality 18L----
+  ch_pc_exchange_match1 = ch_data %>%
+    dplyr::anti_join(ch_match,
+                     by = dplyr::join_by(unique_identifier)) %>%
+    dplyr::mutate(
+      intermediate_pc = ch_postcode,
+      ch_postcode = postcode,
+      postcode = ch_postcode,
+      ch_pc_partial = stringr::str_sub(ch_postcode, 1,-2),
+      ch_pc_partial2 = stringr::str_sub(ch_postcode, 1,-3),
+      ch_pc_partial3 = stringr::str_sub(ch_postcode, 1,-5)
+    ) %>%
+    dplyr::select(-intermediate_pc) %>%
+    dplyr::inner_join(
+      ch_name_lookup,
       by = dplyr::join_by(
-        "ch_name",
-        closest("ch_admission_date" <= "latest_close_date")
+        ch_name_keyword == ch_name_validated_keyword,
+        ch_admission_date <= latest_close_date,
+        ch_admission_date >= ch_date_registered,
+        ch_pc_partial
       ),
-      multiple = "last",
-      na_matches = "never",
-      suffix = c("_old", "")
+      na_matches = "never"
     ) %>%
     dplyr::mutate(
-      ch_postcode = dplyr::if_else(!is_missing(.data[["ch_postcode"]]),
-        .data[["ch_postcode"]],
-        .data[["ch_postcode_old"]]
-      )
-    )
+      ch_name_old = ch_name,
+      ch_postcode_old = ch_postcode,
+      ch_name = ch_name_validated,
+      ch_postcode = ch_postcode_lookup,
+      matching_quality_indicator_overall = 18L,
+      match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
+                                                      .data[["ch_name_validated"]],
+                                                      method = "jaccard")
+    ) %>%
+    dplyr::arrange(
+      unique_identifier,
+      match_distance_jaccard
+    ) %>%
+    dplyr::distinct(unique_identifier, .keep_all = TRUE) %>%
+    dplyr::select(dplyr::all_of(col_to_select))
+
+  ch_match = dplyr::bind_rows(ch_pc_match,
+                              ch_name_match1,
+                              ch_name_match2,
+                              ch_name_match3,
+                              ch_pc_exchange_match1)
 
-  no_match_name_missing <- ch_data %>%
-    dplyr::anti_join(ch_name_lookup,
-      by = dplyr::join_by("ch_postcode"),
+  ## Other matching processes ----
+  ### quality 19L----
+  # ch_postcode and postcode exchange,
+  # then fizzy match ch_name, and matching main part of postcode
+  ch_pc_exchange_match2 = ch_data %>%
+    dplyr::anti_join(ch_match,
+                     by = dplyr::join_by(unique_identifier)) %>%
+    dplyr::mutate(
+      intermediate_pc = ch_postcode,
+      ch_postcode = postcode,
+      postcode = ch_postcode,
+      ch_pc_partial = stringr::str_sub(ch_postcode, 1,-2),
+      ch_pc_partial2 = stringr::str_sub(ch_postcode, 1,-3),
+      ch_pc_partial3 = stringr::str_sub(ch_postcode, 1,-5)
+    ) %>%
+    dplyr::select(-intermediate_pc) %>%
+    dplyr::inner_join(
+      ch_name_lookup,
+      by = dplyr::join_by(
+        ch_name_keyword == ch_name_validated_keyword,
+        ch_admission_date <= latest_close_date,
+        ch_admission_date >= ch_date_registered,
+        ch_pc_partial3
+      ),
       na_matches = "never"
     ) %>%
-    dplyr::filter(is.na(.data[["ch_name"]]) & !is.na(.data[["ch_postcode"]]))
+    dplyr::mutate(
+      ch_name_old = ch_name,
+      ch_postcode_old = ch_postcode,
+      ch_name = ch_name_validated,
+      ch_postcode = ch_postcode_lookup,
+      matching_quality_indicator_overall = 19L,
+      match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
+                                                      .data[["ch_name_validated"]],
+                                                      method = "jaccard")
+    ) %>%
+    dplyr::arrange(
+      unique_identifier,
+      match_distance_jaccard
+    ) %>%
+    dplyr::distinct(unique_identifier, .keep_all = TRUE) %>%
+    dplyr::select(dplyr::all_of(col_to_select))
+
+  ch_match = dplyr::bind_rows(ch_pc_match,
+                              ch_name_match1,
+                              ch_name_match2,
+                              ch_name_match3,
+                              ch_pc_exchange_match1,
+                              ch_pc_exchange_match2)
+
 
-  ch_name_pc_clean <- ch_data %>%
-    # Remove records with no matching postcode, we'll add them back later
-    dplyr::semi_join(ch_name_lookup,
-      by = dplyr::join_by("ch_postcode"),
+  ### quality 20L----
+  # perfect match care home name, regardless of postcode,
+  # excluding those duplicated care home names.
+  unique_ch_name = unique(ch_name_lookup$ch_name_validated)
+
+  ch_name_match4 = ch_data %>%
+    dplyr::anti_join(ch_match,
+                     by = dplyr::join_by(unique_identifier)) %>%
+    dplyr::inner_join(
+      ch_name_lookup,
+      by = dplyr::join_by(
+        ch_name == ch_name_validated,
+        ch_admission_date <= latest_close_date,
+        ch_admission_date >= ch_date_registered
+      ),
       na_matches = "never"
     ) %>%
-    dplyr::left_join(ch_name_best_match,
+    dplyr::filter(
+      ch_name %in% unique_ch_name
+    ) %>%
+    dplyr::mutate(
+      ch_name_old = ch_name,
+      ch_postcode_old = ch_postcode,
+      # add ch_name_validated back since omitted in join_by()
+      ch_name_validated = ch_name,
+      ch_postcode = ch_postcode_lookup,
+      matching_quality_indicator_overall = 20L
+    ) %>%
+    dplyr::select(dplyr::all_of(col_to_select))
+
+  ch_match = dplyr::bind_rows(ch_pc_match,
+                              ch_name_match1,
+                              ch_name_match2,
+                              ch_name_match3,
+                              ch_pc_exchange_match1,
+                              ch_pc_exchange_match2,
+                              ch_name_match4)
+
+  ### quality 21L----
+  # fizzy match care home name, regardless of postcode,
+  # excluding those duplicated care home names.
+  ch_name_match5 = ch_data %>%
+    dplyr::anti_join(ch_match,
+                     by = dplyr::join_by(unique_identifier)) %>%
+    dplyr::inner_join(
+      ch_name_lookup,
       by = dplyr::join_by(
-        "ch_postcode",
-        "ch_name",
-        closest("ch_admission_date" <= "latest_close_date")
+        ch_name_keyword == ch_name_validated_keyword,
+        ch_admission_date <= latest_close_date,
+        ch_admission_date >= ch_date_registered
       ),
       na_matches = "never"
     ) %>%
+    dplyr::filter(
+      ch_name %in% unique_ch_name
+    ) %>%
     dplyr::mutate(
-      ch_name_old = .data[["ch_name"]],
-      ch_name = dplyr::if_else(!is_missing(.data[["ch_name_validated"]]),
-        .data[["ch_name_validated"]],
-        .data[["ch_name"]]
-      )
+      ch_name_old = ch_name,
+      ch_postcode_old = ch_postcode,
+      ch_name = ch_name_validated,
+      ch_postcode = ch_postcode_lookup,
+      matching_quality_indicator_overall = 21L,
+      match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
+                                                      .data[["ch_name_validated"]],
+                                                      method = "jaccard")
     ) %>%
-    # Bring back the records which had no postcode match
-    dplyr::bind_rows(
-      no_match_pc_name_bad,
-      no_match_pc_name_missing,
-      no_match_pc_missing,
-      no_match_name_missing
-    )
+    dplyr::arrange(
+      unique_identifier,
+      match_distance_jaccard
+    ) %>%
+    dplyr::distinct(unique_identifier, .keep_all = TRUE) %>%
+    dplyr::select(dplyr::all_of(col_to_select))
+
+  ch_match = dplyr::bind_rows(ch_pc_match,
+                              ch_name_match1,
+                              ch_name_match2,
+                              ch_name_match3,
+                              ch_pc_exchange_match1,
+                              ch_pc_exchange_match2,
+                              ch_name_match4,
+                              ch_name_match5)
+
+
+
+  # add 100L for non-matching episodes
+  ch_no_match = ch_data %>%
+    dplyr::anti_join(ch_match,
+                     by = dplyr::join_by(unique_identifier)) %>%
+    # dplyr::distinct(ch_name, .keep_all = TRUE) %>%
+    dplyr::mutate(
+      matching_quality_indicator_overall = 100L,
+      ch_name_old = ch_name,
+      ch_postcode_old = ch_postcode,
+      ch_name_validated = NA_character_,
+      open_interval = NA,
+      latest_close_date = NA,
+      ch_date_registered = NA
+    ) %>%
+    dplyr::select(dplyr::all_of(col_to_select))
+
+  ### quality 30L----
+  # episodes sharing common chi
+  # and ch_name with those episodes with good match quality
+  ch_data_final = dplyr::bind_rows(ch_match, ch_no_match) %>%
+    dplyr::arrange(chi, ch_name_keyword, matching_quality_indicator_overall) %>%
+    dplyr::group_by(chi, ch_name_keyword) %>%
+    dplyr::mutate(
+      same_ch_name = (
+        dplyr::first(matching_quality_indicator_overall) <= 10L &
+          matching_quality_indicator_overall == 100L
+      ),
+      ch_name = dplyr::if_else(same_ch_name,
+                               dplyr::first(ch_name),
+                               ch_name),
+      ch_postcode = dplyr::if_else(same_ch_name,
+                                   dplyr::first(ch_postcode),
+                                   ch_postcode),
+      matching_quality_indicator_overall = dplyr::if_else(same_ch_name,
+                                                          30L,
+                                                          matching_quality_indicator_overall)
+    ) %>%
+    dplyr::ungroup() %>%
+    dplyr::arrange(unique_identifier) %>%
+    dplyr::select(dplyr::all_of(col_to_select))
+
+  ## For any future amendment or quality check ----
+  # ch_data_final %>%
+  #   dplyr::group_by(matching_quality_indicator_overall) %>%
+  #   dplyr::summarise(n = dplyr::n()) %>%
+  #   dplyr::mutate(pct = n/sum(n)*100) %>%
+  #   print(n=100) %>%
+  #   write.csv("ch_quality.csv")
+
+  ## produce output ----
+  col_output = c(
+    "sending_location",
+    "latest_sc_id",
+    "chi",
+    "ch_name",
+    "ch_postcode",
+    "social_care_id",
+    "period",
+    "period_start_date",
+    "period_end_date",
+    "ch_provider",
+    "reason_for_admission",
+    "type_of_admission",
+    "nursing_care_provision",
+    "ch_admission_date",
+    "ch_discharge_date",
+    "age",
+    "record_date",
+    "qtr_start",
+    "latest_flag",
+    "gender",
+    "dob",
+    "postcode",
+    "date_of_death",
+    "ch_name_validated",
+    "open_interval",
+    "latest_close_date",
+    "ch_name_old",
+    "ch_postcode_old"
+  )
+
+  return(ch_data_final %>%
+           dplyr::select(dplyr::all_of(col_output)))
+}
 
-  # TODO Check if we can fill in ch_names or ch_postcodes when a client has
-  # multiple episodes
 
-  return(ch_name_pc_clean)
+#' extract keyword in a care home name
+#' @param ch_name care home names
+ch_name_extract_keyword = function(ch_name){
+  ch_stopwords = c(
+    "home",
+    "homes",
+    "care",
+    "house",
+    "nursing",
+    "centre",
+    "court",
+    "lodge",
+    "residential",
+    "view",
+    "st",
+    "park",
+    "manor",
+    "grange",
+    "grove",
+    "futures",
+    "respite",
+    "unit",
+    "hall",
+    "ltd",
+    "the",
+    "for",
+    "elderly",
+    "limited",
+    "service",
+    "services",
+    "place",
+    "suite",
+    "luxury"
+  ) %>% stringr::str_to_title()
+  ch_name <-
+    gsub(paste0(ch_stopwords, collapse = "|"), "", ch_name) %>%
+    stringr::str_trim(side = "right")
+  return(ch_name)
 }

From 375ece2298828186ea5b72984fd4046cae8b7931 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Mon, 29 Apr 2024 14:59:56 +0000
Subject: [PATCH 056/186] Update documentation

---
 man/ch_name_extract_keyword.Rd | 14 ++++++++++++++
 1 file changed, 14 insertions(+)
 create mode 100644 man/ch_name_extract_keyword.Rd

diff --git a/man/ch_name_extract_keyword.Rd b/man/ch_name_extract_keyword.Rd
new file mode 100644
index 000000000..ab37cc94d
--- /dev/null
+++ b/man/ch_name_extract_keyword.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/fill_ch_names.R
+\name{ch_name_extract_keyword}
+\alias{ch_name_extract_keyword}
+\title{extract keyword in a care home name}
+\usage{
+ch_name_extract_keyword(ch_name)
+}
+\arguments{
+\item{ch_name}{care home names}
+}
+\description{
+extract keyword in a care home name
+}

From fdcab301c19b10a08463d082ec8540b6467e26cd Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Mon, 29 Apr 2024 15:13:18 +0000
Subject: [PATCH 057/186] Style code

---
 R/fill_ch_names.R | 273 ++++++++++++++++++++++++++--------------------
 1 file changed, 153 insertions(+), 120 deletions(-)

diff --git a/R/fill_ch_names.R b/R/fill_ch_names.R
index dca87d05a..1ac116708 100644
--- a/R/fill_ch_names.R
+++ b/R/fill_ch_names.R
@@ -10,20 +10,24 @@
 #' @return the same data with improved accuracy and completeness of the Care
 #' Home names and postcodes, as a [tibble][tibble::tibble-package].
 fill_ch_names <- function(ch_data,
-                           ch_name_lookup_path = get_slf_ch_name_lookup_path(),
-                           spd_path = get_spd_path()) {
+                          ch_name_lookup_path = get_slf_ch_name_lookup_path(),
+                          spd_path = get_spd_path()) {
   ch_data <- ch_data %>%
     # Make the care home name more uniform
     dplyr::mutate(ch_name = clean_up_free_text(.data[["ch_name"]])) %>%
     # correct postcode formatting
     dplyr::mutate(
-      dplyr::across(dplyr::contains("postcode"),
-                    phsmethods::format_postcode),
+      dplyr::across(
+        dplyr::contains("postcode"),
+        phsmethods::format_postcode
+      ),
       # Replace invalid postcode with NA
       # Get a list of confirmed valid Scottish postcodes from the SPD
       ch_postcode = dplyr::if_else(
-        .data[["ch_postcode"]] %in% dplyr::pull(read_file(spd_path, col_select = "pc7"),
-                                                "pc7"),
+        .data[["ch_postcode"]] %in% dplyr::pull(
+          read_file(spd_path, col_select = "pc7"),
+          "pc7"
+        ),
         .data[["ch_postcode"]],
         NA_character_
       ),
@@ -32,9 +36,9 @@ fill_ch_names <- function(ch_data,
     # add unique identifier
     dplyr::mutate(
       unique_identifier = dplyr::row_number(),
-      ch_pc_partial = stringr::str_sub(ch_postcode, 1,-2),
-      ch_pc_partial2 = stringr::str_sub(ch_postcode, 1,-3),
-      ch_pc_partial3 = stringr::str_sub(ch_postcode, 1,-5),
+      ch_pc_partial = stringr::str_sub(ch_postcode, 1, -2),
+      ch_pc_partial2 = stringr::str_sub(ch_postcode, 1, -3),
+      ch_pc_partial3 = stringr::str_sub(ch_postcode, 1, -5),
       ch_pc_partial4 = gsub("\\d.*", "", stringr::str_sub(ch_postcode, 1, 2))
     )
   # There are many cases where a patient have many same ch_name and ch_pc, but
@@ -44,7 +48,8 @@ fill_ch_names <- function(ch_data,
   # Contact: IntelligenceTeam@careinspectorate.gov.scot
   # for an updated lookup list
   ch_name_lookup <- openxlsx::read.xlsx(ch_name_lookup_path,
-                                        detectDates = TRUE) %>%
+    detectDates = TRUE
+  ) %>%
     # Drop any Care Homes that were closed before 2017/18
     dplyr::select(
       ch_postcode = "AccomPostCodeNo",
@@ -63,9 +68,11 @@ fill_ch_names <- function(ch_data,
       ch_name_validated = clean_up_free_text(.data[["ch_name_validated"]]),
       ch_date_registered = lubridate::as_date(.data[["ch_date_registered"]]),
       ch_date_cancelled = lubridate::as_date(.data[["ch_date_cancelled"]]),
-      ch_active = dplyr::case_match(ch_active,
-                                    "Active" ~ TRUE,
-                                    c("Cancelled", "Inactive") ~ FALSE) # new chagnes
+      ch_active = dplyr::case_match(
+        ch_active,
+        "Active" ~ TRUE,
+        c("Cancelled", "Inactive") ~ FALSE
+      ) # new chagnes
     ) %>%
     # Merge any duplicates, and get the interval each CH name was active
     dplyr::group_by(.data[["ch_postcode"]], .data[["ch_name_validated"]]) %>%
@@ -87,8 +94,8 @@ fill_ch_names <- function(ch_data,
     dplyr::rename(ch_postcode_lookup = ch_postcode) %>%
     dplyr::mutate(
       ch_pc_partial = stringr::str_sub(ch_postcode_lookup, 1, -2), # new chagnes
-      ch_pc_partial2 = stringr::str_sub(ch_postcode_lookup, 1,-3),
-      ch_pc_partial3 = stringr::str_sub(ch_postcode_lookup, 1,-5),
+      ch_pc_partial2 = stringr::str_sub(ch_postcode_lookup, 1, -3),
+      ch_pc_partial3 = stringr::str_sub(ch_postcode_lookup, 1, -5),
       ch_pc_partial4 = gsub("\\d.*", "", stringr::str_sub(ch_postcode_lookup, 1, 2)),
       ch_name_validated_keyword = ch_name_extract_keyword(ch_name_validated)
     )
@@ -105,9 +112,9 @@ fill_ch_names <- function(ch_data,
   # Generate some metrics for how the submitted names connect to the valid names
   ch_pc_match <- ch_data %>%
     dplyr::left_join(ch_name_lookup,
-                     by = "ch_pc_partial",
-                     multiple = "all",
-                     na_matches = "never"
+      by = "ch_pc_partial",
+      multiple = "all",
+      na_matches = "never"
     ) %>%
     # Work out string distances between names for each postcode
     dplyr::mutate(
@@ -122,7 +129,7 @@ fill_ch_names <- function(ch_data,
         method = "cosine"
       ),
       match_mean = (.data[["match_distance_jaccard"]] +
-                      .data[["match_distance_cosine"]]) / 2.0,
+        .data[["match_distance_cosine"]]) / 2.0,
       # ch_name_keyword distances
       match_distance_jaccard2 = stringdist::stringdist(
         .data[["ch_name_keyword"]],
@@ -135,7 +142,7 @@ fill_ch_names <- function(ch_data,
         method = "cosine"
       ),
       match_mean2 = (.data[["match_distance_jaccard2"]] +
-                       .data[["match_distance_cosine2"]]) / 2.0
+        .data[["match_distance_cosine2"]]) / 2.0
     ) %>%
     # ch_admission_date might be inaccurate.
     # So ch_admission_date >= ch_date_registered is NOT used.
@@ -152,21 +159,18 @@ fill_ch_names <- function(ch_data,
         # we still overwrite the ch_name from ch_name_lookup
 
         # we probably do not overwrite ch_name and ch_postcode if 10?
-        match_mean  < 0.001 &  postcode_matching ~ 1L,
-        match_mean2 < 0.001 &  postcode_matching ~ 2L,
-        match_mean  < 0.001 & !postcode_matching ~ 3L,
+        match_mean < 0.001 & postcode_matching ~ 1L,
+        match_mean2 < 0.001 & postcode_matching ~ 2L,
+        match_mean < 0.001 & !postcode_matching ~ 3L,
         match_mean2 < 0.001 & !postcode_matching ~ 4L,
-
-        match_mean  < 0.1   &  postcode_matching ~ 5L,
-        match_mean2 < 0.1   &  postcode_matching ~ 6L,
-        match_mean  < 0.1   & !postcode_matching ~ 7L,
-        match_mean2 < 0.1   & !postcode_matching ~ 8L,
-
-        (match_mean  < 0.4 | match_mean2 < 0.4)   &  postcode_matching ~ 9L,
-        (match_mean  < 0.4 | match_mean2 < 0.4)   & !postcode_matching ~ 10L,
-
-        is.na(ch_name)      &  postcode_matching ~ 11L,
-        is.na(ch_name)      & !postcode_matching ~ 12L,
+        match_mean < 0.1 & postcode_matching ~ 5L,
+        match_mean2 < 0.1 & postcode_matching ~ 6L,
+        match_mean < 0.1 & !postcode_matching ~ 7L,
+        match_mean2 < 0.1 & !postcode_matching ~ 8L,
+        (match_mean < 0.4 | match_mean2 < 0.4) & postcode_matching ~ 9L,
+        (match_mean < 0.4 | match_mean2 < 0.4) & !postcode_matching ~ 10L,
+        is.na(ch_name) & postcode_matching ~ 11L,
+        is.na(ch_name) & !postcode_matching ~ 12L,
         .default = 100L
         # cases 100L will be improved in the next section
         # 100L means no matching
@@ -196,7 +200,7 @@ fill_ch_names <- function(ch_data,
     ) %>%
     dplyr::arrange(unique_identifier, matching_quality_indicator_overall) %>%
     dplyr::distinct(.data[["unique_identifier"]],
-                    .keep_all = TRUE
+      .keep_all = TRUE
     )
 
 
@@ -218,7 +222,7 @@ fill_ch_names <- function(ch_data,
   # ch_postcode are different,
   # and those episodes seem consistent, indicated by good matching quality.
   # Then, overwrite the minority of records with matching quality being 100.
-  ch_pc_match = ch_pc_match %>%
+  ch_pc_match <- ch_pc_match %>%
     dplyr::arrange(chi, ch_name, matching_quality_indicator_overall) %>%
     dplyr::group_by(chi, ch_name) %>%
     dplyr::mutate(
@@ -236,14 +240,17 @@ fill_ch_names <- function(ch_data,
           best_quality_within_group_chi_name <= 10L
       ),
       matching_quality_indicator_overall = dplyr::if_else(overwrite_pc,
-                                                          13L,
-                                                          matching_quality_indicator_overall),
+        13L,
+        matching_quality_indicator_overall
+      ),
       ch_postcode_lookup = dplyr::if_else(overwrite_pc,
-                                          ch_postcode_lookup_best,
-                                          ch_postcode_lookup),
+        ch_postcode_lookup_best,
+        ch_postcode_lookup
+      ),
       ch_name_validated = dplyr::if_else(overwrite_pc,
-                                         ch_name_validated_best,
-                                         ch_name_validated),
+        ch_name_validated_best,
+        ch_name_validated
+      ),
       ch_name_validated_keyword = dplyr::if_else(
         overwrite_pc,
         ch_name_validated_keyword_best,
@@ -255,7 +262,7 @@ fill_ch_names <- function(ch_data,
   # if ch_postcode perfect match,
   # then we accept ch_name_lookup and overwrite ch_name
 
-  col_to_select = c(
+  col_to_select <- c(
     "unique_identifier",
     "matching_quality_indicator_overall",
     "sending_location",
@@ -289,7 +296,7 @@ fill_ch_names <- function(ch_data,
     "ch_name_keyword"
   )
 
-  ch_pc_match = ch_pc_match %>%
+  ch_pc_match <- ch_pc_match %>%
     dplyr::mutate(
       matching_quality_indicator_overall = dplyr::if_else(
         matching_quality_indicator_overall == 100L & postcode_matching,
@@ -317,9 +324,10 @@ fill_ch_names <- function(ch_data,
   # and relevant dates align,
   # but not the main part of the postcode, say "EH12"
 
-  ch_name_match1 = ch_data %>%
+  ch_name_match1 <- ch_data %>%
     dplyr::anti_join(ch_pc_match,
-                     by = dplyr::join_by(unique_identifier)) %>%
+      by = dplyr::join_by(unique_identifier)
+    ) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
@@ -341,11 +349,12 @@ fill_ch_names <- function(ch_data,
     dplyr::select(dplyr::all_of(col_to_select))
 
   ### fizzy matching by ch_name, and matching main part of postcode, quality 16L ----
-  ch_match = dplyr::bind_rows(ch_pc_match, ch_name_match1)
+  ch_match <- dplyr::bind_rows(ch_pc_match, ch_name_match1)
 
-  ch_name_match2 = ch_data %>%
+  ch_name_match2 <- ch_data %>%
     dplyr::anti_join(ch_match,
-                     by = dplyr::join_by(unique_identifier)) %>%
+      by = dplyr::join_by(unique_identifier)
+    ) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
@@ -363,8 +372,9 @@ fill_ch_names <- function(ch_data,
       ch_postcode = ch_postcode_lookup,
       matching_quality_indicator_overall = 16L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-                                                      .data[["ch_name_validated"]],
-                                                      method = "jaccard")
+        .data[["ch_name_validated"]],
+        method = "jaccard"
+      )
     ) %>%
     dplyr::arrange(
       unique_identifier,
@@ -374,11 +384,12 @@ fill_ch_names <- function(ch_data,
     dplyr::select(dplyr::all_of(col_to_select))
 
   ### fizzy matching by ch_name, and same city, quality 17L ----
-  ch_match = dplyr::bind_rows(ch_pc_match, ch_name_match1, ch_name_match2)
+  ch_match <- dplyr::bind_rows(ch_pc_match, ch_name_match1, ch_name_match2)
 
-  ch_name_match3 = ch_data %>%
+  ch_name_match3 <- ch_data %>%
     dplyr::anti_join(ch_match,
-                     by = dplyr::join_by(unique_identifier)) %>%
+      by = dplyr::join_by(unique_identifier)
+    ) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
@@ -396,8 +407,9 @@ fill_ch_names <- function(ch_data,
       ch_postcode = ch_postcode_lookup,
       matching_quality_indicator_overall = 17L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-                                                      .data[["ch_name_validated"]],
-                                                      method = "jaccard")
+        .data[["ch_name_validated"]],
+        method = "jaccard"
+      )
     ) %>%
     dplyr::arrange(
       unique_identifier,
@@ -406,22 +418,25 @@ fill_ch_names <- function(ch_data,
     dplyr::distinct(unique_identifier, .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
-  ch_match = dplyr::bind_rows(ch_pc_match,
-                              ch_name_match1,
-                              ch_name_match2,
-                              ch_name_match3)
+  ch_match <- dplyr::bind_rows(
+    ch_pc_match,
+    ch_name_match1,
+    ch_name_match2,
+    ch_name_match3
+  )
 
   ### ch_postcode and postcode exchange, then matching, quality 18L----
-  ch_pc_exchange_match1 = ch_data %>%
+  ch_pc_exchange_match1 <- ch_data %>%
     dplyr::anti_join(ch_match,
-                     by = dplyr::join_by(unique_identifier)) %>%
+      by = dplyr::join_by(unique_identifier)
+    ) %>%
     dplyr::mutate(
       intermediate_pc = ch_postcode,
       ch_postcode = postcode,
       postcode = ch_postcode,
-      ch_pc_partial = stringr::str_sub(ch_postcode, 1,-2),
-      ch_pc_partial2 = stringr::str_sub(ch_postcode, 1,-3),
-      ch_pc_partial3 = stringr::str_sub(ch_postcode, 1,-5)
+      ch_pc_partial = stringr::str_sub(ch_postcode, 1, -2),
+      ch_pc_partial2 = stringr::str_sub(ch_postcode, 1, -3),
+      ch_pc_partial3 = stringr::str_sub(ch_postcode, 1, -5)
     ) %>%
     dplyr::select(-intermediate_pc) %>%
     dplyr::inner_join(
@@ -441,8 +456,9 @@ fill_ch_names <- function(ch_data,
       ch_postcode = ch_postcode_lookup,
       matching_quality_indicator_overall = 18L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-                                                      .data[["ch_name_validated"]],
-                                                      method = "jaccard")
+        .data[["ch_name_validated"]],
+        method = "jaccard"
+      )
     ) %>%
     dplyr::arrange(
       unique_identifier,
@@ -451,26 +467,29 @@ fill_ch_names <- function(ch_data,
     dplyr::distinct(unique_identifier, .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
-  ch_match = dplyr::bind_rows(ch_pc_match,
-                              ch_name_match1,
-                              ch_name_match2,
-                              ch_name_match3,
-                              ch_pc_exchange_match1)
+  ch_match <- dplyr::bind_rows(
+    ch_pc_match,
+    ch_name_match1,
+    ch_name_match2,
+    ch_name_match3,
+    ch_pc_exchange_match1
+  )
 
   ## Other matching processes ----
   ### quality 19L----
   # ch_postcode and postcode exchange,
   # then fizzy match ch_name, and matching main part of postcode
-  ch_pc_exchange_match2 = ch_data %>%
+  ch_pc_exchange_match2 <- ch_data %>%
     dplyr::anti_join(ch_match,
-                     by = dplyr::join_by(unique_identifier)) %>%
+      by = dplyr::join_by(unique_identifier)
+    ) %>%
     dplyr::mutate(
       intermediate_pc = ch_postcode,
       ch_postcode = postcode,
       postcode = ch_postcode,
-      ch_pc_partial = stringr::str_sub(ch_postcode, 1,-2),
-      ch_pc_partial2 = stringr::str_sub(ch_postcode, 1,-3),
-      ch_pc_partial3 = stringr::str_sub(ch_postcode, 1,-5)
+      ch_pc_partial = stringr::str_sub(ch_postcode, 1, -2),
+      ch_pc_partial2 = stringr::str_sub(ch_postcode, 1, -3),
+      ch_pc_partial3 = stringr::str_sub(ch_postcode, 1, -5)
     ) %>%
     dplyr::select(-intermediate_pc) %>%
     dplyr::inner_join(
@@ -490,8 +509,9 @@ fill_ch_names <- function(ch_data,
       ch_postcode = ch_postcode_lookup,
       matching_quality_indicator_overall = 19L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-                                                      .data[["ch_name_validated"]],
-                                                      method = "jaccard")
+        .data[["ch_name_validated"]],
+        method = "jaccard"
+      )
     ) %>%
     dplyr::arrange(
       unique_identifier,
@@ -500,22 +520,25 @@ fill_ch_names <- function(ch_data,
     dplyr::distinct(unique_identifier, .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
-  ch_match = dplyr::bind_rows(ch_pc_match,
-                              ch_name_match1,
-                              ch_name_match2,
-                              ch_name_match3,
-                              ch_pc_exchange_match1,
-                              ch_pc_exchange_match2)
+  ch_match <- dplyr::bind_rows(
+    ch_pc_match,
+    ch_name_match1,
+    ch_name_match2,
+    ch_name_match3,
+    ch_pc_exchange_match1,
+    ch_pc_exchange_match2
+  )
 
 
   ### quality 20L----
   # perfect match care home name, regardless of postcode,
   # excluding those duplicated care home names.
-  unique_ch_name = unique(ch_name_lookup$ch_name_validated)
+  unique_ch_name <- unique(ch_name_lookup$ch_name_validated)
 
-  ch_name_match4 = ch_data %>%
+  ch_name_match4 <- ch_data %>%
     dplyr::anti_join(ch_match,
-                     by = dplyr::join_by(unique_identifier)) %>%
+      by = dplyr::join_by(unique_identifier)
+    ) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
@@ -538,20 +561,23 @@ fill_ch_names <- function(ch_data,
     ) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
-  ch_match = dplyr::bind_rows(ch_pc_match,
-                              ch_name_match1,
-                              ch_name_match2,
-                              ch_name_match3,
-                              ch_pc_exchange_match1,
-                              ch_pc_exchange_match2,
-                              ch_name_match4)
+  ch_match <- dplyr::bind_rows(
+    ch_pc_match,
+    ch_name_match1,
+    ch_name_match2,
+    ch_name_match3,
+    ch_pc_exchange_match1,
+    ch_pc_exchange_match2,
+    ch_name_match4
+  )
 
   ### quality 21L----
   # fizzy match care home name, regardless of postcode,
   # excluding those duplicated care home names.
-  ch_name_match5 = ch_data %>%
+  ch_name_match5 <- ch_data %>%
     dplyr::anti_join(ch_match,
-                     by = dplyr::join_by(unique_identifier)) %>%
+      by = dplyr::join_by(unique_identifier)
+    ) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
@@ -571,8 +597,9 @@ fill_ch_names <- function(ch_data,
       ch_postcode = ch_postcode_lookup,
       matching_quality_indicator_overall = 21L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-                                                      .data[["ch_name_validated"]],
-                                                      method = "jaccard")
+        .data[["ch_name_validated"]],
+        method = "jaccard"
+      )
     ) %>%
     dplyr::arrange(
       unique_identifier,
@@ -581,21 +608,24 @@ fill_ch_names <- function(ch_data,
     dplyr::distinct(unique_identifier, .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
-  ch_match = dplyr::bind_rows(ch_pc_match,
-                              ch_name_match1,
-                              ch_name_match2,
-                              ch_name_match3,
-                              ch_pc_exchange_match1,
-                              ch_pc_exchange_match2,
-                              ch_name_match4,
-                              ch_name_match5)
+  ch_match <- dplyr::bind_rows(
+    ch_pc_match,
+    ch_name_match1,
+    ch_name_match2,
+    ch_name_match3,
+    ch_pc_exchange_match1,
+    ch_pc_exchange_match2,
+    ch_name_match4,
+    ch_name_match5
+  )
 
 
 
   # add 100L for non-matching episodes
-  ch_no_match = ch_data %>%
+  ch_no_match <- ch_data %>%
     dplyr::anti_join(ch_match,
-                     by = dplyr::join_by(unique_identifier)) %>%
+      by = dplyr::join_by(unique_identifier)
+    ) %>%
     # dplyr::distinct(ch_name, .keep_all = TRUE) %>%
     dplyr::mutate(
       matching_quality_indicator_overall = 100L,
@@ -611,7 +641,7 @@ fill_ch_names <- function(ch_data,
   ### quality 30L----
   # episodes sharing common chi
   # and ch_name with those episodes with good match quality
-  ch_data_final = dplyr::bind_rows(ch_match, ch_no_match) %>%
+  ch_data_final <- dplyr::bind_rows(ch_match, ch_no_match) %>%
     dplyr::arrange(chi, ch_name_keyword, matching_quality_indicator_overall) %>%
     dplyr::group_by(chi, ch_name_keyword) %>%
     dplyr::mutate(
@@ -620,14 +650,17 @@ fill_ch_names <- function(ch_data,
           matching_quality_indicator_overall == 100L
       ),
       ch_name = dplyr::if_else(same_ch_name,
-                               dplyr::first(ch_name),
-                               ch_name),
+        dplyr::first(ch_name),
+        ch_name
+      ),
       ch_postcode = dplyr::if_else(same_ch_name,
-                                   dplyr::first(ch_postcode),
-                                   ch_postcode),
+        dplyr::first(ch_postcode),
+        ch_postcode
+      ),
       matching_quality_indicator_overall = dplyr::if_else(same_ch_name,
-                                                          30L,
-                                                          matching_quality_indicator_overall)
+        30L,
+        matching_quality_indicator_overall
+      )
     ) %>%
     dplyr::ungroup() %>%
     dplyr::arrange(unique_identifier) %>%
@@ -642,7 +675,7 @@ fill_ch_names <- function(ch_data,
   #   write.csv("ch_quality.csv")
 
   ## produce output ----
-  col_output = c(
+  col_output <- c(
     "sending_location",
     "latest_sc_id",
     "chi",
@@ -674,14 +707,14 @@ fill_ch_names <- function(ch_data,
   )
 
   return(ch_data_final %>%
-           dplyr::select(dplyr::all_of(col_output)))
+    dplyr::select(dplyr::all_of(col_output)))
 }
 
 
 #' extract keyword in a care home name
 #' @param ch_name care home names
-ch_name_extract_keyword = function(ch_name){
-  ch_stopwords = c(
+ch_name_extract_keyword <- function(ch_name) {
+  ch_stopwords <- c(
     "home",
     "homes",
     "care",

From cd818507aa85588d814d99fd91c4c8d035609c19 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Mon, 29 Apr 2024 16:32:24 +0100
Subject: [PATCH 058/186] minor typo fix

---
 R/fill_ch_names.R | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/R/fill_ch_names.R b/R/fill_ch_names.R
index 1ac116708..2ac8cfa99 100644
--- a/R/fill_ch_names.R
+++ b/R/fill_ch_names.R
@@ -72,7 +72,7 @@ fill_ch_names <- function(ch_data,
         ch_active,
         "Active" ~ TRUE,
         c("Cancelled", "Inactive") ~ FALSE
-      ) # new chagnes
+      )
     ) %>%
     # Merge any duplicates, and get the interval each CH name was active
     dplyr::group_by(.data[["ch_postcode"]], .data[["ch_name_validated"]]) %>%
@@ -88,12 +88,12 @@ fill_ch_names <- function(ch_data,
         min(.data[["ch_date_registered"]]),
         .data[["latest_close_date"]]
       ),
-      ch_active = any(ch_active) # new changes
+      ch_active = any(ch_active)
     ) %>%
     dplyr::ungroup() %>%
     dplyr::rename(ch_postcode_lookup = ch_postcode) %>%
     dplyr::mutate(
-      ch_pc_partial = stringr::str_sub(ch_postcode_lookup, 1, -2), # new chagnes
+      ch_pc_partial = stringr::str_sub(ch_postcode_lookup, 1, -2),
       ch_pc_partial2 = stringr::str_sub(ch_postcode_lookup, 1, -3),
       ch_pc_partial3 = stringr::str_sub(ch_postcode_lookup, 1, -5),
       ch_pc_partial4 = gsub("\\d.*", "", stringr::str_sub(ch_postcode_lookup, 1, 2)),
@@ -151,14 +151,13 @@ fill_ch_names <- function(ch_data,
       postcode_matching = (ch_postcode == ch_postcode_lookup),
       ### quality 1L-12L ----
       matching_quality_indicator_overall = dplyr::case_when(
-        # 1 to 9, perfect to ok.
-        # 20 is unacceptable
+        # 1 to 12, from perfect to ok.
+        # 100L, terrible.
 
         # if care home postcode perfectly match, then
         # even if care home name is NA,
         # we still overwrite the ch_name from ch_name_lookup
 
-        # we probably do not overwrite ch_name and ch_postcode if 10?
         match_mean < 0.001 & postcode_matching ~ 1L,
         match_mean2 < 0.001 & postcode_matching ~ 2L,
         match_mean < 0.001 & !postcode_matching ~ 3L,
@@ -334,7 +333,7 @@ fill_ch_names <- function(ch_data,
         ch_name == ch_name_validated,
         ch_admission_date <= latest_close_date,
         ch_admission_date >= ch_date_registered,
-        # some care homes have same name, so use ch_pc_parital2 to filter
+        # some care homes have same name, so use ch_pc_partial2 to filter
         ch_pc_partial3
       )
     ) %>%

From 9d8ba9f0b04b92fce56f07450edbb5d8657af751 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Mon, 29 Apr 2024 15:41:58 +0000
Subject: [PATCH 059/186] [check-spelling] Update metadata

Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/8881311681/attempts/1
Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/946#issuecomment-2083071047

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev>
---
 .github/actions/spelling/expect.txt | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index a4a34a58b..1681e090d 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -1,4 +1,5 @@
 acc
+Accom
 accom
 admloc
 admtype
@@ -14,6 +15,7 @@ birthtime
 bodyloc
 boxi
 callr
+Canx
 canx
 carehome
 careinspectorate
@@ -61,6 +63,7 @@ dischto
 disdest
 dminutes
 dna
+docx
 dontrun
 downup
 dplyr
@@ -108,6 +111,7 @@ idpc
 infyyear
 intzone
 ipdc
+Isdsf
 issuenumber
 itle
 iwalk
@@ -127,6 +131,7 @@ lgl
 lintr
 los
 ltc
+ltd
 lubridate
 magrittr
 markdownguide
@@ -213,12 +218,14 @@ slfhelper
 smr
 smra
 smrtype
+sourcedev
 sparra
 spd
 spss
 stadm
 starwars
 stefanzweifel
+stopwords
 stringdist
 stringr
 submis

From 25079bcf48e3e1db14d2f33ca1edab4fd97bc6c3 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Mon, 29 Apr 2024 16:53:53 +0100
Subject: [PATCH 060/186] update spelling expect

---
 .github/actions/spelling/expect.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 1681e090d..81d17b189 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -1,5 +1,4 @@
 acc
-Accom
 accom
 admloc
 admtype
@@ -15,7 +14,6 @@ birthtime
 bodyloc
 boxi
 callr
-Canx
 canx
 carehome
 careinspectorate

From 5e27ef72519edf7198518588162ff1150985e4c9 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Mon, 29 Apr 2024 16:53:53 +0100
Subject: [PATCH 061/186] update spelling expect

---
 .github/actions/spelling/expect.txt |  2 ++
 inst/WORDLIST                       | 53 +++++++++++++++++++++++------
 2 files changed, 44 insertions(+), 11 deletions(-)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 81d17b189..016f2a2d5 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -1,5 +1,6 @@
 acc
 accom
+Accom
 admloc
 admtype
 adpe
@@ -15,6 +16,7 @@ bodyloc
 boxi
 callr
 canx
+Canx
 carehome
 careinspectorate
 categorises
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 4056fb36f..00f129e64 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -13,22 +13,30 @@ CVD
 Codecov
 DD
 DN
-DVPROD
+DSN
 Datazone
+DoB
 EoL
 FV
 GLS
 HB
+HC
 HHG
+HL
 HRI
+HRI's
+HRIs
 HSCDIIP
 HSCP
 ICD
+IPDC
+Inf
 LAs
 LCA
 LTC
 LTCs
 MH
+MLS
 MMM
 MMMYY
 NRS
@@ -39,42 +47,68 @@ PIS
 PLICS
 PPA
 PPAs
-RStudio
+RSP
+Renviron
 Rmd
+SCTASK
 SDS
 SLF
+SLFhelper
 SLFs
-SMRA
+SMRType
 SPARRA
 SPD
 Telecare
+Tibble
+WIP
 YYYY
 YYYYMMDD
 bedday
 beddays
-bz
+ch
+chr
+cij
 datamart
-csv
+datazone
+daycase
+daycases
 dd
 diag
-dsn
+dir
+dna
+dob
+eg
+ep
 etc
+funs
 fy
 fyear
 fyyear
 gpprac
-gz
+hb
 hbpraccode
+hbrescode
+hc
 hscdiip
-keydate
+hscp
+inc
+interzone
 lca
+lgl
+normal’
+old’
+opendata
 overcounting
+pattype
+popluation
 qtr
 rds
 reablement
 recid
 recids
 sc
+scoial
+sds
 slf
 slfhelper
 smrtype
@@ -84,7 +118,4 @@ specialty
 sysmis
 telecare
 tibble
-xz
-zlib
-zsav
 ️

From 4f4c0a9c7469a796495389d6490f508c4ca67351 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 30 Apr 2024 11:40:26 +0100
Subject: [PATCH 062/186] fix R CMD warning of no visible binding

---
 R/fill_ch_names.R | 539 ++++++++++++++++++++++------------------------
 1 file changed, 252 insertions(+), 287 deletions(-)

diff --git a/R/fill_ch_names.R b/R/fill_ch_names.R
index 2ac8cfa99..63dacd185 100644
--- a/R/fill_ch_names.R
+++ b/R/fill_ch_names.R
@@ -12,34 +12,34 @@
 fill_ch_names <- function(ch_data,
                           ch_name_lookup_path = get_slf_ch_name_lookup_path(),
                           spd_path = get_spd_path()) {
+
+  # fix the issue "no visible binding for global variable x, y"
+  x <- y <- NULL
+
   ch_data <- ch_data %>%
     # Make the care home name more uniform
     dplyr::mutate(ch_name = clean_up_free_text(.data[["ch_name"]])) %>%
     # correct postcode formatting
     dplyr::mutate(
-      dplyr::across(
-        dplyr::contains("postcode"),
-        phsmethods::format_postcode
-      ),
+      dplyr::across(dplyr::contains("postcode"),
+                    phsmethods::format_postcode),
       # Replace invalid postcode with NA
       # Get a list of confirmed valid Scottish postcodes from the SPD
       ch_postcode = dplyr::if_else(
-        .data[["ch_postcode"]] %in% dplyr::pull(
-          read_file(spd_path, col_select = "pc7"),
-          "pc7"
-        ),
+        .data[["ch_postcode"]] %in% dplyr::pull(read_file(spd_path, col_select = "pc7"),
+                                                "pc7"),
         .data[["ch_postcode"]],
         NA_character_
       ),
-      ch_name_keyword = ch_name_extract_keyword(ch_name)
+      ch_name_keyword = ch_name_extract_keyword(.data[["ch_name"]])
     ) %>%
     # add unique identifier
     dplyr::mutate(
       unique_identifier = dplyr::row_number(),
-      ch_pc_partial = stringr::str_sub(ch_postcode, 1, -2),
-      ch_pc_partial2 = stringr::str_sub(ch_postcode, 1, -3),
-      ch_pc_partial3 = stringr::str_sub(ch_postcode, 1, -5),
-      ch_pc_partial4 = gsub("\\d.*", "", stringr::str_sub(ch_postcode, 1, 2))
+      ch_pc_partial = stringr::str_sub(.data[["ch_postcode"]], 1,-2),
+      ch_pc_partial2 = stringr::str_sub(.data[["ch_postcode"]], 1,-3),
+      ch_pc_partial3 = stringr::str_sub(.data[["ch_postcode"]], 1,-5),
+      ch_pc_partial4 = gsub("\\d.*", "", stringr::str_sub(.data[["ch_postcode"]], 1, 2))
     )
   # There are many cases where a patient have many same ch_name and ch_pc, but
   # there is one episode where ch_pc is different while ch_name is the same.
@@ -48,8 +48,7 @@ fill_ch_names <- function(ch_data,
   # Contact: IntelligenceTeam@careinspectorate.gov.scot
   # for an updated lookup list
   ch_name_lookup <- openxlsx::read.xlsx(ch_name_lookup_path,
-    detectDates = TRUE
-  ) %>%
+                                        detectDates = TRUE) %>%
     # Drop any Care Homes that were closed before 2017/18
     dplyr::select(
       ch_postcode = "AccomPostCodeNo",
@@ -58,46 +57,38 @@ fill_ch_names <- function(ch_data,
       ch_date_cancelled = "DateCanx",
       ch_active = tidyselect::contains("ServiceStatusAt")
     ) %>%
-    dplyr::filter(
-      is.na(.data[["ch_date_cancelled"]]) |
-        (.data[["ch_date_cancelled"]] >= start_fy("1718"))
-    ) %>%
+    dplyr::filter(is.na(.data[["ch_date_cancelled"]]) |
+                    (.data[["ch_date_cancelled"]] >= start_fy("1718"))) %>%
     # Standardise the postcode and CH name
     dplyr::mutate(
       ch_postcode = phsmethods::format_postcode(.data[["ch_postcode"]]),
       ch_name_validated = clean_up_free_text(.data[["ch_name_validated"]]),
       ch_date_registered = lubridate::as_date(.data[["ch_date_registered"]]),
       ch_date_cancelled = lubridate::as_date(.data[["ch_date_cancelled"]]),
-      ch_active = dplyr::case_match(
-        ch_active,
-        "Active" ~ TRUE,
-        c("Cancelled", "Inactive") ~ FALSE
-      )
+      ch_active = dplyr::case_match(.data[["ch_active"]],
+                                    "Active" ~ TRUE,
+                                    c("Cancelled", "Inactive") ~ FALSE)
     ) %>%
     # Merge any duplicates, and get the interval each CH name was active
     dplyr::group_by(.data[["ch_postcode"]], .data[["ch_name_validated"]]) %>%
     dplyr::summarise(
       # Find the latest date for each CH name / postcode
-      ch_date_registered = dplyr::first(ch_date_registered),
-      latest_close_date = dplyr::if_else(
-        is.na(max(.data[["ch_date_cancelled"]])),
-        Sys.Date(),
-        max(.data[["ch_date_cancelled"]])
-      ),
-      open_interval = lubridate::interval(
-        min(.data[["ch_date_registered"]]),
-        .data[["latest_close_date"]]
-      ),
-      ch_active = any(ch_active)
+      ch_date_registered = dplyr::first(.data[["ch_date_registered"]]),
+      latest_close_date = dplyr::if_else(is.na(max(.data[["ch_date_cancelled"]])),
+                                         Sys.Date(),
+                                         max(.data[["ch_date_cancelled"]])),
+      open_interval = lubridate::interval(min(.data[["ch_date_registered"]]),
+                                          .data[["latest_close_date"]]),
+      ch_active = any(.data[["ch_active"]])
     ) %>%
     dplyr::ungroup() %>%
-    dplyr::rename(ch_postcode_lookup = ch_postcode) %>%
+    dplyr::rename(ch_postcode_lookup = .data[["ch_postcode"]]) %>%
     dplyr::mutate(
-      ch_pc_partial = stringr::str_sub(ch_postcode_lookup, 1, -2),
-      ch_pc_partial2 = stringr::str_sub(ch_postcode_lookup, 1, -3),
-      ch_pc_partial3 = stringr::str_sub(ch_postcode_lookup, 1, -5),
-      ch_pc_partial4 = gsub("\\d.*", "", stringr::str_sub(ch_postcode_lookup, 1, 2)),
-      ch_name_validated_keyword = ch_name_extract_keyword(ch_name_validated)
+      ch_pc_partial = stringr::str_sub(.data[["ch_postcode_lookup"]], 1,-2),
+      ch_pc_partial2 = stringr::str_sub(.data[["ch_postcode_lookup"]], 1,-3),
+      ch_pc_partial3 = stringr::str_sub(.data[["ch_postcode_lookup"]], 1,-5),
+      ch_pc_partial4 = gsub("\\d.*", "", stringr::str_sub(.data[["ch_postcode_lookup"]], 1, 2)),
+      ch_name_validated_keyword = ch_name_extract_keyword(.data[["ch_name_validated"]])
     )
 
 
@@ -111,44 +102,36 @@ fill_ch_names <- function(ch_data,
   ## postcode matching process ----
   # Generate some metrics for how the submitted names connect to the valid names
   ch_pc_match <- ch_data %>%
-    dplyr::left_join(ch_name_lookup,
+    dplyr::left_join(
+      ch_name_lookup,
       by = "ch_pc_partial",
       multiple = "all",
       na_matches = "never"
     ) %>%
     # Work out string distances between names for each postcode
     dplyr::mutate(
-      match_distance_jaccard = stringdist::stringdist(
-        .data[["ch_name"]],
-        .data[["ch_name_validated"]],
-        method = "jaccard"
-      ),
-      match_distance_cosine = stringdist::stringdist(
-        .data[["ch_name"]],
-        .data[["ch_name_validated"]],
-        method = "cosine"
-      ),
+      match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
+                                                      .data[["ch_name_validated"]],
+                                                      method = "jaccard"),
+      match_distance_cosine = stringdist::stringdist(.data[["ch_name"]],
+                                                     .data[["ch_name_validated"]],
+                                                     method = "cosine"),
       match_mean = (.data[["match_distance_jaccard"]] +
-        .data[["match_distance_cosine"]]) / 2.0,
+                      .data[["match_distance_cosine"]]) / 2.0,
       # ch_name_keyword distances
-      match_distance_jaccard2 = stringdist::stringdist(
-        .data[["ch_name_keyword"]],
-        .data[["ch_name_validated_keyword"]],
-        method = "jaccard"
-      ),
-      match_distance_cosine2 = stringdist::stringdist(
-        .data[["ch_name_keyword"]],
-        .data[["ch_name_validated_keyword"]],
-        method = "cosine"
-      ),
+      match_distance_jaccard2 = stringdist::stringdist(.data[["ch_name_keyword"]],
+                                                       .data[["ch_name_validated_keyword"]],
+                                                       method = "jaccard"),
+      match_distance_cosine2 = stringdist::stringdist(.data[["ch_name_keyword"]],
+                                                      .data[["ch_name_validated_keyword"]],
+                                                      method = "cosine"),
       match_mean2 = (.data[["match_distance_jaccard2"]] +
-        .data[["match_distance_cosine2"]]) / 2.0
+                       .data[["match_distance_cosine2"]]) / 2.0
     ) %>%
     # ch_admission_date might be inaccurate.
-    # So ch_admission_date >= ch_date_registered is NOT used.
-    dplyr::filter(ch_admission_date <= latest_close_date) %>%
+    dplyr::filter(.data[["ch_admission_date"]] <= .data[["latest_close_date"]]) %>%
     dplyr::mutate(
-      postcode_matching = (ch_postcode == ch_postcode_lookup),
+      postcode_matching = (.data[["ch_postcode"]] == .data[["ch_postcode_lookup"]]),
       ### quality 1L-12L ----
       matching_quality_indicator_overall = dplyr::case_when(
         # 1 to 12, from perfect to ok.
@@ -158,18 +141,32 @@ fill_ch_names <- function(ch_data,
         # even if care home name is NA,
         # we still overwrite the ch_name from ch_name_lookup
 
-        match_mean < 0.001 & postcode_matching ~ 1L,
-        match_mean2 < 0.001 & postcode_matching ~ 2L,
-        match_mean < 0.001 & !postcode_matching ~ 3L,
-        match_mean2 < 0.001 & !postcode_matching ~ 4L,
-        match_mean < 0.1 & postcode_matching ~ 5L,
-        match_mean2 < 0.1 & postcode_matching ~ 6L,
-        match_mean < 0.1 & !postcode_matching ~ 7L,
-        match_mean2 < 0.1 & !postcode_matching ~ 8L,
-        (match_mean < 0.4 | match_mean2 < 0.4) & postcode_matching ~ 9L,
-        (match_mean < 0.4 | match_mean2 < 0.4) & !postcode_matching ~ 10L,
-        is.na(ch_name) & postcode_matching ~ 11L,
-        is.na(ch_name) & !postcode_matching ~ 12L,
+        .data[["match_mean"]] < 0.001 &
+          .data[["postcode_matching"]] ~ 1L,
+        .data[["match_mean2"]] < 0.001 &
+          .data[["postcode_matching"]] ~ 2L,
+        .data[["match_mean"]] < 0.001 &
+          !.data[["postcode_matching"]] ~ 3L,
+        .data[["match_mean2"]] < 0.001 &
+          !.data[["postcode_matching"]] ~ 4L,
+        .data[["match_mean"]] < 0.1 &
+          .data[["postcode_matching"]] ~ 5L,
+        .data[["match_mean2"]]  < 0.1 &
+          .data[["postcode_matching"]] ~ 6L,
+        .data[["match_mean"]]  < 0.1 &
+          !.data[["postcode_matching"]] ~ 7L,
+        .data[["match_mean2"]]  < 0.1 &
+          !.data[["postcode_matching"]] ~ 8L,
+        (.data[["match_mean"]]  < 0.4 |
+           .data[["match_mean2"]] < 0.4) &
+          .data[["postcode_matching"]] ~ 9L,
+        (.data[["match_mean"]] < 0.4 |
+           .data[["match_mean2"]] < 0.4) &
+          !.data[["postcode_matching"]] ~ 10L,
+        is.na(.data[["ch_name"]]) &
+          .data[["postcode_matching"]] ~ 11L,
+        is.na(.data[["ch_name"]]) &
+          !.data[["postcode_matching"]] ~ 12L,
         .default = 100L
         # cases 100L will be improved in the next section
         # 100L means no matching
@@ -195,12 +192,12 @@ fill_ch_names <- function(ch_data,
       "ch_active",
       "postcode_matching",
       "matching_quality_indicator_overall",
-      everything()
+      tidyselect::everything()
     ) %>%
-    dplyr::arrange(unique_identifier, matching_quality_indicator_overall) %>%
+    dplyr::arrange(.data[["unique_identifier"]],
+                   .data[["matching_quality_indicator_overall"]]) %>%
     dplyr::distinct(.data[["unique_identifier"]],
-      .keep_all = TRUE
-    )
+                    .keep_all = TRUE)
 
 
   # fix matching quality being 100, meaning bad
@@ -222,42 +219,46 @@ fill_ch_names <- function(ch_data,
   # and those episodes seem consistent, indicated by good matching quality.
   # Then, overwrite the minority of records with matching quality being 100.
   ch_pc_match <- ch_pc_match %>%
-    dplyr::arrange(chi, ch_name, matching_quality_indicator_overall) %>%
-    dplyr::group_by(chi, ch_name) %>%
+    dplyr::arrange(.data[["chi"]],
+                   .data[["ch_name"]],
+                   .data[["matching_quality_indicator_overall"]]) %>%
+    dplyr::group_by(.data[["chi"]], .data[["ch_name"]]) %>%
     dplyr::mutate(
       # Best_quality_within_group_chi_name is supposed to be minimum within a group.
       # Since we sort matching_quality_indicator_overall, first is ok.
-      best_quality_within_group_chi_name = dplyr::first(matching_quality_indicator_overall),
-      ch_postcode_lookup_best = dplyr::first(ch_postcode_lookup),
-      ch_name_validated_best = dplyr::first(ch_name_validated),
-      ch_name_validated_keyword_best = dplyr::first(ch_name_validated_keyword)
+      best_quality_within_group_chi_name =
+        dplyr::first(.data[["matching_quality_indicator_overall"]]),
+      ch_postcode_lookup_best =
+        dplyr::first(.data[["ch_postcode_lookup"]]),
+      ch_name_validated_best =
+        dplyr::first(.data[["ch_name_validated"]]),
+      ch_name_validated_keyword_best =
+        dplyr::first(.data[["ch_name_validated_keyword"]])
     ) %>%
     dplyr::ungroup() %>%
     dplyr::mutate(
-      overwrite_pc = (
-        matching_quality_indicator_overall == 100L &
-          best_quality_within_group_chi_name <= 10L
-      ),
-      matching_quality_indicator_overall = dplyr::if_else(overwrite_pc,
-        13L,
-        matching_quality_indicator_overall
-      ),
-      ch_postcode_lookup = dplyr::if_else(overwrite_pc,
-        ch_postcode_lookup_best,
-        ch_postcode_lookup
-      ),
-      ch_name_validated = dplyr::if_else(overwrite_pc,
-        ch_name_validated_best,
-        ch_name_validated
-      ),
-      ch_name_validated_keyword = dplyr::if_else(
-        overwrite_pc,
-        ch_name_validated_keyword_best,
-        ch_name_validated_keyword
-      )
+      overwrite_pc = (.data[["matching_quality_indicator_overall"]] == 100L &
+                        .data[["best_quality_within_group_chi_name"]] <= 10L),
+      matching_quality_indicator_overall =
+        dplyr::if_else(.data[["overwrite_pc"]],
+                       13L,
+                       .data[["matching_quality_indicator_overall"]]),
+      ch_postcode_lookup =
+        dplyr::if_else(.data[["overwrite_pc"]],
+                       .data[["ch_postcode_lookup_best"]],
+                       .data[["ch_postcode_lookup"]]),
+
+      ch_name_validated =
+        dplyr::if_else(.data[["overwrite_pc"]],
+                       .data[["ch_name_validated_best"]],
+                       .data[["ch_name_validated"]]),
+      ch_name_validated_keyword =
+        dplyr::if_else(.data[["overwrite_pc"]],
+                       .data[["ch_name_validated_keyword_best"]],
+                       .data[["ch_name_validated_keyword"]])
     )
 
-  ### quality 14L, ch_postcode match ----
+  ### quality 14L ----
   # if ch_postcode perfect match,
   # then we accept ch_name_lookup and overwrite ch_name
 
@@ -296,25 +297,20 @@ fill_ch_names <- function(ch_data,
   )
 
   ch_pc_match <- ch_pc_match %>%
-    dplyr::mutate(
-      matching_quality_indicator_overall = dplyr::if_else(
-        matching_quality_indicator_overall == 100L & postcode_matching,
-        14L,
-        matching_quality_indicator_overall
-      )
-    ) %>%
+    dplyr::mutate(matching_quality_indicator_overall = dplyr::if_else(.data[["matching_quality_indicator_overall"]] == 100L &
+                                                                        .data[["postcode_matching"]],
+                                                                      14L,
+                                                                      .data[["matching_quality_indicator_overall"]])) %>%
     # now remove cases of quality being 100L for the next section:
     # ch_name matching
-    dplyr::filter(matching_quality_indicator_overall != 100L) %>%
+    dplyr::filter(.data[["matching_quality_indicator_overall"]] != 100L) %>%
     dplyr::mutate(
-      ch_name_old = ch_name,
-      ch_postcode_old = ch_postcode,
-      ch_name = ch_name_validated,
-      ch_postcode = ch_postcode_lookup
+      ch_name_old = .data[["ch_name"]],
+      ch_postcode_old = .data[["ch_postcode"]],
+      ch_name = .data[["ch_name_validated"]],
+      ch_postcode = .data[["ch_postcode_lookup"]]
     ) %>%
-    dplyr::select(
-      dplyr::all_of(col_to_select)
-    )
+    dplyr::select(dplyr::all_of(col_to_select))
 
   ## matching by ch_name, quality 15L-21L ----
   ### perfect matching by ch_name, and main part of postcode, quality 15L ----
@@ -325,24 +321,23 @@ fill_ch_names <- function(ch_data,
 
   ch_name_match1 <- ch_data %>%
     dplyr::anti_join(ch_pc_match,
-      by = dplyr::join_by(unique_identifier)
-    ) %>%
+                     by = dplyr::join_by("unique_identifier")) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
-        ch_name == ch_name_validated,
-        ch_admission_date <= latest_close_date,
-        ch_admission_date >= ch_date_registered,
+        x$ch_name == y$ch_name_validated,
+        x$ch_admission_date <= y$latest_close_date,
+        x$ch_admission_date >= y$ch_date_registered,
         # some care homes have same name, so use ch_pc_partial2 to filter
-        ch_pc_partial3
+        "ch_pc_partial3"
       )
     ) %>%
     dplyr::mutate(
-      ch_name_old = ch_name,
-      ch_postcode_old = ch_postcode,
-      ch_name_validated = ch_name,
+      ch_name_old = .data[["ch_name"]],
+      ch_postcode_old = .data[["ch_postcode"]],
+      ch_name_validated = .data[["ch_name"]],
       # ch_name_validated is omitted because of join_by(), add back
-      ch_postcode = ch_postcode_lookup,
+      ch_postcode = .data[["ch_postcode_lookup"]],
       matching_quality_indicator_overall = 15L
     ) %>%
     dplyr::select(dplyr::all_of(col_to_select))
@@ -352,118 +347,105 @@ fill_ch_names <- function(ch_data,
 
   ch_name_match2 <- ch_data %>%
     dplyr::anti_join(ch_match,
-      by = dplyr::join_by(unique_identifier)
-    ) %>%
+                     by = dplyr::join_by("unique_identifier")) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
-        ch_name_keyword == ch_name_validated_keyword,
-        ch_admission_date <= latest_close_date,
-        ch_admission_date >= ch_date_registered,
-        ch_pc_partial3
+        x$ch_name_keyword == y$ch_name_validated_keyword,
+        x$ch_admission_date <= y$latest_close_date,
+        x$ch_admission_date >= y$ch_date_registered,
+        "ch_pc_partial3"
       ),
       na_matches = "never"
     ) %>%
     dplyr::mutate(
-      ch_name_old = ch_name,
-      ch_postcode_old = ch_postcode,
-      ch_name = ch_name_validated,
-      ch_postcode = ch_postcode_lookup,
+      ch_name_old = .data[["ch_name"]],
+      ch_postcode_old = .data[["ch_postcode"]],
+      ch_name = .data[["ch_name_validated"]],
+      ch_postcode = .data[["ch_postcode_lookup"]],
       matching_quality_indicator_overall = 16L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-        .data[["ch_name_validated"]],
-        method = "jaccard"
-      )
+                                                      .data[["ch_name_validated"]],
+                                                      method = "jaccard")
     ) %>%
-    dplyr::arrange(
-      unique_identifier,
-      match_distance_jaccard
-    ) %>%
-    dplyr::distinct(unique_identifier, .keep_all = TRUE) %>%
+    dplyr::arrange(.data[["unique_identifier"]],
+                   .data[["match_distance_jaccard"]]) %>%
+    dplyr::distinct(.data[["unique_identifier"]], .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
   ### fizzy matching by ch_name, and same city, quality 17L ----
-  ch_match <- dplyr::bind_rows(ch_pc_match, ch_name_match1, ch_name_match2)
+  ch_match <-
+    dplyr::bind_rows(ch_pc_match, ch_name_match1, ch_name_match2)
 
   ch_name_match3 <- ch_data %>%
     dplyr::anti_join(ch_match,
-      by = dplyr::join_by(unique_identifier)
-    ) %>%
+                     by = dplyr::join_by("unique_identifier")) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
-        ch_name_keyword == ch_name_validated_keyword,
-        ch_admission_date <= latest_close_date,
-        ch_admission_date >= ch_date_registered,
-        ch_pc_partial4
+        x$ch_name_keyword == y$ch_name_validated_keyword,
+        x$ch_admission_date <= y$latest_close_date,
+        x$ch_admission_date >= y$ch_date_registered,
+        "ch_pc_partial4"
       ),
       na_matches = "never"
     ) %>%
     dplyr::mutate(
-      ch_name_old = ch_name,
-      ch_postcode_old = ch_postcode,
-      ch_name = ch_name_validated,
-      ch_postcode = ch_postcode_lookup,
+      ch_name_old = .data[["ch_name"]],
+      ch_postcode_old = .data[["ch_postcode"]],
+      ch_name = .data[["ch_name_validated"]],
+      ch_postcode = .data[["ch_postcode_lookup"]],
       matching_quality_indicator_overall = 17L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-        .data[["ch_name_validated"]],
-        method = "jaccard"
-      )
-    ) %>%
-    dplyr::arrange(
-      unique_identifier,
-      match_distance_jaccard
+                                                      .data[["ch_name_validated"]],
+                                                      method = "jaccard")
     ) %>%
-    dplyr::distinct(unique_identifier, .keep_all = TRUE) %>%
+    dplyr::arrange(.data[["unique_identifier"]],
+                   .data[["match_distance_jaccard"]]) %>%
+    dplyr::distinct(.data[["unique_identifier"]], .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
-  ch_match <- dplyr::bind_rows(
-    ch_pc_match,
-    ch_name_match1,
-    ch_name_match2,
-    ch_name_match3
-  )
+  ch_match <- dplyr::bind_rows(ch_pc_match,
+                               ch_name_match1,
+                               ch_name_match2,
+                               ch_name_match3)
 
   ### ch_postcode and postcode exchange, then matching, quality 18L----
   ch_pc_exchange_match1 <- ch_data %>%
     dplyr::anti_join(ch_match,
-      by = dplyr::join_by(unique_identifier)
-    ) %>%
+                     by = dplyr::join_by("unique_identifier")) %>%
     dplyr::mutate(
-      intermediate_pc = ch_postcode,
-      ch_postcode = postcode,
-      postcode = ch_postcode,
-      ch_pc_partial = stringr::str_sub(ch_postcode, 1, -2),
-      ch_pc_partial2 = stringr::str_sub(ch_postcode, 1, -3),
-      ch_pc_partial3 = stringr::str_sub(ch_postcode, 1, -5)
-    ) %>%
-    dplyr::select(-intermediate_pc) %>%
+      intermediate_pc = .data[["ch_postcode"]],
+      ch_postcode = .data[["postcode"]],
+      postcode = .data[["ch_postcode"]],
+      ch_pc_partial = stringr::str_sub(.data[["ch_postcode"]], 1,-2),
+      ch_pc_partial2 = stringr::str_sub(.data[["ch_postcode"]], 1,-3),
+      ch_pc_partial3 = stringr::str_sub(.data[["ch_postcode"]], 1,-5)
+    ) %>%
+    dplyr::select(-.data[["intermediate_pc"]]) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
-        ch_name_keyword == ch_name_validated_keyword,
-        ch_admission_date <= latest_close_date,
-        ch_admission_date >= ch_date_registered,
-        ch_pc_partial
+        x$ch_name_keyword == y$ch_name_validated_keyword,
+        x$ch_admission_date <= y$latest_close_date,
+        x$ch_admission_date >= y$ch_date_registered,
+        "ch_pc_partial"
       ),
       na_matches = "never"
     ) %>%
     dplyr::mutate(
-      ch_name_old = ch_name,
-      ch_postcode_old = ch_postcode,
-      ch_name = ch_name_validated,
-      ch_postcode = ch_postcode_lookup,
+      ch_name_old = .data[["ch_name"]],
+      ch_postcode_old = .data[["ch_postcode"]],
+      ch_name = .data[["ch_name_validated"]],
+      ch_postcode = .data[["ch_postcode_lookup"]],
       matching_quality_indicator_overall = 18L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-        .data[["ch_name_validated"]],
-        method = "jaccard"
-      )
-    ) %>%
-    dplyr::arrange(
-      unique_identifier,
-      match_distance_jaccard
+                                                      .data[["ch_name_validated"]],
+                                                      method = "jaccard")
     ) %>%
-    dplyr::distinct(unique_identifier, .keep_all = TRUE) %>%
+    dplyr::arrange(.data[["unique_identifier"]],
+                   .data[["match_distance_jaccard"]]) %>%
+    dplyr::distinct(.data[["unique_identifier"]], .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
   ch_match <- dplyr::bind_rows(
@@ -480,43 +462,39 @@ fill_ch_names <- function(ch_data,
   # then fizzy match ch_name, and matching main part of postcode
   ch_pc_exchange_match2 <- ch_data %>%
     dplyr::anti_join(ch_match,
-      by = dplyr::join_by(unique_identifier)
-    ) %>%
+                     by = dplyr::join_by("unique_identifier")) %>%
     dplyr::mutate(
-      intermediate_pc = ch_postcode,
-      ch_postcode = postcode,
-      postcode = ch_postcode,
-      ch_pc_partial = stringr::str_sub(ch_postcode, 1, -2),
-      ch_pc_partial2 = stringr::str_sub(ch_postcode, 1, -3),
-      ch_pc_partial3 = stringr::str_sub(ch_postcode, 1, -5)
-    ) %>%
-    dplyr::select(-intermediate_pc) %>%
+      intermediate_pc = .data[["ch_postcode"]],
+      ch_postcode = .data[["postcode"]],
+      postcode = .data[["ch_postcode"]],
+      ch_pc_partial = stringr::str_sub(.data[["ch_postcode"]], 1,-2),
+      ch_pc_partial2 = stringr::str_sub(.data[["ch_postcode"]], 1,-3),
+      ch_pc_partial3 = stringr::str_sub(.data[["ch_postcode"]], 1,-5)
+    ) %>%
+    dplyr::select(-.data[["intermediate_pc"]]) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
-        ch_name_keyword == ch_name_validated_keyword,
-        ch_admission_date <= latest_close_date,
-        ch_admission_date >= ch_date_registered,
-        ch_pc_partial3
+        x$ch_name_keyword == y$ch_name_validated_keyword,
+        x$ch_admission_date <= y$latest_close_date,
+        x$ch_admission_date >= y$ch_date_registered,
+        "ch_pc_partial3"
       ),
       na_matches = "never"
     ) %>%
     dplyr::mutate(
-      ch_name_old = ch_name,
-      ch_postcode_old = ch_postcode,
-      ch_name = ch_name_validated,
-      ch_postcode = ch_postcode_lookup,
+      ch_name_old = .data[["ch_name"]],
+      ch_postcode_old = .data[["ch_postcode"]],
+      ch_name = .data[["ch_name_validated"]],
+      ch_postcode = .data[["ch_postcode_lookup"]],
       matching_quality_indicator_overall = 19L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-        .data[["ch_name_validated"]],
-        method = "jaccard"
-      )
+                                                      .data[["ch_name_validated"]],
+                                                      method = "jaccard")
     ) %>%
-    dplyr::arrange(
-      unique_identifier,
-      match_distance_jaccard
-    ) %>%
-    dplyr::distinct(unique_identifier, .keep_all = TRUE) %>%
+    dplyr::arrange(.data[["unique_identifier"]],
+                   .data[["match_distance_jaccard"]]) %>%
+    dplyr::distinct(.data[["unique_identifier"]], .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
   ch_match <- dplyr::bind_rows(
@@ -536,26 +514,23 @@ fill_ch_names <- function(ch_data,
 
   ch_name_match4 <- ch_data %>%
     dplyr::anti_join(ch_match,
-      by = dplyr::join_by(unique_identifier)
-    ) %>%
+                     by = dplyr::join_by("unique_identifier")) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
-        ch_name == ch_name_validated,
-        ch_admission_date <= latest_close_date,
-        ch_admission_date >= ch_date_registered
+        x$ch_name == y$ch_name_validated,
+        x$ch_admission_date <= y$latest_close_date,
+        x$ch_admission_date >= y$ch_date_registered
       ),
       na_matches = "never"
     ) %>%
-    dplyr::filter(
-      ch_name %in% unique_ch_name
-    ) %>%
+    dplyr::filter(.data[["ch_name"]] %in% unique_ch_name) %>%
     dplyr::mutate(
-      ch_name_old = ch_name,
-      ch_postcode_old = ch_postcode,
+      ch_name_old = .data[["ch_name"]],
+      ch_postcode_old = .data[["ch_postcode"]],
       # add ch_name_validated back since omitted in join_by()
-      ch_name_validated = ch_name,
-      ch_postcode = ch_postcode_lookup,
+      ch_name_validated = .data[["ch_name"]],
+      ch_postcode = .data[["ch_postcode_lookup"]],
       matching_quality_indicator_overall = 20L
     ) %>%
     dplyr::select(dplyr::all_of(col_to_select))
@@ -575,36 +550,30 @@ fill_ch_names <- function(ch_data,
   # excluding those duplicated care home names.
   ch_name_match5 <- ch_data %>%
     dplyr::anti_join(ch_match,
-      by = dplyr::join_by(unique_identifier)
-    ) %>%
+                     by = dplyr::join_by("unique_identifier")) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
-        ch_name_keyword == ch_name_validated_keyword,
-        ch_admission_date <= latest_close_date,
-        ch_admission_date >= ch_date_registered
+        x$ch_name_keyword == y$ch_name_validated_keyword,
+        x$ch_admission_date <= y$latest_close_date,
+        x$ch_admission_date >= y$ch_date_registered
       ),
       na_matches = "never"
     ) %>%
-    dplyr::filter(
-      ch_name %in% unique_ch_name
-    ) %>%
+    dplyr::filter(.data[["ch_name"]] %in% unique_ch_name) %>%
     dplyr::mutate(
-      ch_name_old = ch_name,
-      ch_postcode_old = ch_postcode,
-      ch_name = ch_name_validated,
-      ch_postcode = ch_postcode_lookup,
+      ch_name_old = .data[["ch_name"]],
+      ch_postcode_old = .data[["ch_postcode"]],
+      ch_name = .data[["ch_name_validated"]],
+      ch_postcode = .data[["ch_postcode_lookup"]],
       matching_quality_indicator_overall = 21L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-        .data[["ch_name_validated"]],
-        method = "jaccard"
-      )
-    ) %>%
-    dplyr::arrange(
-      unique_identifier,
-      match_distance_jaccard
+                                                      .data[["ch_name_validated"]],
+                                                      method = "jaccard")
     ) %>%
-    dplyr::distinct(unique_identifier, .keep_all = TRUE) %>%
+    dplyr::arrange(.data[["unique_identifier"]],
+                   .data[["match_distance_jaccard"]]) %>%
+    dplyr::distinct(.data[["unique_identifier"]], .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
   ch_match <- dplyr::bind_rows(
@@ -623,13 +592,12 @@ fill_ch_names <- function(ch_data,
   # add 100L for non-matching episodes
   ch_no_match <- ch_data %>%
     dplyr::anti_join(ch_match,
-      by = dplyr::join_by(unique_identifier)
-    ) %>%
+                     by = dplyr::join_by("unique_identifier")) %>%
     # dplyr::distinct(ch_name, .keep_all = TRUE) %>%
     dplyr::mutate(
       matching_quality_indicator_overall = 100L,
-      ch_name_old = ch_name,
-      ch_postcode_old = ch_postcode,
+      ch_name_old = .data[["ch_name"]],
+      ch_postcode_old = .data[["ch_postcode"]],
       ch_name_validated = NA_character_,
       open_interval = NA,
       latest_close_date = NA,
@@ -641,28 +609,25 @@ fill_ch_names <- function(ch_data,
   # episodes sharing common chi
   # and ch_name with those episodes with good match quality
   ch_data_final <- dplyr::bind_rows(ch_match, ch_no_match) %>%
-    dplyr::arrange(chi, ch_name_keyword, matching_quality_indicator_overall) %>%
-    dplyr::group_by(chi, ch_name_keyword) %>%
+    dplyr::arrange(.data[["chi"]], .data[["ch_name_keyword"]],
+                   .data[["matching_quality_indicator_overall"]]) %>%
+    dplyr::group_by(.data[["chi"]],
+                    .data[["ch_name_keyword"]]) %>%
     dplyr::mutate(
-      same_ch_name = (
-        dplyr::first(matching_quality_indicator_overall) <= 10L &
-          matching_quality_indicator_overall == 100L
-      ),
-      ch_name = dplyr::if_else(same_ch_name,
-        dplyr::first(ch_name),
-        ch_name
-      ),
-      ch_postcode = dplyr::if_else(same_ch_name,
-        dplyr::first(ch_postcode),
-        ch_postcode
-      ),
-      matching_quality_indicator_overall = dplyr::if_else(same_ch_name,
-        30L,
-        matching_quality_indicator_overall
-      )
+      same_ch_name = (dplyr::first(.data[["matching_quality_indicator_overall"]]) <= 10L &
+                        .data[["matching_quality_indicator_overall"]] == 100L),
+      ch_name = dplyr::if_else(.data[["same_ch_name"]],
+                               dplyr::first(.data[["ch_name"]]),
+                               .data[["ch_name"]]),
+      ch_postcode = dplyr::if_else(.data[["same_ch_name"]],
+                                   dplyr::first(.data[["ch_postcode"]]),
+                                   .data[["ch_postcode"]]),
+      matching_quality_indicator_overall = dplyr::if_else(.data[["same_ch_name"]],
+                                                          30L,
+                                                          .data[["matching_quality_indicator_overall"]])
     ) %>%
     dplyr::ungroup() %>%
-    dplyr::arrange(unique_identifier) %>%
+    dplyr::arrange(.data[["unique_identifier"]]) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
   ## For any future amendment or quality check ----
@@ -706,7 +671,7 @@ fill_ch_names <- function(ch_data,
   )
 
   return(ch_data_final %>%
-    dplyr::select(dplyr::all_of(col_output)))
+           dplyr::select(dplyr::all_of(col_output)))
 }
 
 

From e7c8e5e4d4ec1ff823d87a50481b5509c7174083 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Tue, 30 Apr 2024 10:42:05 +0000
Subject: [PATCH 063/186] Style code

---
 R/fill_ch_names.R | 265 +++++++++++++++++++++++++++-------------------
 1 file changed, 159 insertions(+), 106 deletions(-)

diff --git a/R/fill_ch_names.R b/R/fill_ch_names.R
index 63dacd185..f0cb7d04b 100644
--- a/R/fill_ch_names.R
+++ b/R/fill_ch_names.R
@@ -12,7 +12,6 @@
 fill_ch_names <- function(ch_data,
                           ch_name_lookup_path = get_slf_ch_name_lookup_path(),
                           spd_path = get_spd_path()) {
-
   # fix the issue "no visible binding for global variable x, y"
   x <- y <- NULL
 
@@ -21,13 +20,17 @@ fill_ch_names <- function(ch_data,
     dplyr::mutate(ch_name = clean_up_free_text(.data[["ch_name"]])) %>%
     # correct postcode formatting
     dplyr::mutate(
-      dplyr::across(dplyr::contains("postcode"),
-                    phsmethods::format_postcode),
+      dplyr::across(
+        dplyr::contains("postcode"),
+        phsmethods::format_postcode
+      ),
       # Replace invalid postcode with NA
       # Get a list of confirmed valid Scottish postcodes from the SPD
       ch_postcode = dplyr::if_else(
-        .data[["ch_postcode"]] %in% dplyr::pull(read_file(spd_path, col_select = "pc7"),
-                                                "pc7"),
+        .data[["ch_postcode"]] %in% dplyr::pull(
+          read_file(spd_path, col_select = "pc7"),
+          "pc7"
+        ),
         .data[["ch_postcode"]],
         NA_character_
       ),
@@ -36,9 +39,9 @@ fill_ch_names <- function(ch_data,
     # add unique identifier
     dplyr::mutate(
       unique_identifier = dplyr::row_number(),
-      ch_pc_partial = stringr::str_sub(.data[["ch_postcode"]], 1,-2),
-      ch_pc_partial2 = stringr::str_sub(.data[["ch_postcode"]], 1,-3),
-      ch_pc_partial3 = stringr::str_sub(.data[["ch_postcode"]], 1,-5),
+      ch_pc_partial = stringr::str_sub(.data[["ch_postcode"]], 1, -2),
+      ch_pc_partial2 = stringr::str_sub(.data[["ch_postcode"]], 1, -3),
+      ch_pc_partial3 = stringr::str_sub(.data[["ch_postcode"]], 1, -5),
       ch_pc_partial4 = gsub("\\d.*", "", stringr::str_sub(.data[["ch_postcode"]], 1, 2))
     )
   # There are many cases where a patient have many same ch_name and ch_pc, but
@@ -48,7 +51,8 @@ fill_ch_names <- function(ch_data,
   # Contact: IntelligenceTeam@careinspectorate.gov.scot
   # for an updated lookup list
   ch_name_lookup <- openxlsx::read.xlsx(ch_name_lookup_path,
-                                        detectDates = TRUE) %>%
+    detectDates = TRUE
+  ) %>%
     # Drop any Care Homes that were closed before 2017/18
     dplyr::select(
       ch_postcode = "AccomPostCodeNo",
@@ -58,16 +62,18 @@ fill_ch_names <- function(ch_data,
       ch_active = tidyselect::contains("ServiceStatusAt")
     ) %>%
     dplyr::filter(is.na(.data[["ch_date_cancelled"]]) |
-                    (.data[["ch_date_cancelled"]] >= start_fy("1718"))) %>%
+      (.data[["ch_date_cancelled"]] >= start_fy("1718"))) %>%
     # Standardise the postcode and CH name
     dplyr::mutate(
       ch_postcode = phsmethods::format_postcode(.data[["ch_postcode"]]),
       ch_name_validated = clean_up_free_text(.data[["ch_name_validated"]]),
       ch_date_registered = lubridate::as_date(.data[["ch_date_registered"]]),
       ch_date_cancelled = lubridate::as_date(.data[["ch_date_cancelled"]]),
-      ch_active = dplyr::case_match(.data[["ch_active"]],
-                                    "Active" ~ TRUE,
-                                    c("Cancelled", "Inactive") ~ FALSE)
+      ch_active = dplyr::case_match(
+        .data[["ch_active"]],
+        "Active" ~ TRUE,
+        c("Cancelled", "Inactive") ~ FALSE
+      )
     ) %>%
     # Merge any duplicates, and get the interval each CH name was active
     dplyr::group_by(.data[["ch_postcode"]], .data[["ch_name_validated"]]) %>%
@@ -75,18 +81,21 @@ fill_ch_names <- function(ch_data,
       # Find the latest date for each CH name / postcode
       ch_date_registered = dplyr::first(.data[["ch_date_registered"]]),
       latest_close_date = dplyr::if_else(is.na(max(.data[["ch_date_cancelled"]])),
-                                         Sys.Date(),
-                                         max(.data[["ch_date_cancelled"]])),
-      open_interval = lubridate::interval(min(.data[["ch_date_registered"]]),
-                                          .data[["latest_close_date"]]),
+        Sys.Date(),
+        max(.data[["ch_date_cancelled"]])
+      ),
+      open_interval = lubridate::interval(
+        min(.data[["ch_date_registered"]]),
+        .data[["latest_close_date"]]
+      ),
       ch_active = any(.data[["ch_active"]])
     ) %>%
     dplyr::ungroup() %>%
     dplyr::rename(ch_postcode_lookup = .data[["ch_postcode"]]) %>%
     dplyr::mutate(
-      ch_pc_partial = stringr::str_sub(.data[["ch_postcode_lookup"]], 1,-2),
-      ch_pc_partial2 = stringr::str_sub(.data[["ch_postcode_lookup"]], 1,-3),
-      ch_pc_partial3 = stringr::str_sub(.data[["ch_postcode_lookup"]], 1,-5),
+      ch_pc_partial = stringr::str_sub(.data[["ch_postcode_lookup"]], 1, -2),
+      ch_pc_partial2 = stringr::str_sub(.data[["ch_postcode_lookup"]], 1, -3),
+      ch_pc_partial3 = stringr::str_sub(.data[["ch_postcode_lookup"]], 1, -5),
       ch_pc_partial4 = gsub("\\d.*", "", stringr::str_sub(.data[["ch_postcode_lookup"]], 1, 2)),
       ch_name_validated_keyword = ch_name_extract_keyword(.data[["ch_name_validated"]])
     )
@@ -111,22 +120,26 @@ fill_ch_names <- function(ch_data,
     # Work out string distances between names for each postcode
     dplyr::mutate(
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-                                                      .data[["ch_name_validated"]],
-                                                      method = "jaccard"),
+        .data[["ch_name_validated"]],
+        method = "jaccard"
+      ),
       match_distance_cosine = stringdist::stringdist(.data[["ch_name"]],
-                                                     .data[["ch_name_validated"]],
-                                                     method = "cosine"),
+        .data[["ch_name_validated"]],
+        method = "cosine"
+      ),
       match_mean = (.data[["match_distance_jaccard"]] +
-                      .data[["match_distance_cosine"]]) / 2.0,
+        .data[["match_distance_cosine"]]) / 2.0,
       # ch_name_keyword distances
       match_distance_jaccard2 = stringdist::stringdist(.data[["ch_name_keyword"]],
-                                                       .data[["ch_name_validated_keyword"]],
-                                                       method = "jaccard"),
+        .data[["ch_name_validated_keyword"]],
+        method = "jaccard"
+      ),
       match_distance_cosine2 = stringdist::stringdist(.data[["ch_name_keyword"]],
-                                                      .data[["ch_name_validated_keyword"]],
-                                                      method = "cosine"),
+        .data[["ch_name_validated_keyword"]],
+        method = "cosine"
+      ),
       match_mean2 = (.data[["match_distance_jaccard2"]] +
-                       .data[["match_distance_cosine2"]]) / 2.0
+        .data[["match_distance_cosine2"]]) / 2.0
     ) %>%
     # ch_admission_date might be inaccurate.
     dplyr::filter(.data[["ch_admission_date"]] <= .data[["latest_close_date"]]) %>%
@@ -140,7 +153,6 @@ fill_ch_names <- function(ch_data,
         # if care home postcode perfectly match, then
         # even if care home name is NA,
         # we still overwrite the ch_name from ch_name_lookup
-
         .data[["match_mean"]] < 0.001 &
           .data[["postcode_matching"]] ~ 1L,
         .data[["match_mean2"]] < 0.001 &
@@ -151,17 +163,17 @@ fill_ch_names <- function(ch_data,
           !.data[["postcode_matching"]] ~ 4L,
         .data[["match_mean"]] < 0.1 &
           .data[["postcode_matching"]] ~ 5L,
-        .data[["match_mean2"]]  < 0.1 &
+        .data[["match_mean2"]] < 0.1 &
           .data[["postcode_matching"]] ~ 6L,
-        .data[["match_mean"]]  < 0.1 &
+        .data[["match_mean"]] < 0.1 &
           !.data[["postcode_matching"]] ~ 7L,
-        .data[["match_mean2"]]  < 0.1 &
+        .data[["match_mean2"]] < 0.1 &
           !.data[["postcode_matching"]] ~ 8L,
-        (.data[["match_mean"]]  < 0.4 |
-           .data[["match_mean2"]] < 0.4) &
+        (.data[["match_mean"]] < 0.4 |
+          .data[["match_mean2"]] < 0.4) &
           .data[["postcode_matching"]] ~ 9L,
         (.data[["match_mean"]] < 0.4 |
-           .data[["match_mean2"]] < 0.4) &
+          .data[["match_mean2"]] < 0.4) &
           !.data[["postcode_matching"]] ~ 10L,
         is.na(.data[["ch_name"]]) &
           .data[["postcode_matching"]] ~ 11L,
@@ -194,10 +206,13 @@ fill_ch_names <- function(ch_data,
       "matching_quality_indicator_overall",
       tidyselect::everything()
     ) %>%
-    dplyr::arrange(.data[["unique_identifier"]],
-                   .data[["matching_quality_indicator_overall"]]) %>%
+    dplyr::arrange(
+      .data[["unique_identifier"]],
+      .data[["matching_quality_indicator_overall"]]
+    ) %>%
     dplyr::distinct(.data[["unique_identifier"]],
-                    .keep_all = TRUE)
+      .keep_all = TRUE
+    )
 
 
   # fix matching quality being 100, meaning bad
@@ -219,9 +234,11 @@ fill_ch_names <- function(ch_data,
   # and those episodes seem consistent, indicated by good matching quality.
   # Then, overwrite the minority of records with matching quality being 100.
   ch_pc_match <- ch_pc_match %>%
-    dplyr::arrange(.data[["chi"]],
-                   .data[["ch_name"]],
-                   .data[["matching_quality_indicator_overall"]]) %>%
+    dplyr::arrange(
+      .data[["chi"]],
+      .data[["ch_name"]],
+      .data[["matching_quality_indicator_overall"]]
+    ) %>%
     dplyr::group_by(.data[["chi"]], .data[["ch_name"]]) %>%
     dplyr::mutate(
       # Best_quality_within_group_chi_name is supposed to be minimum within a group.
@@ -238,24 +255,27 @@ fill_ch_names <- function(ch_data,
     dplyr::ungroup() %>%
     dplyr::mutate(
       overwrite_pc = (.data[["matching_quality_indicator_overall"]] == 100L &
-                        .data[["best_quality_within_group_chi_name"]] <= 10L),
+        .data[["best_quality_within_group_chi_name"]] <= 10L),
       matching_quality_indicator_overall =
         dplyr::if_else(.data[["overwrite_pc"]],
-                       13L,
-                       .data[["matching_quality_indicator_overall"]]),
+          13L,
+          .data[["matching_quality_indicator_overall"]]
+        ),
       ch_postcode_lookup =
         dplyr::if_else(.data[["overwrite_pc"]],
-                       .data[["ch_postcode_lookup_best"]],
-                       .data[["ch_postcode_lookup"]]),
-
+          .data[["ch_postcode_lookup_best"]],
+          .data[["ch_postcode_lookup"]]
+        ),
       ch_name_validated =
         dplyr::if_else(.data[["overwrite_pc"]],
-                       .data[["ch_name_validated_best"]],
-                       .data[["ch_name_validated"]]),
+          .data[["ch_name_validated_best"]],
+          .data[["ch_name_validated"]]
+        ),
       ch_name_validated_keyword =
         dplyr::if_else(.data[["overwrite_pc"]],
-                       .data[["ch_name_validated_keyword_best"]],
-                       .data[["ch_name_validated_keyword"]])
+          .data[["ch_name_validated_keyword_best"]],
+          .data[["ch_name_validated_keyword"]]
+        )
     )
 
   ### quality 14L ----
@@ -298,9 +318,10 @@ fill_ch_names <- function(ch_data,
 
   ch_pc_match <- ch_pc_match %>%
     dplyr::mutate(matching_quality_indicator_overall = dplyr::if_else(.data[["matching_quality_indicator_overall"]] == 100L &
-                                                                        .data[["postcode_matching"]],
-                                                                      14L,
-                                                                      .data[["matching_quality_indicator_overall"]])) %>%
+      .data[["postcode_matching"]],
+    14L,
+    .data[["matching_quality_indicator_overall"]]
+    )) %>%
     # now remove cases of quality being 100L for the next section:
     # ch_name matching
     dplyr::filter(.data[["matching_quality_indicator_overall"]] != 100L) %>%
@@ -321,7 +342,8 @@ fill_ch_names <- function(ch_data,
 
   ch_name_match1 <- ch_data %>%
     dplyr::anti_join(ch_pc_match,
-                     by = dplyr::join_by("unique_identifier")) %>%
+      by = dplyr::join_by("unique_identifier")
+    ) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
@@ -347,7 +369,8 @@ fill_ch_names <- function(ch_data,
 
   ch_name_match2 <- ch_data %>%
     dplyr::anti_join(ch_match,
-                     by = dplyr::join_by("unique_identifier")) %>%
+      by = dplyr::join_by("unique_identifier")
+    ) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
@@ -365,11 +388,14 @@ fill_ch_names <- function(ch_data,
       ch_postcode = .data[["ch_postcode_lookup"]],
       matching_quality_indicator_overall = 16L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-                                                      .data[["ch_name_validated"]],
-                                                      method = "jaccard")
+        .data[["ch_name_validated"]],
+        method = "jaccard"
+      )
+    ) %>%
+    dplyr::arrange(
+      .data[["unique_identifier"]],
+      .data[["match_distance_jaccard"]]
     ) %>%
-    dplyr::arrange(.data[["unique_identifier"]],
-                   .data[["match_distance_jaccard"]]) %>%
     dplyr::distinct(.data[["unique_identifier"]], .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
@@ -379,7 +405,8 @@ fill_ch_names <- function(ch_data,
 
   ch_name_match3 <- ch_data %>%
     dplyr::anti_join(ch_match,
-                     by = dplyr::join_by("unique_identifier")) %>%
+      by = dplyr::join_by("unique_identifier")
+    ) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
@@ -397,30 +424,36 @@ fill_ch_names <- function(ch_data,
       ch_postcode = .data[["ch_postcode_lookup"]],
       matching_quality_indicator_overall = 17L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-                                                      .data[["ch_name_validated"]],
-                                                      method = "jaccard")
+        .data[["ch_name_validated"]],
+        method = "jaccard"
+      )
+    ) %>%
+    dplyr::arrange(
+      .data[["unique_identifier"]],
+      .data[["match_distance_jaccard"]]
     ) %>%
-    dplyr::arrange(.data[["unique_identifier"]],
-                   .data[["match_distance_jaccard"]]) %>%
     dplyr::distinct(.data[["unique_identifier"]], .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
-  ch_match <- dplyr::bind_rows(ch_pc_match,
-                               ch_name_match1,
-                               ch_name_match2,
-                               ch_name_match3)
+  ch_match <- dplyr::bind_rows(
+    ch_pc_match,
+    ch_name_match1,
+    ch_name_match2,
+    ch_name_match3
+  )
 
   ### ch_postcode and postcode exchange, then matching, quality 18L----
   ch_pc_exchange_match1 <- ch_data %>%
     dplyr::anti_join(ch_match,
-                     by = dplyr::join_by("unique_identifier")) %>%
+      by = dplyr::join_by("unique_identifier")
+    ) %>%
     dplyr::mutate(
       intermediate_pc = .data[["ch_postcode"]],
       ch_postcode = .data[["postcode"]],
       postcode = .data[["ch_postcode"]],
-      ch_pc_partial = stringr::str_sub(.data[["ch_postcode"]], 1,-2),
-      ch_pc_partial2 = stringr::str_sub(.data[["ch_postcode"]], 1,-3),
-      ch_pc_partial3 = stringr::str_sub(.data[["ch_postcode"]], 1,-5)
+      ch_pc_partial = stringr::str_sub(.data[["ch_postcode"]], 1, -2),
+      ch_pc_partial2 = stringr::str_sub(.data[["ch_postcode"]], 1, -3),
+      ch_pc_partial3 = stringr::str_sub(.data[["ch_postcode"]], 1, -5)
     ) %>%
     dplyr::select(-.data[["intermediate_pc"]]) %>%
     dplyr::inner_join(
@@ -440,11 +473,14 @@ fill_ch_names <- function(ch_data,
       ch_postcode = .data[["ch_postcode_lookup"]],
       matching_quality_indicator_overall = 18L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-                                                      .data[["ch_name_validated"]],
-                                                      method = "jaccard")
+        .data[["ch_name_validated"]],
+        method = "jaccard"
+      )
+    ) %>%
+    dplyr::arrange(
+      .data[["unique_identifier"]],
+      .data[["match_distance_jaccard"]]
     ) %>%
-    dplyr::arrange(.data[["unique_identifier"]],
-                   .data[["match_distance_jaccard"]]) %>%
     dplyr::distinct(.data[["unique_identifier"]], .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
@@ -462,14 +498,15 @@ fill_ch_names <- function(ch_data,
   # then fizzy match ch_name, and matching main part of postcode
   ch_pc_exchange_match2 <- ch_data %>%
     dplyr::anti_join(ch_match,
-                     by = dplyr::join_by("unique_identifier")) %>%
+      by = dplyr::join_by("unique_identifier")
+    ) %>%
     dplyr::mutate(
       intermediate_pc = .data[["ch_postcode"]],
       ch_postcode = .data[["postcode"]],
       postcode = .data[["ch_postcode"]],
-      ch_pc_partial = stringr::str_sub(.data[["ch_postcode"]], 1,-2),
-      ch_pc_partial2 = stringr::str_sub(.data[["ch_postcode"]], 1,-3),
-      ch_pc_partial3 = stringr::str_sub(.data[["ch_postcode"]], 1,-5)
+      ch_pc_partial = stringr::str_sub(.data[["ch_postcode"]], 1, -2),
+      ch_pc_partial2 = stringr::str_sub(.data[["ch_postcode"]], 1, -3),
+      ch_pc_partial3 = stringr::str_sub(.data[["ch_postcode"]], 1, -5)
     ) %>%
     dplyr::select(-.data[["intermediate_pc"]]) %>%
     dplyr::inner_join(
@@ -489,11 +526,14 @@ fill_ch_names <- function(ch_data,
       ch_postcode = .data[["ch_postcode_lookup"]],
       matching_quality_indicator_overall = 19L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-                                                      .data[["ch_name_validated"]],
-                                                      method = "jaccard")
+        .data[["ch_name_validated"]],
+        method = "jaccard"
+      )
+    ) %>%
+    dplyr::arrange(
+      .data[["unique_identifier"]],
+      .data[["match_distance_jaccard"]]
     ) %>%
-    dplyr::arrange(.data[["unique_identifier"]],
-                   .data[["match_distance_jaccard"]]) %>%
     dplyr::distinct(.data[["unique_identifier"]], .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
@@ -514,7 +554,8 @@ fill_ch_names <- function(ch_data,
 
   ch_name_match4 <- ch_data %>%
     dplyr::anti_join(ch_match,
-                     by = dplyr::join_by("unique_identifier")) %>%
+      by = dplyr::join_by("unique_identifier")
+    ) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
@@ -550,7 +591,8 @@ fill_ch_names <- function(ch_data,
   # excluding those duplicated care home names.
   ch_name_match5 <- ch_data %>%
     dplyr::anti_join(ch_match,
-                     by = dplyr::join_by("unique_identifier")) %>%
+      by = dplyr::join_by("unique_identifier")
+    ) %>%
     dplyr::inner_join(
       ch_name_lookup,
       by = dplyr::join_by(
@@ -568,11 +610,14 @@ fill_ch_names <- function(ch_data,
       ch_postcode = .data[["ch_postcode_lookup"]],
       matching_quality_indicator_overall = 21L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
-                                                      .data[["ch_name_validated"]],
-                                                      method = "jaccard")
+        .data[["ch_name_validated"]],
+        method = "jaccard"
+      )
+    ) %>%
+    dplyr::arrange(
+      .data[["unique_identifier"]],
+      .data[["match_distance_jaccard"]]
     ) %>%
-    dplyr::arrange(.data[["unique_identifier"]],
-                   .data[["match_distance_jaccard"]]) %>%
     dplyr::distinct(.data[["unique_identifier"]], .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
@@ -592,7 +637,8 @@ fill_ch_names <- function(ch_data,
   # add 100L for non-matching episodes
   ch_no_match <- ch_data %>%
     dplyr::anti_join(ch_match,
-                     by = dplyr::join_by("unique_identifier")) %>%
+      by = dplyr::join_by("unique_identifier")
+    ) %>%
     # dplyr::distinct(ch_name, .keep_all = TRUE) %>%
     dplyr::mutate(
       matching_quality_indicator_overall = 100L,
@@ -609,22 +655,29 @@ fill_ch_names <- function(ch_data,
   # episodes sharing common chi
   # and ch_name with those episodes with good match quality
   ch_data_final <- dplyr::bind_rows(ch_match, ch_no_match) %>%
-    dplyr::arrange(.data[["chi"]], .data[["ch_name_keyword"]],
-                   .data[["matching_quality_indicator_overall"]]) %>%
-    dplyr::group_by(.data[["chi"]],
-                    .data[["ch_name_keyword"]]) %>%
+    dplyr::arrange(
+      .data[["chi"]], .data[["ch_name_keyword"]],
+      .data[["matching_quality_indicator_overall"]]
+    ) %>%
+    dplyr::group_by(
+      .data[["chi"]],
+      .data[["ch_name_keyword"]]
+    ) %>%
     dplyr::mutate(
       same_ch_name = (dplyr::first(.data[["matching_quality_indicator_overall"]]) <= 10L &
-                        .data[["matching_quality_indicator_overall"]] == 100L),
+        .data[["matching_quality_indicator_overall"]] == 100L),
       ch_name = dplyr::if_else(.data[["same_ch_name"]],
-                               dplyr::first(.data[["ch_name"]]),
-                               .data[["ch_name"]]),
+        dplyr::first(.data[["ch_name"]]),
+        .data[["ch_name"]]
+      ),
       ch_postcode = dplyr::if_else(.data[["same_ch_name"]],
-                                   dplyr::first(.data[["ch_postcode"]]),
-                                   .data[["ch_postcode"]]),
+        dplyr::first(.data[["ch_postcode"]]),
+        .data[["ch_postcode"]]
+      ),
       matching_quality_indicator_overall = dplyr::if_else(.data[["same_ch_name"]],
-                                                          30L,
-                                                          .data[["matching_quality_indicator_overall"]])
+        30L,
+        .data[["matching_quality_indicator_overall"]]
+      )
     ) %>%
     dplyr::ungroup() %>%
     dplyr::arrange(.data[["unique_identifier"]]) %>%
@@ -671,7 +724,7 @@ fill_ch_names <- function(ch_data,
   )
 
   return(ch_data_final %>%
-           dplyr::select(dplyr::all_of(col_output)))
+    dplyr::select(dplyr::all_of(col_output)))
 }
 
 

From d5393b6f4428c2c56483bc45d1c8b2f944494957 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 30 Apr 2024 10:45:01 +0000
Subject: [PATCH 064/186] [check-spelling] Update metadata

Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/8893412405/attempts/1
Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/946#issuecomment-2084965857

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev>
---
 .github/actions/spelling/excludes.txt | 1 +
 .github/actions/spelling/expect.txt   | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/.github/actions/spelling/excludes.txt b/.github/actions/spelling/excludes.txt
index 59fab4259..e508dc7ad 100644
--- a/.github/actions/spelling/excludes.txt
+++ b/.github/actions/spelling/excludes.txt
@@ -57,4 +57,5 @@
 ^\Q.github/workflows/spelling.yml\E$
 ^\Q/tmp/check-spelling/Public-Health-Scotland/source-linkage-files/commits/f13483ca341940e8549dc23c930da2f23dd0ac43.message\E$
 ^\Q/tmp/check-spelling/Public-Health-Scotland/source-linkage-files/pull-request/613/summary.txt\E$
+^\Qinst/WORDLIST\E$
 ignore$
diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 016f2a2d5..5b4a58f5c 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -1,6 +1,6 @@
 acc
-accom
 Accom
+accom
 admloc
 admtype
 adpe
@@ -15,8 +15,8 @@ birthtime
 bodyloc
 boxi
 callr
-canx
 Canx
+canx
 carehome
 careinspectorate
 categorises
@@ -245,6 +245,7 @@ unicode
 updown
 upi
 vline
+WORDLIST
 workflows
 xintercept
 xlsx

From 5d27e64b56ba604954040bf0a7c776cc397340a0 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 30 Apr 2024 11:51:56 +0100
Subject: [PATCH 065/186] spelling seems not recognize variants

---
 .github/actions/spelling/expect.txt | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 5b4a58f5c..6c894d958 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -1,6 +1,5 @@
 acc
 Accom
-accom
 admloc
 admtype
 adpe
@@ -16,7 +15,6 @@ bodyloc
 boxi
 callr
 Canx
-canx
 carehome
 careinspectorate
 categorises

From 6baad298865ed67955c9d35c0ff685f7ca8ba102 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 30 Apr 2024 16:43:30 +0100
Subject: [PATCH 066/186] only select columns we want in ltc raw data

---
 R/read_lookup_ltc.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/read_lookup_ltc.R b/R/read_lookup_ltc.R
index 7eb83a434..68aa6e1ee 100644
--- a/R/read_lookup_ltc.R
+++ b/R/read_lookup_ltc.R
@@ -34,7 +34,7 @@ read_lookup_ltc <- function(file_path = get_it_ltc_path()) {
     )
   ) %>%
     # Rename variables
-    dplyr::rename(
+    dplyr::select(
       chi = "PATIENT_UPI [C]",
       postcode = "PATIENT_POSTCODE [C]",
       arth_date = "ARTHRITIS_DIAG_DATE",

From 211c8a765bc6bdf8ff9125fea6d1fa6e80bb0df9 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 30 Apr 2024 15:50:18 +0000
Subject: [PATCH 067/186] [check-spelling] Update metadata

Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/8897746003/attempts/1
Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/947#issuecomment-2085735144

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev>
---
 .github/actions/spelling/expect.txt | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index a4a34a58b..906cdff91 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -7,10 +7,13 @@ adtf
 arrivalmode
 arth
 atlassian
+atrialfib
 attendcat
 aut
 bedday
+BFO
 birthtime
+bloodbfo
 bodyloc
 boxi
 callr
@@ -22,6 +25,8 @@ cattend
 ccyy
 cdn
 cennum
+CEREBROVASC
+chd
 chp
 chpstart
 cij
@@ -33,6 +38,7 @@ codecov
 comhairle
 commhosp
 congen
+copd
 costincdnas
 costmonthnum
 costsfy
@@ -40,6 +46,7 @@ covr
 cph
 createslf
 customise
+cvd
 dataframe
 datamart
 datazone
@@ -67,6 +74,7 @@ dplyr
 dsn
 dtplyr
 dvprod
+endomet
 envir
 fcase
 feb
@@ -94,6 +102,7 @@ hbtreatcode
 hbtreatname
 hci
 hcp
+hefailure
 hhg
 hjust
 hms
@@ -154,6 +163,7 @@ openxlsx
 orcid
 outfile
 pandoc
+parkinsons
 patflow
 pattype
 pcec
@@ -184,6 +194,7 @@ readr
 readxl
 reasonwait
 recid
+refailure
 reflectoring
 refsource
 renviron

From 06c63bbc28a897525cfade4817239bea8bc6dd4f Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 30 Apr 2024 17:21:07 +0100
Subject: [PATCH 068/186] fix care home cancelled dates might be 1900-01-01

---
 R/fill_ch_names.R | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/R/fill_ch_names.R b/R/fill_ch_names.R
index f0cb7d04b..365e77fdc 100644
--- a/R/fill_ch_names.R
+++ b/R/fill_ch_names.R
@@ -61,6 +61,15 @@ fill_ch_names <- function(ch_data,
       ch_date_cancelled = "DateCanx",
       ch_active = tidyselect::contains("ServiceStatusAt")
     ) %>%
+    # some care home cancelled dates incorrectly are "1900-01-01"
+    # assume the cancelled dates to be the "current date", or equivalently NA
+    dplyr::mutate(
+      ch_date_cancelled = dplyr::if_else(
+        .data[["ch_date_cancelled"]] == as.Date("1900-01-01"),
+        NA %>% as.Date(),
+        .data[["ch_date_cancelled"]]
+      )
+    ) %>%
     dplyr::filter(is.na(.data[["ch_date_cancelled"]]) |
       (.data[["ch_date_cancelled"]] >= start_fy("1718"))) %>%
     # Standardise the postcode and CH name

From 86efa00a144cff0947003d3273845050991d6e08 Mon Sep 17 00:00:00 2001
From: marjom02 <megan.mcnicol2@nhs.scot>
Date: Fri, 3 May 2024 12:02:26 +0100
Subject: [PATCH 069/186] for some reason the latest scid code was overwritten
 after the march update?? anyway, now it is fixed.

---
 R/replace_sc_id_with_latest.R | 30 +++++++++++++++++++-----------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/R/replace_sc_id_with_latest.R b/R/replace_sc_id_with_latest.R
index 8e815d46b..db1dc578c 100644
--- a/R/replace_sc_id_with_latest.R
+++ b/R/replace_sc_id_with_latest.R
@@ -7,31 +7,40 @@ replace_sc_id_with_latest <- function(data) {
   # Check for required variables
   check_variables_exist(
     data,
-    c("sending_location", "social_care_id", "chi", "latest_flag")
+    c("sending_location", "social_care_id", "chi", "period")
   )
 
   # select variables we need
   filter_data <- data %>%
     dplyr::select(
-      "sending_location", "social_care_id", "chi", "latest_flag"
+      "sending_location", "social_care_id", "chi", "period"
     ) %>%
-    dplyr::filter(!(is.na(.data$chi))) %>%
-    dplyr::distinct()
+    dplyr::filter(!(is.na(.data$chi)))
 
   change_sc_id <- filter_data %>%
-    dplyr::filter(.data$latest_flag == 1) %>%
+    # Sort (by sending_location, chi and period) for unique chi/sending location
+    dplyr::arrange(
+      .data$sending_location,
+      .data$chi,
+      dplyr::desc(.data$period)
+    ) %>%
+    # Find the latest sc_id for each chi/sending location by keeping latest period
+    dplyr::distinct(
+      .data$sending_location,
+      .data$chi,
+      .keep_all = TRUE
+    ) %>%
     # Rename for latest sc id
     dplyr::rename(latest_sc_id = "social_care_id") %>%
-    # drop latest_flag for matching
-    dplyr::select(-"latest_flag")
+    # drop period for matching
+    dplyr::select(-"period")
 
   return_data <- change_sc_id %>%
     # Match back onto data
     dplyr::right_join(data,
-      by = c("sending_location", "chi"),
-      multiple = "all"
+                      by = c("sending_location", "chi"),
+                      multiple = "all"
     ) %>%
-    dplyr::filter(!(is.na(.data$period))) %>%
     # Overwrite sc id with the latest
     dplyr::mutate(
       social_care_id = dplyr::if_else(
@@ -40,6 +49,5 @@ replace_sc_id_with_latest <- function(data) {
         .data$social_care_id
       )
     )
-
   return(return_data)
 }

From d571cb623e6d2e80864027532f11c66ef65c54a4 Mon Sep 17 00:00:00 2001
From: SwiftySalmon <SwiftySalmon@users.noreply.github.com>
Date: Fri, 3 May 2024 11:15:15 +0000
Subject: [PATCH 070/186] Style code

---
 R/replace_sc_id_with_latest.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/replace_sc_id_with_latest.R b/R/replace_sc_id_with_latest.R
index db1dc578c..9478ebefe 100644
--- a/R/replace_sc_id_with_latest.R
+++ b/R/replace_sc_id_with_latest.R
@@ -38,8 +38,8 @@ replace_sc_id_with_latest <- function(data) {
   return_data <- change_sc_id %>%
     # Match back onto data
     dplyr::right_join(data,
-                      by = c("sending_location", "chi"),
-                      multiple = "all"
+      by = c("sending_location", "chi"),
+      multiple = "all"
     ) %>%
     # Overwrite sc id with the latest
     dplyr::mutate(

From a2849b956db9527dcf642868966469cb3d3df50b Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Mon, 6 May 2024 16:44:26 +0100
Subject: [PATCH 071/186] add checking ch_postcode in England, quality 15

---
 R/fill_ch_names.R        | 298 ++++++++++++++++++++++-----------------
 R/get_slf_lookup_paths.R |  11 ++
 2 files changed, 178 insertions(+), 131 deletions(-)

diff --git a/R/fill_ch_names.R b/R/fill_ch_names.R
index 365e77fdc..223bfe43c 100644
--- a/R/fill_ch_names.R
+++ b/R/fill_ch_names.R
@@ -11,29 +11,28 @@
 #' Home names and postcodes, as a [tibble][tibble::tibble-package].
 fill_ch_names <- function(ch_data,
                           ch_name_lookup_path = get_slf_ch_name_lookup_path(),
-                          spd_path = get_spd_path()) {
+                          spd_path = get_spd_path(),
+                          uk_pc_list = get_uk_postcode_path()) {
   # fix the issue "no visible binding for global variable x, y"
   x <- y <- NULL
 
+  spd_list <- dplyr::pull(read_file(spd_path, col_select = "pc7"),
+                          "pc7")
+  uk_pc_list <- dplyr::pull(read_file(uk_pc_list))
+
   ch_data <- ch_data %>%
     # Make the care home name more uniform
     dplyr::mutate(ch_name = clean_up_free_text(.data[["ch_name"]])) %>%
     # correct postcode formatting
     dplyr::mutate(
-      dplyr::across(
-        dplyr::contains("postcode"),
-        phsmethods::format_postcode
-      ),
+      dplyr::across(dplyr::contains("postcode"),
+                    phsmethods::format_postcode),
       # Replace invalid postcode with NA
-      # Get a list of confirmed valid Scottish postcodes from the SPD
-      ch_postcode = dplyr::if_else(
-        .data[["ch_postcode"]] %in% dplyr::pull(
-          read_file(spd_path, col_select = "pc7"),
-          "pc7"
-        ),
-        .data[["ch_postcode"]],
-        NA_character_
-      ),
+      # check nations where ch is located
+      ch_pc_nation = dplyr::case_when(
+        .data[["ch_postcode"]] %in% spd_list ~ "sco",
+        .data[["ch_postcode"]] %in% uk_pc_list ~ "uk", # presumably most in England
+        .default = NA),
       ch_name_keyword = ch_name_extract_keyword(.data[["ch_name"]])
     ) %>%
     # add unique identifier
@@ -119,7 +118,7 @@ fill_ch_names <- function(ch_data,
 
   ## postcode matching process ----
   # Generate some metrics for how the submitted names connect to the valid names
-  ch_pc_match <- ch_data %>%
+  ch_pc_match_quality1to12 <- ch_data %>%
     dplyr::left_join(
       ch_name_lookup,
       by = "ch_pc_partial",
@@ -155,7 +154,7 @@ fill_ch_names <- function(ch_data,
     dplyr::mutate(
       postcode_matching = (.data[["ch_postcode"]] == .data[["ch_postcode_lookup"]]),
       ### quality 1L-12L ----
-      matching_quality_indicator_overall = dplyr::case_when(
+      matching_quality_indicator = dplyr::case_when(
         # 1 to 12, from perfect to ok.
         # 100L, terrible.
 
@@ -212,12 +211,12 @@ fill_ch_names <- function(ch_data,
       "latest_close_date",
       "ch_active",
       "postcode_matching",
-      "matching_quality_indicator_overall",
+      "matching_quality_indicator",
       tidyselect::everything()
     ) %>%
     dplyr::arrange(
       .data[["unique_identifier"]],
-      .data[["matching_quality_indicator_overall"]]
+      .data[["matching_quality_indicator"]]
     ) %>%
     dplyr::distinct(.data[["unique_identifier"]],
       .keep_all = TRUE
@@ -226,7 +225,7 @@ fill_ch_names <- function(ch_data,
 
   # fix matching quality being 100, meaning bad
   # After this great process,
-  # there are around 7.5% with matching_quality_indicator_overall being 100
+  # there are around 7.5% with matching_quality_indicator being 100
   # This means that
   # cases coming from postcode matching does not matching names at all
   # But some of them may vaguely matching name
@@ -240,20 +239,21 @@ fill_ch_names <- function(ch_data,
   # For some ch records, for a chi number,
   # ch_name are consistent while
   # ch_postcode are different,
-  # and those episodes seem consistent, indicated by good matching quality.
+  # and those episodes seem consistent
+  # based on ch_name and indicated by good matching quality.
   # Then, overwrite the minority of records with matching quality being 100.
-  ch_pc_match <- ch_pc_match %>%
+  ch_pc_match_quality1to13 <- ch_pc_match_quality1to12 %>%
     dplyr::arrange(
       .data[["chi"]],
       .data[["ch_name"]],
-      .data[["matching_quality_indicator_overall"]]
+      .data[["matching_quality_indicator"]]
     ) %>%
     dplyr::group_by(.data[["chi"]], .data[["ch_name"]]) %>%
     dplyr::mutate(
       # Best_quality_within_group_chi_name is supposed to be minimum within a group.
-      # Since we sort matching_quality_indicator_overall, first is ok.
+      # Since we sort matching_quality_indicator, first is ok.
       best_quality_within_group_chi_name =
-        dplyr::first(.data[["matching_quality_indicator_overall"]]),
+        dplyr::first(.data[["matching_quality_indicator"]]),
       ch_postcode_lookup_best =
         dplyr::first(.data[["ch_postcode_lookup"]]),
       ch_name_validated_best =
@@ -262,13 +262,15 @@ fill_ch_names <- function(ch_data,
         dplyr::first(.data[["ch_name_validated_keyword"]])
     ) %>%
     dplyr::ungroup() %>%
+    # for those consistent ch episodes for a patient
+    # overwrite ch information based on other episodes with good quality
     dplyr::mutate(
-      overwrite_pc = (.data[["matching_quality_indicator_overall"]] == 100L &
+      overwrite_pc = (.data[["matching_quality_indicator"]] == 100L &
         .data[["best_quality_within_group_chi_name"]] <= 10L),
-      matching_quality_indicator_overall =
+      matching_quality_indicator =
         dplyr::if_else(.data[["overwrite_pc"]],
           13L,
-          .data[["matching_quality_indicator_overall"]]
+          .data[["matching_quality_indicator"]]
         ),
       ch_postcode_lookup =
         dplyr::if_else(.data[["overwrite_pc"]],
@@ -286,6 +288,7 @@ fill_ch_names <- function(ch_data,
           .data[["ch_name_validated_keyword"]]
         )
     )
+  rm(ch_pc_match_quality1to12)
 
   ### quality 14L ----
   # if ch_postcode perfect match,
@@ -293,7 +296,7 @@ fill_ch_names <- function(ch_data,
 
   col_to_select <- c(
     "unique_identifier",
-    "matching_quality_indicator_overall",
+    "matching_quality_indicator",
     "sending_location",
     "latest_sc_id",
     "chi",
@@ -322,18 +325,19 @@ fill_ch_names <- function(ch_data,
     "latest_close_date",
     "ch_name_old",
     "ch_postcode_old",
-    "ch_name_keyword"
+    "ch_name_keyword",
+    "ch_pc_nation"
   )
 
-  ch_pc_match <- ch_pc_match %>%
-    dplyr::mutate(matching_quality_indicator_overall = dplyr::if_else(.data[["matching_quality_indicator_overall"]] == 100L &
+  ch_pc_match_quality1to14 <- ch_pc_match_quality1to13 %>%
+    dplyr::mutate(matching_quality_indicator = dplyr::if_else(.data[["matching_quality_indicator"]] == 100L &
       .data[["postcode_matching"]],
     14L,
-    .data[["matching_quality_indicator_overall"]]
+    .data[["matching_quality_indicator"]]
     )) %>%
-    # now remove cases of quality being 100L for the next section:
-    # ch_name matching
-    dplyr::filter(.data[["matching_quality_indicator_overall"]] != 100L) %>%
+    # now remove cases of quality being 100L for the next sections:
+    # ch_name matching, english ch, others
+    dplyr::filter(.data[["matching_quality_indicator"]] != 100L) %>%
     dplyr::mutate(
       ch_name_old = .data[["ch_name"]],
       ch_postcode_old = .data[["ch_postcode"]],
@@ -341,16 +345,44 @@ fill_ch_names <- function(ch_data,
       ch_postcode = .data[["ch_postcode_lookup"]]
     ) %>%
     dplyr::select(dplyr::all_of(col_to_select))
+  rm(ch_pc_match_quality1to13)
+
+  # fix other matching, matching quality implicit being 100L
+
+  ## English care home ----
+  ### quality 15L ----
+  # patient will have a Scottish postcode
+  # but the ch postcode will be in England.
+  ch_eng_match_quality15 <- ch_data %>%
+    dplyr::anti_join(
+      ch_pc_match_quality1to14,
+      by = dplyr::join_by("unique_identifier")
+    ) %>%
+    dplyr::filter(ch_pc_nation == "uk") %>%
+    # add columns for English care homes
+    dplyr::mutate(
+      matching_quality_indicator = 15L,
+      ch_name_old = .data[["ch_name"]],
+      ch_postcode_old = .data[["ch_postcode"]],
+      ch_name_validated = NA_character_,
+      open_interval = NA,
+      latest_close_date = NA,
+      ch_date_registered = NA
+    ) %>%
+    dplyr::select(dplyr::all_of(col_to_select))
+
 
-  ## matching by ch_name, quality 15L-21L ----
-  ### perfect matching by ch_name, and main part of postcode, quality 15L ----
+  ## matching by ch_name, quality 16L-22L ----
+  ### quality 16L ----
+  # perfect matching by ch_name, and main part of postcode
   # ch_name matching, then overwrite postcode from ch_lookup
-  # 15L means perfect matching name,
+  # 16L means perfect matching name,
   # and relevant dates align,
   # but not the main part of the postcode, say "EH12"
 
-  ch_name_match1 <- ch_data %>%
-    dplyr::anti_join(ch_pc_match,
+  ch_name_match_quality16 <- ch_data %>%
+    dplyr::anti_join(dplyr::bind_rows(ch_pc_match_quality1to14,
+                                      ch_eng_match_quality15),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     dplyr::inner_join(
@@ -369,15 +401,16 @@ fill_ch_names <- function(ch_data,
       ch_name_validated = .data[["ch_name"]],
       # ch_name_validated is omitted because of join_by(), add back
       ch_postcode = .data[["ch_postcode_lookup"]],
-      matching_quality_indicator_overall = 15L
+      matching_quality_indicator = 16L
     ) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
-  ### fizzy matching by ch_name, and matching main part of postcode, quality 16L ----
-  ch_match <- dplyr::bind_rows(ch_pc_match, ch_name_match1)
-
-  ch_name_match2 <- ch_data %>%
-    dplyr::anti_join(ch_match,
+  ### quality 17L ----
+  # fizzy matching by ch_name, and matching main part of postcode
+  ch_name_match_quality17 <- ch_data %>%
+    dplyr::anti_join(dplyr::bind_rows(ch_pc_match_quality1to14,
+                                      ch_eng_match_quality15,
+                                      ch_name_match_quality16),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     dplyr::inner_join(
@@ -395,7 +428,7 @@ fill_ch_names <- function(ch_data,
       ch_postcode_old = .data[["ch_postcode"]],
       ch_name = .data[["ch_name_validated"]],
       ch_postcode = .data[["ch_postcode_lookup"]],
-      matching_quality_indicator_overall = 16L,
+      matching_quality_indicator = 17L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
         .data[["ch_name_validated"]],
         method = "jaccard"
@@ -408,12 +441,13 @@ fill_ch_names <- function(ch_data,
     dplyr::distinct(.data[["unique_identifier"]], .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
-  ### fizzy matching by ch_name, and same city, quality 17L ----
-  ch_match <-
-    dplyr::bind_rows(ch_pc_match, ch_name_match1, ch_name_match2)
-
-  ch_name_match3 <- ch_data %>%
-    dplyr::anti_join(ch_match,
+  ### quality 18L ----
+  ### fizzy matching by ch_name, and same city
+  ch_name_match_quality18 <- ch_data %>%
+    dplyr::anti_join(dplyr::bind_rows(ch_pc_match_quality1to14,
+                                      ch_eng_match_quality15,
+                                      ch_name_match_quality16,
+                                      ch_name_match_quality17),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     dplyr::inner_join(
@@ -431,7 +465,7 @@ fill_ch_names <- function(ch_data,
       ch_postcode_old = .data[["ch_postcode"]],
       ch_name = .data[["ch_name_validated"]],
       ch_postcode = .data[["ch_postcode_lookup"]],
-      matching_quality_indicator_overall = 17L,
+      matching_quality_indicator = 18L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
         .data[["ch_name_validated"]],
         method = "jaccard"
@@ -444,16 +478,16 @@ fill_ch_names <- function(ch_data,
     dplyr::distinct(.data[["unique_identifier"]], .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
-  ch_match <- dplyr::bind_rows(
-    ch_pc_match,
-    ch_name_match1,
-    ch_name_match2,
-    ch_name_match3
-  )
-
-  ### ch_postcode and postcode exchange, then matching, quality 18L----
-  ch_pc_exchange_match1 <- ch_data %>%
-    dplyr::anti_join(ch_match,
+  ### quality 19L----
+  # ch_postcode and postcode exchange, then matching
+  ch_pc_ex_match_quality19 <- ch_data %>%
+    dplyr::anti_join(dplyr::bind_rows(
+      ch_pc_match_quality1to14,
+      ch_eng_match_quality15,
+      ch_name_match_quality16,
+      ch_name_match_quality17,
+      ch_name_match_quality18
+    ),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     dplyr::mutate(
@@ -480,7 +514,7 @@ fill_ch_names <- function(ch_data,
       ch_postcode_old = .data[["ch_postcode"]],
       ch_name = .data[["ch_name_validated"]],
       ch_postcode = .data[["ch_postcode_lookup"]],
-      matching_quality_indicator_overall = 18L,
+      matching_quality_indicator = 19L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
         .data[["ch_name_validated"]],
         method = "jaccard"
@@ -493,20 +527,19 @@ fill_ch_names <- function(ch_data,
     dplyr::distinct(.data[["unique_identifier"]], .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
-  ch_match <- dplyr::bind_rows(
-    ch_pc_match,
-    ch_name_match1,
-    ch_name_match2,
-    ch_name_match3,
-    ch_pc_exchange_match1
-  )
-
   ## Other matching processes ----
-  ### quality 19L----
+  ### quality 20L ----
   # ch_postcode and postcode exchange,
   # then fizzy match ch_name, and matching main part of postcode
-  ch_pc_exchange_match2 <- ch_data %>%
-    dplyr::anti_join(ch_match,
+  ch_pc_ex_match_quality20 <- ch_data %>%
+    dplyr::anti_join(dplyr::bind_rows(
+      ch_pc_match_quality1to14,
+      ch_eng_match_quality15,
+      ch_name_match_quality16,
+      ch_name_match_quality17,
+      ch_name_match_quality18,
+      ch_pc_ex_match_quality19
+    ),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     dplyr::mutate(
@@ -533,7 +566,7 @@ fill_ch_names <- function(ch_data,
       ch_postcode_old = .data[["ch_postcode"]],
       ch_name = .data[["ch_name_validated"]],
       ch_postcode = .data[["ch_postcode_lookup"]],
-      matching_quality_indicator_overall = 19L,
+      matching_quality_indicator = 20L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
         .data[["ch_name_validated"]],
         method = "jaccard"
@@ -546,23 +579,21 @@ fill_ch_names <- function(ch_data,
     dplyr::distinct(.data[["unique_identifier"]], .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
-  ch_match <- dplyr::bind_rows(
-    ch_pc_match,
-    ch_name_match1,
-    ch_name_match2,
-    ch_name_match3,
-    ch_pc_exchange_match1,
-    ch_pc_exchange_match2
-  )
-
-
-  ### quality 20L----
+  ### quality 21L----
   # perfect match care home name, regardless of postcode,
   # excluding those duplicated care home names.
   unique_ch_name <- unique(ch_name_lookup$ch_name_validated)
 
-  ch_name_match4 <- ch_data %>%
-    dplyr::anti_join(ch_match,
+  ch_name_match_quality21 <- ch_data %>%
+    dplyr::anti_join(dplyr::bind_rows(
+      ch_pc_match_quality1to14,
+      ch_eng_match_quality15,
+      ch_name_match_quality16,
+      ch_name_match_quality17,
+      ch_name_match_quality18,
+      ch_pc_ex_match_quality19,
+      ch_pc_ex_match_quality20
+    ),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     dplyr::inner_join(
@@ -581,25 +612,24 @@ fill_ch_names <- function(ch_data,
       # add ch_name_validated back since omitted in join_by()
       ch_name_validated = .data[["ch_name"]],
       ch_postcode = .data[["ch_postcode_lookup"]],
-      matching_quality_indicator_overall = 20L
+      matching_quality_indicator = 21L
     ) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
-  ch_match <- dplyr::bind_rows(
-    ch_pc_match,
-    ch_name_match1,
-    ch_name_match2,
-    ch_name_match3,
-    ch_pc_exchange_match1,
-    ch_pc_exchange_match2,
-    ch_name_match4
-  )
-
-  ### quality 21L----
+  ### quality 22L----
   # fizzy match care home name, regardless of postcode,
   # excluding those duplicated care home names.
-  ch_name_match5 <- ch_data %>%
-    dplyr::anti_join(ch_match,
+  ch_name_match_quality22 <- ch_data %>%
+    dplyr::anti_join(dplyr::bind_rows(
+      ch_pc_match_quality1to14,
+      ch_eng_match_quality15,
+      ch_name_match_quality16,
+      ch_name_match_quality17,
+      ch_name_match_quality18,
+      ch_pc_ex_match_quality19,
+      ch_pc_ex_match_quality20,
+      ch_name_match_quality21
+    ),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     dplyr::inner_join(
@@ -617,7 +647,7 @@ fill_ch_names <- function(ch_data,
       ch_postcode_old = .data[["ch_postcode"]],
       ch_name = .data[["ch_name_validated"]],
       ch_postcode = .data[["ch_postcode_lookup"]],
-      matching_quality_indicator_overall = 21L,
+      matching_quality_indicator = 22L,
       match_distance_jaccard = stringdist::stringdist(.data[["ch_name"]],
         .data[["ch_name_validated"]],
         method = "jaccard"
@@ -630,27 +660,24 @@ fill_ch_names <- function(ch_data,
     dplyr::distinct(.data[["unique_identifier"]], .keep_all = TRUE) %>%
     dplyr::select(dplyr::all_of(col_to_select))
 
-  ch_match <- dplyr::bind_rows(
-    ch_pc_match,
-    ch_name_match1,
-    ch_name_match2,
-    ch_name_match3,
-    ch_pc_exchange_match1,
-    ch_pc_exchange_match2,
-    ch_name_match4,
-    ch_name_match5
-  )
-
-
-
   # add 100L for non-matching episodes
-  ch_no_match <- ch_data %>%
-    dplyr::anti_join(ch_match,
+  ch_no_match_quality100 <- ch_data %>%
+    dplyr::anti_join(dplyr::bind_rows(
+      ch_pc_match_quality1to14,
+      ch_eng_match_quality15,
+      ch_name_match_quality16,
+      ch_name_match_quality17,
+      ch_name_match_quality18,
+      ch_pc_ex_match_quality19,
+      ch_pc_ex_match_quality20,
+      ch_name_match_quality21,
+      ch_name_match_quality22
+    ),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     # dplyr::distinct(ch_name, .keep_all = TRUE) %>%
     dplyr::mutate(
-      matching_quality_indicator_overall = 100L,
+      matching_quality_indicator = 100L,
       ch_name_old = .data[["ch_name"]],
       ch_postcode_old = .data[["ch_postcode"]],
       ch_name_validated = NA_character_,
@@ -663,18 +690,27 @@ fill_ch_names <- function(ch_data,
   ### quality 30L----
   # episodes sharing common chi
   # and ch_name with those episodes with good match quality
-  ch_data_final <- dplyr::bind_rows(ch_match, ch_no_match) %>%
-    dplyr::arrange(
-      .data[["chi"]], .data[["ch_name_keyword"]],
-      .data[["matching_quality_indicator_overall"]]
-    ) %>%
+  ch_data_final <- dplyr::bind_rows(
+    ch_pc_match_quality1to14,
+    ch_eng_match_quality15,
+    ch_name_match_quality16,
+    ch_name_match_quality17,
+    ch_name_match_quality18,
+    ch_pc_ex_match_quality19,
+    ch_pc_ex_match_quality20,
+    ch_name_match_quality21,
+    ch_name_match_quality22,
+    ch_no_match_quality100
+  ) %>%
+    dplyr::arrange(.data[["chi"]], .data[["ch_name_keyword"]],
+                   .data[["matching_quality_indicator"]]) %>%
     dplyr::group_by(
       .data[["chi"]],
       .data[["ch_name_keyword"]]
     ) %>%
     dplyr::mutate(
-      same_ch_name = (dplyr::first(.data[["matching_quality_indicator_overall"]]) <= 10L &
-        .data[["matching_quality_indicator_overall"]] == 100L),
+      same_ch_name = (dplyr::first(.data[["matching_quality_indicator"]]) <= 10L &
+        .data[["matching_quality_indicator"]] == 100L),
       ch_name = dplyr::if_else(.data[["same_ch_name"]],
         dplyr::first(.data[["ch_name"]]),
         .data[["ch_name"]]
@@ -683,9 +719,9 @@ fill_ch_names <- function(ch_data,
         dplyr::first(.data[["ch_postcode"]]),
         .data[["ch_postcode"]]
       ),
-      matching_quality_indicator_overall = dplyr::if_else(.data[["same_ch_name"]],
+      matching_quality_indicator = dplyr::if_else(.data[["same_ch_name"]],
         30L,
-        .data[["matching_quality_indicator_overall"]]
+        .data[["matching_quality_indicator"]]
       )
     ) %>%
     dplyr::ungroup() %>%
@@ -694,7 +730,7 @@ fill_ch_names <- function(ch_data,
 
   ## For any future amendment or quality check ----
   # ch_data_final %>%
-  #   dplyr::group_by(matching_quality_indicator_overall) %>%
+  #   dplyr::group_by(matching_quality_indicator) %>%
   #   dplyr::summarise(n = dplyr::n()) %>%
   #   dplyr::mutate(pct = n/sum(n)*100) %>%
   #   print(n=100) %>%
diff --git a/R/get_slf_lookup_paths.R b/R/get_slf_lookup_paths.R
index 1be1c9781..c3d70ddfe 100644
--- a/R/get_slf_lookup_paths.R
+++ b/R/get_slf_lookup_paths.R
@@ -18,6 +18,17 @@ get_slf_postcode_path <- function(update = latest_update(), ...) {
   )
 }
 
+#' get uk postcode list file path
+#' @description get uk postcode list file
+#' @family lookup file paths
+get_uk_postcode_path <- function(){
+  get_file_path(
+    directory = fs::path(get_slf_dir(), "Lookups"),
+    file_name = "uk_postcode_list",
+    ext = "parquet"
+  )
+}
+
 #' SLF GP Lookup File Path
 #'
 #' @description Get the full path to the SLF GP practice lookup

From 7627ed85d1e0d4a07ca154ae828d5c368042586c Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Mon, 6 May 2024 15:46:22 +0000
Subject: [PATCH 072/186] Update documentation

---
 man/fill_ch_names.Rd        |  3 ++-
 man/get_gpprac_ref_path.Rd  |  3 ++-
 man/get_locality_path.Rd    |  3 ++-
 man/get_lookups_dir.Rd      |  3 ++-
 man/get_pop_path.Rd         |  3 ++-
 man/get_simd_path.Rd        |  3 ++-
 man/get_spd_path.Rd         |  3 ++-
 man/get_uk_postcode_path.Rd | 21 +++++++++++++++++++++
 8 files changed, 35 insertions(+), 7 deletions(-)
 create mode 100644 man/get_uk_postcode_path.Rd

diff --git a/man/fill_ch_names.Rd b/man/fill_ch_names.Rd
index 361cdc215..97d706d92 100644
--- a/man/fill_ch_names.Rd
+++ b/man/fill_ch_names.Rd
@@ -7,7 +7,8 @@
 fill_ch_names(
   ch_data,
   ch_name_lookup_path = get_slf_ch_name_lookup_path(),
-  spd_path = get_spd_path()
+  spd_path = get_spd_path(),
+  uk_pc_list = get_uk_postcode_path()
 )
 }
 \arguments{
diff --git a/man/get_gpprac_ref_path.Rd b/man/get_gpprac_ref_path.Rd
index a61a4b560..cc2a8fa92 100644
--- a/man/get_gpprac_ref_path.Rd
+++ b/man/get_gpprac_ref_path.Rd
@@ -21,6 +21,7 @@ Other lookup file paths:
 \code{\link{get_lookups_dir}()},
 \code{\link{get_pop_path}()},
 \code{\link{get_simd_path}()},
-\code{\link{get_spd_path}()}
+\code{\link{get_spd_path}()},
+\code{\link{get_uk_postcode_path}()}
 }
 \concept{lookup file paths}
diff --git a/man/get_locality_path.Rd b/man/get_locality_path.Rd
index 65f809b2f..3d124a236 100644
--- a/man/get_locality_path.Rd
+++ b/man/get_locality_path.Rd
@@ -23,6 +23,7 @@ Other lookup file paths:
 \code{\link{get_lookups_dir}()},
 \code{\link{get_pop_path}()},
 \code{\link{get_simd_path}()},
-\code{\link{get_spd_path}()}
+\code{\link{get_spd_path}()},
+\code{\link{get_uk_postcode_path}()}
 }
 \concept{lookup file paths}
diff --git a/man/get_lookups_dir.Rd b/man/get_lookups_dir.Rd
index 4e90472bf..a6cb794a8 100644
--- a/man/get_lookups_dir.Rd
+++ b/man/get_lookups_dir.Rd
@@ -18,7 +18,8 @@ Other lookup file paths:
 \code{\link{get_locality_path}()},
 \code{\link{get_pop_path}()},
 \code{\link{get_simd_path}()},
-\code{\link{get_spd_path}()}
+\code{\link{get_spd_path}()},
+\code{\link{get_uk_postcode_path}()}
 
 Other directories: 
 \code{\link{get_dev_dir}()},
diff --git a/man/get_pop_path.Rd b/man/get_pop_path.Rd
index 751ab0275..37e69596e 100644
--- a/man/get_pop_path.Rd
+++ b/man/get_pop_path.Rd
@@ -29,6 +29,7 @@ Other lookup file paths:
 \code{\link{get_locality_path}()},
 \code{\link{get_lookups_dir}()},
 \code{\link{get_simd_path}()},
-\code{\link{get_spd_path}()}
+\code{\link{get_spd_path}()},
+\code{\link{get_uk_postcode_path}()}
 }
 \concept{lookup file paths}
diff --git a/man/get_simd_path.Rd b/man/get_simd_path.Rd
index 15e58929e..9e45ea20b 100644
--- a/man/get_simd_path.Rd
+++ b/man/get_simd_path.Rd
@@ -24,6 +24,7 @@ Other lookup file paths:
 \code{\link{get_locality_path}()},
 \code{\link{get_lookups_dir}()},
 \code{\link{get_pop_path}()},
-\code{\link{get_spd_path}()}
+\code{\link{get_spd_path}()},
+\code{\link{get_uk_postcode_path}()}
 }
 \concept{lookup file paths}
diff --git a/man/get_spd_path.Rd b/man/get_spd_path.Rd
index 25d46f114..6085749f6 100644
--- a/man/get_spd_path.Rd
+++ b/man/get_spd_path.Rd
@@ -24,6 +24,7 @@ Other lookup file paths:
 \code{\link{get_locality_path}()},
 \code{\link{get_lookups_dir}()},
 \code{\link{get_pop_path}()},
-\code{\link{get_simd_path}()}
+\code{\link{get_simd_path}()},
+\code{\link{get_uk_postcode_path}()}
 }
 \concept{lookup file paths}
diff --git a/man/get_uk_postcode_path.Rd b/man/get_uk_postcode_path.Rd
new file mode 100644
index 000000000..03fedfc37
--- /dev/null
+++ b/man/get_uk_postcode_path.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_slf_lookup_paths.R
+\name{get_uk_postcode_path}
+\alias{get_uk_postcode_path}
+\title{get uk postcode list file path}
+\usage{
+get_uk_postcode_path()
+}
+\description{
+get uk postcode list file
+}
+\seealso{
+Other lookup file paths: 
+\code{\link{get_gpprac_ref_path}()},
+\code{\link{get_locality_path}()},
+\code{\link{get_lookups_dir}()},
+\code{\link{get_pop_path}()},
+\code{\link{get_simd_path}()},
+\code{\link{get_spd_path}()}
+}
+\concept{lookup file paths}

From e6e820e05ddd884c1935a88941f03155e3d36176 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Mon, 6 May 2024 15:47:35 +0000
Subject: [PATCH 073/186] Style code

---
 R/fill_ch_names.R        | 143 ++++++++++++++++++++++-----------------
 R/get_slf_lookup_paths.R |   2 +-
 2 files changed, 83 insertions(+), 62 deletions(-)

diff --git a/R/fill_ch_names.R b/R/fill_ch_names.R
index 223bfe43c..16772b1a8 100644
--- a/R/fill_ch_names.R
+++ b/R/fill_ch_names.R
@@ -16,8 +16,10 @@ fill_ch_names <- function(ch_data,
   # fix the issue "no visible binding for global variable x, y"
   x <- y <- NULL
 
-  spd_list <- dplyr::pull(read_file(spd_path, col_select = "pc7"),
-                          "pc7")
+  spd_list <- dplyr::pull(
+    read_file(spd_path, col_select = "pc7"),
+    "pc7"
+  )
   uk_pc_list <- dplyr::pull(read_file(uk_pc_list))
 
   ch_data <- ch_data %>%
@@ -25,14 +27,17 @@ fill_ch_names <- function(ch_data,
     dplyr::mutate(ch_name = clean_up_free_text(.data[["ch_name"]])) %>%
     # correct postcode formatting
     dplyr::mutate(
-      dplyr::across(dplyr::contains("postcode"),
-                    phsmethods::format_postcode),
+      dplyr::across(
+        dplyr::contains("postcode"),
+        phsmethods::format_postcode
+      ),
       # Replace invalid postcode with NA
       # check nations where ch is located
       ch_pc_nation = dplyr::case_when(
         .data[["ch_postcode"]] %in% spd_list ~ "sco",
         .data[["ch_postcode"]] %in% uk_pc_list ~ "uk", # presumably most in England
-        .default = NA),
+        .default = NA
+      ),
       ch_name_keyword = ch_name_extract_keyword(.data[["ch_name"]])
     ) %>%
     # add unique identifier
@@ -381,8 +386,11 @@ fill_ch_names <- function(ch_data,
   # but not the main part of the postcode, say "EH12"
 
   ch_name_match_quality16 <- ch_data %>%
-    dplyr::anti_join(dplyr::bind_rows(ch_pc_match_quality1to14,
-                                      ch_eng_match_quality15),
+    dplyr::anti_join(
+      dplyr::bind_rows(
+        ch_pc_match_quality1to14,
+        ch_eng_match_quality15
+      ),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     dplyr::inner_join(
@@ -408,9 +416,12 @@ fill_ch_names <- function(ch_data,
   ### quality 17L ----
   # fizzy matching by ch_name, and matching main part of postcode
   ch_name_match_quality17 <- ch_data %>%
-    dplyr::anti_join(dplyr::bind_rows(ch_pc_match_quality1to14,
-                                      ch_eng_match_quality15,
-                                      ch_name_match_quality16),
+    dplyr::anti_join(
+      dplyr::bind_rows(
+        ch_pc_match_quality1to14,
+        ch_eng_match_quality15,
+        ch_name_match_quality16
+      ),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     dplyr::inner_join(
@@ -444,10 +455,13 @@ fill_ch_names <- function(ch_data,
   ### quality 18L ----
   ### fizzy matching by ch_name, and same city
   ch_name_match_quality18 <- ch_data %>%
-    dplyr::anti_join(dplyr::bind_rows(ch_pc_match_quality1to14,
-                                      ch_eng_match_quality15,
-                                      ch_name_match_quality16,
-                                      ch_name_match_quality17),
+    dplyr::anti_join(
+      dplyr::bind_rows(
+        ch_pc_match_quality1to14,
+        ch_eng_match_quality15,
+        ch_name_match_quality16,
+        ch_name_match_quality17
+      ),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     dplyr::inner_join(
@@ -481,13 +495,14 @@ fill_ch_names <- function(ch_data,
   ### quality 19L----
   # ch_postcode and postcode exchange, then matching
   ch_pc_ex_match_quality19 <- ch_data %>%
-    dplyr::anti_join(dplyr::bind_rows(
-      ch_pc_match_quality1to14,
-      ch_eng_match_quality15,
-      ch_name_match_quality16,
-      ch_name_match_quality17,
-      ch_name_match_quality18
-    ),
+    dplyr::anti_join(
+      dplyr::bind_rows(
+        ch_pc_match_quality1to14,
+        ch_eng_match_quality15,
+        ch_name_match_quality16,
+        ch_name_match_quality17,
+        ch_name_match_quality18
+      ),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     dplyr::mutate(
@@ -532,14 +547,15 @@ fill_ch_names <- function(ch_data,
   # ch_postcode and postcode exchange,
   # then fizzy match ch_name, and matching main part of postcode
   ch_pc_ex_match_quality20 <- ch_data %>%
-    dplyr::anti_join(dplyr::bind_rows(
-      ch_pc_match_quality1to14,
-      ch_eng_match_quality15,
-      ch_name_match_quality16,
-      ch_name_match_quality17,
-      ch_name_match_quality18,
-      ch_pc_ex_match_quality19
-    ),
+    dplyr::anti_join(
+      dplyr::bind_rows(
+        ch_pc_match_quality1to14,
+        ch_eng_match_quality15,
+        ch_name_match_quality16,
+        ch_name_match_quality17,
+        ch_name_match_quality18,
+        ch_pc_ex_match_quality19
+      ),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     dplyr::mutate(
@@ -585,15 +601,16 @@ fill_ch_names <- function(ch_data,
   unique_ch_name <- unique(ch_name_lookup$ch_name_validated)
 
   ch_name_match_quality21 <- ch_data %>%
-    dplyr::anti_join(dplyr::bind_rows(
-      ch_pc_match_quality1to14,
-      ch_eng_match_quality15,
-      ch_name_match_quality16,
-      ch_name_match_quality17,
-      ch_name_match_quality18,
-      ch_pc_ex_match_quality19,
-      ch_pc_ex_match_quality20
-    ),
+    dplyr::anti_join(
+      dplyr::bind_rows(
+        ch_pc_match_quality1to14,
+        ch_eng_match_quality15,
+        ch_name_match_quality16,
+        ch_name_match_quality17,
+        ch_name_match_quality18,
+        ch_pc_ex_match_quality19,
+        ch_pc_ex_match_quality20
+      ),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     dplyr::inner_join(
@@ -620,16 +637,17 @@ fill_ch_names <- function(ch_data,
   # fizzy match care home name, regardless of postcode,
   # excluding those duplicated care home names.
   ch_name_match_quality22 <- ch_data %>%
-    dplyr::anti_join(dplyr::bind_rows(
-      ch_pc_match_quality1to14,
-      ch_eng_match_quality15,
-      ch_name_match_quality16,
-      ch_name_match_quality17,
-      ch_name_match_quality18,
-      ch_pc_ex_match_quality19,
-      ch_pc_ex_match_quality20,
-      ch_name_match_quality21
-    ),
+    dplyr::anti_join(
+      dplyr::bind_rows(
+        ch_pc_match_quality1to14,
+        ch_eng_match_quality15,
+        ch_name_match_quality16,
+        ch_name_match_quality17,
+        ch_name_match_quality18,
+        ch_pc_ex_match_quality19,
+        ch_pc_ex_match_quality20,
+        ch_name_match_quality21
+      ),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     dplyr::inner_join(
@@ -662,17 +680,18 @@ fill_ch_names <- function(ch_data,
 
   # add 100L for non-matching episodes
   ch_no_match_quality100 <- ch_data %>%
-    dplyr::anti_join(dplyr::bind_rows(
-      ch_pc_match_quality1to14,
-      ch_eng_match_quality15,
-      ch_name_match_quality16,
-      ch_name_match_quality17,
-      ch_name_match_quality18,
-      ch_pc_ex_match_quality19,
-      ch_pc_ex_match_quality20,
-      ch_name_match_quality21,
-      ch_name_match_quality22
-    ),
+    dplyr::anti_join(
+      dplyr::bind_rows(
+        ch_pc_match_quality1to14,
+        ch_eng_match_quality15,
+        ch_name_match_quality16,
+        ch_name_match_quality17,
+        ch_name_match_quality18,
+        ch_pc_ex_match_quality19,
+        ch_pc_ex_match_quality20,
+        ch_name_match_quality21,
+        ch_name_match_quality22
+      ),
       by = dplyr::join_by("unique_identifier")
     ) %>%
     # dplyr::distinct(ch_name, .keep_all = TRUE) %>%
@@ -702,8 +721,10 @@ fill_ch_names <- function(ch_data,
     ch_name_match_quality22,
     ch_no_match_quality100
   ) %>%
-    dplyr::arrange(.data[["chi"]], .data[["ch_name_keyword"]],
-                   .data[["matching_quality_indicator"]]) %>%
+    dplyr::arrange(
+      .data[["chi"]], .data[["ch_name_keyword"]],
+      .data[["matching_quality_indicator"]]
+    ) %>%
     dplyr::group_by(
       .data[["chi"]],
       .data[["ch_name_keyword"]]
diff --git a/R/get_slf_lookup_paths.R b/R/get_slf_lookup_paths.R
index c3d70ddfe..8e8dda0e5 100644
--- a/R/get_slf_lookup_paths.R
+++ b/R/get_slf_lookup_paths.R
@@ -21,7 +21,7 @@ get_slf_postcode_path <- function(update = latest_update(), ...) {
 #' get uk postcode list file path
 #' @description get uk postcode list file
 #' @family lookup file paths
-get_uk_postcode_path <- function(){
+get_uk_postcode_path <- function() {
   get_file_path(
     directory = fs::path(get_slf_dir(), "Lookups"),
     file_name = "uk_postcode_list",

From 970a45799769d7a199bdaa65aba9226380553689 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Mon, 6 May 2024 15:57:45 +0000
Subject: [PATCH 074/186] [check-spelling] Update metadata

Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/8971882687/attempts/1
Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/946#issuecomment-2096386627

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev>
---
 .github/actions/spelling/expect.txt | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 6c894d958..ed83a6625 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -7,10 +7,14 @@ adtf
 arrivalmode
 arth
 atlassian
+atrialfib
 attendcat
 aut
+bba
 bedday
+BFO
 birthtime
+bloodbfo
 bodyloc
 boxi
 callr
@@ -22,6 +26,8 @@ cattend
 ccyy
 cdn
 cennum
+CEREBROVASC
+chd
 chp
 chpstart
 cij
@@ -33,13 +39,16 @@ codecov
 comhairle
 commhosp
 congen
+copd
 costincdnas
 costmonthnum
 costsfy
 covr
 cph
 createslf
+csf
 customise
+cvd
 dataframe
 datamart
 datazone
@@ -68,6 +77,9 @@ dplyr
 dsn
 dtplyr
 dvprod
+eee
+efa
+endomet
 envir
 fcase
 feb
@@ -95,6 +107,7 @@ hbtreatcode
 hbtreatname
 hci
 hcp
+hefailure
 hhg
 hjust
 hms
@@ -157,6 +170,7 @@ openxlsx
 orcid
 outfile
 pandoc
+parkinsons
 patflow
 pattype
 pcec
@@ -187,6 +201,7 @@ readr
 readxl
 reasonwait
 recid
+refailure
 reflectoring
 refsource
 renviron
@@ -199,6 +214,7 @@ rspm
 rstudio
 rstudioapi
 rtype
+sco
 scoial
 scotp
 SDcols
@@ -236,6 +252,7 @@ thom
 tibble
 tidyr
 tidyselect
+TJDX
 todo
 uid
 ungroup
@@ -243,6 +260,7 @@ unicode
 updown
 upi
 vline
+wdbf
 WORDLIST
 workflows
 xintercept
@@ -251,5 +269,6 @@ yearstay
 yml
 yyyyqx
 Zihao
+zihao
 zsav
 zstd

From 6e5a0aa2d6674f79de63090d30e780d82f4280a6 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Mon, 6 May 2024 17:04:55 +0100
Subject: [PATCH 075/186] spelling metadata

---
 .github/actions/spelling/expect.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index ed83a6625..2d546a9db 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -268,7 +268,6 @@ xlsx
 yearstay
 yml
 yyyyqx
-Zihao
 zihao
 zsav
 zstd

From a75374ed4494b36ccce1a8514228e310c159b528 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Tue, 7 May 2024 08:52:35 +0100
Subject: [PATCH 076/186] Merge May24 NI update into June update branch (#949)

Collect data before manipulations
---
 R/read_lookup_sc_client.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/read_lookup_sc_client.R b/R/read_lookup_sc_client.R
index d2b549671..173c971b6 100644
--- a/R/read_lookup_sc_client.R
+++ b/R/read_lookup_sc_client.R
@@ -41,6 +41,7 @@ read_lookup_sc_client <- function(fyyear,
       "day_care"
     ) %>%
     dplyr::filter(.data$financial_year == year) %>%
+    dplyr::collect() %>%
     dplyr::mutate(
       dplyr::across(
         c(
@@ -74,8 +75,7 @@ read_lookup_sc_client <- function(fyyear,
       .data$social_care_id,
       .data$financial_year,
       .data$financial_quarter
-    ) %>%
-    dplyr::collect()
+    )
 
   if (!fs::file_exists(get_sandpit_extract_path(type = "client", year = fyyear))) {
     client_data %>%

From 809d7d7e42c30bcde04be244fde59b7f6ffadf7b Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 7 May 2024 09:34:14 +0100
Subject: [PATCH 077/186] update metadata for fill_ch_names

---
 R/fill_ch_names.R | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/R/fill_ch_names.R b/R/fill_ch_names.R
index 16772b1a8..209fb4ee7 100644
--- a/R/fill_ch_names.R
+++ b/R/fill_ch_names.R
@@ -6,13 +6,15 @@
 #' Workbook, this defaults to [get_slf_ch_name_lookup_path()]
 #' @param spd_path Path to the Scottish Postcode Directory (rds) version, this
 #' defaults to [get_spd_path()]
+#' @param uk_pc_path Path to the UK postcode list. This is defaults to
+#' [get_uk_postcode_path()]
 #'
 #' @return the same data with improved accuracy and completeness of the Care
 #' Home names and postcodes, as a [tibble][tibble::tibble-package].
 fill_ch_names <- function(ch_data,
                           ch_name_lookup_path = get_slf_ch_name_lookup_path(),
                           spd_path = get_spd_path(),
-                          uk_pc_list = get_uk_postcode_path()) {
+                          uk_pc_path = get_uk_postcode_path()) {
   # fix the issue "no visible binding for global variable x, y"
   x <- y <- NULL
 
@@ -20,7 +22,7 @@ fill_ch_names <- function(ch_data,
     read_file(spd_path, col_select = "pc7"),
     "pc7"
   )
-  uk_pc_list <- dplyr::pull(read_file(uk_pc_list))
+  uk_pc_list <- dplyr::pull(read_file(uk_pc_path))
 
   ch_data <- ch_data %>%
     # Make the care home name more uniform
@@ -363,7 +365,7 @@ fill_ch_names <- function(ch_data,
       ch_pc_match_quality1to14,
       by = dplyr::join_by("unique_identifier")
     ) %>%
-    dplyr::filter(ch_pc_nation == "uk") %>%
+    dplyr::filter(.data[["ch_pc_nation"]] == "uk") %>%
     # add columns for English care homes
     dplyr::mutate(
       matching_quality_indicator = 15L,

From 40d696e91c5f88313c4b6dfbe47e881ff018bd3a Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Tue, 7 May 2024 08:36:36 +0000
Subject: [PATCH 078/186] Update documentation

---
 man/fill_ch_names.Rd | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/man/fill_ch_names.Rd b/man/fill_ch_names.Rd
index 97d706d92..9ef2f49cd 100644
--- a/man/fill_ch_names.Rd
+++ b/man/fill_ch_names.Rd
@@ -8,7 +8,7 @@ fill_ch_names(
   ch_data,
   ch_name_lookup_path = get_slf_ch_name_lookup_path(),
   spd_path = get_spd_path(),
-  uk_pc_list = get_uk_postcode_path()
+  uk_pc_path = get_uk_postcode_path()
 )
 }
 \arguments{
@@ -20,6 +20,9 @@ Workbook, this defaults to \code{\link[=get_slf_ch_name_lookup_path]{get_slf_ch_
 
 \item{spd_path}{Path to the Scottish Postcode Directory (rds) version, this
 defaults to \code{\link[=get_spd_path]{get_spd_path()}}}
+
+\item{uk_pc_path}{Path to the UK postcode list. This is defaults to
+\code{\link[=get_uk_postcode_path]{get_uk_postcode_path()}}}
 }
 \value{
 the same data with improved accuracy and completeness of the Care

From 3446a9f0d4626f5b95fdc99ff489422f9ec3ba97 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 10 May 2024 09:46:19 +0100
Subject: [PATCH 079/186] add rounding to one decimal place on percentage

---
 R/process_extract_homelessness.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
index 2b35114cc..53ec8ec93 100644
--- a/R/process_extract_homelessness.R
+++ b/R/process_extract_homelessness.R
@@ -145,16 +145,17 @@ process_extract_homelessness <- function(
     sg_pub_path = sg_pub_path
   )
 
-  data <- data %>%
+  hl1_data <- data %>%
     dplyr::left_join(
       completeness_data %>%
         dplyr::select(sending_local_authority_name, pct_complete_all),
       by = dplyr::join_by("sending_local_authority_name")
     ) %>%
-    dplyr::rename(hl1_completeness = pct_complete_all)
+    dplyr::rename(hl1_completeness = pct_complete_all) %>%
+    dplyr::mutate(hl1_completeness = round(hl1_completeness, 1))
 
   # TODO - Include person_id (from client_id)
-  final_data <- data %>%
+  final_data <- hl1_data %>%
     dplyr::select(
       "year",
       "recid",

From fc345e8735b8a12a26cd42212ef29a843c3cec64 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 10 May 2024 15:57:11 +0100
Subject: [PATCH 080/186] Add write to disk

---
 R/process_tests_cross_year.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/R/process_tests_cross_year.R b/R/process_tests_cross_year.R
index 18a5b40cc..af1538b19 100644
--- a/R/process_tests_cross_year.R
+++ b/R/process_tests_cross_year.R
@@ -51,7 +51,8 @@ process_tests_cross_year <- function(year) {
       names_glue = "{year}_qtr_{fy_qtr}",
       values_from = "n"
     ) %>%
-    dplyr::select(-tidyselect::ends_with("NA"))
+    dplyr::select(-tidyselect::ends_with("NA")) %>%
+    write_tests_xlsx(sheet_name = "cross_year", workbook_name = "cross_year")
 
   return(pivot_tests)
 }

From ace26e5f980043d0768c9d452dfb7550bb8bba2a Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 10 May 2024 15:58:35 +0100
Subject: [PATCH 081/186] update `write_tests_xlsx`

---
 R/write_tests_xlsx.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R
index d2e1490f2..039f203c4 100644
--- a/R/write_tests_xlsx.R
+++ b/R/write_tests_xlsx.R
@@ -19,7 +19,8 @@
 write_tests_xlsx <- function(comparison_data,
                              sheet_name,
                              year = NULL,
-                             workbook_name = c("ep_file", "indiv_file", "lookup", "extract", "sandpit")) {
+                             workbook_name = c("ep_file", "indiv_file", "lookup", "extract", "sandpit",
+                                               "cross_year")) {
   # Set up the workbook ----
 
   if (is.null(year)) {
@@ -27,7 +28,8 @@ write_tests_xlsx <- function(comparison_data,
       workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_ep_file_tests"),
       workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_indiv_file_tests"),
       workbook_name == "lookup" ~ stringr::str_glue(latest_update(), "_lookups_tests"),
-      workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests")
+      workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests"),
+      workbook_name == "cross_year" ~ stringr::str_glue(latest_update(), "_cross_year_tests")
     )
   } else if (workbook_name == "sandpit" & !is.null(year)) {
     tests_workbook_name <- dplyr::case_when(

From 54aa37a9cd81930f1487f757e960fa0b674373eb Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Fri, 10 May 2024 15:09:12 +0000
Subject: [PATCH 082/186] Style code

---
 R/write_tests_xlsx.R | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R
index 039f203c4..aa8527d7f 100644
--- a/R/write_tests_xlsx.R
+++ b/R/write_tests_xlsx.R
@@ -19,8 +19,10 @@
 write_tests_xlsx <- function(comparison_data,
                              sheet_name,
                              year = NULL,
-                             workbook_name = c("ep_file", "indiv_file", "lookup", "extract", "sandpit",
-                                               "cross_year")) {
+                             workbook_name = c(
+                               "ep_file", "indiv_file", "lookup", "extract", "sandpit",
+                               "cross_year"
+                             )) {
   # Set up the workbook ----
 
   if (is.null(year)) {

From d60184136d941e7438700ce2fe6e1dfd892d53d9 Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Fri, 10 May 2024 15:09:39 +0000
Subject: [PATCH 083/186] Update documentation

---
 man/write_tests_xlsx.Rd | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/man/write_tests_xlsx.Rd b/man/write_tests_xlsx.Rd
index 0788d0080..716cd3e59 100644
--- a/man/write_tests_xlsx.Rd
+++ b/man/write_tests_xlsx.Rd
@@ -8,7 +8,8 @@ write_tests_xlsx(
   comparison_data,
   sheet_name,
   year = NULL,
-  workbook_name = c("ep_file", "indiv_file", "lookup", "extract", "sandpit")
+  workbook_name = c("ep_file", "indiv_file", "lookup", "extract", "sandpit",
+    "cross_year")
 )
 }
 \arguments{

From 5474fd64c8dc4509d6b82827ded77d9dc7b7a7ea Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 10 May 2024 16:10:56 +0100
Subject: [PATCH 084/186] Add to targets pipeline

---
 _targets.R | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/_targets.R b/_targets.R
index 15d2584bb..74d022698 100644
--- a/_targets.R
+++ b/_targets.R
@@ -591,7 +591,11 @@ list(
         data = episode_file,
         year = year
       )
-    ) # ,
+    ),
+    tar_target(
+      cross_year_tests,
+      process_tests_cross_year(year = year)
+    ), # ,
     # tar_target(
     #   individual_file,
     #   create_individual_file(

From 08b9f8085d0a5d7e989736fd76e42fc838db3286 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Mon, 13 May 2024 12:32:56 +0100
Subject: [PATCH 085/186] Update NEWS.md

---
 NEWS.md | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index fdbb64c9c..a2e0aeeea 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,55 @@
-# September 2023 Update - Unreleased
+# June 2024 Update - Unreleased
+* Update of 2017/18 onwards to include bug fixes within the files.
+* Removal of extra variable caused by the LTCs not matching properly.
+* Homelessness improvements:
+   * Removal of filtering the data in SLFs according to completeness levels.
+   * New variables:
+      * `hl1_completeness` - a data quality indicator by percentage compared to SG annual publication. 
+      * `hl1_12_months_pre_app`- date variable
+      * `hl1_12_momths_post_app` - date variable 
+* Potential inclusions
+* Activity after death flag?
+* New care home methodology? - potentially this is on hold until September update.
+* Additional Documentation?
+
+# March 2024 Update - Released 20-Mar-2024
+* Update of 2017/18 onwards to include bug fixes within the files.
+* 2023/24 file now includes social care data.
+* Geography files updated - SPD and SIMD
+* Variable `property_type` in homelessness has been updated to include further description
+* Bug fixes:
+  * Service use cohort wrongly assigning Non-Service Users (NSU) as `psychiatry`
+  * Not Applicable (NA) introduced for variable `high_cc` in Demographic cohort
+  * Issue with delayed discharges data not linking to admissions
+  * Person ID available in self-directed support (SDS) data
+  * Issue with Social Care ID - missing sc id were all being set to one sc id.
+ * Improvements to social care methodology
+   * Demographics 
+    * person_id will now be consistent across social care cases for an individual. The social care ID for a CHI will also be consistent across all areas, not just the latest ID used in AT/SDS/CH/HC.
+ * Self-directed Support (SDS) and Alarms Telecare (AT) data
+    * Our tests show this is now in line with the social care team’s publications and therefore, the data may have changed slightly. 
+ * New Social Care methodology 
+    * The new methodology impacts how we match the demographics file and how we select the latest social care ID.
+    * Previously we used the `latest_flag` but this isn’t accurate as some IDs have none flagged, and some have more than one flagged. We now have one social care ID flagged for each CHI. This issue mostly affects Edinburgh, Falkirk, Western Isles, and Renfrewshire.
+    * Previously, in cases where a social care ID had multiple CHIs associated only one of the CHIs was chosen.
+    * The new methodology keeps all CHIs in as there is no way to tell which CHI the activity is for. The new methodology will show duplicate activity but for the different CHIs. The main areas this affects are Midlothian, Western Isles, and Renfrewshire.
+      
+
+# December 2023 Update - Released 20-Dec-2023
+* Update of 2017/18 onwards to include bug fixes within the files.
+* 2023/24 file contain data from 1st April 2023 up to the end of September 2023.
+  * No social care data available.
+* Re-addition of keep population flag.
+* SPARRA update
+* NA's introduced for variable `ch_provider` - now fixed.
+* Future improvements
+  * Activity after death flag
+  * Review of social care methodology.
+* SLFhelper updated to version 10.1.1.
+  * Includes a fix for speeding up function `get_chi()`
+
+
+# September 2023 Update - Released 22-Sep-2023
 * Update of 2017/18 onwards to include bug fixes within the files. 
 * New 2023/24 files.
   *No social care data available for new 2023/24 file.

From e285891618cbb2b7659868099cb79eeabbc5a173 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Tue, 14 May 2024 11:01:49 +0100
Subject: [PATCH 086/186] Update NEWS.md

---
 NEWS.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/NEWS.md b/NEWS.md
index a2e0aeeea..d5aea7364 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,7 @@
 # June 2024 Update - Unreleased
 * Update of 2017/18 onwards to include bug fixes within the files.
 * Removal of extra variable caused by the LTCs not matching properly.
+* New NRS mid-2022 population estimates.
 * Homelessness improvements:
    * Removal of filtering the data in SLFs according to completeness levels.
    * New variables:

From 6c2b6d5b49eabfcfe2326d9d4053e9803b3a1da6 Mon Sep 17 00:00:00 2001
From: rachev04 <rachel.veeravalli@phs.scot>
Date: Fri, 17 May 2024 14:56:51 +0100
Subject: [PATCH 087/186] Added function for get_all_slf_deaths_lookup_path

---
 R/get_slf_lookup_paths.R | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/R/get_slf_lookup_paths.R b/R/get_slf_lookup_paths.R
index 1be1c9781..d96087347 100644
--- a/R/get_slf_lookup_paths.R
+++ b/R/get_slf_lookup_paths.R
@@ -49,6 +49,7 @@ get_slf_gpprac_path <- function(update = latest_update(), ...) {
 #' @family slf lookup file path
 #' @seealso [get_file_path()] for the generic function.
 get_slf_deaths_lookup_path <- function(year, ...) {
+  # Review the naming convention of this path and file
   slf_deaths_lookup_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Deaths"),
     file_name = stringr::str_glue("slf_deaths_lookup_{year}.parquet"),
@@ -58,6 +59,31 @@ get_slf_deaths_lookup_path <- function(year, ...) {
   return(slf_deaths_lookup_path)
 }
 
+#' SLF death dates File Path
+#'
+#' @description Get the full path to the BOXI NRS Deaths lookup file for all financial years
+#'
+#' @inheritParams get_boxi_extract_path
+#' @param ... additional arguments passed to [get_file_path()]
+#' @param year financial year e.g. "1920"
+#'
+#' @export
+#' @family slf lookup file path
+#' @seealso [get_file_path()] for the generic function.
+
+get_all_slf_deaths_lookup_path <- function(update = latest_update()) {
+  # Note this name is very similar to the existing slf_deaths_lookup_path which returnsthe path for
+  # the processed BOXI extract for each financial year. This function will return the combined financial
+  # years lookup i.e. all years put together.
+  all_slf_deaths_lookup_path <- get_file_path(
+    directory = fs::path(get_slf_dir(), "Deaths",
+                         file_name = stringr::str_glue("all_slf_deaths_lookup_{update}.parquet"))
+  )
+
+  return(all_slf_deaths_lookup_path)
+
+}
+
 #' SLF CHI Deaths File Path
 #'
 #' @description Get the full path to the CHI deaths file

From d47c0a19b4d37670f2037fbf903f2d7c83ba1a19 Mon Sep 17 00:00:00 2001
From: rchlv <rchlv@users.noreply.github.com>
Date: Fri, 17 May 2024 13:58:49 +0000
Subject: [PATCH 088/186] Update documentation

---
 NAMESPACE                             |  1 +
 man/get_all_slf_deaths_lookup_path.Rd | 27 +++++++++++++++++++++++++++
 man/get_slf_ch_name_lookup_path.Rd    |  1 +
 man/get_slf_chi_deaths_path.Rd        |  1 +
 man/get_slf_deaths_lookup_path.Rd     |  1 +
 man/get_slf_gpprac_path.Rd            |  1 +
 man/get_slf_postcode_path.Rd          |  1 +
 7 files changed, 33 insertions(+)
 create mode 100644 man/get_all_slf_deaths_lookup_path.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 4606cf3f2..1c13740ef 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -25,6 +25,7 @@ export(end_fy_quarter)
 export(end_next_fy_quarter)
 export(find_latest_file)
 export(fy_interval)
+export(get_all_slf_deaths_lookup_path)
 export(get_boxi_extract_path)
 export(get_ch_costs_path)
 export(get_dd_path)
diff --git a/man/get_all_slf_deaths_lookup_path.Rd b/man/get_all_slf_deaths_lookup_path.Rd
new file mode 100644
index 000000000..93c8a2840
--- /dev/null
+++ b/man/get_all_slf_deaths_lookup_path.Rd
@@ -0,0 +1,27 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_slf_lookup_paths.R
+\name{get_all_slf_deaths_lookup_path}
+\alias{get_all_slf_deaths_lookup_path}
+\title{SLF death dates File Path}
+\usage{
+get_all_slf_deaths_lookup_path(update = latest_update())
+}
+\arguments{
+\item{...}{additional arguments passed to \code{\link[=get_file_path]{get_file_path()}}}
+
+\item{year}{financial year e.g. "1920"}
+}
+\description{
+Get the full path to the BOXI NRS Deaths lookup file for all financial years
+}
+\seealso{
+\code{\link[=get_file_path]{get_file_path()}} for the generic function.
+
+Other slf lookup file path: 
+\code{\link{get_slf_ch_name_lookup_path}()},
+\code{\link{get_slf_chi_deaths_path}()},
+\code{\link{get_slf_deaths_lookup_path}()},
+\code{\link{get_slf_gpprac_path}()},
+\code{\link{get_slf_postcode_path}()}
+}
+\concept{slf lookup file path}
diff --git a/man/get_slf_ch_name_lookup_path.Rd b/man/get_slf_ch_name_lookup_path.Rd
index 1f687308a..2660bbeab 100644
--- a/man/get_slf_ch_name_lookup_path.Rd
+++ b/man/get_slf_ch_name_lookup_path.Rd
@@ -22,6 +22,7 @@ has official Care Home names and addresses provided by the Care Inspectorate.
 \code{\link[=get_file_path]{get_file_path()}} for the generic function.
 
 Other slf lookup file path: 
+\code{\link{get_all_slf_deaths_lookup_path}()},
 \code{\link{get_slf_chi_deaths_path}()},
 \code{\link{get_slf_deaths_lookup_path}()},
 \code{\link{get_slf_gpprac_path}()},
diff --git a/man/get_slf_chi_deaths_path.Rd b/man/get_slf_chi_deaths_path.Rd
index c598a3c24..0db72d9d3 100644
--- a/man/get_slf_chi_deaths_path.Rd
+++ b/man/get_slf_chi_deaths_path.Rd
@@ -22,6 +22,7 @@ Get the full path to the CHI deaths file
 \code{\link[=get_file_path]{get_file_path()}} for the generic function.
 
 Other slf lookup file path: 
+\code{\link{get_all_slf_deaths_lookup_path}()},
 \code{\link{get_slf_ch_name_lookup_path}()},
 \code{\link{get_slf_deaths_lookup_path}()},
 \code{\link{get_slf_gpprac_path}()},
diff --git a/man/get_slf_deaths_lookup_path.Rd b/man/get_slf_deaths_lookup_path.Rd
index 5b1306c91..307c38ad3 100644
--- a/man/get_slf_deaths_lookup_path.Rd
+++ b/man/get_slf_deaths_lookup_path.Rd
@@ -21,6 +21,7 @@ Get the full path to the SLF deaths lookup file
 \code{\link[=get_file_path]{get_file_path()}} for the generic function.
 
 Other slf lookup file path: 
+\code{\link{get_all_slf_deaths_lookup_path}()},
 \code{\link{get_slf_ch_name_lookup_path}()},
 \code{\link{get_slf_chi_deaths_path}()},
 \code{\link{get_slf_gpprac_path}()},
diff --git a/man/get_slf_gpprac_path.Rd b/man/get_slf_gpprac_path.Rd
index c17403f7d..1fb23116f 100644
--- a/man/get_slf_gpprac_path.Rd
+++ b/man/get_slf_gpprac_path.Rd
@@ -21,6 +21,7 @@ Get the full path to the SLF GP practice lookup
 \code{\link[=get_file_path]{get_file_path()}} for the generic function.
 
 Other slf lookup file path: 
+\code{\link{get_all_slf_deaths_lookup_path}()},
 \code{\link{get_slf_ch_name_lookup_path}()},
 \code{\link{get_slf_chi_deaths_path}()},
 \code{\link{get_slf_deaths_lookup_path}()},
diff --git a/man/get_slf_postcode_path.Rd b/man/get_slf_postcode_path.Rd
index ed007c7c5..f37678695 100644
--- a/man/get_slf_postcode_path.Rd
+++ b/man/get_slf_postcode_path.Rd
@@ -21,6 +21,7 @@ Get the full path to the SLF Postcode lookup
 \code{\link[=get_file_path]{get_file_path()}} for the generic function.
 
 Other slf lookup file path: 
+\code{\link{get_all_slf_deaths_lookup_path}()},
 \code{\link{get_slf_ch_name_lookup_path}()},
 \code{\link{get_slf_chi_deaths_path}()},
 \code{\link{get_slf_deaths_lookup_path}()},

From ec0c5ef84b443972f081d7adee032da59874e85f Mon Sep 17 00:00:00 2001
From: rchlv <rchlv@users.noreply.github.com>
Date: Fri, 17 May 2024 14:03:59 +0000
Subject: [PATCH 089/186] Style code

---
 R/get_slf_lookup_paths.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/get_slf_lookup_paths.R b/R/get_slf_lookup_paths.R
index d96087347..898fe4c74 100644
--- a/R/get_slf_lookup_paths.R
+++ b/R/get_slf_lookup_paths.R
@@ -77,11 +77,11 @@ get_all_slf_deaths_lookup_path <- function(update = latest_update()) {
   # years lookup i.e. all years put together.
   all_slf_deaths_lookup_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Deaths",
-                         file_name = stringr::str_glue("all_slf_deaths_lookup_{update}.parquet"))
+      file_name = stringr::str_glue("all_slf_deaths_lookup_{update}.parquet")
+    )
   )
 
   return(all_slf_deaths_lookup_path)
-
 }
 
 #' SLF CHI Deaths File Path

From 8a704e01d71d1a7acf7d43fe7bfce66b7faf04c8 Mon Sep 17 00:00:00 2001
From: rachev04 <rachel.veeravalli@phs.scot>
Date: Mon, 20 May 2024 16:08:40 +0100
Subject: [PATCH 090/186] Add vars for activity after death flag

---
 R/create_episode_file.R | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index f6443291f..53238eac5 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -79,6 +79,8 @@ create_episode_file <- function(
         "hscp",
         "datazone2011",
         "attendance_status",
+        "death_date",
+        "deceased",
         "deathdiag1",
         "deathdiag2",
         "deathdiag3",
@@ -139,6 +141,7 @@ create_episode_file <- function(
       year,
       slf_deaths_lookup
     ) %>%
+    add_activity_after_death_flag(year, deaths_data = read_file(all_slf_deaths_lookup_path())) %>%
     load_ep_file_vars(year)
 
   if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {

From 746d65d0314c2f728044e674c739a86b035bfdf3 Mon Sep 17 00:00:00 2001
From: rachev04 <rachel.veeravalli@phs.scot>
Date: Mon, 20 May 2024 16:11:00 +0100
Subject: [PATCH 091/186] Add activity after death flag

---
 R/add_activity_after_death_flag.R | 166 ++++++++++++++++++++++++++++++
 1 file changed, 166 insertions(+)
 create mode 100644 R/add_activity_after_death_flag.R

diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R
new file mode 100644
index 000000000..c82c177e0
--- /dev/null
+++ b/R/add_activity_after_death_flag.R
@@ -0,0 +1,166 @@
+#' Match on BOXI NRS death dates to process activity after death flag
+#'
+#' @description Match on CHI number where available in the episode file, and add date of death from the BOXI NRS lookup.
+#' Create new activity after death flag
+#'
+#' @param data episode files
+#' @param year financial year, e.g. '1920'
+#' @param deaths_data The death data for the year
+#'
+#' @return data flagged if activity after death
+add_activity_after_death_flag <- function(
+    data,
+    year,
+    deaths_data = read_file(all_slf_deaths_lookup_path())) {
+  # Match on BOXI NRS deaths lookup for records without chi
+  data <- data %>%
+    dplyr::filter(!is.na(chi) | chi != "") %>%
+    dplyr::left_join(
+    deaths_data,
+    by = "chi",
+    suffix = c("", "_boxi")
+    )
+
+
+  # Check and print error message for records which already have a death_date in the episode file, but this doesn't match the BOXI death date
+  check_death_date_match <- data %>%
+    dplyr::filter(death_date != death_date_boxi)
+
+  if (nrow(check_death_date_match) != 0)  {
+    warning("There were records in the episode file which already have a death_date, but does not match the BOXI NRS death date.")
+  }
+
+
+  # Check and print error message for records which have a record_keydate1 after their BOXI death date
+  check_keydate1_death_date <- data %>%
+    dplyr::filter(record_keydate1 > death_date_boxi)
+
+  if (nrow(check_death_date_match) != 0)  {
+    warning("There were records in the episode file which have a record_keydate1 after the BOXI NRS death date.")
+  }
+
+
+  flag_data <- data %>%
+    dplyr::mutate(
+      flag_keydate1 = if_else(record_keydate1 > death_date_boxi, 1, 0),
+      flag_keydate2 = if_else(record_keydate2 > death_date_boxi, 1, 0),
+
+      # Next flag records with 'ongoing' activity after date of death (available from BOXI) if keydate2 is missing and the death date occurs in
+      # in the current or a previous financial year.
+      flag_keydate2_missing = if_else(((is.na(record_keydate2) | record_keydate2 == "") & (death_date_boxi <= paste0("20", substr(year, 3, 4), "-03-31"))), 1, 0),
+
+      # Also flag records without a death_date in the episode file, but the BOXI death date occurs in the current or a previous financial year.
+      flag_deathdate_missing = if_else(((is.na(death_date) | death_date == "") & (death_date_boxi <= paste0("20", substr(year, 3, 4), "-03-31"))), 1, 0)) %>%
+      # These should be flagged by one of the two lines of code above, but in these cases, we will also fill in the blank death date if appropriate
+
+    # Search all variables beginning with "flag_" for value "1" and create new variable to flag cases where 1 is present
+    # Multiplying by 1 changes flag from true/false to 1/0
+    dplyr::mutate(activity_after_death = purrr::pmap_dbl(select(., contains("flag_")),
+                                                    ~any(grepl("^1$", c(...)),
+                                                         na.rm = TRUE) *1))
+
+
+  # Check and print error message for records which already are TRUE for the deceased variable in the episode file, but this doesn't match the
+  # BOXI deceased variable
+  check_deceased_match <- flag_data %>%
+    dplyr::filter(deceased != deceased_boxi)
+
+  if (nrow(check_deceased_match) != 0)  {
+    warning("There were records in the episode file which have a deceased variable which does not match the BOXI NRS deceased variable")
+  }
+
+
+  # Fill in date of death if missing in the episode file but available in BOXI lookup, due to historic dates of death not being carried
+  # over from previous financial years
+  flag_data <- flag_data %>%
+    dplyr::mutate(death_date = if_else(((is.na(death_date) | death_date == "") & (death_date_boxi <= paste0("20", substr(year, 1, 2), "-03-31"))), death_date_boxi, death_date)) %>%
+    dplyr::mutate(deceased = if_else(((is.na(deceased) | deceased == "") & (deceased_boxi == TRUE)), deceased_boxi, deceased)) %>%
+
+   # Remove temporary flag variables used to create activity after death flag and fill in missing death_date
+    dplyr::select(-c(death_date_boxi, deceased_boxi, flag_keydate1, flag_keydate2, flag_keydate2_missing, flag_deathdate_missing))
+
+
+  return(flag_data)
+
+}
+
+
+#' Create and read SLF Deaths lookup from processed BOXI NRS deaths extracts
+#'
+#' #' @description The BOXI NRS deaths extract lookup should be created after the extract files for all years have been processed,
+# but before an episode file has been produced. Therefore, all BOXI NRS years should be run before running episode files.
+#'
+#' @param file_path Path to the BOXI NRS file for each financial year - may not use this
+#' @param year The year to process, in FY format - may not use this
+#'
+#' @param write_to_disk (optional) Should the data be written to disk default is
+#' `TRUE` i.e. write the data to disk.
+#'
+#' @return the final data as a [tibble][tibble::tibble-package].
+#' @export
+#'
+#'
+#'
+# Read data------------------------------------------------
+process_deaths_lookup <- function(update = latest_update(), ...) {
+  all_boxi_deaths <- read_file(get_slf_deaths_lookup_path("1415")) %>%
+    rbind(read_file(get_slf_deaths_lookup_path("1516"))) %>%
+    rbind(read_file(get_slf_deaths_lookup_path("1617"))) %>%
+    rbind(read_file(get_slf_deaths_lookup_path("1718"))) %>%
+    rbind(read_file(get_slf_deaths_lookup_path("1819"))) %>%
+    rbind(read_file(get_slf_deaths_lookup_path("1920"))) %>%
+    rbind(read_file(get_slf_deaths_lookup_path("2021"))) %>%
+    rbind(read_file(get_slf_deaths_lookup_path("2122"))) %>%
+    rbind(read_file(get_slf_deaths_lookup_path("2223"))) %>%
+    rbind(read_file(get_slf_deaths_lookup_path("2324"))) %>%
+    # Can this be automated to pick up files starting with name "get_slf_deaths_lookup_path"?
+
+    # Remove rows with missing or blank CHI number - could also use na.omit?
+    #na.omit(all_boxi_deaths)
+    dplyr::filter(!is.na(chi) | chi != "")
+
+  # Check all CHI numbers are valid
+  chi_check <- all_boxi_deaths %>%
+    dplyr::pull(.data$chi) %>%
+    phsmethods::chi_check()
+
+  if (!all(chi_check %in% c("Valid CHI", "Missing (Blank)", "Missing (NA)"))) {
+    # There are some Missing (NA) values in the extracts, but I have excluded them above as they cannot be matched to episode file
+    stop("There were bad CHI numbers in the BOXI NRS file")
+  }
+
+  # Check and print error message for chi numbers with more than one death date
+  duplicates <- all_boxi_deaths %>%
+    janitor::get_dupes(.data$chi)
+
+  if (nrow(duplicates) != 0) {
+    # There are some Missing (NA) values in the extracts, but I have excluded them above as they cannot be matched to episode file
+    warning("There were duplicate death dates in the BOXI NRS file.")
+  }
+
+
+  # We decided to include duplicates as unable to determine which is correct date (unless IT can tell us, however, they don't seem to know
+  # the process well enough), and overall impact will be negligible
+  # Get anon_chi and use this to match onto episode file later
+  all_boxi_deaths <- all_boxi_deaths %>%
+    slfhelper::get_anon_chi()
+
+  # Save out duplicates for further investigation if needed (as anon_chi)
+  if (!missing(duplicates)) {
+    write_file(
+      duplicates,
+      fs::path(get_slf_dir(), "Deaths",
+               file_name = stringr::str_glue("slf_deaths_duplicates_{update}.parquet"))
+    )
+  }
+
+  # Maybe save as its own function
+  # Write the all BOXI NRS deaths lookup file to disk, so this can be used to populate activity after death flag in each episode file
+  if (write_to_disk) {
+    all_boxi_deaths %>%
+      write_file(get_all_slf_deaths_lookup_path())
+  }
+
+  return(all_boxi_deaths)
+
+}

From 861bb2b03643ce5feee867d887b9581d7e6c95d6 Mon Sep 17 00:00:00 2001
From: rachev04 <rachel.veeravalli@phs.scot>
Date: Mon, 20 May 2024 16:19:29 +0100
Subject: [PATCH 092/186] Join data back to episode file

---
 R/add_activity_after_death_flag.R | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R
index c82c177e0..64930b3d3 100644
--- a/R/add_activity_after_death_flag.R
+++ b/R/add_activity_after_death_flag.R
@@ -79,8 +79,17 @@ add_activity_after_death_flag <- function(
    # Remove temporary flag variables used to create activity after death flag and fill in missing death_date
     dplyr::select(-c(death_date_boxi, deceased_boxi, flag_keydate1, flag_keydate2, flag_keydate2_missing, flag_deathdate_missing))
 
+  # Match activity after death flag back to episode file
+  final_data <- data %>%
+    dplyr::left_join(
+      flag_data,
+      by = "chi",
+      na_matches = "never",
+      relationship = "many-to-one"
+    )
+
 
-  return(flag_data)
+  return(final_data)
 
 }
 

From 8ea7941cf3259b43c1d4a2e8b11c33a74ee11011 Mon Sep 17 00:00:00 2001
From: rchlv <rchlv@users.noreply.github.com>
Date: Mon, 20 May 2024 15:24:20 +0000
Subject: [PATCH 093/186] Style code

---
 R/add_activity_after_death_flag.R | 36 ++++++++++++++++---------------
 1 file changed, 19 insertions(+), 17 deletions(-)

diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R
index 64930b3d3..55777859d 100644
--- a/R/add_activity_after_death_flag.R
+++ b/R/add_activity_after_death_flag.R
@@ -16,9 +16,9 @@ add_activity_after_death_flag <- function(
   data <- data %>%
     dplyr::filter(!is.na(chi) | chi != "") %>%
     dplyr::left_join(
-    deaths_data,
-    by = "chi",
-    suffix = c("", "_boxi")
+      deaths_data,
+      by = "chi",
+      suffix = c("", "_boxi")
     )
 
 
@@ -26,7 +26,7 @@ add_activity_after_death_flag <- function(
   check_death_date_match <- data %>%
     dplyr::filter(death_date != death_date_boxi)
 
-  if (nrow(check_death_date_match) != 0)  {
+  if (nrow(check_death_date_match) != 0) {
     warning("There were records in the episode file which already have a death_date, but does not match the BOXI NRS death date.")
   }
 
@@ -35,7 +35,7 @@ add_activity_after_death_flag <- function(
   check_keydate1_death_date <- data %>%
     dplyr::filter(record_keydate1 > death_date_boxi)
 
-  if (nrow(check_death_date_match) != 0)  {
+  if (nrow(check_death_date_match) != 0) {
     warning("There were records in the episode file which have a record_keydate1 after the BOXI NRS death date.")
   }
 
@@ -50,14 +50,18 @@ add_activity_after_death_flag <- function(
       flag_keydate2_missing = if_else(((is.na(record_keydate2) | record_keydate2 == "") & (death_date_boxi <= paste0("20", substr(year, 3, 4), "-03-31"))), 1, 0),
 
       # Also flag records without a death_date in the episode file, but the BOXI death date occurs in the current or a previous financial year.
-      flag_deathdate_missing = if_else(((is.na(death_date) | death_date == "") & (death_date_boxi <= paste0("20", substr(year, 3, 4), "-03-31"))), 1, 0)) %>%
-      # These should be flagged by one of the two lines of code above, but in these cases, we will also fill in the blank death date if appropriate
+      flag_deathdate_missing = if_else(((is.na(death_date) | death_date == "") & (death_date_boxi <= paste0("20", substr(year, 3, 4), "-03-31"))), 1, 0)
+    ) %>%
+    # These should be flagged by one of the two lines of code above, but in these cases, we will also fill in the blank death date if appropriate
 
     # Search all variables beginning with "flag_" for value "1" and create new variable to flag cases where 1 is present
     # Multiplying by 1 changes flag from true/false to 1/0
-    dplyr::mutate(activity_after_death = purrr::pmap_dbl(select(., contains("flag_")),
-                                                    ~any(grepl("^1$", c(...)),
-                                                         na.rm = TRUE) *1))
+    dplyr::mutate(activity_after_death = purrr::pmap_dbl(
+      select(., contains("flag_")),
+      ~ any(grepl("^1$", c(...)),
+        na.rm = TRUE
+      ) * 1
+    ))
 
 
   # Check and print error message for records which already are TRUE for the deceased variable in the episode file, but this doesn't match the
@@ -65,7 +69,7 @@ add_activity_after_death_flag <- function(
   check_deceased_match <- flag_data %>%
     dplyr::filter(deceased != deceased_boxi)
 
-  if (nrow(check_deceased_match) != 0)  {
+  if (nrow(check_deceased_match) != 0) {
     warning("There were records in the episode file which have a deceased variable which does not match the BOXI NRS deceased variable")
   }
 
@@ -75,8 +79,7 @@ add_activity_after_death_flag <- function(
   flag_data <- flag_data %>%
     dplyr::mutate(death_date = if_else(((is.na(death_date) | death_date == "") & (death_date_boxi <= paste0("20", substr(year, 1, 2), "-03-31"))), death_date_boxi, death_date)) %>%
     dplyr::mutate(deceased = if_else(((is.na(deceased) | deceased == "") & (deceased_boxi == TRUE)), deceased_boxi, deceased)) %>%
-
-   # Remove temporary flag variables used to create activity after death flag and fill in missing death_date
+    # Remove temporary flag variables used to create activity after death flag and fill in missing death_date
     dplyr::select(-c(death_date_boxi, deceased_boxi, flag_keydate1, flag_keydate2, flag_keydate2_missing, flag_deathdate_missing))
 
   # Match activity after death flag back to episode file
@@ -90,7 +93,6 @@ add_activity_after_death_flag <- function(
 
 
   return(final_data)
-
 }
 
 
@@ -125,7 +127,7 @@ process_deaths_lookup <- function(update = latest_update(), ...) {
     # Can this be automated to pick up files starting with name "get_slf_deaths_lookup_path"?
 
     # Remove rows with missing or blank CHI number - could also use na.omit?
-    #na.omit(all_boxi_deaths)
+    # na.omit(all_boxi_deaths)
     dplyr::filter(!is.na(chi) | chi != "")
 
   # Check all CHI numbers are valid
@@ -159,7 +161,8 @@ process_deaths_lookup <- function(update = latest_update(), ...) {
     write_file(
       duplicates,
       fs::path(get_slf_dir(), "Deaths",
-               file_name = stringr::str_glue("slf_deaths_duplicates_{update}.parquet"))
+        file_name = stringr::str_glue("slf_deaths_duplicates_{update}.parquet")
+      )
     )
   }
 
@@ -171,5 +174,4 @@ process_deaths_lookup <- function(update = latest_update(), ...) {
   }
 
   return(all_boxi_deaths)
-
 }

From b8589ad83aa81f5cf7e178a328cff357240094c4 Mon Sep 17 00:00:00 2001
From: rchlv <rchlv@users.noreply.github.com>
Date: Mon, 20 May 2024 15:24:37 +0000
Subject: [PATCH 094/186] Update documentation

---
 NAMESPACE                            |  1 +
 man/add_activity_after_death_flag.Rd | 26 ++++++++++++++++++++++++++
 man/process_deaths_lookup.Rd         | 22 ++++++++++++++++++++++
 3 files changed, 49 insertions(+)
 create mode 100644 man/add_activity_after_death_flag.Rd
 create mode 100644 man/process_deaths_lookup.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 1c13740ef..691a056db 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -92,6 +92,7 @@ export(process_costs_ch_rmd)
 export(process_costs_dn_rmd)
 export(process_costs_gp_ooh_rmd)
 export(process_costs_hc_rmd)
+export(process_deaths_lookup)
 export(process_extract_acute)
 export(process_extract_ae)
 export(process_extract_alarms_telecare)
diff --git a/man/add_activity_after_death_flag.Rd b/man/add_activity_after_death_flag.Rd
new file mode 100644
index 000000000..f13a47ddf
--- /dev/null
+++ b/man/add_activity_after_death_flag.Rd
@@ -0,0 +1,26 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/add_activity_after_death_flag.R
+\name{add_activity_after_death_flag}
+\alias{add_activity_after_death_flag}
+\title{Match on BOXI NRS death dates to process activity after death flag}
+\usage{
+add_activity_after_death_flag(
+  data,
+  year,
+  deaths_data = read_file(all_slf_deaths_lookup_path())
+)
+}
+\arguments{
+\item{data}{episode files}
+
+\item{year}{financial year, e.g. '1920'}
+
+\item{deaths_data}{The death data for the year}
+}
+\value{
+data flagged if activity after death
+}
+\description{
+Match on CHI number where available in the episode file, and add date of death from the BOXI NRS lookup.
+Create new activity after death flag
+}
diff --git a/man/process_deaths_lookup.Rd b/man/process_deaths_lookup.Rd
new file mode 100644
index 000000000..3780e48a7
--- /dev/null
+++ b/man/process_deaths_lookup.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/add_activity_after_death_flag.R
+\name{process_deaths_lookup}
+\alias{process_deaths_lookup}
+\title{Create and read SLF Deaths lookup from processed BOXI NRS deaths extracts}
+\usage{
+process_deaths_lookup(update = latest_update(), ...)
+}
+\arguments{
+\item{file_path}{Path to the BOXI NRS file for each financial year - may not use this}
+
+\item{year}{The year to process, in FY format - may not use this}
+
+\item{write_to_disk}{(optional) Should the data be written to disk default is
+\code{TRUE} i.e. write the data to disk.}
+}
+\value{
+the final data as a \link[tibble:tibble-package]{tibble}.
+}
+\description{
+#' @description The BOXI NRS deaths extract lookup should be created after the extract files for all years have been processed,
+}

From a093f600d92c6b55f9d42ef55a519f8754470175 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 21 May 2024 14:20:36 +0100
Subject: [PATCH 095/186] fix a bug for quality 21

---
 R/fill_ch_names.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/fill_ch_names.R b/R/fill_ch_names.R
index 209fb4ee7..1a2966b48 100644
--- a/R/fill_ch_names.R
+++ b/R/fill_ch_names.R
@@ -600,7 +600,8 @@ fill_ch_names <- function(ch_data,
   ### quality 21L----
   # perfect match care home name, regardless of postcode,
   # excluding those duplicated care home names.
-  unique_ch_name <- unique(ch_name_lookup$ch_name_validated)
+  duplicated_ch_name <-
+    ch_name_lookup$ch_name_validated[duplicated(ch_name_lookup$ch_name_validated)]
 
   ch_name_match_quality21 <- ch_data %>%
     dplyr::anti_join(
@@ -624,7 +625,7 @@ fill_ch_names <- function(ch_data,
       ),
       na_matches = "never"
     ) %>%
-    dplyr::filter(.data[["ch_name"]] %in% unique_ch_name) %>%
+    dplyr::filter(!(.data[["ch_name"]] %in% duplicated_ch_name)) %>%
     dplyr::mutate(
       ch_name_old = .data[["ch_name"]],
       ch_postcode_old = .data[["ch_postcode"]],

From 5f565fc341ba2131221c95878a4a30ffe2aafa40 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Wed, 22 May 2024 09:54:11 +0100
Subject: [PATCH 096/186] Update `00_sort_bi_extracts` to write anon_chi (#952)

* Update `00_sort_BI_extracts`
Save a new file with `anon-` prefix and use slfhelper to get the anon_chi

* remove file copy

* Update `00_sort_bi_extracts` note

* Style code

* Update chi when this is different e.g UPI number or PAT_UPI

* remove storing as a dataframe

* Add condition if CHI exists in data file

* update 00_Sort_BI_Extracts
replace for loop by function to enable parallel computing with lapply

* Style code

* merge similar code

* simplify sort_bi_extracts

---------

Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: Zihao Li <lizihao_anu@outlook.com>
Co-authored-by: lizihao-anu <lizihao-anu@users.noreply.github.com>
---
 00_Sort_BI_Extracts.R | 42 ++++++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/00_Sort_BI_Extracts.R b/00_Sort_BI_Extracts.R
index 7cf7d0708..0638123b9 100644
--- a/00_Sort_BI_Extracts.R
+++ b/00_Sort_BI_Extracts.R
@@ -1,5 +1,5 @@
 # Define the source directory and financial year pattern
-compress_files <- FALSE
+compress_files <- TRUE
 source_dir <- "/conf/sourcedev/Source_Linkage_File_Updates/Extracts Temp"
 pattern <- "-20(\\d{4})\\.csv"
 
@@ -20,31 +20,49 @@ extract_financial_year <- function(filename) {
   }
 }
 
-# Create directories for each financial year and move files
-for (csv_file in csv_files) {
+# Create a function to read variable names
+is_chi_in_file <- function(filename) {
+  data <- read.csv(filename, nrow = 1)
+  return(grepl("UPI", names(data)) %>% any())
+}
+
+# function to move files
+move_temps_to_year_extract <- function(csv_file, compress_files = TRUE) {
   financial_year <- extract_financial_year(csv_file)
   # check if year directory exists
   if (!is.null(financial_year)) {
     financial_year_dir <- file.path("/conf/sourcedev/Source_Linkage_File_Updates", financial_year, "Extracts")
-    # if not, create the year directory
+    # if financial_year_dir does not exist, create the year directory
     if (!dir.exists(financial_year_dir)) {
       dir.create(financial_year_dir)
     }
 
+    new_file_path <- file.path(financial_year_dir, paste0("anon-", basename(csv_file)))
+
+    # set up new file path location to move each file to their destination.
+    chi_in_file <- is_chi_in_file(csv_file)
+    if (chi_in_file) {
+      read_file(csv_file) %>%
+        dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
+        slfhelper::get_anon_chi() %>%
+        readr::write_csv(file = new_file_path)
+      cat("Replaced chi with anon chi:", csv_file, "to", new_file_path, "\n")
+    } else {
+      fs::file_copy(csv_file, new_file_path, overwrite = TRUE)
+      cat("Moved", csv_file, "to", new_file_path, "\n")
+    }
+
     # compress file
     if (compress_files) {
-      cat("Compressing:", basename(csv_file), "\n")
+      cat("Compressing:", basename(new_file_path), "\n")
       system2(
         command = "gzip",
-        args = shQuote(csv_file)
+        args = shQuote(new_file_path)
       )
-      csv_file <- paste0(csv_file, ".gz")
     }
-
-    # move file
-    new_file_path <- file.path(financial_year_dir, basename(csv_file))
-    fs::file_copy(csv_file, new_file_path, overwrite = TRUE)
+    # remove old files
     file.remove(csv_file)
-    cat("Moved:", csv_file, "to", new_file_path, "\n")
   }
 }
+
+lapply(csv_files, move_temps_to_year_extract, compress_files = compress_files)

From a630f0d9b4fba98d03e222dacc7c424696a5488a Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 24 May 2024 14:02:00 +0100
Subject: [PATCH 097/186] Update refs

---
 R/00-update_refs.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/00-update_refs.R b/R/00-update_refs.R
index 2052b938f..6106f17cf 100644
--- a/R/00-update_refs.R
+++ b/R/00-update_refs.R
@@ -7,7 +7,7 @@
 #'
 #' @family initialisation
 latest_update <- function() {
-  "Mar_2024"
+  "Jun_2024"
 }
 
 #' Previous update
@@ -61,7 +61,7 @@ previous_update <- function(months_ago = 3L, override = NULL) {
 #'
 #' @family initialisation
 get_dd_period <- function() {
-  "Jul16_Dec23"
+  "Jul16_Mar24"
 }
 
 #' The latest financial year for Cost uplift setting

From e3a646f2f8a47db94c44462a48e6d9bbbf094642 Mon Sep 17 00:00:00 2001
From: marjom02 <megan.mcnicol2@nhs.scot>
Date: Mon, 27 May 2024 11:45:06 +0100
Subject: [PATCH 098/186] changes to activity after death flag

---
 R/add_activity_after_death_flag.R | 53 +++++++++++++++++--------------
 R/get_slf_lookup_paths.R          | 13 ++++----
 2 files changed, 37 insertions(+), 29 deletions(-)

diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R
index 55777859d..6be5c5f73 100644
--- a/R/add_activity_after_death_flag.R
+++ b/R/add_activity_after_death_flag.R
@@ -11,19 +11,22 @@
 add_activity_after_death_flag <- function(
     data,
     year,
-    deaths_data = read_file(all_slf_deaths_lookup_path())) {
-  # Match on BOXI NRS deaths lookup for records without chi
-  data <- data %>%
+    deaths_data = read_file(get_all_slf_deaths_lookup_path())) {
+  death_joined <- data %>%
+    dplyr::select(year, chi, record_keydate1, record_keydate2, death_date, deceased) %>%
     dplyr::filter(!is.na(chi) | chi != "") %>%
     dplyr::left_join(
-      deaths_data,
+      deaths_data %>%
+        slfhelper::get_chi(),
       by = "chi",
       suffix = c("", "_boxi")
-    )
+    ) %>%
+    dplyr::filter(deceased == TRUE | deceased_boxi == TRUE) %>%
+    dplyr::distinct()
 
 
   # Check and print error message for records which already have a death_date in the episode file, but this doesn't match the BOXI death date
-  check_death_date_match <- data %>%
+  check_death_date_match <- death_joined %>%
     dplyr::filter(death_date != death_date_boxi)
 
   if (nrow(check_death_date_match) != 0) {
@@ -32,7 +35,7 @@ add_activity_after_death_flag <- function(
 
 
   # Check and print error message for records which have a record_keydate1 after their BOXI death date
-  check_keydate1_death_date <- data %>%
+  check_keydate1_death_date <- death_joined %>%
     dplyr::filter(record_keydate1 > death_date_boxi)
 
   if (nrow(check_death_date_match) != 0) {
@@ -40,24 +43,24 @@ add_activity_after_death_flag <- function(
   }
 
 
-  flag_data <- data %>%
+  flag_data <- death_joined %>%
     dplyr::mutate(
-      flag_keydate1 = if_else(record_keydate1 > death_date_boxi, 1, 0),
-      flag_keydate2 = if_else(record_keydate2 > death_date_boxi, 1, 0),
+      flag_keydate1 = dplyr::if_else(record_keydate1 > death_date_boxi, 1, 0),
+      flag_keydate2 = dplyr::if_else(record_keydate2 > death_date_boxi, 1, 0),
 
       # Next flag records with 'ongoing' activity after date of death (available from BOXI) if keydate2 is missing and the death date occurs in
       # in the current or a previous financial year.
-      flag_keydate2_missing = if_else(((is.na(record_keydate2) | record_keydate2 == "") & (death_date_boxi <= paste0("20", substr(year, 3, 4), "-03-31"))), 1, 0),
+      flag_keydate2_missing = dplyr::if_else(((is.na(record_keydate2) | record_keydate2 == "") & (death_date_boxi <= paste0("20", substr(year, 3, 4), "-03-31"))), 1, 0),
 
       # Also flag records without a death_date in the episode file, but the BOXI death date occurs in the current or a previous financial year.
-      flag_deathdate_missing = if_else(((is.na(death_date) | death_date == "") & (death_date_boxi <= paste0("20", substr(year, 3, 4), "-03-31"))), 1, 0)
+      flag_deathdate_missing = dplyr::if_else(((is.na(death_date) | death_date == "") & (death_date_boxi <= paste0("20", substr(year, 3, 4), "-03-31"))), 1, 0)
     ) %>%
     # These should be flagged by one of the two lines of code above, but in these cases, we will also fill in the blank death date if appropriate
 
     # Search all variables beginning with "flag_" for value "1" and create new variable to flag cases where 1 is present
     # Multiplying by 1 changes flag from true/false to 1/0
     dplyr::mutate(activity_after_death = purrr::pmap_dbl(
-      select(., contains("flag_")),
+      dplyr::select(., contains("flag_")),
       ~ any(grepl("^1$", c(...)),
         na.rm = TRUE
       ) * 1
@@ -77,18 +80,17 @@ add_activity_after_death_flag <- function(
   # Fill in date of death if missing in the episode file but available in BOXI lookup, due to historic dates of death not being carried
   # over from previous financial years
   flag_data <- flag_data %>%
-    dplyr::mutate(death_date = if_else(((is.na(death_date) | death_date == "") & (death_date_boxi <= paste0("20", substr(year, 1, 2), "-03-31"))), death_date_boxi, death_date)) %>%
-    dplyr::mutate(deceased = if_else(((is.na(deceased) | deceased == "") & (deceased_boxi == TRUE)), deceased_boxi, deceased)) %>%
+    dplyr::filter(activity_after_death == 1) %>%
     # Remove temporary flag variables used to create activity after death flag and fill in missing death_date
-    dplyr::select(-c(death_date_boxi, deceased_boxi, flag_keydate1, flag_keydate2, flag_keydate2_missing, flag_deathdate_missing))
+    dplyr::select(year, chi, record_keydate1, record_keydate2, activity_after_death) %>%
+    dplyr::distinct()
 
   # Match activity after death flag back to episode file
   final_data <- data %>%
     dplyr::left_join(
       flag_data,
-      by = "chi",
-      na_matches = "never",
-      relationship = "many-to-one"
+      by = c("year", "chi", "record_keydate1", "record_keydate2"),
+      na_matches = "never"
     )
 
 
@@ -113,7 +115,8 @@ add_activity_after_death_flag <- function(
 #'
 #'
 # Read data------------------------------------------------
-process_deaths_lookup <- function(update = latest_update(), ...) {
+process_deaths_lookup <- function(update = latest_update(),
+                                  write_to_disk = TRUE, ...) {
   all_boxi_deaths <- read_file(get_slf_deaths_lookup_path("1415")) %>%
     rbind(read_file(get_slf_deaths_lookup_path("1516"))) %>%
     rbind(read_file(get_slf_deaths_lookup_path("1617"))) %>%
@@ -125,7 +128,7 @@ process_deaths_lookup <- function(update = latest_update(), ...) {
     rbind(read_file(get_slf_deaths_lookup_path("2223"))) %>%
     rbind(read_file(get_slf_deaths_lookup_path("2324"))) %>%
     # Can this be automated to pick up files starting with name "get_slf_deaths_lookup_path"?
-
+    slfhelper::get_chi() %>%
     # Remove rows with missing or blank CHI number - could also use na.omit?
     # na.omit(all_boxi_deaths)
     dplyr::filter(!is.na(chi) | chi != "")
@@ -169,8 +172,12 @@ process_deaths_lookup <- function(update = latest_update(), ...) {
   # Maybe save as its own function
   # Write the all BOXI NRS deaths lookup file to disk, so this can be used to populate activity after death flag in each episode file
   if (write_to_disk) {
-    all_boxi_deaths %>%
-      write_file(get_all_slf_deaths_lookup_path())
+    write_file(
+      all_boxi_deaths,
+      fs::path(get_slf_dir(), "Deaths",
+        file_name = stringr::str_glue("anon-all_slf_deaths_lookup_{update}.parquet")
+      )
+    )
   }
 
   return(all_boxi_deaths)
diff --git a/R/get_slf_lookup_paths.R b/R/get_slf_lookup_paths.R
index d7e68c494..3a1b932be 100644
--- a/R/get_slf_lookup_paths.R
+++ b/R/get_slf_lookup_paths.R
@@ -63,7 +63,7 @@ get_slf_deaths_lookup_path <- function(year, ...) {
   # Review the naming convention of this path and file
   slf_deaths_lookup_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Deaths"),
-    file_name = stringr::str_glue("slf_deaths_lookup_{year}.parquet"),
+    file_name = stringr::str_glue("anon-slf_deaths_lookup_{year}.parquet"),
     ...
   )
 
@@ -82,19 +82,20 @@ get_slf_deaths_lookup_path <- function(year, ...) {
 #' @family slf lookup file path
 #' @seealso [get_file_path()] for the generic function.
 
-get_all_slf_deaths_lookup_path <- function(update = latest_update()) {
+get_all_slf_deaths_lookup_path <- function(update = latest_update(), ...) {
   # Note this name is very similar to the existing slf_deaths_lookup_path which returnsthe path for
   # the processed BOXI extract for each financial year. This function will return the combined financial
   # years lookup i.e. all years put together.
   all_slf_deaths_lookup_path <- get_file_path(
-    directory = fs::path(get_slf_dir(), "Deaths",
-      file_name = stringr::str_glue("all_slf_deaths_lookup_{update}.parquet")
-    )
+    directory = fs::path(get_slf_dir(), "Deaths"),
+    file_name = stringr::str_glue("anon-all_slf_deaths_lookup_{update}.parquet"),
+    ...
   )
-
   return(all_slf_deaths_lookup_path)
 }
 
+
+
 #' SLF CHI Deaths File Path
 #'
 #' @description Get the full path to the CHI deaths file

From 5e5c71a290e42cdc9a038b52c9ed2e1f3ff14781 Mon Sep 17 00:00:00 2001
From: SwiftySalmon <SwiftySalmon@users.noreply.github.com>
Date: Mon, 27 May 2024 10:46:49 +0000
Subject: [PATCH 099/186] Update documentation

---
 man/add_activity_after_death_flag.Rd  | 2 +-
 man/get_all_slf_deaths_lookup_path.Rd | 2 +-
 man/process_deaths_lookup.Rd          | 8 ++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/man/add_activity_after_death_flag.Rd b/man/add_activity_after_death_flag.Rd
index f13a47ddf..67d3cb1ad 100644
--- a/man/add_activity_after_death_flag.Rd
+++ b/man/add_activity_after_death_flag.Rd
@@ -7,7 +7,7 @@
 add_activity_after_death_flag(
   data,
   year,
-  deaths_data = read_file(all_slf_deaths_lookup_path())
+  deaths_data = read_file(get_all_slf_deaths_lookup_path())
 )
 }
 \arguments{
diff --git a/man/get_all_slf_deaths_lookup_path.Rd b/man/get_all_slf_deaths_lookup_path.Rd
index 93c8a2840..dd5291c2d 100644
--- a/man/get_all_slf_deaths_lookup_path.Rd
+++ b/man/get_all_slf_deaths_lookup_path.Rd
@@ -4,7 +4,7 @@
 \alias{get_all_slf_deaths_lookup_path}
 \title{SLF death dates File Path}
 \usage{
-get_all_slf_deaths_lookup_path(update = latest_update())
+get_all_slf_deaths_lookup_path(update = latest_update(), ...)
 }
 \arguments{
 \item{...}{additional arguments passed to \code{\link[=get_file_path]{get_file_path()}}}
diff --git a/man/process_deaths_lookup.Rd b/man/process_deaths_lookup.Rd
index 3780e48a7..8e9ec1199 100644
--- a/man/process_deaths_lookup.Rd
+++ b/man/process_deaths_lookup.Rd
@@ -4,15 +4,15 @@
 \alias{process_deaths_lookup}
 \title{Create and read SLF Deaths lookup from processed BOXI NRS deaths extracts}
 \usage{
-process_deaths_lookup(update = latest_update(), ...)
+process_deaths_lookup(update = latest_update(), write_to_disk = TRUE, ...)
 }
 \arguments{
+\item{write_to_disk}{(optional) Should the data be written to disk default is
+\code{TRUE} i.e. write the data to disk.}
+
 \item{file_path}{Path to the BOXI NRS file for each financial year - may not use this}
 
 \item{year}{The year to process, in FY format - may not use this}
-
-\item{write_to_disk}{(optional) Should the data be written to disk default is
-\code{TRUE} i.e. write the data to disk.}
 }
 \value{
 the final data as a \link[tibble:tibble-package]{tibble}.

From 5ed7cf35269703b466d2af720aec4f9a85b862f9 Mon Sep 17 00:00:00 2001
From: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com>
Date: Mon, 27 May 2024 15:07:08 +0100
Subject: [PATCH 100/186] Update R/add_activity_after_death_flag.R

Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 R/add_activity_after_death_flag.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R
index 6be5c5f73..e7910b8f1 100644
--- a/R/add_activity_after_death_flag.R
+++ b/R/add_activity_after_death_flag.R
@@ -60,7 +60,7 @@ add_activity_after_death_flag <- function(
     # Search all variables beginning with "flag_" for value "1" and create new variable to flag cases where 1 is present
     # Multiplying by 1 changes flag from true/false to 1/0
     dplyr::mutate(activity_after_death = purrr::pmap_dbl(
-      dplyr::select(., contains("flag_")),
+      dplyr::select(., tidyselect::contains("flag_")),
       ~ any(grepl("^1$", c(...)),
         na.rm = TRUE
       ) * 1

From 8faac6e22ecd291b4af505eaa5308fdfe55d2efd Mon Sep 17 00:00:00 2001
From: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com>
Date: Mon, 27 May 2024 15:07:30 +0100
Subject: [PATCH 101/186] Update R/add_activity_after_death_flag.R

Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 R/add_activity_after_death_flag.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R
index e7910b8f1..43ac1bd73 100644
--- a/R/add_activity_after_death_flag.R
+++ b/R/add_activity_after_death_flag.R
@@ -13,7 +13,7 @@ add_activity_after_death_flag <- function(
     year,
     deaths_data = read_file(get_all_slf_deaths_lookup_path())) {
   death_joined <- data %>%
-    dplyr::select(year, chi, record_keydate1, record_keydate2, death_date, deceased) %>%
+    dplyr::select(.data$year, .data$chi, .data$record_keydate1, .data$record_keydate2, .data$death_date, .data$deceased) %>%
     dplyr::filter(!is.na(chi) | chi != "") %>%
     dplyr::left_join(
       deaths_data %>%

From 1406ff02a80b1145484ba4634ca4dbf209251e50 Mon Sep 17 00:00:00 2001
From: marjom02 <megan.mcnicol2@nhs.scot>
Date: Mon, 27 May 2024 15:30:04 +0100
Subject: [PATCH 102/186] added .data$ to variables

---
 R/add_activity_after_death_flag.R | 33 +++++++++++++++++--------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R
index 43ac1bd73..2acad3bbe 100644
--- a/R/add_activity_after_death_flag.R
+++ b/R/add_activity_after_death_flag.R
@@ -11,23 +11,26 @@
 add_activity_after_death_flag <- function(
     data,
     year,
-    deaths_data = read_file(get_all_slf_deaths_lookup_path())) {
+    deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>%
+      slfhelper::get_chi()
+
+    ) {
+
   death_joined <- data %>%
     dplyr::select(.data$year, .data$chi, .data$record_keydate1, .data$record_keydate2, .data$death_date, .data$deceased) %>%
-    dplyr::filter(!is.na(chi) | chi != "") %>%
+    dplyr::filter(!is.na(.data$chi) | .data$chi != "") %>%
     dplyr::left_join(
-      deaths_data %>%
-        slfhelper::get_chi(),
+      deaths_data,
       by = "chi",
       suffix = c("", "_boxi")
     ) %>%
-    dplyr::filter(deceased == TRUE | deceased_boxi == TRUE) %>%
+    dplyr::filter(.data$deceased == TRUE | .data$deceased_boxi == TRUE) %>%
     dplyr::distinct()
 
 
   # Check and print error message for records which already have a death_date in the episode file, but this doesn't match the BOXI death date
   check_death_date_match <- death_joined %>%
-    dplyr::filter(death_date != death_date_boxi)
+    dplyr::filter(.data$death_date != .data$death_date_boxi)
 
   if (nrow(check_death_date_match) != 0) {
     warning("There were records in the episode file which already have a death_date, but does not match the BOXI NRS death date.")
@@ -36,7 +39,7 @@ add_activity_after_death_flag <- function(
 
   # Check and print error message for records which have a record_keydate1 after their BOXI death date
   check_keydate1_death_date <- death_joined %>%
-    dplyr::filter(record_keydate1 > death_date_boxi)
+    dplyr::filter(.data$record_keydate1 > .data$death_date_boxi)
 
   if (nrow(check_death_date_match) != 0) {
     warning("There were records in the episode file which have a record_keydate1 after the BOXI NRS death date.")
@@ -45,15 +48,15 @@ add_activity_after_death_flag <- function(
 
   flag_data <- death_joined %>%
     dplyr::mutate(
-      flag_keydate1 = dplyr::if_else(record_keydate1 > death_date_boxi, 1, 0),
-      flag_keydate2 = dplyr::if_else(record_keydate2 > death_date_boxi, 1, 0),
+      flag_keydate1 = dplyr::if_else(.data$record_keydate1 > .data$death_date_boxi, 1, 0),
+      flag_keydate2 = dplyr::if_else(.data$record_keydate2 > .data$death_date_boxi, 1, 0),
 
       # Next flag records with 'ongoing' activity after date of death (available from BOXI) if keydate2 is missing and the death date occurs in
       # in the current or a previous financial year.
-      flag_keydate2_missing = dplyr::if_else(((is.na(record_keydate2) | record_keydate2 == "") & (death_date_boxi <= paste0("20", substr(year, 3, 4), "-03-31"))), 1, 0),
+      flag_keydate2_missing = dplyr::if_else(((is.na(.data$record_keydate2) | .data$record_keydate2 == "") & (.data$death_date_boxi <= paste0("20", substr(.data$year, 3, 4), "-03-31"))), 1, 0),
 
       # Also flag records without a death_date in the episode file, but the BOXI death date occurs in the current or a previous financial year.
-      flag_deathdate_missing = dplyr::if_else(((is.na(death_date) | death_date == "") & (death_date_boxi <= paste0("20", substr(year, 3, 4), "-03-31"))), 1, 0)
+      flag_deathdate_missing = dplyr::if_else(((is.na(.data$death_date) | .data$death_date == "") & (.data$death_date_boxi <= paste0("20", substr(.data$year, 3, 4), "-03-31"))), 1, 0)
     ) %>%
     # These should be flagged by one of the two lines of code above, but in these cases, we will also fill in the blank death date if appropriate
 
@@ -70,7 +73,7 @@ add_activity_after_death_flag <- function(
   # Check and print error message for records which already are TRUE for the deceased variable in the episode file, but this doesn't match the
   # BOXI deceased variable
   check_deceased_match <- flag_data %>%
-    dplyr::filter(deceased != deceased_boxi)
+    dplyr::filter(.data$deceased != .data$deceased_boxi)
 
   if (nrow(check_deceased_match) != 0) {
     warning("There were records in the episode file which have a deceased variable which does not match the BOXI NRS deceased variable")
@@ -80,9 +83,9 @@ add_activity_after_death_flag <- function(
   # Fill in date of death if missing in the episode file but available in BOXI lookup, due to historic dates of death not being carried
   # over from previous financial years
   flag_data <- flag_data %>%
-    dplyr::filter(activity_after_death == 1) %>%
+    dplyr::filter(.data$activity_after_death == 1) %>%
     # Remove temporary flag variables used to create activity after death flag and fill in missing death_date
-    dplyr::select(year, chi, record_keydate1, record_keydate2, activity_after_death) %>%
+    dplyr::select(.data$year, .data$chi, .data$record_keydate1, .data$record_keydate2, .data$activity_after_death) %>%
     dplyr::distinct()
 
   # Match activity after death flag back to episode file
@@ -131,7 +134,7 @@ process_deaths_lookup <- function(update = latest_update(),
     slfhelper::get_chi() %>%
     # Remove rows with missing or blank CHI number - could also use na.omit?
     # na.omit(all_boxi_deaths)
-    dplyr::filter(!is.na(chi) | chi != "")
+    dplyr::filter(!is.na(.data$chi) | chi != "")
 
   # Check all CHI numbers are valid
   chi_check <- all_boxi_deaths %>%

From c535165231f5cea2757a0988d8e84ff9cbcda090 Mon Sep 17 00:00:00 2001
From: SwiftySalmon <SwiftySalmon@users.noreply.github.com>
Date: Mon, 27 May 2024 14:33:29 +0000
Subject: [PATCH 103/186] Update documentation

---
 man/add_activity_after_death_flag.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/add_activity_after_death_flag.Rd b/man/add_activity_after_death_flag.Rd
index 67d3cb1ad..36eafe7bd 100644
--- a/man/add_activity_after_death_flag.Rd
+++ b/man/add_activity_after_death_flag.Rd
@@ -7,7 +7,7 @@
 add_activity_after_death_flag(
   data,
   year,
-  deaths_data = read_file(get_all_slf_deaths_lookup_path())
+  deaths_data = read_file(get_all_slf_deaths_lookup_path()) \%>\% slfhelper::get_chi()
 )
 }
 \arguments{

From edb4782badd22438f56cb0389ae72140b20fa883 Mon Sep 17 00:00:00 2001
From: SwiftySalmon <SwiftySalmon@users.noreply.github.com>
Date: Mon, 27 May 2024 14:33:44 +0000
Subject: [PATCH 104/186] Style code

---
 R/add_activity_after_death_flag.R | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R
index 2acad3bbe..f68c7c9f4 100644
--- a/R/add_activity_after_death_flag.R
+++ b/R/add_activity_after_death_flag.R
@@ -12,10 +12,7 @@ add_activity_after_death_flag <- function(
     data,
     year,
     deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>%
-      slfhelper::get_chi()
-
-    ) {
-
+      slfhelper::get_chi()) {
   death_joined <- data %>%
     dplyr::select(.data$year, .data$chi, .data$record_keydate1, .data$record_keydate2, .data$death_date, .data$deceased) %>%
     dplyr::filter(!is.na(.data$chi) | .data$chi != "") %>%

From 56df5d7dfa1e659b5a1c368ef745d79fd6667f38 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Mon, 27 May 2024 15:34:28 +0100
Subject: [PATCH 105/186] comment out cross_year_tests for now

---
 _targets.R | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/_targets.R b/_targets.R
index 74d022698..1422c8a9b 100644
--- a/_targets.R
+++ b/_targets.R
@@ -591,11 +591,11 @@ list(
         data = episode_file,
         year = year
       )
-    ),
-    tar_target(
-      cross_year_tests,
-      process_tests_cross_year(year = year)
-    ), # ,
+    ) # ,
+    # tar_target(
+    #   cross_year_tests,
+    #   process_tests_cross_year(year = year)
+    # ), # ,
     # tar_target(
     #   individual_file,
     #   create_individual_file(

From dfdee6f2eb6987e677650fb816fabed097bf7ebd Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 14 May 2024 13:54:40 +0100
Subject: [PATCH 106/186] Update anon_chi for dn and cmh

---
 R/process_extract_cmh.R              | 3 ++-
 R/process_extract_district_nursing.R | 3 ++-
 R/read_extract_cmh.R                 | 7 ++++---
 R/read_extract_district_nursing.R    | 7 ++++---
 4 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/R/process_extract_cmh.R b/R/process_extract_cmh.R
index 418b95b00..a29c9f41c 100644
--- a/R/process_extract_cmh.R
+++ b/R/process_extract_cmh.R
@@ -68,7 +68,8 @@ process_extract_cmh <- function(data,
       "diag4",
       "diag5",
       "diag6"
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     write_file(
diff --git a/R/process_extract_district_nursing.R b/R/process_extract_district_nursing.R
index 6254926f0..a621f6a74 100644
--- a/R/process_extract_district_nursing.R
+++ b/R/process_extract_district_nursing.R
@@ -131,7 +131,8 @@ process_extract_district_nursing <- function(
         ~ sum(.x)
       )
     ) %>%
-    dplyr::ungroup()
+    dplyr::ungroup() %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     dn_episodes %>%
diff --git a/R/read_extract_cmh.R b/R/read_extract_cmh.R
index 0beb4ea4a..75cc4c99e 100644
--- a/R/read_extract_cmh.R
+++ b/R/read_extract_cmh.R
@@ -14,7 +14,7 @@ read_extract_cmh <- function(
   # Read BOXI extract
   extract_cmh <- read_file(file_path,
     col_types = readr::cols_only(
-      "UPI Number [C]" = readr::col_character(),
+      "anon_chi" = readr::col_character(),
       "Patient DoB Date [C]" = readr::col_date(format = "%Y/%m/%d %T"),
       "Gender" = readr::col_double(),
       "Patient Postcode [C]" = readr::col_character(),
@@ -35,7 +35,7 @@ read_extract_cmh <- function(
   ) %>%
     # rename
     dplyr::rename(
-      chi = "UPI Number [C]",
+      anon_chi = "anon_chi",
       dob = "Patient DoB Date [C]",
       gender = "Gender",
       postcode = "Patient Postcode [C]",
@@ -52,7 +52,8 @@ read_extract_cmh <- function(
       diag3 = "Other Aim of Contact (2)",
       diag4 = "Other Aim of Contact (3)",
       diag5 = "Other Aim of Contact (4)"
-    )
+    ) %>%
+    slfhelper::get_chi()
 
   return(extract_cmh)
 }
diff --git a/R/read_extract_district_nursing.R b/R/read_extract_district_nursing.R
index 59b1142e5..cd9d08929 100644
--- a/R/read_extract_district_nursing.R
+++ b/R/read_extract_district_nursing.R
@@ -19,7 +19,7 @@ read_extract_district_nursing <- function(
       `Primary Intervention Category` = readr::col_character(),
       `Other Intervention Category (1)` = readr::col_character(),
       `Other Intervention Category (2)` = readr::col_character(),
-      `UPI Number [C]` = readr::col_character(),
+      `anon_chi` = readr::col_character(),
       `Patient DoB Date [C]` = readr::col_date(format = "%Y/%m/%d %T"),
       `Patient Postcode [C] (Contact)` = readr::col_character(),
       `Duration of Contact (measure)` = readr::col_double(),
@@ -46,14 +46,15 @@ read_extract_district_nursing <- function(
       datazone2011 = "Patient Data Zone 2011 (Contact)",
       hbpraccode = "Practice NHS Board Code 9 (Contact)",
       hbtreatcode = "Treatment NHS Board Code 9",
-      chi = "UPI Number [C]",
+      anon_chi = "anon_chi",
       record_keydate1 = "Contact Date",
       primary_intervention = "Primary Intervention Category",
       intervention_1 = "Other Intervention Category (1)",
       intervention_2 = "Other Intervention Category (2)",
       duration_contact = "Duration of Contact (measure)",
       location_contact = "Location of Contact"
-    )
+    ) %>%
+    slfhelper::get_chi()
 
   return(extract_district_nursing)
 }

From 225bc54dee3865d0419ec29c9ab1b7f21bf7aa22 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 17 May 2024 11:16:03 +0100
Subject: [PATCH 107/186] Update boxi filepath ("anon-")

---
 R/get_boxi_extract_path.R | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R
index 3c2b4acdc..2752b634a 100644
--- a/R/get_boxi_extract_path.R
+++ b/R/get_boxi_extract_path.R
@@ -41,19 +41,19 @@ get_boxi_extract_path <- function(
 
   file_name <- dplyr::case_match(
     type,
-    "ae" ~ "A&E-episode-level-extract",
-    "ae_cup" ~ "A&E-UCD-CUP-extract",
-    "acute" ~ "Acute-episode-level-extract",
-    "cmh" ~ "Community-MH-contact-level-extract",
-    "dn" ~ "District-Nursing-contact-level-extract",
-    "gp_ooh-c" ~ "GP-OoH-consultations-extract",
-    "gp_ooh-d" ~ "GP-OoH-diagnosis-extract",
-    "gp_ooh-o" ~ "GP-OoH-outcomes-extract",
-    "homelessness" ~ "Homelessness-extract",
-    "maternity" ~ "Maternity-episode-level-extract",
-    "mh" ~ "Mental-Health-episode-level-extract",
-    "deaths" ~ "NRS-death-registrations-extract",
-    "outpatients" ~ "Outpatients-episode-level-extract"
+    "ae" ~ "anon-A&E-episode-level-extract",
+    "ae_cup" ~ "anon-A&E-UCD-CUP-extract",
+    "acute" ~ "anon-Acute-episode-level-extract",
+    "cmh" ~ "anon-Community-MH-contact-level-extract",
+    "dn" ~ "anon-District-Nursing-contact-level-extract",
+    "gp_ooh-c" ~ "anon-GP-OoH-consultations-extract",
+    "gp_ooh-d" ~ "anon-GP-OoH-diagnosis-extract",
+    "gp_ooh-o" ~ "anon-GP-OoH-outcomes-extract",
+    "homelessness" ~ "anon-Homelessness-extract",
+    "maternity" ~ "anon-Maternity-episode-level-extract",
+    "mh" ~ "anon-Mental-Health-episode-level-extract",
+    "deaths" ~ "anon-NRS-death-registrations-extract",
+    "outpatients" ~ "anon-Outpatients-episode-level-extract"
   )
 
   boxi_extract_path_csv_gz <- fs::path(

From ef46ff62a75322fff3aacabbe7625b2020287851 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 17 May 2024 11:54:04 +0100
Subject: [PATCH 108/186] remove file copy

---
 00_Sort_BI_Extracts.R | 50 +++++++++++++++++--------------------------
 1 file changed, 20 insertions(+), 30 deletions(-)

diff --git a/00_Sort_BI_Extracts.R b/00_Sort_BI_Extracts.R
index 0638123b9..5fd7438a6 100644
--- a/00_Sort_BI_Extracts.R
+++ b/00_Sort_BI_Extracts.R
@@ -1,5 +1,5 @@
 # Define the source directory and financial year pattern
-compress_files <- TRUE
+compress_files <- FALSE
 source_dir <- "/conf/sourcedev/Source_Linkage_File_Updates/Extracts Temp"
 pattern <- "-20(\\d{4})\\.csv"
 
@@ -20,49 +20,39 @@ extract_financial_year <- function(filename) {
   }
 }
 
-# Create a function to read variable names
-is_chi_in_file <- function(filename) {
-  data <- read.csv(filename, nrow = 1)
-  return(grepl("UPI", names(data)) %>% any())
-}
-
-# function to move files
-move_temps_to_year_extract <- function(csv_file, compress_files = TRUE) {
+# Create directories for each financial year and move files
+for (csv_file in csv_files) {
   financial_year <- extract_financial_year(csv_file)
   # check if year directory exists
   if (!is.null(financial_year)) {
     financial_year_dir <- file.path("/conf/sourcedev/Source_Linkage_File_Updates", financial_year, "Extracts")
-    # if financial_year_dir does not exist, create the year directory
+    # if not, create the year directory
     if (!dir.exists(financial_year_dir)) {
       dir.create(financial_year_dir)
     }
 
-    new_file_path <- file.path(financial_year_dir, paste0("anon-", basename(csv_file)))
-
-    # set up new file path location to move each file to their destination.
-    chi_in_file <- is_chi_in_file(csv_file)
-    if (chi_in_file) {
-      read_file(csv_file) %>%
-        dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
-        slfhelper::get_anon_chi() %>%
-        readr::write_csv(file = new_file_path)
-      cat("Replaced chi with anon chi:", csv_file, "to", new_file_path, "\n")
-    } else {
-      fs::file_copy(csv_file, new_file_path, overwrite = TRUE)
-      cat("Moved", csv_file, "to", new_file_path, "\n")
-    }
-
     # compress file
     if (compress_files) {
-      cat("Compressing:", basename(new_file_path), "\n")
+      cat("Compressing:", basename(csv_file), "\n")
       system2(
         command = "gzip",
-        args = shQuote(new_file_path)
+        args = shQuote(csv_file)
       )
+      csv_file <- paste0(csv_file, ".gz")
     }
-    # remove old files
+
+    # move file
+    new_file_path <- file.path(financial_year_dir, paste0("anon-",basename(csv_file)))
+
+    # Read in each file and replace chi with anon_chi
+    for (csv_file in csv_files) {
+      hl1<- read_file(csv_file) %>%
+        dplyr::rename(chi = 'UPI Number [C]') %>%
+        slfhelper::get_anon_chi() %>%
+        readr::write_csv(file = new_file_path)
+    }
+
     file.remove(csv_file)
+    cat("Replaced chi with anon chi:", csv_file, "to", new_file_path, "\n")
   }
 }
-
-lapply(csv_files, move_temps_to_year_extract, compress_files = compress_files)

From 8c389f967284b6a848eca7d92d8093176b94927c Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 17 May 2024 11:54:34 +0100
Subject: [PATCH 109/186] Update `00_sort_bi_extracts` note

---
 00_Sort_BI_Extracts.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/00_Sort_BI_Extracts.R b/00_Sort_BI_Extracts.R
index 5fd7438a6..65d809887 100644
--- a/00_Sort_BI_Extracts.R
+++ b/00_Sort_BI_Extracts.R
@@ -41,8 +41,8 @@ for (csv_file in csv_files) {
       csv_file <- paste0(csv_file, ".gz")
     }
 
-    # move file
-    new_file_path <- file.path(financial_year_dir, paste0("anon-",basename(csv_file)))
+    # set up new file path location to move each file to their destination.
+    new_file_path <- file.path(financial_year_dir, paste0("anon-", basename(csv_file)))
 
     # Read in each file and replace chi with anon_chi
     for (csv_file in csv_files) {

From 979581e02f9e361b445d9b634c3c409ed7fb8830 Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Fri, 17 May 2024 10:57:37 +0000
Subject: [PATCH 110/186] Style code

---
 00_Sort_BI_Extracts.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/00_Sort_BI_Extracts.R b/00_Sort_BI_Extracts.R
index 65d809887..3aca8257e 100644
--- a/00_Sort_BI_Extracts.R
+++ b/00_Sort_BI_Extracts.R
@@ -46,8 +46,8 @@ for (csv_file in csv_files) {
 
     # Read in each file and replace chi with anon_chi
     for (csv_file in csv_files) {
-      hl1<- read_file(csv_file) %>%
-        dplyr::rename(chi = 'UPI Number [C]') %>%
+      hl1 <- read_file(csv_file) %>%
+        dplyr::rename(chi = "UPI Number [C]") %>%
         slfhelper::get_anon_chi() %>%
         readr::write_csv(file = new_file_path)
     }

From bd2ad6ca71290103385774de2941ad4908c0de62 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 17 May 2024 12:25:22 +0100
Subject: [PATCH 111/186] Update `get_source_extract_path` (anon- prefix)

---
 R/get_source_extract_path.R | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/R/get_source_extract_path.R b/R/get_source_extract_path.R
index b4ccf4920..3c4a4cb65 100644
--- a/R/get_source_extract_path.R
+++ b/R/get_source_extract_path.R
@@ -45,24 +45,24 @@ get_source_extract_path <- function(year,
 
   file_name <- dplyr::case_match(
     type,
-    "acute" ~ "acute_for_source",
-    "ae" ~ "a_and_e_for_source",
-    "at" ~ "alarms-telecare-for-source",
-    "ch" ~ "care_home_for_source",
-    "cmh" ~ "cmh_for_source",
-    "client" ~ "client_for_source",
-    "dd" ~ "delayed_discharge_for_source",
-    "deaths" ~ "deaths_for_source",
-    "dn" ~ "district_nursing_for_source",
-    "gp_ooh" ~ "gp_ooh_for_source",
-    "hc" ~ "home_care_for_source",
-    "homelessness" ~ "homelessness_for_source",
-    "maternity" ~ "maternity_for_source",
-    "mh" ~ "mental_health_for_source",
-    "dd" ~ "dd_for_source",
-    "outpatients" ~ "outpatients_for_source",
-    "pis" ~ "prescribing_file_for_source",
-    "sds" ~ "sds-for-source"
+    "acute" ~ "anon-acute_for_source",
+    "ae" ~ "anon-a_and_e_for_source",
+    "at" ~ "anon-alarms-telecare-for-source",
+    "ch" ~ "anon-care_home_for_source",
+    "cmh" ~ "anon-cmh_for_source",
+    "client" ~ "anon-client_for_source",
+    "dd" ~ "anon-delayed_discharge_for_source",
+    "deaths" ~ "anon-deaths_for_source",
+    "dn" ~ "anon-district_nursing_for_source",
+    "gp_ooh" ~ "anon-gp_ooh_for_source",
+    "hc" ~ "anon-home_care_for_source",
+    "homelessness" ~ "anon-homelessness_for_source",
+    "maternity" ~ "anon-maternity_for_source",
+    "mh" ~ "anon-mental_health_for_source",
+    "dd" ~ "anon-dd_for_source",
+    "outpatients" ~ "anon-outpatients_for_source",
+    "pis" ~ "anon-prescribing_file_for_source",
+    "sds" ~ "anon-sds-for-source"
   ) %>%
     stringr::str_glue("-20{year}.parquet")
 

From 060ab648f673b5efeeb0aa8b0492ce8951e6462b Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 17 May 2024 15:51:37 +0100
Subject: [PATCH 112/186] Update chi when this is different e.g UPI number or
 PAT_UPI

---
 00_Sort_BI_Extracts.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/00_Sort_BI_Extracts.R b/00_Sort_BI_Extracts.R
index 3aca8257e..368ad12bc 100644
--- a/00_Sort_BI_Extracts.R
+++ b/00_Sort_BI_Extracts.R
@@ -47,8 +47,8 @@ for (csv_file in csv_files) {
     # Read in each file and replace chi with anon_chi
     for (csv_file in csv_files) {
       hl1 <- read_file(csv_file) %>%
-        dplyr::rename(chi = "UPI Number [C]") %>%
-        slfhelper::get_anon_chi() %>%
+        dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
+        slfhelper::get_anon_chi(chi = chi) %>%
         readr::write_csv(file = new_file_path)
     }
 

From 47c17ab12969b4e076d9fde0f478c4f694b25c8a Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 17 May 2024 16:03:18 +0100
Subject: [PATCH 113/186] Change `read` functions to read anon_chi

---
 R/read_extract_acute.R              | 7 ++++---
 R/read_extract_ae.R                 | 7 ++++---
 R/read_extract_delayed_discharges.R | 1 +
 R/read_extract_homelessness.R       | 7 ++++---
 R/read_extract_maternity.R          | 7 ++++---
 R/read_extract_mental_health.R      | 7 ++++---
 R/read_extract_nrs_deaths.R         | 7 ++++---
 R/read_extract_outpatients.R        | 7 ++++---
 R/read_extract_prescribing.R        | 7 ++++---
 R/read_it_chi_deaths.R              | 7 ++++---
 10 files changed, 37 insertions(+), 27 deletions(-)

diff --git a/R/read_extract_acute.R b/R/read_extract_acute.R
index 7a227db73..9f649a560 100644
--- a/R/read_extract_acute.R
+++ b/R/read_extract_acute.R
@@ -15,7 +15,7 @@ read_extract_acute <- function(year, file_path = get_boxi_extract_path(year = ye
       "GLS Record" = readr::col_character(),
       "Date of Admission(01)" = readr::col_date(format = "%Y/%m/%d %T"),
       "Date of Discharge(01)" = readr::col_date(format = "%Y/%m/%d %T"),
-      "Pat UPI" = readr::col_character(),
+      "anon_chi" = readr::col_character(),
       "Pat Gender Code" = readr::col_double(),
       "Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"),
       "Practice Location Code" = readr::col_character(),
@@ -127,7 +127,7 @@ read_extract_acute <- function(year, file_path = get_boxi_extract_path(year = ye
       op4a = "Operation 4A Code (4 char)",
       op4b = "Operation 4B Code (4 char)",
       gender = "Pat Gender Code",
-      chi = "Pat UPI",
+      anon_chi = "anon_chi",
       cat = "Patient Category Code",
       gpprac = "Practice Location Code",
       hbpraccode = "Practice NHS Board Code - current",
@@ -154,7 +154,8 @@ read_extract_acute <- function(year, file_path = get_boxi_extract_path(year = ye
     # replace NA in cost_total_net by 0
     dplyr::mutate(
       cost_total_net = tidyr::replace_na(.data[["cost_total_net"]], 0.0)
-    )
+    ) %>%
+    slfhelper::get_chi()
 
   return(extract_acute)
 }
diff --git a/R/read_extract_ae.R b/R/read_extract_ae.R
index e426a167c..a006966e5 100644
--- a/R/read_extract_ae.R
+++ b/R/read_extract_ae.R
@@ -11,7 +11,7 @@ read_extract_ae <- function(
     col_type = readr::cols(
       "Arrival Date" = readr::col_date(format = "%Y/%m/%d %T"),
       "DAT Date" = readr::col_date(format = "%Y/%m/%d %T"),
-      "Pat UPI [C]" = readr::col_character(),
+      "anon_chi" = readr::col_character(),
       "Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"),
       "Pat Gender Code" = readr::col_double(),
       "NHS Board of Residence Code - current" = readr::col_character(),
@@ -71,7 +71,7 @@ read_extract_ae <- function(
       hscp = "HSCP of Residence Code - current",
       hbrescode = "NHS Board of Residence Code - current",
       hbtreatcode = "Treatment NHS Board Code - current",
-      chi = "Pat UPI [C]",
+      anon_chi = "anon_chi",
       gender = "Pat Gender Code",
       ae_patflow = "Patient Flow Code",
       ae_placeinc = "Place of Incident Code",
@@ -83,7 +83,8 @@ read_extract_ae <- function(
       cost_total_net = "Total Net Costs",
       location = "Treatment Location Code",
       case_ref_number = "Case Reference Number"
-    )
+    ) %>%
+    slfhelper::get_chi()
 
   return(extract_ae)
 }
diff --git a/R/read_extract_delayed_discharges.R b/R/read_extract_delayed_discharges.R
index d1b9c62a1..fa46fb615 100644
--- a/R/read_extract_delayed_discharges.R
+++ b/R/read_extract_delayed_discharges.R
@@ -8,6 +8,7 @@
 read_extract_delayed_discharges <- function(file_path = get_dd_path()) {
   extract_delayed_discharges <- read_file(file_path) %>%
     janitor::clean_names() %>%
+    slfhelper::get_chi() %>%
     dplyr::mutate(
       monthflag = lubridate::my(.data[["monthflag"]]),
       delay_end_reason = as.integer(.data[["delay_end_reason"]])
diff --git a/R/read_extract_homelessness.R b/R/read_extract_homelessness.R
index aa6ed7779..63e216483 100644
--- a/R/read_extract_homelessness.R
+++ b/R/read_extract_homelessness.R
@@ -17,7 +17,7 @@ read_extract_homelessness <- function(
       "Case Closed Date" = readr::col_date(format = "%Y/%m/%d %T"),
       "Sending Local Authority Code 9" = readr::col_character(),
       "Client Unique Identifier" = readr::col_character(),
-      "UPI Number [C]" = readr::col_character(),
+      "anon_chi" = readr::col_character(),
       "Client DoB Date [C]" = readr::col_date(format = "%Y/%m/%d %T"),
       "Age at Assessment Decision Date" = readr::col_integer(),
       "Gender Code" = readr::col_integer(),
@@ -43,7 +43,7 @@ read_extract_homelessness <- function(
       case_closed_date = "Case Closed Date",
       sending_local_authority_code_9 = "Sending Local Authority Code 9",
       client_unique_identifier = "Client Unique Identifier",
-      upi_number = "UPI Number [C]",
+      anon_chi = "anon_chi",
       client_dob_date = "Client DoB Date [C]",
       age_at_assessment_decision_date = "Age at Assessment Decision Date",
       gender_code = "Gender Code",
@@ -62,7 +62,8 @@ read_extract_homelessness <- function(
       not_to_do_with_applicant_household = "Not to do with Applicant Household",
       refused = "Refused",
       person_in_receipt_of_universal_credit = "Person in Receipt of Universal Credit"
-    )
+    ) %>%
+    slfhelper::get_chi()
 
   return(extract_homelessness)
 }
diff --git a/R/read_extract_maternity.R b/R/read_extract_maternity.R
index e03b50e12..d9c45366a 100644
--- a/R/read_extract_maternity.R
+++ b/R/read_extract_maternity.R
@@ -12,7 +12,7 @@ read_extract_maternity <- function(
       "Costs Financial Year" = readr::col_double(),
       "Date of Admission Full Date" = readr::col_date(format = "%Y/%m/%d %T"),
       "Date of Discharge Full Date" = readr::col_date(format = "%Y/%m/%d %T"),
-      "Pat UPI [C]" = readr::col_character(),
+      "anon_chi" = readr::col_character(),
       "Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"),
       "Practice Location Code" = readr::col_character(),
       "Practice NHS Board Code - current" = readr::col_character(),
@@ -107,7 +107,7 @@ read_extract_maternity <- function(
       op2a = "Operation 2A Code",
       op3a = "Operation 3A Code",
       op4a = "Operation 4A Code",
-      chi = "Pat UPI [C]",
+      anon_chi = "anon_chi",
       gpprac = "Practice Location Code",
       hbpraccode = "Practice NHS Board Code - current",
       selfharm_adm = "Self Harm Related Admission",
@@ -118,7 +118,8 @@ read_extract_maternity <- function(
       location = "Treatment Location Code",
       hbtreatcode = "Treatment NHS Board Code - current",
       uri = "Maternity Unique Record Identifier [C]"
-    )
+    ) %>%
+    slfhelper::get_chi()
 
   return(extract_maternity)
 }
diff --git a/R/read_extract_mental_health.R b/R/read_extract_mental_health.R
index 687e656d0..0f68ffce3 100644
--- a/R/read_extract_mental_health.R
+++ b/R/read_extract_mental_health.R
@@ -13,7 +13,7 @@ read_extract_mental_health <- function(
       "Costs Financial Month Number (04)" = readr::col_double(),
       "Date of Admission(04)" = readr::col_date(format = "%Y/%m/%d %T"),
       "Date of Discharge(04)" = readr::col_date(format = "%Y/%m/%d %T"),
-      "Pat UPI" = readr::col_character(),
+      "anon_chi" = readr::col_character(),
       "Pat Gender Code" = readr::col_integer(),
       "Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"),
       "Practice Location Code" = readr::col_character(),
@@ -74,7 +74,7 @@ read_extract_mental_health <- function(
       costmonthnum = "Costs Financial Month Number (04)",
       record_keydate1 = "Date of Admission(04)",
       record_keydate2 = "Date of Discharge(04)",
-      chi = "Pat UPI",
+      anon_chi = "anon_chi",
       gender = "Pat Gender Code",
       dob = "Pat Date Of Birth [C]",
       gpprac = "Practice Location Code",
@@ -131,7 +131,8 @@ read_extract_mental_health <- function(
     # replace NA in cost_total_net by 0
     dplyr::mutate(
       cost_total_net = tidyr::replace_na(.data[["cost_total_net"]], 0.0)
-    )
+    ) %>%
+    slfhelper::get_chi()
 
   return(extract_mental_health)
 }
diff --git a/R/read_extract_nrs_deaths.R b/R/read_extract_nrs_deaths.R
index c852748b9..a3bbca43d 100644
--- a/R/read_extract_nrs_deaths.R
+++ b/R/read_extract_nrs_deaths.R
@@ -18,7 +18,7 @@ read_extract_nrs_deaths <- function(
       "Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"),
       "Date of Death(99)" = readr::col_date(format = "%Y/%m/%d %T"),
       "Pat Gender Code" = readr::col_double(),
-      "Pat UPI" = readr::col_character(),
+      "anon_chi" = readr::col_character(),
       "Place Death Occurred Code" = readr::col_character(),
       "Post Mortem Code" = readr::col_character(),
       "Prim Cause of Death Code (6 char)" = readr::col_character(),
@@ -47,7 +47,7 @@ read_extract_nrs_deaths <- function(
       dob = "Pat Date Of Birth [C]",
       record_keydate1 = "Date of Death(99)",
       gender = "Pat Gender Code",
-      chi = "Pat UPI",
+      anon_chi = "anon_chi",
       place_death_occurred = "Place Death Occurred Code",
       post_mortem = "Post Mortem Code",
       deathdiag1 = "Prim Cause of Death Code (6 char)",
@@ -63,7 +63,8 @@ read_extract_nrs_deaths <- function(
       deathdiag11 = "Sec Cause of Death 9 Code (6 char)",
       uri = "Unique Record Identifier",
       gpprac = "GP practice code(99)"
-    )
+    ) %>%
+    slfhelper::get_chi()
 
   return(extract_nrs_deaths)
 }
diff --git a/R/read_extract_outpatients.R b/R/read_extract_outpatients.R
index 9ff60a36f..40c8772de 100644
--- a/R/read_extract_outpatients.R
+++ b/R/read_extract_outpatients.R
@@ -12,7 +12,7 @@ read_extract_outpatients <- function(
       "Clinic Date Fin Year" = readr::col_double(),
       "Clinic Date (00)" = readr::col_date(format = "%Y/%m/%d %T"),
       "Episode Record Key (SMR00) [C]" = readr::col_character(),
-      "Pat UPI" = readr::col_character(),
+      "anon_chi" = readr::col_character(),
       "Pat Gender Code" = readr::col_double(),
       "Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"),
       "Practice Location Code" = readr::col_character(),
@@ -70,7 +70,7 @@ read_extract_outpatients <- function(
       op2b = "Operation 2B Code (4 char)",
       dateop2 = "Date of Operation 2 (00)",
       gender = "Pat Gender Code",
-      chi = "Pat UPI",
+      anon_chi = "anon_chi",
       cat = "Patient Category Code",
       gpprac = "Practice Location Code",
       hbpraccode = "Practice NHS Board Code - current",
@@ -83,7 +83,8 @@ read_extract_outpatients <- function(
       cost_total_net = "Total Net Costs",
       location = "Treatment Location Code",
       hbtreatcode = "Treatment NHS Board Code - current"
-    )
+    ) %>%
+    slfhelper::get_chi()
 
   return(extract_outpatients)
 }
diff --git a/R/read_extract_prescribing.R b/R/read_extract_prescribing.R
index 683484473..147d0b08b 100644
--- a/R/read_extract_prescribing.R
+++ b/R/read_extract_prescribing.R
@@ -6,7 +6,7 @@
 read_extract_prescribing <- function(year, file_path = get_it_prescribing_path(year)) {
   pis_file <- read_file(file_path,
     col_type = readr::cols_only(
-      "Pat UPI [C]" = readr::col_character(),
+      "anon_chi" = readr::col_character(),
       "Pat DoB [C]" = readr::col_date(format = "%d-%m-%Y"),
       "Pat Gender" = readr::col_double(),
       "Pat Postcode [C]" = readr::col_character(),
@@ -17,14 +17,15 @@ read_extract_prescribing <- function(year, file_path = get_it_prescribing_path(y
   ) %>%
     # Rename variables
     dplyr::rename(
-      chi = "Pat UPI [C]",
+      anon_chi = "anon_chi",
       dob = "Pat DoB [C]",
       gender = "Pat Gender",
       postcode = "Pat Postcode [C]",
       gpprac = "Practice Code",
       no_paid_items = "Number of Paid Items",
       cost_total_net = "PD Paid GIC excl. BB"
-    )
+    ) %>%
+    slfhelper::get_chi()
 
   return(pis_file)
 }
diff --git a/R/read_it_chi_deaths.R b/R/read_it_chi_deaths.R
index aab56c86d..a67a9a4ce 100644
--- a/R/read_it_chi_deaths.R
+++ b/R/read_it_chi_deaths.R
@@ -9,16 +9,17 @@
 read_it_chi_deaths <- function(file_path = get_it_deaths_path()) {
   it_chi_deaths <- read_file(file_path,
     col_type = readr::cols(
-      "PATIENT_UPI [C]" = readr::col_character(),
+      "anon_chi" = readr::col_character(),
       "PATIENT DoD DATE (NRS)" = readr::col_date(format = "%d-%m-%Y"),
       "PATIENT DoD DATE (CHI)" = readr::col_date(format = "%d-%m-%Y")
     )
   ) %>%
     dplyr::rename(
-      chi = "PATIENT_UPI [C]",
+      anon_chi = "anon_chi",
       death_date_nrs = "PATIENT DoD DATE (NRS)",
       death_date_chi = "PATIENT DoD DATE (CHI)"
-    )
+    ) %>%
+    slfhelper::get_chi()
 
   return(it_chi_deaths)
 }

From 1f31efacda5e6a4fff15f4534a5a6ffe9c42b7cd Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 17 May 2024 16:12:03 +0100
Subject: [PATCH 114/186] change `process` functions to read `anon_chi`

---
 R/process_extract_acute.R              | 3 ++-
 R/process_extract_ae.R                 | 3 ++-
 R/process_extract_alarms_telecare.R    | 3 ++-
 R/process_extract_care_home.R          | 3 ++-
 R/process_extract_delayed_discharges.R | 3 ++-
 R/process_extract_home_care.R          | 3 ++-
 R/process_extract_homelessness.R       | 3 ++-
 R/process_extract_maternity.R          | 3 ++-
 R/process_extract_mental_health.R      | 3 ++-
 R/process_extract_nrs_deaths.R         | 3 ++-
 R/process_extract_outpatients.R        | 3 ++-
 R/process_extract_prescribing.R        | 3 ++-
 R/process_extract_sds.R                | 3 ++-
 R/process_it_chi_deaths.R              | 3 ++-
 R/process_lookup_deaths.R              | 3 ++-
 R/process_sc_all_alarms_telecare.R     | 3 ++-
 R/process_sc_all_care_home.R           | 3 ++-
 R/process_sc_all_home_care.R           | 3 ++-
 R/process_sc_all_sds.R                 | 3 ++-
 19 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R
index dcfdb47c0..d91a77ab3 100644
--- a/R/process_extract_acute.R
+++ b/R/process_extract_acute.R
@@ -108,7 +108,8 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) {
       tidyselect::ends_with("_cost"),
       "uri"
     ) %>%
-    dplyr::arrange(.data$chi, .data$record_keydate1)
+    dplyr::arrange(.data$chi, .data$record_keydate1) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     write_file(
diff --git a/R/process_extract_ae.R b/R/process_extract_ae.R
index dd3823a36..9a5829cc5 100644
--- a/R/process_extract_ae.R
+++ b/R/process_extract_ae.R
@@ -289,7 +289,8 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) {
       "mar_cost",
       "cup_marker",
       "cup_pathway"
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     write_file(
diff --git a/R/process_extract_alarms_telecare.R b/R/process_extract_alarms_telecare.R
index 4eee7ea16..d7b063ace 100644
--- a/R/process_extract_alarms_telecare.R
+++ b/R/process_extract_alarms_telecare.R
@@ -48,7 +48,8 @@ process_extract_alarms_telecare <- function(
       "record_keydate2",
       "person_id",
       "sc_latest_submission"
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     at_data %>%
diff --git a/R/process_extract_care_home.R b/R/process_extract_care_home.R
index 06305ec0f..54789eac9 100644
--- a/R/process_extract_care_home.R
+++ b/R/process_extract_care_home.R
@@ -129,7 +129,8 @@ process_extract_care_home <- function(
       "cost_total_net",
       dplyr::ends_with("_beddays"),
       dplyr::ends_with("_cost")
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     write_file(
diff --git a/R/process_extract_delayed_discharges.R b/R/process_extract_delayed_discharges.R
index c16748a2d..75f575b66 100644
--- a/R/process_extract_delayed_discharges.R
+++ b/R/process_extract_delayed_discharges.R
@@ -105,7 +105,8 @@ process_extract_delayed_discharges <- function(
       "hbtreatcode",
       "location",
       "spec"
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     write_file(
diff --git a/R/process_extract_home_care.R b/R/process_extract_home_care.R
index 836c3ac18..831496bd2 100644
--- a/R/process_extract_home_care.R
+++ b/R/process_extract_home_care.R
@@ -97,7 +97,8 @@ process_extract_home_care <- function(
       "hc_provider",
       "hc_reablement",
       "person_id"
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     write_file(
diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
index 53ec8ec93..7f89bcbc7 100644
--- a/R/process_extract_homelessness.R
+++ b/R/process_extract_homelessness.R
@@ -172,7 +172,8 @@ process_extract_homelessness <- function(
       hl1_property_type = "property_type_code",
       "hl1_reason_ftm",
       hl1_completeness
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     write_file(
diff --git a/R/process_extract_maternity.R b/R/process_extract_maternity.R
index eab3fb713..59307ea26 100644
--- a/R/process_extract_maternity.R
+++ b/R/process_extract_maternity.R
@@ -107,7 +107,8 @@ process_extract_maternity <- function(data, year, write_to_disk = TRUE) {
       "uri",
       "ipdc"
     ) %>%
-    dplyr::arrange(.data$chi, .data$record_keydate1)
+    dplyr::arrange(.data$chi, .data$record_keydate1) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     write_file(
diff --git a/R/process_extract_mental_health.R b/R/process_extract_mental_health.R
index 4326630fe..5db5e09cb 100644
--- a/R/process_extract_mental_health.R
+++ b/R/process_extract_mental_health.R
@@ -112,7 +112,8 @@ process_extract_mental_health <- function(data, year, write_to_disk = TRUE) {
       tidyselect::ends_with("_beddays"),
       tidyselect::ends_with("_cost"),
       "uri"
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     write_file(
diff --git a/R/process_extract_nrs_deaths.R b/R/process_extract_nrs_deaths.R
index e707e74f6..a6fd0eea3 100644
--- a/R/process_extract_nrs_deaths.R
+++ b/R/process_extract_nrs_deaths.R
@@ -23,7 +23,8 @@ process_extract_nrs_deaths <- function(data, year, write_to_disk = TRUE) {
       year = year,
       gpprac = convert_eng_gpprac_to_dummy(.data$gpprac),
       smrtype = add_smrtype(.data$recid)
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     deaths_clean %>%
diff --git a/R/process_extract_outpatients.R b/R/process_extract_outpatients.R
index fdf4ee63d..50200a52e 100644
--- a/R/process_extract_outpatients.R
+++ b/R/process_extract_outpatients.R
@@ -82,7 +82,8 @@ process_extract_outpatients <- function(data, year, write_to_disk = TRUE) {
       "cost_total_net",
       tidyselect::ends_with("_cost"),
       "uri"
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     write_file(
diff --git a/R/process_extract_prescribing.R b/R/process_extract_prescribing.R
index c79e0a513..12eac62e7 100644
--- a/R/process_extract_prescribing.R
+++ b/R/process_extract_prescribing.R
@@ -38,7 +38,8 @@ process_extract_prescribing <- function(data, year, write_to_disk = TRUE) {
       record_keydate2 = .data$record_keydate1,
       # Add SMR type variable
       smrtype = add_smrtype(.data$recid)
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   # Issue a warning if rows were removed
   if (nrow(pis_clean) != nrow(data)) {
diff --git a/R/process_extract_sds.R b/R/process_extract_sds.R
index b7b65a7a7..be53f35ff 100644
--- a/R/process_extract_sds.R
+++ b/R/process_extract_sds.R
@@ -46,7 +46,8 @@ process_extract_sds <- function(
       "record_keydate1",
       "record_keydate2",
       "sc_send_lca"
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     outfile %>%
diff --git a/R/process_it_chi_deaths.R b/R/process_it_chi_deaths.R
index d30a5cb7b..85354880b 100644
--- a/R/process_it_chi_deaths.R
+++ b/R/process_it_chi_deaths.R
@@ -20,7 +20,8 @@ process_it_chi_deaths <- function(data, write_to_disk = TRUE) {
     # Use the NRS death_date unless it isn't there
     dplyr::mutate(
       death_date = dplyr::coalesce(.data$death_date_nrs, .data$death_date_chi)
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     it_chi_deaths_clean %>%
diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R
index 1150059a7..9a5c21974 100644
--- a/R/process_lookup_deaths.R
+++ b/R/process_lookup_deaths.R
@@ -30,7 +30,8 @@ process_slf_deaths_lookup <- function(
       death_date = .data$record_keydate1,
       deceased = TRUE,
       .keep = "unused"
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     write_file(
diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R
index 3ac1c7774..f7d36c714 100644
--- a/R/process_sc_all_alarms_telecare.R
+++ b/R/process_sc_all_alarms_telecare.R
@@ -136,7 +136,8 @@ process_sc_all_alarms_telecare <- function(
   )]
 
   # Convert back to data.frame if necessary
-  qtr_merge <- as.data.frame(qtr_merge)
+  qtr_merge <- as.data.frame(qtr_merge) %>%
+    slfhelper::get_anon_chi()
 
 
   if (write_to_disk) {
diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R
index f0b6c3db4..c88cf2be5 100644
--- a/R/process_sc_all_care_home.R
+++ b/R/process_sc_all_care_home.R
@@ -330,7 +330,8 @@ process_sc_all_care_home <- function(
       "ch_nursing",
       "ch_adm_reason",
       "sc_latest_submission"
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     ch_data_final %>%
diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R
index 331a682e1..f0a663685 100644
--- a/R/process_sc_all_home_care.R
+++ b/R/process_sc_all_home_care.R
@@ -199,7 +199,8 @@ process_sc_all_home_care <- function(
     # compute lca variable from sending_location
     dplyr::mutate(
       sc_send_lca = convert_sc_sending_location_to_lca(.data$sending_location)
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     write_file(
diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R
index c79eeb6e8..7a72b6a7f 100644
--- a/R/process_sc_all_sds.R
+++ b/R/process_sc_all_sds.R
@@ -182,7 +182,8 @@ process_sc_all_sds <- function(
   rm(sds_full_clean_long)
 
   # Drop episode_counter and convert back to data.frame if needed
-  final_data <- as.data.frame(final_data[, -"episode_counter"])
+  final_data <- as.data.frame(final_data[, -"episode_counter"]) %>%
+    slfhelper::get_anon_chi()
   # final_data now holds the processed data in the format of a data.frame
 
   if (write_to_disk) {

From e901a59a6d976f447868c32b579ae7bab72510d7 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 20 May 2024 08:34:23 +0100
Subject: [PATCH 115/186] remove storing as a dataframe

---
 00_Sort_BI_Extracts.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/00_Sort_BI_Extracts.R b/00_Sort_BI_Extracts.R
index 368ad12bc..461374a58 100644
--- a/00_Sort_BI_Extracts.R
+++ b/00_Sort_BI_Extracts.R
@@ -46,7 +46,7 @@ for (csv_file in csv_files) {
 
     # Read in each file and replace chi with anon_chi
     for (csv_file in csv_files) {
-      hl1 <- read_file(csv_file) %>%
+      read_file(csv_file) %>%
         dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
         slfhelper::get_anon_chi(chi = chi) %>%
         readr::write_csv(file = new_file_path)

From c4c280393140719a9167532cea64f3c2de1a0697 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 20 May 2024 11:58:44 +0100
Subject: [PATCH 116/186] Add condition if CHI exists in data file

---
 00_Sort_BI_Extracts.R | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/00_Sort_BI_Extracts.R b/00_Sort_BI_Extracts.R
index 461374a58..e04a953a1 100644
--- a/00_Sort_BI_Extracts.R
+++ b/00_Sort_BI_Extracts.R
@@ -46,13 +46,17 @@ for (csv_file in csv_files) {
 
     # Read in each file and replace chi with anon_chi
     for (csv_file in csv_files) {
-      read_file(csv_file) %>%
-        dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
-        slfhelper::get_anon_chi(chi = chi) %>%
-        readr::write_csv(file = new_file_path)
+      if (any(grepl("UPI", names(csv_file)))) {
+        read_file(csv_file) %>%
+          dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
+          slfhelper::get_anon_chi(chi = chi) %>%
+          readr::write_csv(file = new_file_path)
+      } else {
+        fs::file_copy(csv_file, new_file_path, overwrite = TRUE)
+      }
+
+      file.remove(csv_file)
+      cat("Replaced chi with anon chi:", csv_file, "to", new_file_path, "\n")
     }
-
-    file.remove(csv_file)
-    cat("Replaced chi with anon chi:", csv_file, "to", new_file_path, "\n")
   }
 }

From c5a42da002d90ff4f7db50ed21410ec6d363be15 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 21 May 2024 09:02:12 +0100
Subject: [PATCH 117/186] Update dd path

---
 R/get_dd_path.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/get_dd_path.R b/R/get_dd_path.R
index 78796c267..fcbcc1595 100644
--- a/R/get_dd_path.R
+++ b/R/get_dd_path.R
@@ -19,7 +19,7 @@ get_dd_path <- function(..., dd_period = NULL) {
 
   dd_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Delayed_Discharges"),
-    file_name = paste0(dd_period, "DD_LinkageFile.parquet"),
+    file_name = paste0("anon_", dd_period, "_DD_LinkageFile.parquet"),
     ...
   )
 

From d9a70cc19959b6b934dbd184bfabcef3edad8754 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 21 May 2024 09:03:23 +0100
Subject: [PATCH 118/186] switch between chi - ooh and dd

---
 R/process_extract_gp_ooh.R          | 3 ++-
 R/read_extract_delayed_discharges.R | 4 ++--
 R/read_extract_ooh_consultations.R  | 5 +++--
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/R/process_extract_gp_ooh.R b/R/process_extract_gp_ooh.R
index 37cfc8f3f..c889c3db7 100644
--- a/R/process_extract_gp_ooh.R
+++ b/R/process_extract_gp_ooh.R
@@ -123,7 +123,8 @@ process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) {
       "cost_total_net",
       tidyselect::ends_with("_cost"),
       "ooh_case_id"
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     final_data %>%
diff --git a/R/read_extract_delayed_discharges.R b/R/read_extract_delayed_discharges.R
index fa46fb615..19d7d52ab 100644
--- a/R/read_extract_delayed_discharges.R
+++ b/R/read_extract_delayed_discharges.R
@@ -8,12 +8,12 @@
 read_extract_delayed_discharges <- function(file_path = get_dd_path()) {
   extract_delayed_discharges <- read_file(file_path) %>%
     janitor::clean_names() %>%
-    slfhelper::get_chi() %>%
     dplyr::mutate(
       monthflag = lubridate::my(.data[["monthflag"]]),
       delay_end_reason = as.integer(.data[["delay_end_reason"]])
     ) %>%
-    dplyr::select(-.data[["cennum"]])
+    dplyr::select(-.data[["cennum"]]) %>%
+    slfhelper::get_chi()
 
   return(extract_delayed_discharges)
 }
diff --git a/R/read_extract_ooh_consultations.R b/R/read_extract_ooh_consultations.R
index d6f19c127..4322758e6 100644
--- a/R/read_extract_ooh_consultations.R
+++ b/R/read_extract_ooh_consultations.R
@@ -28,7 +28,7 @@ read_extract_ooh_consultations <- function(
     dplyr::select(!"Practice NHS Board Code 9 - current") %>%
     # rename variables
     dplyr::rename(
-      chi = "UPI Number [C]",
+      anon_chi = "anon_chi",
       dob = "Patient DoB Date [C]",
       gender = "Gender",
       postcode = "Patient Postcode [C]",
@@ -50,7 +50,8 @@ read_extract_ooh_consultations <- function(
     ) %>%
     # Restore CHI leading zero
     dplyr::mutate(chi = phsmethods::chi_pad(.data$chi)) %>%
-    dplyr::distinct()
+    dplyr::distinct() %>%
+    slfhelper::get_chi()
 
   return(consultations_extract)
 }

From c8379959d480e29b8032109f914117cc1b4b31ae Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 17 May 2024 15:51:37 +0100
Subject: [PATCH 119/186] Update chi when this is different e.g UPI number or
 PAT_UPI

---
 00_Sort_BI_Extracts.R | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

diff --git a/00_Sort_BI_Extracts.R b/00_Sort_BI_Extracts.R
index e04a953a1..368ad12bc 100644
--- a/00_Sort_BI_Extracts.R
+++ b/00_Sort_BI_Extracts.R
@@ -46,17 +46,13 @@ for (csv_file in csv_files) {
 
     # Read in each file and replace chi with anon_chi
     for (csv_file in csv_files) {
-      if (any(grepl("UPI", names(csv_file)))) {
-        read_file(csv_file) %>%
-          dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
-          slfhelper::get_anon_chi(chi = chi) %>%
-          readr::write_csv(file = new_file_path)
-      } else {
-        fs::file_copy(csv_file, new_file_path, overwrite = TRUE)
-      }
-
-      file.remove(csv_file)
-      cat("Replaced chi with anon chi:", csv_file, "to", new_file_path, "\n")
+      hl1 <- read_file(csv_file) %>%
+        dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
+        slfhelper::get_anon_chi(chi = chi) %>%
+        readr::write_csv(file = new_file_path)
     }
+
+    file.remove(csv_file)
+    cat("Replaced chi with anon chi:", csv_file, "to", new_file_path, "\n")
   }
 }

From 7945d262466a16f3811c2db0c5ab3c92fbdb9e4d Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 20 May 2024 08:34:23 +0100
Subject: [PATCH 120/186] remove storing as a dataframe

---
 00_Sort_BI_Extracts.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/00_Sort_BI_Extracts.R b/00_Sort_BI_Extracts.R
index 368ad12bc..461374a58 100644
--- a/00_Sort_BI_Extracts.R
+++ b/00_Sort_BI_Extracts.R
@@ -46,7 +46,7 @@ for (csv_file in csv_files) {
 
     # Read in each file and replace chi with anon_chi
     for (csv_file in csv_files) {
-      hl1 <- read_file(csv_file) %>%
+      read_file(csv_file) %>%
         dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
         slfhelper::get_anon_chi(chi = chi) %>%
         readr::write_csv(file = new_file_path)

From 01c7b98db31558d73acf146a5c667d6e1d81fe0d Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 20 May 2024 11:58:44 +0100
Subject: [PATCH 121/186] Add condition if CHI exists in data file

---
 00_Sort_BI_Extracts.R | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/00_Sort_BI_Extracts.R b/00_Sort_BI_Extracts.R
index 461374a58..e04a953a1 100644
--- a/00_Sort_BI_Extracts.R
+++ b/00_Sort_BI_Extracts.R
@@ -46,13 +46,17 @@ for (csv_file in csv_files) {
 
     # Read in each file and replace chi with anon_chi
     for (csv_file in csv_files) {
-      read_file(csv_file) %>%
-        dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
-        slfhelper::get_anon_chi(chi = chi) %>%
-        readr::write_csv(file = new_file_path)
+      if (any(grepl("UPI", names(csv_file)))) {
+        read_file(csv_file) %>%
+          dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
+          slfhelper::get_anon_chi(chi = chi) %>%
+          readr::write_csv(file = new_file_path)
+      } else {
+        fs::file_copy(csv_file, new_file_path, overwrite = TRUE)
+      }
+
+      file.remove(csv_file)
+      cat("Replaced chi with anon chi:", csv_file, "to", new_file_path, "\n")
     }
-
-    file.remove(csv_file)
-    cat("Replaced chi with anon chi:", csv_file, "to", new_file_path, "\n")
   }
 }

From bac82266a1258911f6b840252aadb5737120e4d1 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Mon, 20 May 2024 15:16:02 +0100
Subject: [PATCH 122/186] update 00_Sort_BI_Extracts replace for loop by
 function to enable parallel computing with lapply

---
 00_Sort_BI_Extracts.R | 68 ++++++++++++++++++++++++++-----------------
 1 file changed, 42 insertions(+), 26 deletions(-)

diff --git a/00_Sort_BI_Extracts.R b/00_Sort_BI_Extracts.R
index e04a953a1..7677c15e4 100644
--- a/00_Sort_BI_Extracts.R
+++ b/00_Sort_BI_Extracts.R
@@ -1,5 +1,5 @@
 # Define the source directory and financial year pattern
-compress_files <- FALSE
+compress_files <- TRUE
 source_dir <- "/conf/sourcedev/Source_Linkage_File_Updates/Extracts Temp"
 pattern <- "-20(\\d{4})\\.csv"
 
@@ -20,43 +20,59 @@ extract_financial_year <- function(filename) {
   }
 }
 
-# Create directories for each financial year and move files
-for (csv_file in csv_files) {
+# Create a function to read variable names
+is_chi_in_file <- function(filename){
+  data <- read.csv(filename, nrow = 1)
+  return(grepl("UPI", names(data)) %>% any())
+}
+
+# function to move files
+move_temps_to_year_extract <- function(csv_file, compress_files = TRUE){
   financial_year <- extract_financial_year(csv_file)
   # check if year directory exists
   if (!is.null(financial_year)) {
     financial_year_dir <- file.path("/conf/sourcedev/Source_Linkage_File_Updates", financial_year, "Extracts")
-    # if not, create the year directory
+    # if financial_year_dir does not exist, create the year directory
     if (!dir.exists(financial_year_dir)) {
       dir.create(financial_year_dir)
     }
 
-    # compress file
-    if (compress_files) {
-      cat("Compressing:", basename(csv_file), "\n")
-      system2(
-        command = "gzip",
-        args = shQuote(csv_file)
-      )
-      csv_file <- paste0(csv_file, ".gz")
-    }
-
     # set up new file path location to move each file to their destination.
-    new_file_path <- file.path(financial_year_dir, paste0("anon-", basename(csv_file)))
-
-    # Read in each file and replace chi with anon_chi
-    for (csv_file in csv_files) {
-      if (any(grepl("UPI", names(csv_file)))) {
-        read_file(csv_file) %>%
-          dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
-          slfhelper::get_anon_chi(chi = chi) %>%
-          readr::write_csv(file = new_file_path)
-      } else {
-        fs::file_copy(csv_file, new_file_path, overwrite = TRUE)
+    chi_in_file <- is_chi_in_file(csv_file)
+    if(chi_in_file){
+      new_file_path <- file.path(financial_year_dir,
+                                 paste0("anon-", basename(csv_file)))
+      read_file(csv_file) %>%
+        dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
+        slfhelper::get_anon_chi() %>%
+        readr::write_csv(file = new_file_path)
+      cat("Replaced chi with anon chi:", csv_file, "to", new_file_path, "\n")
+
+      # compress file
+      if (compress_files) {
+        cat("Compressing:", basename(new_file_path), "\n")
+        system2(
+          command = "gzip",
+          args = shQuote(new_file_path)
+        )
       }
+      file.remove(csv_file)
+    }else{
+      new_file_path <- file.path(financial_year_dir, basename(csv_file))
+      fs::file_copy(csv_file, new_file_path, overwrite = TRUE)
+      cat("Moved", csv_file, "to", new_file_path, "\n")
 
+      # compress file
+      if (compress_files) {
+        cat("Compressing:", basename(new_file_path), "\n")
+        system2(
+          command = "gzip",
+          args = shQuote(new_file_path)
+        )
+      }
       file.remove(csv_file)
-      cat("Replaced chi with anon chi:", csv_file, "to", new_file_path, "\n")
     }
   }
 }
+
+lapply(csv_files, move_temps_to_year_extract, compress_files = compress_files)

From a6a54534d7b1b9722179c00d04a6fcbad64955ea Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Mon, 20 May 2024 14:17:33 +0000
Subject: [PATCH 123/186] Style code

---
 00_Sort_BI_Extracts.R | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/00_Sort_BI_Extracts.R b/00_Sort_BI_Extracts.R
index 7677c15e4..859e5b35b 100644
--- a/00_Sort_BI_Extracts.R
+++ b/00_Sort_BI_Extracts.R
@@ -21,13 +21,13 @@ extract_financial_year <- function(filename) {
 }
 
 # Create a function to read variable names
-is_chi_in_file <- function(filename){
+is_chi_in_file <- function(filename) {
   data <- read.csv(filename, nrow = 1)
   return(grepl("UPI", names(data)) %>% any())
 }
 
 # function to move files
-move_temps_to_year_extract <- function(csv_file, compress_files = TRUE){
+move_temps_to_year_extract <- function(csv_file, compress_files = TRUE) {
   financial_year <- extract_financial_year(csv_file)
   # check if year directory exists
   if (!is.null(financial_year)) {
@@ -39,9 +39,11 @@ move_temps_to_year_extract <- function(csv_file, compress_files = TRUE){
 
     # set up new file path location to move each file to their destination.
     chi_in_file <- is_chi_in_file(csv_file)
-    if(chi_in_file){
-      new_file_path <- file.path(financial_year_dir,
-                                 paste0("anon-", basename(csv_file)))
+    if (chi_in_file) {
+      new_file_path <- file.path(
+        financial_year_dir,
+        paste0("anon-", basename(csv_file))
+      )
       read_file(csv_file) %>%
         dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
         slfhelper::get_anon_chi() %>%
@@ -57,7 +59,7 @@ move_temps_to_year_extract <- function(csv_file, compress_files = TRUE){
         )
       }
       file.remove(csv_file)
-    }else{
+    } else {
       new_file_path <- file.path(financial_year_dir, basename(csv_file))
       fs::file_copy(csv_file, new_file_path, overwrite = TRUE)
       cat("Moved", csv_file, "to", new_file_path, "\n")

From 905ac4509eb8e90d56453f1d0a2e5280516c5ca8 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Mon, 20 May 2024 15:36:00 +0100
Subject: [PATCH 124/186] merge similar code

---
 00_Sort_BI_Extracts.R | 29 ++++++++++-------------------
 1 file changed, 10 insertions(+), 19 deletions(-)

diff --git a/00_Sort_BI_Extracts.R b/00_Sort_BI_Extracts.R
index 859e5b35b..49235eddc 100644
--- a/00_Sort_BI_Extracts.R
+++ b/00_Sort_BI_Extracts.R
@@ -49,31 +49,22 @@ move_temps_to_year_extract <- function(csv_file, compress_files = TRUE) {
         slfhelper::get_anon_chi() %>%
         readr::write_csv(file = new_file_path)
       cat("Replaced chi with anon chi:", csv_file, "to", new_file_path, "\n")
-
-      # compress file
-      if (compress_files) {
-        cat("Compressing:", basename(new_file_path), "\n")
-        system2(
-          command = "gzip",
-          args = shQuote(new_file_path)
-        )
-      }
-      file.remove(csv_file)
     } else {
       new_file_path <- file.path(financial_year_dir, basename(csv_file))
       fs::file_copy(csv_file, new_file_path, overwrite = TRUE)
       cat("Moved", csv_file, "to", new_file_path, "\n")
+    }
 
-      # compress file
-      if (compress_files) {
-        cat("Compressing:", basename(new_file_path), "\n")
-        system2(
-          command = "gzip",
-          args = shQuote(new_file_path)
-        )
-      }
-      file.remove(csv_file)
+    # compress file
+    if (compress_files) {
+      cat("Compressing:", basename(new_file_path), "\n")
+      system2(
+        command = "gzip",
+        args = shQuote(new_file_path)
+      )
     }
+    # remove old files
+    file.remove(csv_file)
   }
 }
 

From 9e81e218e0e697ff4f9e02534cb65936faed292c Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 21 May 2024 10:26:11 +0100
Subject: [PATCH 125/186] simplify sort_bi_extracts

---
 00_Sort_BI_Extracts.R | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/00_Sort_BI_Extracts.R b/00_Sort_BI_Extracts.R
index 49235eddc..0638123b9 100644
--- a/00_Sort_BI_Extracts.R
+++ b/00_Sort_BI_Extracts.R
@@ -37,20 +37,17 @@ move_temps_to_year_extract <- function(csv_file, compress_files = TRUE) {
       dir.create(financial_year_dir)
     }
 
+    new_file_path <- file.path(financial_year_dir, paste0("anon-", basename(csv_file)))
+
     # set up new file path location to move each file to their destination.
     chi_in_file <- is_chi_in_file(csv_file)
     if (chi_in_file) {
-      new_file_path <- file.path(
-        financial_year_dir,
-        paste0("anon-", basename(csv_file))
-      )
       read_file(csv_file) %>%
         dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
         slfhelper::get_anon_chi() %>%
         readr::write_csv(file = new_file_path)
       cat("Replaced chi with anon chi:", csv_file, "to", new_file_path, "\n")
     } else {
-      new_file_path <- file.path(financial_year_dir, basename(csv_file))
       fs::file_copy(csv_file, new_file_path, overwrite = TRUE)
       cat("Moved", csv_file, "to", new_file_path, "\n")
     }

From f154365755d1d7da680505343d9c0437e2c78a43 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 21 May 2024 14:11:56 +0100
Subject: [PATCH 126/186] update sparra/hhg paths (anon_chi)

---
 R/get_sparra_hhg_paths.R |  4 ++--
 R/join_sparra_hhg.R      | 12 ++++++++----
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/R/get_sparra_hhg_paths.R b/R/get_sparra_hhg_paths.R
index 66ae9a0bf..58f306556 100644
--- a/R/get_sparra_hhg_paths.R
+++ b/R/get_sparra_hhg_paths.R
@@ -16,7 +16,7 @@ get_hhg_path <- function(year, ...) {
 
   hhg_file_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "HHG"),
-    file_name = stringr::str_glue("HHG-20{year}.parquet"),
+    file_name = stringr::str_glue("anon-HHG-20{year}.parquet"),
     ...
   )
 
@@ -41,7 +41,7 @@ get_sparra_path <- function(year, ...) {
 
   sparra_file_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "SPARRA"),
-    file_name = stringr::str_glue("SPARRA-20{year}.parquet"),
+    file_name = stringr::str_glue("anon-SPARRA-20{year}.parquet"),
     ...
   )
 
diff --git a/R/join_sparra_hhg.R b/R/join_sparra_hhg.R
index ec5ed1a32..d946f08c5 100644
--- a/R/join_sparra_hhg.R
+++ b/R/join_sparra_hhg.R
@@ -9,7 +9,8 @@ join_sparra_hhg <- function(data, year) {
     data <- dplyr::left_join(
       data,
       read_file(get_sparra_path(year)) %>%
-        dplyr::rename(sparra_start_fy = "sparra_risk_score"),
+        dplyr::rename(sparra_start_fy = "sparra_risk_score") %>%
+        slfhelper::get_chi(),
       by = c("chi" = "upi_number"),
       na_matches = "never",
       relationship = "many-to-one"
@@ -22,7 +23,8 @@ join_sparra_hhg <- function(data, year) {
     data <- dplyr::left_join(
       data,
       read_file(get_sparra_path(next_fy(year))) %>%
-        dplyr::rename(sparra_end_fy = "sparra_risk_score"),
+        dplyr::rename(sparra_end_fy = "sparra_risk_score") %>%
+        slfhelper::get_chi(),
       by = c("chi" = "upi_number"),
       na_matches = "never",
       relationship = "many-to-one"
@@ -35,7 +37,8 @@ join_sparra_hhg <- function(data, year) {
     data <- dplyr::left_join(
       data,
       read_file(get_hhg_path(year)) %>%
-        dplyr::rename(hhg_start_fy = "hhg_score"),
+        dplyr::rename(hhg_start_fy = "hhg_score") %>%
+        slfhelper::get_chi(),
       by = c("chi" = "upi_number"),
       na_matches = "never",
       relationship = "many-to-one"
@@ -48,7 +51,8 @@ join_sparra_hhg <- function(data, year) {
     data <- dplyr::left_join(
       data,
       read_file(get_hhg_path(next_fy(year))) %>%
-        dplyr::rename(hhg_end_fy = "hhg_score"),
+        dplyr::rename(hhg_end_fy = "hhg_score") %>%
+        slfhelper::get_chi(),
       by = c("chi" = "upi_number"),
       na_matches = "never",
       relationship = "many-to-one"

From ca8c9ae6d2d707f1087bad6fbd6a2999cb399561 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 21 May 2024 14:58:41 +0100
Subject: [PATCH 127/186] use anon_chi for sc demogs

---
 R/process_lookup_sc_demographics.R | 2 ++
 R/process_sc_all_alarms_telecare.R | 2 +-
 R/process_sc_all_care_home.R       | 2 +-
 R/process_sc_all_home_care.R       | 2 +-
 R/process_sc_all_sds.R             | 2 +-
 R/read_lookup_sc_demographics.R    | 1 +
 6 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/R/process_lookup_sc_demographics.R b/R/process_lookup_sc_demographics.R
index 77418aeba..d6e24c87f 100644
--- a/R/process_lookup_sc_demographics.R
+++ b/R/process_lookup_sc_demographics.R
@@ -146,6 +146,8 @@ process_lookup_sc_demographics <- function(
   dplyr::n_distinct(sc_demog_lookup$chi) # 524810
   dplyr::n_distinct(sc_demog_lookup$social_care_id) # 636404
 
+  sc_demog_lookup <- sc_demog_lookup %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     write_file(
diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R
index f7d36c714..c583fa8a7 100644
--- a/R/process_sc_all_alarms_telecare.R
+++ b/R/process_sc_all_alarms_telecare.R
@@ -13,7 +13,7 @@
 #'
 process_sc_all_alarms_telecare <- function(
     data,
-    sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
+    sc_demog_lookup = read_file(get_sc_demog_lookup_path()) %>% slfhelper::get_chi(),
     write_to_disk = TRUE) {
   # Data Cleaning-----------------------------------------------------
 
diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R
index c88cf2be5..b7c29fbc7 100644
--- a/R/process_sc_all_care_home.R
+++ b/R/process_sc_all_care_home.R
@@ -22,7 +22,7 @@
 #'
 process_sc_all_care_home <- function(
     data,
-    sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
+    sc_demog_lookup = read_file(get_sc_demog_lookup_path()) %>% slfhelper::get_chi(),
     it_chi_deaths_data = read_file(get_slf_chi_deaths_path()),
     ch_name_lookup_path = read_file(get_slf_ch_name_lookup_path()),
     spd_path = read_file(get_spd_path()),
diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R
index f0a663685..275001c64 100644
--- a/R/process_sc_all_home_care.R
+++ b/R/process_sc_all_home_care.R
@@ -13,7 +13,7 @@
 #'
 process_sc_all_home_care <- function(
     data,
-    sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
+    sc_demog_lookup = read_file(get_sc_demog_lookup_path()) %>% slfhelper::get_chi(),
     write_to_disk = TRUE) {
   replaced_dates <- data %>%
     dplyr::filter(.data$hc_start_date_after_period_end_date != 1) %>%
diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R
index 7a72b6a7f..5306c0956 100644
--- a/R/process_sc_all_sds.R
+++ b/R/process_sc_all_sds.R
@@ -12,7 +12,7 @@
 #'
 process_sc_all_sds <- function(
     data,
-    sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
+    sc_demog_lookup = read_file(get_sc_demog_lookup_path()) %>% slfhelper::get_chi(),
     write_to_disk = TRUE) {
   # Match on demographics data (chi, gender, dob and postcode)
   matched_sds_data <- data %>%
diff --git a/R/read_lookup_sc_demographics.R b/R/read_lookup_sc_demographics.R
index 729f3a445..33873cf4f 100644
--- a/R/read_lookup_sc_demographics.R
+++ b/R/read_lookup_sc_demographics.R
@@ -27,6 +27,7 @@ read_lookup_sc_demographics <- function(sc_dvprod_connection = phs_db_connection
 
   if (!fs::file_exists(get_sandpit_extract_path(type = "demographics"))) {
     sc_demog %>%
+      slfhelper::get_anon_chi(chi = "chi_upi") %>%
       write_file(get_sandpit_extract_path(type = "demographics"))
 
     sc_demog %>%

From 09e35c06a68949913a92e2ecb1f30bfae8f3c212 Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Tue, 21 May 2024 14:00:29 +0000
Subject: [PATCH 128/186] Update documentation

---
 man/process_sc_all_alarms_telecare.Rd | 2 +-
 man/process_sc_all_care_home.Rd       | 2 +-
 man/process_sc_all_home_care.Rd       | 2 +-
 man/process_sc_all_sds.Rd             | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/man/process_sc_all_alarms_telecare.Rd b/man/process_sc_all_alarms_telecare.Rd
index 031fd5028..a2e319cbf 100644
--- a/man/process_sc_all_alarms_telecare.Rd
+++ b/man/process_sc_all_alarms_telecare.Rd
@@ -6,7 +6,7 @@
 \usage{
 process_sc_all_alarms_telecare(
   data,
-  sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
+  sc_demog_lookup = read_file(get_sc_demog_lookup_path()) \%>\% slfhelper::get_chi(),
   write_to_disk = TRUE
 )
 }
diff --git a/man/process_sc_all_care_home.Rd b/man/process_sc_all_care_home.Rd
index a137119b7..792d2200d 100644
--- a/man/process_sc_all_care_home.Rd
+++ b/man/process_sc_all_care_home.Rd
@@ -6,7 +6,7 @@
 \usage{
 process_sc_all_care_home(
   data,
-  sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
+  sc_demog_lookup = read_file(get_sc_demog_lookup_path()) \%>\% slfhelper::get_chi(),
   it_chi_deaths_data = read_file(get_slf_chi_deaths_path()),
   ch_name_lookup_path = read_file(get_slf_ch_name_lookup_path()),
   spd_path = read_file(get_spd_path()),
diff --git a/man/process_sc_all_home_care.Rd b/man/process_sc_all_home_care.Rd
index ff18aac6a..c6777889f 100644
--- a/man/process_sc_all_home_care.Rd
+++ b/man/process_sc_all_home_care.Rd
@@ -6,7 +6,7 @@
 \usage{
 process_sc_all_home_care(
   data,
-  sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
+  sc_demog_lookup = read_file(get_sc_demog_lookup_path()) \%>\% slfhelper::get_chi(),
   write_to_disk = TRUE
 )
 }
diff --git a/man/process_sc_all_sds.Rd b/man/process_sc_all_sds.Rd
index 3454ef35b..f91c9dfb9 100644
--- a/man/process_sc_all_sds.Rd
+++ b/man/process_sc_all_sds.Rd
@@ -6,7 +6,7 @@
 \usage{
 process_sc_all_sds(
   data,
-  sc_demog_lookup = read_file(get_sc_demog_lookup_path()),
+  sc_demog_lookup = read_file(get_sc_demog_lookup_path()) \%>\% slfhelper::get_chi(),
   write_to_disk = TRUE
 )
 }

From 102c90aefe71bba420e5366277793b3dfd940b0a Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 21 May 2024 15:01:20 +0100
Subject: [PATCH 129/186] Update `create_episode_file`

---
 R/create_episode_file.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index 53238eac5..b732e465f 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -20,10 +20,10 @@
 create_episode_file <- function(
     processed_data_list,
     year,
-    dd_data = read_file(get_source_extract_path(year, "dd")),
+    dd_data = read_file(get_source_extract_path(year, "dd")) %>% slfhelper::get_chi(),
     homelessness_lookup = create_homelessness_lookup(year),
-    nsu_cohort = read_file(get_nsu_path(year)),
-    ltc_data = read_file(get_ltcs_path(year)),
+    nsu_cohort = read_file(get_nsu_path(year)) %>% slfhelper::get_chi(),
+    ltc_data = read_file(get_ltcs_path(year)) %>% slfhelper::get_chi() ,
     slf_pc_lookup = read_file(get_slf_postcode_path()),
     slf_gpprac_lookup = read_file(
       get_slf_gpprac_path(),
@@ -36,6 +36,7 @@ create_episode_file <- function(
   processed_data_list <- purrr::discard(processed_data_list, ~ is.null(.x) | identical(.x, tibble::tibble()))
 
   episode_file <- dplyr::bind_rows(processed_data_list) %>%
+    slfhelper::get_chi() %>%
     create_cost_inc_dna() %>%
     apply_cost_uplift() %>%
     store_ep_file_vars(

From 709a23efef92b695c2eacdeb12ffea6656576c9c Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 21 May 2024 15:03:47 +0100
Subject: [PATCH 130/186] update NSU path

---
 R/get_nsu_paths.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/get_nsu_paths.R b/R/get_nsu_paths.R
index 532056ee6..fad9c2c9a 100644
--- a/R/get_nsu_paths.R
+++ b/R/get_nsu_paths.R
@@ -16,7 +16,7 @@ get_nsu_path <- function(year, ...) {
 
   nsu_file_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "NSU"),
-    file_name = stringr::str_glue("All_CHIs_20{year}.parquet"),
+    file_name = stringr::str_glue("anon-All_CHIs_20{year}.parquet"),
     ...
   )
 

From a12107107b355b977e9c7510edb966b93396a202 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 21 May 2024 15:04:25 +0100
Subject: [PATCH 131/186] Use `get_chi` before phs methods check - ooh

---
 R/read_extract_ooh_consultations.R | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/R/read_extract_ooh_consultations.R b/R/read_extract_ooh_consultations.R
index 4322758e6..3e8edf92b 100644
--- a/R/read_extract_ooh_consultations.R
+++ b/R/read_extract_ooh_consultations.R
@@ -48,10 +48,11 @@ read_extract_ooh_consultations <- function(
       consultation_type = "Consultation Type",
       consultation_type_unmapped = "Consultation Type Unmapped"
     ) %>%
-    # Restore CHI leading zero
-    dplyr::mutate(chi = phsmethods::chi_pad(.data$chi)) %>%
-    dplyr::distinct() %>%
-    slfhelper::get_chi()
+    slfhelper::get_chi() %>%
+  # Restore CHI leading zero
+  dplyr::mutate(chi = phsmethods::chi_pad(.data$chi)) %>%
+    dplyr::distinct()
+
 
   return(consultations_extract)
 }

From 8add12f68465591abe59cd2bf68d30e17322c8dd Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 21 May 2024 15:06:00 +0100
Subject: [PATCH 132/186] Update LTCs

---
 R/get_ltcs_path.R      | 2 +-
 R/process_lookup_ltc.R | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/get_ltcs_path.R b/R/get_ltcs_path.R
index 26aff31fc..caf591f72 100644
--- a/R/get_ltcs_path.R
+++ b/R/get_ltcs_path.R
@@ -12,7 +12,7 @@
 get_ltcs_path <- function(year, ...) {
   ltcs_file_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "LTCs"),
-    file_name = stringr::str_glue("LTCs_patient_reference_file-20{year}.parquet"),
+    file_name = stringr::str_glue("anon-LTCs_patient_reference_file-20{year}.parquet"),
     ...
   )
 
diff --git a/R/process_lookup_ltc.R b/R/process_lookup_ltc.R
index 8ea33da48..5f8f5de83 100644
--- a/R/process_lookup_ltc.R
+++ b/R/process_lookup_ltc.R
@@ -22,7 +22,8 @@ process_lookup_ltc <- function(data, year, write_to_disk = TRUE) {
     dplyr::rename_with(
       .cols = tidyselect::ends_with("flag"),
       .fn = ~ stringr::str_remove(.x, "_date_flag")
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     write_file(

From 5e129d499949a39f9a80324d321cdf688d3e5cec Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Tue, 21 May 2024 14:10:25 +0000
Subject: [PATCH 133/186] Style code

---
 R/create_episode_file.R            | 2 +-
 R/read_extract_ooh_consultations.R | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index b732e465f..ef8e75878 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -23,7 +23,7 @@ create_episode_file <- function(
     dd_data = read_file(get_source_extract_path(year, "dd")) %>% slfhelper::get_chi(),
     homelessness_lookup = create_homelessness_lookup(year),
     nsu_cohort = read_file(get_nsu_path(year)) %>% slfhelper::get_chi(),
-    ltc_data = read_file(get_ltcs_path(year)) %>% slfhelper::get_chi() ,
+    ltc_data = read_file(get_ltcs_path(year)) %>% slfhelper::get_chi(),
     slf_pc_lookup = read_file(get_slf_postcode_path()),
     slf_gpprac_lookup = read_file(
       get_slf_gpprac_path(),
diff --git a/R/read_extract_ooh_consultations.R b/R/read_extract_ooh_consultations.R
index 3e8edf92b..35f660f6e 100644
--- a/R/read_extract_ooh_consultations.R
+++ b/R/read_extract_ooh_consultations.R
@@ -49,8 +49,8 @@ read_extract_ooh_consultations <- function(
       consultation_type_unmapped = "Consultation Type Unmapped"
     ) %>%
     slfhelper::get_chi() %>%
-  # Restore CHI leading zero
-  dplyr::mutate(chi = phsmethods::chi_pad(.data$chi)) %>%
+    # Restore CHI leading zero
+    dplyr::mutate(chi = phsmethods::chi_pad(.data$chi)) %>%
     dplyr::distinct()
 
 

From c5077c28fa96d9917b6c6d968e5c1ff3ffcb4980 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 21 May 2024 16:10:43 +0100
Subject: [PATCH 134/186] Update sc paths to `anon-` prefix

---
 R/get_sc_episodes_path.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/R/get_sc_episodes_path.R b/R/get_sc_episodes_path.R
index 230b69727..951dfd4d7 100644
--- a/R/get_sc_episodes_path.R
+++ b/R/get_sc_episodes_path.R
@@ -14,7 +14,7 @@
 get_sc_ch_episodes_path <- function(update = latest_update(), ...) {
   sc_ch_episodes_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_all_care_home"),
-    file_name = stringr::str_glue("all_ch_episodes_{update}.parquet"),
+    file_name = stringr::str_glue("anon-all_ch_episodes_{update}.parquet"),
     ...
   )
 
@@ -34,7 +34,7 @@ get_sc_ch_episodes_path <- function(update = latest_update(), ...) {
 get_sc_at_episodes_path <- function(update = latest_update(), ...) {
   sc_at_episodes_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_all_alarms_telecare"),
-    file_name = stringr::str_glue("all_at_episodes_{update}.parquet"),
+    file_name = stringr::str_glue("anon-all_at_episodes_{update}.parquet"),
     ...
   )
 
@@ -54,7 +54,7 @@ get_sc_at_episodes_path <- function(update = latest_update(), ...) {
 get_sc_hc_episodes_path <- function(update = latest_update(), ...) {
   sc_hc_episodes_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_all_home_care"),
-    file_name = stringr::str_glue("all_hc_episodes_{update}.parquet"),
+    file_name = stringr::str_glue("anon-all_hc_episodes_{update}.parquet"),
     ...
   )
 
@@ -74,7 +74,7 @@ get_sc_hc_episodes_path <- function(update = latest_update(), ...) {
 get_sc_sds_episodes_path <- function(update = latest_update(), ...) {
   sc_sds_episodes_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_all_sds"),
-    file_name = stringr::str_glue("all_sds_episodes_{update}.parquet"),
+    file_name = stringr::str_glue("anon-all_sds_episodes_{update}.parquet"),
     ...
   )
 

From 73b3454e71081e5560f2eab80ae1fa9d5c006baf Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 21 May 2024 16:11:50 +0100
Subject: [PATCH 135/186] update cohorts paths

---
 R/create_demographic_lookup.R | 3 ++-
 R/create_service_use_lookup.R | 3 ++-
 R/get_cohorts_paths.R         | 4 ++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/R/create_demographic_lookup.R b/R/create_demographic_lookup.R
index d0e0c9988..7d924f0fb 100644
--- a/R/create_demographic_lookup.R
+++ b/R/create_demographic_lookup.R
@@ -168,7 +168,8 @@ create_demographic_cohorts <- function(
       TRUE ~ "Healthy and Low User"
     )) %>%
     # Reorder variables
-    dplyr::relocate(.data$demographic_cohort, .after = .data$chi)
+    dplyr::relocate(.data$demographic_cohort, .after = .data$chi) %>%
+    slfhelper::get_anon_chi()
 
   # Write to disk
   if (write_to_disk) {
diff --git a/R/create_service_use_lookup.R b/R/create_service_use_lookup.R
index 242e0b351..bfb798e26 100644
--- a/R/create_service_use_lookup.R
+++ b/R/create_service_use_lookup.R
@@ -254,7 +254,8 @@ create_service_use_cohorts <- function(
       "outpatient_cost",
       "prescribing_cost",
       "ae2_cost"
-    )
+    ) %>%
+    slfhelper::get_anon_chi()
 
   if (write_to_disk) {
     write_file(return_data,
diff --git a/R/get_cohorts_paths.R b/R/get_cohorts_paths.R
index c4d2fa60c..f4e1ddeee 100644
--- a/R/get_cohorts_paths.R
+++ b/R/get_cohorts_paths.R
@@ -15,7 +15,7 @@ get_demographic_cohorts_path <- function(year, update = latest_update(), ...) {
   demographic_cohorts_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Cohorts"),
     file_name = stringr::str_glue(
-      "demographic_cohorts_{update}_{year}.parquet"
+      "anon-demographic_cohorts_{update}_{year}.parquet"
     ),
     ...
   )
@@ -38,7 +38,7 @@ get_service_use_cohorts_path <- function(year, update = latest_update(), ...) {
   service_use_cohorts_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Cohorts"),
     file_name = stringr::str_glue(
-      "service_use_cohorts_{update}_{year}.parquet"
+      "anon-service_use_cohorts_{update}_{year}.parquet"
     ),
     ...
   )

From 8654196a82a9562437526ac2164ea25fdeb705ff Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 21 May 2024 16:12:37 +0100
Subject: [PATCH 136/186] Update deaths paths with `anon-` prefix

---
 R/get_slf_lookup_paths.R | 2 +-
 R/join_deaths_data.R     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/get_slf_lookup_paths.R b/R/get_slf_lookup_paths.R
index 3a1b932be..c88518094 100644
--- a/R/get_slf_lookup_paths.R
+++ b/R/get_slf_lookup_paths.R
@@ -111,7 +111,7 @@ get_all_slf_deaths_lookup_path <- function(update = latest_update(), ...) {
 get_slf_chi_deaths_path <- function(update = latest_update(), ...) {
   slf_chi_deaths_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Deaths"),
-    file_name = stringr::str_glue("chi_deaths_{update}.parquet"),
+    file_name = stringr::str_glue("anon-chi_deaths_{update}.parquet"),
     ...
   )
 
diff --git a/R/join_deaths_data.R b/R/join_deaths_data.R
index 89bcbbe13..7fbf203d0 100644
--- a/R/join_deaths_data.R
+++ b/R/join_deaths_data.R
@@ -9,7 +9,7 @@
 join_deaths_data <- function(
     data,
     year,
-    slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year))) {
+    slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) %>% slfhelper::get_chi()) {
   return(
     data %>%
       dplyr::left_join(

From 73d643ab28b252b303afe8c67b0cfd1d87b4841c Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 21 May 2024 16:13:18 +0100
Subject: [PATCH 137/186] sc client anon_chi

---
 R/process_lookup_sc_client.R | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R
index f27229e6d..39c2b8874 100644
--- a/R/process_lookup_sc_client.R
+++ b/R/process_lookup_sc_client.R
@@ -18,8 +18,8 @@ process_lookup_sc_client <-
            year,
            sc_demographics = read_file(
              get_sc_demog_lookup_path(),
-             col_select = c("sending_location", "social_care_id", "chi")
-           ),
+             col_select = c("sending_location", "social_care_id", "anon_chi")
+           ) %>% slfhelper::get_chi(),
            write_to_disk = TRUE) {
     client_clean <- data %>%
       # Replace 'unknown' responses with NA
@@ -157,7 +157,8 @@ process_lookup_sc_client <-
       ) %>%
       dplyr::arrange(.data$chi, .data$count_not_known) %>%
       dplyr::distinct(.data$chi, .keep_all = TRUE) %>%
-      dplyr::select(-.data$sending_location)
+      dplyr::select(-.data$sending_location) %>%
+      slfhelper::get_anon_chi()
 
     if (write_to_disk) {
       write_file(

From 604b452f1f8d58ac7a914623cd518bac1a508945 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 21 May 2024 16:14:31 +0100
Subject: [PATCH 138/186] match files with chi

---
 R/add_nsu_cohort.R             | 2 +-
 R/link_delayed_discharge_eps.R | 2 +-
 R/match_on_ltcs.R              | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R
index 46c22cde2..15d5d4e01 100644
--- a/R/add_nsu_cohort.R
+++ b/R/add_nsu_cohort.R
@@ -12,7 +12,7 @@
 add_nsu_cohort <- function(
     data,
     year,
-    nsu_cohort = read_file(get_nsu_path(year))) {
+    nsu_cohort = read_file(get_nsu_path(year)) %>% slfhelper::get_chi()) {
   year_param <- year
 
   if (!check_year_valid(year, "nsu")) {
diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R
index 0d3030a1b..ef4aa4754 100644
--- a/R/link_delayed_discharge_eps.R
+++ b/R/link_delayed_discharge_eps.R
@@ -11,7 +11,7 @@
 link_delayed_discharge_eps <- function(
     episode_file,
     year,
-    dd_data = read_file(get_source_extract_path(year, "dd"))) {
+    dd_data = read_file(get_source_extract_path(year, "dd")) %>% slfhelper::get_chi()) {
   episode_file <- episode_file %>%
     dplyr::mutate(
       # remember to revoke the cij_end_date with dummy_cij_end
diff --git a/R/match_on_ltcs.R b/R/match_on_ltcs.R
index f83f31325..3ed052be8 100644
--- a/R/match_on_ltcs.R
+++ b/R/match_on_ltcs.R
@@ -11,7 +11,7 @@
 match_on_ltcs <- function(
     data,
     year,
-    ltc_data = read_file(get_ltcs_path(year))) {
+    ltc_data = read_file(get_ltcs_path(year)) %>% slfhelper::get_chi()) {
   # Match on LTC lookup
   matched <- dplyr::left_join(
     data,

From 9bea997d797ae7f1fdff5cdbce3454ade63f2ab2 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 21 May 2024 16:18:32 +0100
Subject: [PATCH 139/186] Update `create_episode_file` joins

---
 R/create_episode_file.R | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index ef8e75878..f376b22f6 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -412,12 +412,14 @@ join_cohort_lookups <- function(
     update = latest_update(),
     demographic_cohort = read_file(
       get_demographic_cohorts_path(year, update),
-      col_select = c("chi", "demographic_cohort")
-    ),
+      col_select = c("anon_chi", "demographic_cohort")
+    ) %>%
+      slfhelper::get_chi(),
     service_use_cohort = read_file(
       get_service_use_cohorts_path(year, update),
-      col_select = c("chi", "service_use_cohort")
-    )) {
+      col_select = c("anon_chi", "service_use_cohort")
+    ) %>%
+      slfhelper::get_chi()) {
   join_cohort_lookups <- data %>%
     dplyr::left_join(
       demographic_cohort,
@@ -442,7 +444,7 @@ join_cohort_lookups <- function(
 #' @param file_type episode or individual file
 join_sc_client <- function(data,
                            year,
-                           sc_client = read_file(get_sc_client_lookup_path(year)),
+                           sc_client = read_file(get_sc_client_lookup_path(year) %>% slfhelper::get_chi()),
                            file_type = c("episode", "individual")) {
   if (file_type == "episode") {
     # Match on client variables by chi

From 1dec452254f3cf17ba7fae1648d907e6aec04af4 Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Tue, 21 May 2024 15:22:25 +0000
Subject: [PATCH 140/186] Update documentation

---
 man/add_nsu_cohort.Rd             | 6 +++++-
 man/create_episode_file.Rd        | 6 +++---
 man/join_cohort_lookups.Rd        | 4 ++--
 man/join_deaths_data.Rd           | 3 ++-
 man/join_sc_client.Rd             | 2 +-
 man/link_delayed_discharge_eps.Rd | 2 +-
 man/match_on_ltcs.Rd              | 6 +++++-
 man/process_lookup_sc_client.Rd   | 2 +-
 8 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/man/add_nsu_cohort.Rd b/man/add_nsu_cohort.Rd
index b9a988c57..3be8b290e 100644
--- a/man/add_nsu_cohort.Rd
+++ b/man/add_nsu_cohort.Rd
@@ -4,7 +4,11 @@
 \alias{add_nsu_cohort}
 \title{Add NSU cohort to working file}
 \usage{
-add_nsu_cohort(data, year, nsu_cohort = read_file(get_nsu_path(year)))
+add_nsu_cohort(
+  data,
+  year,
+  nsu_cohort = read_file(get_nsu_path(year)) \%>\% slfhelper::get_chi()
+)
 }
 \arguments{
 \item{data}{The input data frame}
diff --git a/man/create_episode_file.Rd b/man/create_episode_file.Rd
index 1b4c54186..2f35af6a0 100644
--- a/man/create_episode_file.Rd
+++ b/man/create_episode_file.Rd
@@ -7,10 +7,10 @@
 create_episode_file(
   processed_data_list,
   year,
-  dd_data = read_file(get_source_extract_path(year, "dd")),
+  dd_data = read_file(get_source_extract_path(year, "dd")) \%>\% slfhelper::get_chi(),
   homelessness_lookup = create_homelessness_lookup(year),
-  nsu_cohort = read_file(get_nsu_path(year)),
-  ltc_data = read_file(get_ltcs_path(year)),
+  nsu_cohort = read_file(get_nsu_path(year)) \%>\% slfhelper::get_chi(),
+  ltc_data = read_file(get_ltcs_path(year)) \%>\% slfhelper::get_chi(),
   slf_pc_lookup = read_file(get_slf_postcode_path()),
   slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac",
     "cluster", "hbpraccode")),
diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd
index 3ef549cc3..5f8a7ace9 100644
--- a/man/join_cohort_lookups.Rd
+++ b/man/join_cohort_lookups.Rd
@@ -9,9 +9,9 @@ join_cohort_lookups(
   year,
   update = latest_update(),
   demographic_cohort = read_file(get_demographic_cohorts_path(year, update), col_select =
-    c("chi", "demographic_cohort")),
+    c("anon_chi", "demographic_cohort")) \%>\% slfhelper::get_chi(),
   service_use_cohort = read_file(get_service_use_cohorts_path(year, update), col_select =
-    c("chi", "service_use_cohort"))
+    c("anon_chi", "service_use_cohort")) \%>\% slfhelper::get_chi()
 )
 }
 \arguments{
diff --git a/man/join_deaths_data.Rd b/man/join_deaths_data.Rd
index f3b68fe1a..e45b1e714 100644
--- a/man/join_deaths_data.Rd
+++ b/man/join_deaths_data.Rd
@@ -7,7 +7,8 @@
 join_deaths_data(
   data,
   year,
-  slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year))
+  slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) \%>\%
+    slfhelper::get_chi()
 )
 }
 \arguments{
diff --git a/man/join_sc_client.Rd b/man/join_sc_client.Rd
index c79a62782..f2ad946b2 100644
--- a/man/join_sc_client.Rd
+++ b/man/join_sc_client.Rd
@@ -7,7 +7,7 @@
 join_sc_client(
   data,
   year,
-  sc_client = read_file(get_sc_client_lookup_path(year)),
+  sc_client = read_file(get_sc_client_lookup_path(year) \%>\% slfhelper::get_chi()),
   file_type = c("episode", "individual")
 )
 }
diff --git a/man/link_delayed_discharge_eps.Rd b/man/link_delayed_discharge_eps.Rd
index 173fc8706..37f5db262 100644
--- a/man/link_delayed_discharge_eps.Rd
+++ b/man/link_delayed_discharge_eps.Rd
@@ -7,7 +7,7 @@
 link_delayed_discharge_eps(
   episode_file,
   year,
-  dd_data = read_file(get_source_extract_path(year, "dd"))
+  dd_data = read_file(get_source_extract_path(year, "dd")) \%>\% slfhelper::get_chi()
 )
 }
 \arguments{
diff --git a/man/match_on_ltcs.Rd b/man/match_on_ltcs.Rd
index e0def00cc..4dcd991bc 100644
--- a/man/match_on_ltcs.Rd
+++ b/man/match_on_ltcs.Rd
@@ -4,7 +4,11 @@
 \alias{match_on_ltcs}
 \title{Match on LTC DoB and dates of LTC incidence}
 \usage{
-match_on_ltcs(data, year, ltc_data = read_file(get_ltcs_path(year)))
+match_on_ltcs(
+  data,
+  year,
+  ltc_data = read_file(get_ltcs_path(year)) \%>\% slfhelper::get_chi()
+)
 }
 \arguments{
 \item{data}{episode files}
diff --git a/man/process_lookup_sc_client.Rd b/man/process_lookup_sc_client.Rd
index aa9cbed18..464c8e83d 100644
--- a/man/process_lookup_sc_client.Rd
+++ b/man/process_lookup_sc_client.Rd
@@ -8,7 +8,7 @@ process_lookup_sc_client(
   data,
   year,
   sc_demographics = read_file(get_sc_demog_lookup_path(), col_select =
-    c("sending_location", "social_care_id", "chi")),
+    c("sending_location", "social_care_id", "anon_chi")) \%>\% slfhelper::get_chi(),
   write_to_disk = TRUE
 )
 }

From 89fca6ba49535f0d194dc90418fceb2490bc15c9 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Wed, 22 May 2024 12:18:23 +0100
Subject: [PATCH 141/186] update get sandpit extracts

---
 R/get_sandpit_extract_path.R | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/R/get_sandpit_extract_path.R b/R/get_sandpit_extract_path.R
index 170de7537..9ff942044 100644
--- a/R/get_sandpit_extract_path.R
+++ b/R/get_sandpit_extract_path.R
@@ -23,12 +23,12 @@ get_sandpit_extract_path <- function(type = c(
 
   file_name <- dplyr::case_match(
     type,
-    "at" ~ "sandpit_at_extract",
-    "ch" ~ "sandpit_ch_extract",
-    "hc" ~ "sandpit_hc_extract",
-    "sds" ~ "sandpit_sds_extract",
-    "client" ~ "sandpit_sc_client_extract",
-    "demographics" ~ "sandpit_sc_demographics_extract"
+    "at" ~ "anon-sandpit_at_extract",
+    "ch" ~ "anon-sandpit_ch_extract",
+    "hc" ~ "anon-sandpit_hc_extract",
+    "sds" ~ "anon-sandpit_sds_extract",
+    "client" ~ "anon-sandpit_sc_client_extract",
+    "demographics" ~ "anon-sandpit_sc_demographics_extract"
   )
 
   if (type == "client") {

From fd8827bd9338ff5d71752cbc46745367cdf8c19f Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Wed, 22 May 2024 16:30:13 +0100
Subject: [PATCH 142/186] update tests to use `chi`

---
 R/process_tests_acute.R               | 4 ++++
 R/process_tests_ae.R                  | 4 ++++
 R/process_tests_alarms_telecare.R     | 4 ++++
 R/process_tests_care_home.R           | 4 ++++
 R/process_tests_cmh.R                 | 3 +++
 R/process_tests_delayed_discharges.R  | 4 ++++
 R/process_tests_district_nursing.R    | 3 +++
 R/process_tests_gp_ooh.R              | 4 ++++
 R/process_tests_home_care.R           | 4 ++++
 R/process_tests_homelessness.R        | 4 ++++
 R/process_tests_it_chi_deaths.R       | 4 ++++
 R/process_tests_ltcs.R                | 4 ++++
 R/process_tests_maternity.R           | 4 ++++
 R/process_tests_mental_health.R       | 4 ++++
 R/process_tests_nrs_deaths.R          | 4 ++++
 R/process_tests_outpatients.R         | 4 ++++
 R/process_tests_prescribing.R         | 4 ++++
 R/process_tests_sc_all_at_episodes.R  | 4 ++++
 R/process_tests_sc_all_ch_episodes.R  | 4 ++++
 R/process_tests_sc_all_hc_episodes.R  | 4 ++++
 R/process_tests_sc_all_sds_episodes.R | 4 ++++
 R/process_tests_sc_demographics.R     | 4 ++++
 R/process_tests_sc_sandpit.R          | 4 ++++
 R/process_tests_sds.R                 | 4 ++++
 R/produce_sc_all_episodes_tests.R     | 4 ++++
 25 files changed, 98 insertions(+)

diff --git a/R/process_tests_acute.R b/R/process_tests_acute.R
index 3e01a463a..bea70cb98 100644
--- a/R/process_tests_acute.R
+++ b/R/process_tests_acute.R
@@ -10,6 +10,10 @@
 #'
 #' @export
 process_tests_acute <- function(data, year) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
 
   data <- rename_hscp(data)
diff --git a/R/process_tests_ae.R b/R/process_tests_ae.R
index 802cc8c6c..99634b412 100644
--- a/R/process_tests_ae.R
+++ b/R/process_tests_ae.R
@@ -7,6 +7,10 @@
 #'
 #' @export
 process_tests_ae <- function(data, year) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
 
   data <- rename_hscp(data)
diff --git a/R/process_tests_alarms_telecare.R b/R/process_tests_alarms_telecare.R
index 0b2524d55..0d8cea7ca 100644
--- a/R/process_tests_alarms_telecare.R
+++ b/R/process_tests_alarms_telecare.R
@@ -8,6 +8,10 @@
 #'
 #' @export
 process_tests_alarms_telecare <- function(data, year) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
 
   data <- rename_hscp(data)
diff --git a/R/process_tests_care_home.R b/R/process_tests_care_home.R
index f75908d69..f504efb95 100644
--- a/R/process_tests_care_home.R
+++ b/R/process_tests_care_home.R
@@ -7,6 +7,10 @@
 #'
 #' @export
 process_tests_care_home <- function(data, year) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
 
   data <- rename_hscp(data)
diff --git a/R/process_tests_cmh.R b/R/process_tests_cmh.R
index 4d246399f..3c650b63a 100644
--- a/R/process_tests_cmh.R
+++ b/R/process_tests_cmh.R
@@ -12,6 +12,9 @@ process_tests_cmh <- function(data, year) {
     return(data)
   }
 
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
 
   data <- rename_hscp(data)
diff --git a/R/process_tests_delayed_discharges.R b/R/process_tests_delayed_discharges.R
index 0355ad0e2..3546db292 100644
--- a/R/process_tests_delayed_discharges.R
+++ b/R/process_tests_delayed_discharges.R
@@ -10,6 +10,10 @@
 #'
 #' @export
 process_tests_delayed_discharges <- function(data, year) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
 
   data <- rename_hscp(data)
diff --git a/R/process_tests_district_nursing.R b/R/process_tests_district_nursing.R
index 6c890cfb4..b8d119055 100644
--- a/R/process_tests_district_nursing.R
+++ b/R/process_tests_district_nursing.R
@@ -13,6 +13,9 @@ process_tests_district_nursing <- function(data, year) {
     return(data)
   }
 
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
   data <- rename_hscp(data)
 
diff --git a/R/process_tests_gp_ooh.R b/R/process_tests_gp_ooh.R
index e9778711d..ab79d2bd8 100644
--- a/R/process_tests_gp_ooh.R
+++ b/R/process_tests_gp_ooh.R
@@ -7,6 +7,10 @@
 #'
 #' @export
 process_tests_gp_ooh <- function(data, year) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
 
   data <- rename_hscp(data)
diff --git a/R/process_tests_home_care.R b/R/process_tests_home_care.R
index c06d9832f..738644510 100644
--- a/R/process_tests_home_care.R
+++ b/R/process_tests_home_care.R
@@ -7,6 +7,10 @@
 #'
 #' @export
 process_tests_home_care <- function(data, year) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
 
   data <- rename_hscp(data)
diff --git a/R/process_tests_homelessness.R b/R/process_tests_homelessness.R
index db409ac66..e092a77d2 100644
--- a/R/process_tests_homelessness.R
+++ b/R/process_tests_homelessness.R
@@ -8,6 +8,10 @@
 #'
 #' @export
 process_tests_homelessness <- function(data, year) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
 
   data <- rename_hscp(data)
diff --git a/R/process_tests_it_chi_deaths.R b/R/process_tests_it_chi_deaths.R
index 5de2d02c8..6a2f6066e 100644
--- a/R/process_tests_it_chi_deaths.R
+++ b/R/process_tests_it_chi_deaths.R
@@ -4,6 +4,10 @@
 #'
 #' @export
 process_tests_it_chi_deaths <- function(data, update = previous_update()) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   comparison <- produce_test_comparison(
     old_data = produce_it_chi_deaths_tests(
       read_file(get_slf_chi_deaths_path(update = update))
diff --git a/R/process_tests_ltcs.R b/R/process_tests_ltcs.R
index c667ad745..ccd34a458 100644
--- a/R/process_tests_ltcs.R
+++ b/R/process_tests_ltcs.R
@@ -7,6 +7,10 @@
 #'
 #' @export
 process_tests_ltcs <- function(data, year) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   # Find and flag any duplicate chis and chi/postcode combinations
   duplicates <- data %>%
     dplyr::summarise(
diff --git a/R/process_tests_maternity.R b/R/process_tests_maternity.R
index 6cc043bcb..11f2c6bbd 100644
--- a/R/process_tests_maternity.R
+++ b/R/process_tests_maternity.R
@@ -7,6 +7,10 @@
 #'
 #' @export
 process_tests_maternity <- function(data, year) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
 
   data <- rename_hscp(data)
diff --git a/R/process_tests_mental_health.R b/R/process_tests_mental_health.R
index 2a3c0f026..07cad8699 100644
--- a/R/process_tests_mental_health.R
+++ b/R/process_tests_mental_health.R
@@ -7,6 +7,10 @@
 #'
 #' @export
 process_tests_mental_health <- function(data, year) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
 
   data <- rename_hscp(data)
diff --git a/R/process_tests_nrs_deaths.R b/R/process_tests_nrs_deaths.R
index f3d47b18d..efbc2c716 100644
--- a/R/process_tests_nrs_deaths.R
+++ b/R/process_tests_nrs_deaths.R
@@ -7,6 +7,10 @@
 #'
 #' @export
 process_tests_nrs_deaths <- function(data, year) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
 
   data <- rename_hscp(data)
diff --git a/R/process_tests_outpatients.R b/R/process_tests_outpatients.R
index c9a7521b7..086aa183b 100644
--- a/R/process_tests_outpatients.R
+++ b/R/process_tests_outpatients.R
@@ -7,6 +7,10 @@
 #'
 #' @export
 process_tests_outpatients <- function(data, year) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
 
   data <- rename_hscp(data)
diff --git a/R/process_tests_prescribing.R b/R/process_tests_prescribing.R
index 415b896f4..50853a6b0 100644
--- a/R/process_tests_prescribing.R
+++ b/R/process_tests_prescribing.R
@@ -7,6 +7,10 @@
 #'
 #' @export
 process_tests_prescribing <- function(data, year) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
 
   data <- rename_hscp(data)
diff --git a/R/process_tests_sc_all_at_episodes.R b/R/process_tests_sc_all_at_episodes.R
index 8b5580334..0e89d3883 100644
--- a/R/process_tests_sc_all_at_episodes.R
+++ b/R/process_tests_sc_all_at_episodes.R
@@ -10,6 +10,10 @@
 #'
 #' @export
 process_tests_sc_all_at_episodes <- function(data) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   comparison <- produce_test_comparison(
     old_data = produce_sc_all_episodes_tests(
       read_file(get_sc_at_episodes_path(update = previous_update()))
diff --git a/R/process_tests_sc_all_ch_episodes.R b/R/process_tests_sc_all_ch_episodes.R
index 20b438d96..b6f81d1f8 100644
--- a/R/process_tests_sc_all_ch_episodes.R
+++ b/R/process_tests_sc_all_ch_episodes.R
@@ -10,6 +10,10 @@
 #'
 #' @export
 process_tests_sc_all_ch_episodes <- function(data) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   comparison <- produce_test_comparison(
     old_data = produce_sc_all_episodes_tests(
       read_file(get_sc_ch_episodes_path(update = previous_update()))
diff --git a/R/process_tests_sc_all_hc_episodes.R b/R/process_tests_sc_all_hc_episodes.R
index 7194790c0..e9ef8ea47 100644
--- a/R/process_tests_sc_all_hc_episodes.R
+++ b/R/process_tests_sc_all_hc_episodes.R
@@ -10,6 +10,10 @@
 #'
 #' @export
 process_tests_sc_all_hc_episodes <- function(data) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   comparison <- produce_test_comparison(
     old_data = produce_sc_all_episodes_tests(
       read_file(get_sc_hc_episodes_path(update = previous_update()))
diff --git a/R/process_tests_sc_all_sds_episodes.R b/R/process_tests_sc_all_sds_episodes.R
index cf87a671c..f7f68cacc 100644
--- a/R/process_tests_sc_all_sds_episodes.R
+++ b/R/process_tests_sc_all_sds_episodes.R
@@ -10,6 +10,10 @@
 #'
 #' @export
 process_tests_sc_all_sds_episodes <- function(data) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   comparison <- produce_test_comparison(
     old_data = produce_sc_all_episodes_tests(
       read_file(get_sc_sds_episodes_path(update = previous_update()))
diff --git a/R/process_tests_sc_demographics.R b/R/process_tests_sc_demographics.R
index 6150a4e62..5c1e8a718 100644
--- a/R/process_tests_sc_demographics.R
+++ b/R/process_tests_sc_demographics.R
@@ -10,6 +10,10 @@
 #'
 #' @export
 process_tests_sc_demographics <- function(data) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   comparison <- produce_test_comparison(
     old_data = produce_sc_demog_lookup_tests(
       read_file(get_sc_demog_lookup_path(update = previous_update()))
diff --git a/R/process_tests_sc_sandpit.R b/R/process_tests_sc_sandpit.R
index 089f61aa1..8b6d62b1d 100644
--- a/R/process_tests_sc_sandpit.R
+++ b/R/process_tests_sc_sandpit.R
@@ -6,6 +6,10 @@
 #' @export
 #'
 process_tests_sc_sandpit <- function(type = c("at", "hc", "ch", "sds", "demographics", "client"), year = NULL) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   comparison <- produce_test_comparison(
     old_data = produce_sc_sandpit_tests(
       read_file(get_sandpit_extract_path(type = {{ type }}, year = year, update = previous_update())),
diff --git a/R/process_tests_sds.R b/R/process_tests_sds.R
index 437b137f9..64573b513 100644
--- a/R/process_tests_sds.R
+++ b/R/process_tests_sds.R
@@ -7,6 +7,10 @@
 #'
 #' @export
 process_tests_sds <- function(data, year) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   old_data <- get_existing_data_for_tests(data)
 
   data <- rename_hscp(data)
diff --git a/R/produce_sc_all_episodes_tests.R b/R/produce_sc_all_episodes_tests.R
index c2720a928..13e7b7b9d 100644
--- a/R/produce_sc_all_episodes_tests.R
+++ b/R/produce_sc_all_episodes_tests.R
@@ -8,6 +8,10 @@
 #'
 #' @family social care test functions
 produce_sc_all_episodes_tests <- function(data) {
+
+  data <- data %>%
+    slfhelper::get_chi()
+
   data %>%
     # create test flags
     create_demog_test_flags(chi = .data$chi) %>%

From 60ca854316081b0327f8c3d7bdaf3fdc89729405 Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Wed, 22 May 2024 16:29:18 +0000
Subject: [PATCH 143/186] Style code

---
 R/process_tests_acute.R               | 1 -
 R/process_tests_ae.R                  | 1 -
 R/process_tests_alarms_telecare.R     | 1 -
 R/process_tests_care_home.R           | 1 -
 R/process_tests_delayed_discharges.R  | 1 -
 R/process_tests_gp_ooh.R              | 1 -
 R/process_tests_home_care.R           | 1 -
 R/process_tests_homelessness.R        | 1 -
 R/process_tests_it_chi_deaths.R       | 1 -
 R/process_tests_ltcs.R                | 1 -
 R/process_tests_maternity.R           | 1 -
 R/process_tests_mental_health.R       | 1 -
 R/process_tests_nrs_deaths.R          | 1 -
 R/process_tests_outpatients.R         | 1 -
 R/process_tests_prescribing.R         | 1 -
 R/process_tests_sc_all_at_episodes.R  | 1 -
 R/process_tests_sc_all_ch_episodes.R  | 1 -
 R/process_tests_sc_all_hc_episodes.R  | 1 -
 R/process_tests_sc_all_sds_episodes.R | 1 -
 R/process_tests_sc_demographics.R     | 1 -
 R/process_tests_sc_sandpit.R          | 1 -
 R/process_tests_sds.R                 | 1 -
 R/produce_sc_all_episodes_tests.R     | 1 -
 23 files changed, 23 deletions(-)

diff --git a/R/process_tests_acute.R b/R/process_tests_acute.R
index bea70cb98..793012850 100644
--- a/R/process_tests_acute.R
+++ b/R/process_tests_acute.R
@@ -10,7 +10,6 @@
 #'
 #' @export
 process_tests_acute <- function(data, year) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_ae.R b/R/process_tests_ae.R
index 99634b412..feee1281d 100644
--- a/R/process_tests_ae.R
+++ b/R/process_tests_ae.R
@@ -7,7 +7,6 @@
 #'
 #' @export
 process_tests_ae <- function(data, year) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_alarms_telecare.R b/R/process_tests_alarms_telecare.R
index 0d8cea7ca..59919a7bc 100644
--- a/R/process_tests_alarms_telecare.R
+++ b/R/process_tests_alarms_telecare.R
@@ -8,7 +8,6 @@
 #'
 #' @export
 process_tests_alarms_telecare <- function(data, year) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_care_home.R b/R/process_tests_care_home.R
index f504efb95..4c0a20173 100644
--- a/R/process_tests_care_home.R
+++ b/R/process_tests_care_home.R
@@ -7,7 +7,6 @@
 #'
 #' @export
 process_tests_care_home <- function(data, year) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_delayed_discharges.R b/R/process_tests_delayed_discharges.R
index 3546db292..70db064fe 100644
--- a/R/process_tests_delayed_discharges.R
+++ b/R/process_tests_delayed_discharges.R
@@ -10,7 +10,6 @@
 #'
 #' @export
 process_tests_delayed_discharges <- function(data, year) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_gp_ooh.R b/R/process_tests_gp_ooh.R
index ab79d2bd8..43b151da0 100644
--- a/R/process_tests_gp_ooh.R
+++ b/R/process_tests_gp_ooh.R
@@ -7,7 +7,6 @@
 #'
 #' @export
 process_tests_gp_ooh <- function(data, year) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_home_care.R b/R/process_tests_home_care.R
index 738644510..54fb065e9 100644
--- a/R/process_tests_home_care.R
+++ b/R/process_tests_home_care.R
@@ -7,7 +7,6 @@
 #'
 #' @export
 process_tests_home_care <- function(data, year) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_homelessness.R b/R/process_tests_homelessness.R
index e092a77d2..2a5ce657e 100644
--- a/R/process_tests_homelessness.R
+++ b/R/process_tests_homelessness.R
@@ -8,7 +8,6 @@
 #'
 #' @export
 process_tests_homelessness <- function(data, year) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_it_chi_deaths.R b/R/process_tests_it_chi_deaths.R
index 6a2f6066e..33ef1c5ff 100644
--- a/R/process_tests_it_chi_deaths.R
+++ b/R/process_tests_it_chi_deaths.R
@@ -4,7 +4,6 @@
 #'
 #' @export
 process_tests_it_chi_deaths <- function(data, update = previous_update()) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_ltcs.R b/R/process_tests_ltcs.R
index ccd34a458..0561e70d0 100644
--- a/R/process_tests_ltcs.R
+++ b/R/process_tests_ltcs.R
@@ -7,7 +7,6 @@
 #'
 #' @export
 process_tests_ltcs <- function(data, year) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_maternity.R b/R/process_tests_maternity.R
index 11f2c6bbd..5f7954fa1 100644
--- a/R/process_tests_maternity.R
+++ b/R/process_tests_maternity.R
@@ -7,7 +7,6 @@
 #'
 #' @export
 process_tests_maternity <- function(data, year) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_mental_health.R b/R/process_tests_mental_health.R
index 07cad8699..882c426cf 100644
--- a/R/process_tests_mental_health.R
+++ b/R/process_tests_mental_health.R
@@ -7,7 +7,6 @@
 #'
 #' @export
 process_tests_mental_health <- function(data, year) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_nrs_deaths.R b/R/process_tests_nrs_deaths.R
index efbc2c716..6c34fc42a 100644
--- a/R/process_tests_nrs_deaths.R
+++ b/R/process_tests_nrs_deaths.R
@@ -7,7 +7,6 @@
 #'
 #' @export
 process_tests_nrs_deaths <- function(data, year) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_outpatients.R b/R/process_tests_outpatients.R
index 086aa183b..56e53b7a2 100644
--- a/R/process_tests_outpatients.R
+++ b/R/process_tests_outpatients.R
@@ -7,7 +7,6 @@
 #'
 #' @export
 process_tests_outpatients <- function(data, year) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_prescribing.R b/R/process_tests_prescribing.R
index 50853a6b0..328051342 100644
--- a/R/process_tests_prescribing.R
+++ b/R/process_tests_prescribing.R
@@ -7,7 +7,6 @@
 #'
 #' @export
 process_tests_prescribing <- function(data, year) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_sc_all_at_episodes.R b/R/process_tests_sc_all_at_episodes.R
index 0e89d3883..c23a4f6ed 100644
--- a/R/process_tests_sc_all_at_episodes.R
+++ b/R/process_tests_sc_all_at_episodes.R
@@ -10,7 +10,6 @@
 #'
 #' @export
 process_tests_sc_all_at_episodes <- function(data) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_sc_all_ch_episodes.R b/R/process_tests_sc_all_ch_episodes.R
index b6f81d1f8..6887eb662 100644
--- a/R/process_tests_sc_all_ch_episodes.R
+++ b/R/process_tests_sc_all_ch_episodes.R
@@ -10,7 +10,6 @@
 #'
 #' @export
 process_tests_sc_all_ch_episodes <- function(data) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_sc_all_hc_episodes.R b/R/process_tests_sc_all_hc_episodes.R
index e9ef8ea47..d037e7908 100644
--- a/R/process_tests_sc_all_hc_episodes.R
+++ b/R/process_tests_sc_all_hc_episodes.R
@@ -10,7 +10,6 @@
 #'
 #' @export
 process_tests_sc_all_hc_episodes <- function(data) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_sc_all_sds_episodes.R b/R/process_tests_sc_all_sds_episodes.R
index f7f68cacc..91c32d450 100644
--- a/R/process_tests_sc_all_sds_episodes.R
+++ b/R/process_tests_sc_all_sds_episodes.R
@@ -10,7 +10,6 @@
 #'
 #' @export
 process_tests_sc_all_sds_episodes <- function(data) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_sc_demographics.R b/R/process_tests_sc_demographics.R
index 5c1e8a718..c4a81d776 100644
--- a/R/process_tests_sc_demographics.R
+++ b/R/process_tests_sc_demographics.R
@@ -10,7 +10,6 @@
 #'
 #' @export
 process_tests_sc_demographics <- function(data) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_sc_sandpit.R b/R/process_tests_sc_sandpit.R
index 8b6d62b1d..7540d8686 100644
--- a/R/process_tests_sc_sandpit.R
+++ b/R/process_tests_sc_sandpit.R
@@ -6,7 +6,6 @@
 #' @export
 #'
 process_tests_sc_sandpit <- function(type = c("at", "hc", "ch", "sds", "demographics", "client"), year = NULL) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/process_tests_sds.R b/R/process_tests_sds.R
index 64573b513..b1f164455 100644
--- a/R/process_tests_sds.R
+++ b/R/process_tests_sds.R
@@ -7,7 +7,6 @@
 #'
 #' @export
 process_tests_sds <- function(data, year) {
-
   data <- data %>%
     slfhelper::get_chi()
 
diff --git a/R/produce_sc_all_episodes_tests.R b/R/produce_sc_all_episodes_tests.R
index 13e7b7b9d..bdbe84033 100644
--- a/R/produce_sc_all_episodes_tests.R
+++ b/R/produce_sc_all_episodes_tests.R
@@ -8,7 +8,6 @@
 #'
 #' @family social care test functions
 produce_sc_all_episodes_tests <- function(data) {
-
   data <- data %>%
     slfhelper::get_chi()
 

From 6852db76bd2da69bf9c6f48f27303a2065067726 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 27 May 2024 14:25:22 +0100
Subject: [PATCH 144/186] Update IT extracts to maintain chi

---
 R/read_extract_prescribing.R | 7 +++----
 R/read_it_chi_deaths.R       | 7 +++----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/R/read_extract_prescribing.R b/R/read_extract_prescribing.R
index 147d0b08b..683484473 100644
--- a/R/read_extract_prescribing.R
+++ b/R/read_extract_prescribing.R
@@ -6,7 +6,7 @@
 read_extract_prescribing <- function(year, file_path = get_it_prescribing_path(year)) {
   pis_file <- read_file(file_path,
     col_type = readr::cols_only(
-      "anon_chi" = readr::col_character(),
+      "Pat UPI [C]" = readr::col_character(),
       "Pat DoB [C]" = readr::col_date(format = "%d-%m-%Y"),
       "Pat Gender" = readr::col_double(),
       "Pat Postcode [C]" = readr::col_character(),
@@ -17,15 +17,14 @@ read_extract_prescribing <- function(year, file_path = get_it_prescribing_path(y
   ) %>%
     # Rename variables
     dplyr::rename(
-      anon_chi = "anon_chi",
+      chi = "Pat UPI [C]",
       dob = "Pat DoB [C]",
       gender = "Pat Gender",
       postcode = "Pat Postcode [C]",
       gpprac = "Practice Code",
       no_paid_items = "Number of Paid Items",
       cost_total_net = "PD Paid GIC excl. BB"
-    ) %>%
-    slfhelper::get_chi()
+    )
 
   return(pis_file)
 }
diff --git a/R/read_it_chi_deaths.R b/R/read_it_chi_deaths.R
index a67a9a4ce..aab56c86d 100644
--- a/R/read_it_chi_deaths.R
+++ b/R/read_it_chi_deaths.R
@@ -9,17 +9,16 @@
 read_it_chi_deaths <- function(file_path = get_it_deaths_path()) {
   it_chi_deaths <- read_file(file_path,
     col_type = readr::cols(
-      "anon_chi" = readr::col_character(),
+      "PATIENT_UPI [C]" = readr::col_character(),
       "PATIENT DoD DATE (NRS)" = readr::col_date(format = "%d-%m-%Y"),
       "PATIENT DoD DATE (CHI)" = readr::col_date(format = "%d-%m-%Y")
     )
   ) %>%
     dplyr::rename(
-      anon_chi = "anon_chi",
+      chi = "PATIENT_UPI [C]",
       death_date_nrs = "PATIENT DoD DATE (NRS)",
       death_date_chi = "PATIENT DoD DATE (CHI)"
-    ) %>%
-    slfhelper::get_chi()
+    )
 
   return(it_chi_deaths)
 }

From ccf3e2cba6313a5b7b53a1e44a8677e7be7b41d6 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Mon, 27 May 2024 14:28:06 +0100
Subject: [PATCH 145/186] Update sort_bi_extracts

---
 00_Sort_BI_Extracts.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/00_Sort_BI_Extracts.R b/00_Sort_BI_Extracts.R
index 0638123b9..395f57784 100644
--- a/00_Sort_BI_Extracts.R
+++ b/00_Sort_BI_Extracts.R
@@ -1,5 +1,5 @@
 # Define the source directory and financial year pattern
-compress_files <- TRUE
+compress_files <- FALSE
 source_dir <- "/conf/sourcedev/Source_Linkage_File_Updates/Extracts Temp"
 pattern <- "-20(\\d{4})\\.csv"
 
@@ -43,7 +43,7 @@ move_temps_to_year_extract <- function(csv_file, compress_files = TRUE) {
     chi_in_file <- is_chi_in_file(csv_file)
     if (chi_in_file) {
       read_file(csv_file) %>%
-        dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI")) %>%
+        dplyr::rename_with(~ paste0("chi"), tidyselect::contains("UPI", ignore.case = FALSE)) %>%
         slfhelper::get_anon_chi() %>%
         readr::write_csv(file = new_file_path)
       cat("Replaced chi with anon chi:", csv_file, "to", new_file_path, "\n")

From e00568404888771db942be49c3aea63c459c70e9 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Mon, 27 May 2024 15:14:43 +0100
Subject: [PATCH 146/186] Update bracket

---
 R/create_episode_file.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index f376b22f6..c0bbc5c74 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -444,7 +444,7 @@ join_cohort_lookups <- function(
 #' @param file_type episode or individual file
 join_sc_client <- function(data,
                            year,
-                           sc_client = read_file(get_sc_client_lookup_path(year) %>% slfhelper::get_chi()),
+                           sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(),
                            file_type = c("episode", "individual")) {
   if (file_type == "episode") {
     # Match on client variables by chi

From 7942dce23a78541e1976431b59e655a553b7d553 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Mon, 27 May 2024 15:15:16 +0100
Subject: [PATCH 147/186] update parameter

---
 R/read_lookup_sc_demographics.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/read_lookup_sc_demographics.R b/R/read_lookup_sc_demographics.R
index 33873cf4f..08a8c5d8d 100644
--- a/R/read_lookup_sc_demographics.R
+++ b/R/read_lookup_sc_demographics.R
@@ -27,7 +27,7 @@ read_lookup_sc_demographics <- function(sc_dvprod_connection = phs_db_connection
 
   if (!fs::file_exists(get_sandpit_extract_path(type = "demographics"))) {
     sc_demog %>%
-      slfhelper::get_anon_chi(chi = "chi_upi") %>%
+      slfhelper::get_anon_chi(chi_var = "chi_upi") %>%
       write_file(get_sandpit_extract_path(type = "demographics"))
 
     sc_demog %>%

From eaa13f6f0fa7ddea7d964abb030b54d4a337cf7d Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Mon, 27 May 2024 14:16:18 +0000
Subject: [PATCH 148/186] Update documentation

---
 man/join_sc_client.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/join_sc_client.Rd b/man/join_sc_client.Rd
index f2ad946b2..46ecefb5a 100644
--- a/man/join_sc_client.Rd
+++ b/man/join_sc_client.Rd
@@ -7,7 +7,7 @@
 join_sc_client(
   data,
   year,
-  sc_client = read_file(get_sc_client_lookup_path(year) \%>\% slfhelper::get_chi()),
+  sc_client = read_file(get_sc_client_lookup_path(year)) \%>\% slfhelper::get_chi(),
   file_type = c("episode", "individual")
 )
 }

From 66c8a596cbc2bd71167655e6dbd9bd7e511a774d Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Mon, 27 May 2024 15:50:19 +0100
Subject: [PATCH 149/186] bugs fix

---
 R/get_dd_path.R                 | 2 +-
 R/read_sc_all_alarms_telecare.R | 2 +-
 R/read_sc_all_care_home.R       | 2 +-
 R/read_sc_all_home_care.R       | 2 +-
 R/read_sc_all_sds.R             | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/get_dd_path.R b/R/get_dd_path.R
index fcbcc1595..b831c6482 100644
--- a/R/get_dd_path.R
+++ b/R/get_dd_path.R
@@ -19,7 +19,7 @@ get_dd_path <- function(..., dd_period = NULL) {
 
   dd_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Delayed_Discharges"),
-    file_name = paste0("anon_", dd_period, "_DD_LinkageFile.parquet"),
+    file_name = paste0("anon-", dd_period, "_DD_LinkageFile.parquet"),
     ...
   )
 
diff --git a/R/read_sc_all_alarms_telecare.R b/R/read_sc_all_alarms_telecare.R
index 4af57d857..f6e97397c 100644
--- a/R/read_sc_all_alarms_telecare.R
+++ b/R/read_sc_all_alarms_telecare.R
@@ -33,7 +33,7 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection
       write_file(get_sandpit_extract_path(type = "at"))
 
     at_full_data %>%
-      process_tests_sandpit(type = "at")
+      process_tests_sc_sandpit(type = "at")
   } else {
     at_full_data <- at_full_data
   }
diff --git a/R/read_sc_all_care_home.R b/R/read_sc_all_care_home.R
index 0e74d6623..4ff17477d 100644
--- a/R/read_sc_all_care_home.R
+++ b/R/read_sc_all_care_home.R
@@ -35,7 +35,7 @@ read_sc_all_care_home <- function(sc_dvprod_connection = phs_db_connection(dsn =
       write_file(get_sandpit_extract_path(type = "ch"))
 
     ch_data %>%
-      process_tests_sandpit(type = "ch")
+      process_tests_sc_sandpit(type = "ch")
   } else {
     ch_data <- ch_data
   }
diff --git a/R/read_sc_all_home_care.R b/R/read_sc_all_home_care.R
index 3741785a7..fa51d24b7 100644
--- a/R/read_sc_all_home_care.R
+++ b/R/read_sc_all_home_care.R
@@ -52,7 +52,7 @@ read_sc_all_home_care <- function(sc_dvprod_connection = phs_db_connection(dsn =
       write_file(get_sandpit_extract_path(type = "hc"))
 
     home_care_date %>%
-      process_tests_sandpit(type = "hc")
+      process_tests_sc_sandpit(type = "hc")
   } else {
     home_care_data <- home_care_data
   }
diff --git a/R/read_sc_all_sds.R b/R/read_sc_all_sds.R
index e184ffaeb..75fe77637 100644
--- a/R/read_sc_all_sds.R
+++ b/R/read_sc_all_sds.R
@@ -33,7 +33,7 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR
       write_file(get_sandpit_extract_path(type = "sds"))
 
     sds_full_data %>%
-      process_tests_sandpit(type = "sds")
+      process_tests_sc_sandpit(type = "sds")
   } else {
     sds_full_data <- sds_full_data
   }

From 29d7cedd49bdcc20ca0f882046aec0a54185f295 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Mon, 27 May 2024 17:04:53 +0100
Subject: [PATCH 150/186] fix reading data from plateform and homelessness chi

---
 R/process_extract_homelessness.R | 2 +-
 R/read_lookup_sc_client.R        | 4 +++-
 R/read_lookup_sc_demographics.R  | 7 +++++--
 R/read_sc_all_alarms_telecare.R  | 5 +++--
 R/read_sc_all_care_home.R        | 5 +++--
 R/read_sc_all_home_care.R        | 5 +++--
 R/read_sc_all_sds.R              | 4 +++-
 7 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
index 7f89bcbc7..167ad6b9e 100644
--- a/R/process_extract_homelessness.R
+++ b/R/process_extract_homelessness.R
@@ -160,7 +160,7 @@ process_extract_homelessness <- function(
       "year",
       "recid",
       "smrtype",
-      chi = "upi_number",
+      "chi",
       dob = "client_dob_date",
       age = "age_at_assessment_decision_date",
       gender = "gender_code",
diff --git a/R/read_lookup_sc_client.R b/R/read_lookup_sc_client.R
index 173c971b6..d0c72c859 100644
--- a/R/read_lookup_sc_client.R
+++ b/R/read_lookup_sc_client.R
@@ -16,6 +16,9 @@ read_lookup_sc_client <- function(fyyear,
 
   # read in data - social care 2 client
   client_data <- dplyr::tbl(sc_dvprod_connection, dbplyr::in_schema("social_care_2", "client")) %>%
+    dplyr::collect()
+
+  client_data <- client_data %>%
     dplyr::select(
       "sending_location",
       "social_care_id",
@@ -41,7 +44,6 @@ read_lookup_sc_client <- function(fyyear,
       "day_care"
     ) %>%
     dplyr::filter(.data$financial_year == year) %>%
-    dplyr::collect() %>%
     dplyr::mutate(
       dplyr::across(
         c(
diff --git a/R/read_lookup_sc_demographics.R b/R/read_lookup_sc_demographics.R
index 08a8c5d8d..1d97e7c30 100644
--- a/R/read_lookup_sc_demographics.R
+++ b/R/read_lookup_sc_demographics.R
@@ -10,6 +10,9 @@ read_lookup_sc_demographics <- function(sc_dvprod_connection = phs_db_connection
     sc_dvprod_connection,
     dbplyr::in_schema("social_care_2", "demographic_snapshot")
   ) %>%
+    dplyr::collect()
+
+  sc_demog <- sc_demog %>%
     dplyr::select(
       "latest_record_flag",
       "period",
@@ -22,8 +25,8 @@ read_lookup_sc_demographics <- function(sc_dvprod_connection = phs_db_connection
       "chi_postcode",
       "submitted_postcode",
       "chi_gender_code"
-    ) %>%
-    dplyr::collect()
+    )
+
 
   if (!fs::file_exists(get_sandpit_extract_path(type = "demographics"))) {
     sc_demog %>%
diff --git a/R/read_sc_all_alarms_telecare.R b/R/read_sc_all_alarms_telecare.R
index f6e97397c..0106bcbda 100644
--- a/R/read_sc_all_alarms_telecare.R
+++ b/R/read_sc_all_alarms_telecare.R
@@ -13,7 +13,9 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection
   at_full_data <- dplyr::tbl(
     sc_dvprod_connection,
     dbplyr::in_schema("social_care_2", "equipment_snapshot")
-  ) %>%
+  ) %>% dplyr::collect()
+
+  at_full_data <- at_full_data %>%
     dplyr::select(
       "sending_location",
       "social_care_id",
@@ -25,7 +27,6 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection
       "service_end_date",
       "service_start_date_after_period_end_date"
     ) %>%
-    dplyr::collect() %>%
     dplyr::distinct()
 
   if (!fs::file_exists(get_sandpit_extract_path(type = "at"))) {
diff --git a/R/read_sc_all_care_home.R b/R/read_sc_all_care_home.R
index 4ff17477d..89ef7951b 100644
--- a/R/read_sc_all_care_home.R
+++ b/R/read_sc_all_care_home.R
@@ -10,7 +10,9 @@ read_sc_all_care_home <- function(sc_dvprod_connection = phs_db_connection(dsn =
   ch_data <- dplyr::tbl(
     sc_dvprod_connection,
     dbplyr::in_schema("social_care_2", "carehome_snapshot")
-  ) %>%
+  ) %>% dplyr::collect()
+
+  ch_data <- ch_data %>%
     dplyr::select(
       "ch_name",
       "ch_postcode",
@@ -27,7 +29,6 @@ read_sc_all_care_home <- function(sc_dvprod_connection = phs_db_connection(dsn =
       "ch_discharge_date",
       "age"
     ) %>%
-    dplyr::collect() %>%
     dplyr::distinct()
 
   if (!fs::file_exists(get_sandpit_extract_path(type = "ch"))) {
diff --git a/R/read_sc_all_home_care.R b/R/read_sc_all_home_care.R
index fa51d24b7..2f4d892e3 100644
--- a/R/read_sc_all_home_care.R
+++ b/R/read_sc_all_home_care.R
@@ -10,7 +10,9 @@ read_sc_all_home_care <- function(sc_dvprod_connection = phs_db_connection(dsn =
   home_care_data <- dplyr::tbl(
     sc_dvprod_connection,
     dbplyr::in_schema("social_care_2", "homecare_snapshot")
-  ) %>%
+  ) %>% dplyr::collect()
+
+  home_care_data <- home_care_data %>%
     dplyr::select(
       "sending_location",
       "sending_location_name",
@@ -44,7 +46,6 @@ read_sc_all_home_care <- function(sc_dvprod_connection = phs_db_connection(dsn =
       .data$period
     )) %>%
     # drop rows start date after end date
-    dplyr::collect() %>%
     dplyr::distinct()
 
   if (!fs::file_exists(get_sandpit_extract_path(type = "hc"))) {
diff --git a/R/read_sc_all_sds.R b/R/read_sc_all_sds.R
index 75fe77637..54d3d31ed 100644
--- a/R/read_sc_all_sds.R
+++ b/R/read_sc_all_sds.R
@@ -11,6 +11,9 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR
     sc_dvprod_connection,
     dbplyr::in_schema("social_care_2", "sds_snapshot")
   ) %>%
+    dplyr::collect()
+
+  sds_full_data <- sds_full_data %>%
     dplyr::select(
       "sending_location",
       "social_care_id",
@@ -25,7 +28,6 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR
       "sds_start_date_after_end_date", # get fixed
       "sds_start_date_after_period_end_date" # get removed
     ) %>%
-    dplyr::collect() %>%
     dplyr::distinct()
 
   if (!fs::file_exists(get_sandpit_extract_path(type = "sds"))) {

From bb5aafa879c4899bd3fff2dbebffc96bdcad9e68 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 28 May 2024 10:32:36 +0100
Subject: [PATCH 151/186] update sc demog path

---
 R/get_sc_lookup_paths.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/get_sc_lookup_paths.R b/R/get_sc_lookup_paths.R
index be0fa3eb6..80dd15d5f 100644
--- a/R/get_sc_lookup_paths.R
+++ b/R/get_sc_lookup_paths.R
@@ -15,7 +15,7 @@
 get_sc_demog_lookup_path <- function(update = latest_update(), ...) {
   sc_demog_lookup_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_demographic_lookup"),
-    file_name = stringr::str_glue("sc_demographics_lookup_{update}.parquet"),
+    file_name = stringr::str_glue("anon-sc_demographics_lookup_{update}.parquet"),
     ...
   )
 

From d4b9cd6ccaa0cba0d619320a1c0b86cd0b80d0bb Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 28 May 2024 10:33:20 +0100
Subject: [PATCH 152/186] update homelessness lookup

---
 R/process_lookup_homelessness.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/process_lookup_homelessness.R b/R/process_lookup_homelessness.R
index 62199c9e8..b0dc30d51 100644
--- a/R/process_lookup_homelessness.R
+++ b/R/process_lookup_homelessness.R
@@ -12,7 +12,7 @@
 #' @family process extracts
 create_homelessness_lookup <- function(
     year,
-    homelessness_data = read_file(get_source_extract_path(year, "homelessness"))) {
+    homelessness_data = read_file(get_source_extract_path(year, "homelessness")) %>% slfhelper::get_chi()) {
   homelessness_lookup <- homelessness_data %>%
     dplyr::distinct(.data$chi, .data$record_keydate1, .data$record_keydate2) %>%
     tidyr::drop_na(.data$chi) %>%

From 047441cb48be0fb532f48a282c57485cc93ae716 Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Tue, 28 May 2024 09:35:09 +0000
Subject: [PATCH 153/186] Update documentation

---
 man/create_homelessness_lookup.Rd | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/man/create_homelessness_lookup.Rd b/man/create_homelessness_lookup.Rd
index 610a96c26..d6a2f2bc8 100644
--- a/man/create_homelessness_lookup.Rd
+++ b/man/create_homelessness_lookup.Rd
@@ -6,7 +6,8 @@
 \usage{
 create_homelessness_lookup(
   year,
-  homelessness_data = read_file(get_source_extract_path(year, "homelessness"))
+  homelessness_data = read_file(get_source_extract_path(year, "homelessness")) \%>\%
+    slfhelper::get_chi()
 )
 }
 \arguments{

From 51d51c15bd55e256dfdc5b534ade0700396774bc Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 28 May 2024 11:24:22 +0100
Subject: [PATCH 154/186] supply get_chi() where needed in targets

---
 R/create_episode_file.R |  2 +-
 _targets.R              | 25 +++++++++++++------------
 2 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index c0bbc5c74..ec4957878 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -29,7 +29,7 @@ create_episode_file <- function(
       get_slf_gpprac_path(),
       col_select = c("gpprac", "cluster", "hbpraccode")
     ),
-    slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)),
+    slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year))%>% slfhelper::get_chi(),
     sc_client = read_file(get_sc_client_lookup_path(year)),
     write_to_disk = TRUE,
     anon_chi_out = TRUE) {
diff --git a/_targets.R b/_targets.R
index 1422c8a9b..3fa08e8cb 100644
--- a/_targets.R
+++ b/_targets.R
@@ -40,8 +40,8 @@ list(
   tar_target(simd_path, get_simd_path(), format = "file"),
   tar_target(spd_path, get_spd_path(), format = "file"),
   tar_file_read(it_chi_deaths_extract,
-    command = get_it_deaths_path(),
-    read = read_it_chi_deaths(!!.x)
+                command = get_it_deaths_path(),
+                read = read_it_chi_deaths(!!.x)
   ),
   tar_file_read(dd_data, get_dd_path(), read_extract_delayed_discharges(!!.x)),
   tar_file_read(ltc_data, get_it_ltc_path(), read_lookup_ltc(!!.x)),
@@ -129,7 +129,7 @@ list(
     all_at,
     process_sc_all_alarms_telecare(
       all_at_extract,
-      sc_demog_lookup = sc_demog_lookup,
+      sc_demog_lookup = sc_demog_lookup %>% slfhelper::get_chi(),
       write_to_disk = write_to_disk
     ),
     priority = 0.5
@@ -150,7 +150,7 @@ list(
     all_home_care,
     process_sc_all_home_care(
       all_home_care_extract,
-      sc_demog_lookup = sc_demog_lookup,
+      sc_demog_lookup = sc_demog_lookup %>% slfhelper::get_chi(),
       write_to_disk = write_to_disk
     ),
     priority = 0.5
@@ -171,7 +171,7 @@ list(
     all_care_home,
     process_sc_all_care_home(
       all_care_home_extract,
-      sc_demog_lookup = sc_demog_lookup,
+      sc_demog_lookup = sc_demog_lookup %>% slfhelper::get_chi(),
       it_chi_deaths_data = it_chi_deaths_data,
       ch_name_lookup_path = slf_ch_name_lookup_path,
       spd_path = spd_path,
@@ -195,7 +195,7 @@ list(
     all_sds,
     process_sc_all_sds(
       all_sds_extract,
-      sc_demog_lookup = sc_demog_lookup,
+      sc_demog_lookup = sc_demog_lookup %>% slfhelper::get_chi(),
       write_to_disk = write_to_disk
     ),
     priority = 0.5
@@ -463,6 +463,7 @@ list(
         data = sc_client_data,
         year = year,
         sc_demographics = sc_demog_lookup %>%
+          slfhelper::get_chi() %>%
           dplyr::select(c("sending_location", "social_care_id", "chi")),
         write_to_disk = write_to_disk
       )
@@ -566,7 +567,7 @@ list(
       homelessness_lookup,
       create_homelessness_lookup(
         year,
-        homelessness_data = source_homelessness_extract
+        homelessness_data = source_homelessness_extract %>% slfhelper::get_chi()
       )
     ),
     tar_target(
@@ -575,13 +576,13 @@ list(
         processed_data_list,
         year,
         homelessness_lookup = homelessness_lookup,
-        dd_data = source_dd_extract,
-        nsu_cohort = nsu_cohort,
-        ltc_data = source_ltc_lookup,
+        dd_data = source_dd_extract %>% slfhelper::get_chi(),
+        nsu_cohort = nsu_cohort %>% slfhelper::get_chi(),
+        ltc_data = source_ltc_lookup %>% slfhelper::get_chi(),
         slf_pc_lookup = source_pc_lookup,
         slf_gpprac_lookup = source_gp_lookup,
-        slf_deaths_lookup = slf_deaths_lookup,
-        sc_client = sc_client_lookup,
+        slf_deaths_lookup = slf_deaths_lookup %>% slfhelper::get_chi(),
+        sc_client = sc_client_lookup %>% slfhelper::get_chi(),
         write_to_disk
       )
     ),

From 48620a6e834d736f5dd04f49067dc8dd14f68389 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Tue, 28 May 2024 10:25:51 +0000
Subject: [PATCH 155/186] Style code

---
 R/create_episode_file.R | 2 +-
 _targets.R              | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index ec4957878..df1e3b5b5 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -29,7 +29,7 @@ create_episode_file <- function(
       get_slf_gpprac_path(),
       col_select = c("gpprac", "cluster", "hbpraccode")
     ),
-    slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year))%>% slfhelper::get_chi(),
+    slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) %>% slfhelper::get_chi(),
     sc_client = read_file(get_sc_client_lookup_path(year)),
     write_to_disk = TRUE,
     anon_chi_out = TRUE) {
diff --git a/_targets.R b/_targets.R
index 3fa08e8cb..fa4047554 100644
--- a/_targets.R
+++ b/_targets.R
@@ -40,8 +40,8 @@ list(
   tar_target(simd_path, get_simd_path(), format = "file"),
   tar_target(spd_path, get_spd_path(), format = "file"),
   tar_file_read(it_chi_deaths_extract,
-                command = get_it_deaths_path(),
-                read = read_it_chi_deaths(!!.x)
+    command = get_it_deaths_path(),
+    read = read_it_chi_deaths(!!.x)
   ),
   tar_file_read(dd_data, get_dd_path(), read_extract_delayed_discharges(!!.x)),
   tar_file_read(ltc_data, get_it_ltc_path(), read_lookup_ltc(!!.x)),

From 597f6c398eda314dce92a0d659c3ef7c484c339d Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Tue, 28 May 2024 10:26:30 +0000
Subject: [PATCH 156/186] Update documentation

---
 man/create_episode_file.Rd | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/man/create_episode_file.Rd b/man/create_episode_file.Rd
index 2f35af6a0..12ae3a949 100644
--- a/man/create_episode_file.Rd
+++ b/man/create_episode_file.Rd
@@ -14,7 +14,8 @@ create_episode_file(
   slf_pc_lookup = read_file(get_slf_postcode_path()),
   slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac",
     "cluster", "hbpraccode")),
-  slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)),
+  slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) \%>\%
+    slfhelper::get_chi(),
   sc_client = read_file(get_sc_client_lookup_path(year)),
   write_to_disk = TRUE,
   anon_chi_out = TRUE

From 1e6fd8676d0079f872b90112be6a6e38307c031e Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 28 May 2024 13:31:33 +0100
Subject: [PATCH 157/186] Update targets with get_chi()

---
 _targets.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/_targets.R b/_targets.R
index fa4047554..556fea656 100644
--- a/_targets.R
+++ b/_targets.R
@@ -475,7 +475,7 @@ list(
     tar_target(
       source_sc_alarms_tele,
       process_extract_alarms_telecare(
-        data = all_at,
+        data = all_at %>% slfhelper::get_chi(),
         year = year,
         write_to_disk = write_to_disk
       )
@@ -490,7 +490,7 @@ list(
     tar_target(
       source_sc_care_home,
       process_extract_care_home(
-        data = all_care_home,
+        data = all_care_home %>% slfhelper::get_chi(),
         year = year,
         ch_costs = ch_cost_lookup,
         write_to_disk = write_to_disk
@@ -506,7 +506,7 @@ list(
     tar_target(
       source_sc_home_care,
       process_extract_home_care(
-        data = all_home_care,
+        data = all_home_care %>% slfhelper::get_chi(),
         year = year,
         write_to_disk = write_to_disk
       )
@@ -521,7 +521,7 @@ list(
     tar_target(
       source_sc_sds,
       process_extract_sds(
-        data = all_sds,
+        data = all_sds %>% slfhelper::get_chi(),
         year = year,
         write_to_disk = write_to_disk
       )

From 2defec7609a2953d3aefb0955d8a98e5171e9f3e Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 28 May 2024 13:49:39 +0100
Subject: [PATCH 158/186] Update targets with get_chi()

---
 _targets.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/_targets.R b/_targets.R
index 556fea656..925b5f19f 100644
--- a/_targets.R
+++ b/_targets.R
@@ -537,8 +537,8 @@ list(
       slf_deaths_lookup,
       process_slf_deaths_lookup(
         year = year,
-        nrs_deaths_data = source_nrs_deaths_extract,
-        chi_deaths_data = it_chi_deaths_data,
+        nrs_deaths_data = source_nrs_deaths_extract %>% slfhelper::get_chi(),
+        chi_deaths_data = it_chi_deaths_data %>% slfhelper::get_chi(),
         write_to_disk = write_to_disk
       )
     ),

From ff1910cb58ee7ffb5bf96974b7e34b4c88907917 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 28 May 2024 13:51:31 +0100
Subject: [PATCH 159/186] Update client script

---
 R/process_lookup_sc_client.R | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R
index 39c2b8874..69818def3 100644
--- a/R/process_lookup_sc_client.R
+++ b/R/process_lookup_sc_client.R
@@ -16,10 +16,9 @@
 process_lookup_sc_client <-
   function(data,
            year,
-           sc_demographics = read_file(
-             get_sc_demog_lookup_path(),
-             col_select = c("sending_location", "social_care_id", "anon_chi")
-           ) %>% slfhelper::get_chi(),
+           sc_demographics = read_file(get_sc_demog_lookup_path()) %>%
+             slfhelper::get_chi() %>%
+             dplyr::select(c("sending_location", "social_care_id", "chi")),
            write_to_disk = TRUE) {
     client_clean <- data %>%
       # Replace 'unknown' responses with NA

From 50503b916cceca77c130bbbb09a44c57665c72f5 Mon Sep 17 00:00:00 2001
From: Jennit07 <Jennit07@users.noreply.github.com>
Date: Tue, 28 May 2024 12:54:15 +0000
Subject: [PATCH 160/186] Update documentation

---
 man/process_lookup_sc_client.Rd | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/man/process_lookup_sc_client.Rd b/man/process_lookup_sc_client.Rd
index 464c8e83d..74e402846 100644
--- a/man/process_lookup_sc_client.Rd
+++ b/man/process_lookup_sc_client.Rd
@@ -7,8 +7,8 @@
 process_lookup_sc_client(
   data,
   year,
-  sc_demographics = read_file(get_sc_demog_lookup_path(), col_select =
-    c("sending_location", "social_care_id", "anon_chi")) \%>\% slfhelper::get_chi(),
+  sc_demographics = read_file(get_sc_demog_lookup_path()) \%>\% slfhelper::get_chi()
+    \%>\% dplyr::select(c("sending_location", "social_care_id", "chi")),
   write_to_disk = TRUE
 )
 }

From 27b5f469f9f09bec84707f9cf83d11c9055c56fb Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 28 May 2024 16:01:53 +0100
Subject: [PATCH 161/186] fix fill_ch_names

---
 R/fill_ch_names.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/fill_ch_names.R b/R/fill_ch_names.R
index 1a2966b48..cd8d18677 100644
--- a/R/fill_ch_names.R
+++ b/R/fill_ch_names.R
@@ -662,7 +662,7 @@ fill_ch_names <- function(ch_data,
       ),
       na_matches = "never"
     ) %>%
-    dplyr::filter(.data[["ch_name"]] %in% unique_ch_name) %>%
+    dplyr::filter(!(.data[["ch_name"]] %in% duplicated_ch_name)) %>%
     dplyr::mutate(
       ch_name_old = .data[["ch_name"]],
       ch_postcode_old = .data[["ch_postcode"]],

From ae052101369b8cd024ae5d1e750bfdde029f85a9 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 28 May 2024 17:29:29 +0100
Subject: [PATCH 162/186] add anon- and update targets

---
 R/get_sc_lookup_paths.R | 2 +-
 _targets.R              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/get_sc_lookup_paths.R b/R/get_sc_lookup_paths.R
index 80dd15d5f..d201f416f 100644
--- a/R/get_sc_lookup_paths.R
+++ b/R/get_sc_lookup_paths.R
@@ -40,7 +40,7 @@ get_sc_demog_lookup_path <- function(update = latest_update(), ...) {
 get_sc_client_lookup_path <- function(year, update = latest_update(), ...) {
   sc_client_lookup_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_client_lookup"),
-    file_name = stringr::str_glue("sc_client_lookup_{year}_{update}.parquet"),
+    file_name = stringr::str_glue("anon-sc_client_lookup_{year}_{update}.parquet"),
     ...
   )
 
diff --git a/_targets.R b/_targets.R
index 925b5f19f..4ea32c179 100644
--- a/_targets.R
+++ b/_targets.R
@@ -172,7 +172,7 @@ list(
     process_sc_all_care_home(
       all_care_home_extract,
       sc_demog_lookup = sc_demog_lookup %>% slfhelper::get_chi(),
-      it_chi_deaths_data = it_chi_deaths_data,
+      it_chi_deaths_data = it_chi_deaths_data %>% slfhelper::get_chi(),
       ch_name_lookup_path = slf_ch_name_lookup_path,
       spd_path = spd_path,
       write_to_disk = write_to_disk

From 164bcff1a9d16ef0ee402e292c6757d3de7fb531 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Wed, 29 May 2024 10:26:46 +0100
Subject: [PATCH 163/186] fix add_activity_after_death in create_episode_file

---
 R/create_episode_file.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index df1e3b5b5..f14eede34 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -80,8 +80,6 @@ create_episode_file <- function(
         "hscp",
         "datazone2011",
         "attendance_status",
-        "death_date",
-        "deceased",
         "deathdiag1",
         "deathdiag2",
         "deathdiag3",
@@ -142,7 +140,9 @@ create_episode_file <- function(
       year,
       slf_deaths_lookup
     ) %>%
-    add_activity_after_death_flag(year, deaths_data = read_file(all_slf_deaths_lookup_path())) %>%
+    add_activity_after_death_flag(year,
+                                  deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>%
+                                    slfhelper::get_chi()) %>%
     load_ep_file_vars(year)
 
   if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {

From 17271a3124017bbe4e006fe20d7e3faf0d4f0a76 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Wed, 29 May 2024 09:28:22 +0000
Subject: [PATCH 164/186] Style code

---
 R/create_episode_file.R | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index f14eede34..93d2dc061 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -141,8 +141,9 @@ create_episode_file <- function(
       slf_deaths_lookup
     ) %>%
     add_activity_after_death_flag(year,
-                                  deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>%
-                                    slfhelper::get_chi()) %>%
+      deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>%
+        slfhelper::get_chi()
+    ) %>%
     load_ep_file_vars(year)
 
   if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {

From 1953a0749be3903cc5b639bd28008990a370add8 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Wed, 29 May 2024 10:54:05 +0100
Subject: [PATCH 165/186] process_tests_sc_client_lookup fix

---
 R/process_tests_sc_client_lookup.R |  2 +-
 R/write_tests_xlsx.R               | 27 +++++++++------------------
 2 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/R/process_tests_sc_client_lookup.R b/R/process_tests_sc_client_lookup.R
index 0bd0a7bad..61cb2cff0 100644
--- a/R/process_tests_sc_client_lookup.R
+++ b/R/process_tests_sc_client_lookup.R
@@ -16,7 +16,7 @@ process_tests_sc_client_lookup <- function(data, year) {
   )
 
   comparison %>%
-    write_tests_xlsx(sheet_name = "sc_client", year, workbook_name = "lookup")
+    write_tests_xlsx(sheet_name = "sc_client", year, workbook_name = "extract")
 
   return(comparison)
 }
diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R
index aa8527d7f..f9ae4a1f0 100644
--- a/R/write_tests_xlsx.R
+++ b/R/write_tests_xlsx.R
@@ -24,24 +24,15 @@ write_tests_xlsx <- function(comparison_data,
                                "cross_year"
                              )) {
   # Set up the workbook ----
-
-  if (is.null(year)) {
-    tests_workbook_name <- dplyr::case_when(
-      workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_ep_file_tests"),
-      workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_indiv_file_tests"),
-      workbook_name == "lookup" ~ stringr::str_glue(latest_update(), "_lookups_tests"),
-      workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests"),
-      workbook_name == "cross_year" ~ stringr::str_glue(latest_update(), "_cross_year_tests")
-    )
-  } else if (workbook_name == "sandpit" & !is.null(year)) {
-    tests_workbook_name <- dplyr::case_when(
-      workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests")
-    )
-  } else {
-    tests_workbook_name <- dplyr::case_when(
-      workbook_name == "extract" ~ stringr::str_glue(latest_update(), "_{year}_extract_tests")
-    )
-  }
+  tests_workbook_name <- dplyr::case_when(
+    is.null(year) & workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_ep_file_tests"),
+    is.null(year) & workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_indiv_file_tests"),
+    is.null(year) & workbook_name == "lookup" ~ stringr::str_glue(latest_update(), "_lookups_tests"),
+    is.null(year) & workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests"),
+    is.null(year) & workbook_name == "cross_year" ~ stringr::str_glue(latest_update(), "_cross_year_tests"),
+    !is.null(year) & workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests"),
+    !is.null(year) & workbook_name == "extract" ~ stringr::str_glue(latest_update(), "_{year}_extract_tests")
+  )
 
   tests_workbook_path <- fs::path(
     get_slf_dir(),

From 0942c8e305b31d3c1ba0d62c3258ee9494787528 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Wed, 29 May 2024 16:14:38 +0100
Subject: [PATCH 166/186] fix anon-chi issues in create_episode_file

---
 R/create_episode_file.R | 2 +-
 R/join_sparra_hhg.R     | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index 93d2dc061..aef49f55c 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -30,7 +30,7 @@ create_episode_file <- function(
       col_select = c("gpprac", "cluster", "hbpraccode")
     ),
     slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) %>% slfhelper::get_chi(),
-    sc_client = read_file(get_sc_client_lookup_path(year)),
+    sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_anon_chi(),
     write_to_disk = TRUE,
     anon_chi_out = TRUE) {
   processed_data_list <- purrr::discard(processed_data_list, ~ is.null(.x) | identical(.x, tibble::tibble()))
diff --git a/R/join_sparra_hhg.R b/R/join_sparra_hhg.R
index d946f08c5..dafaca867 100644
--- a/R/join_sparra_hhg.R
+++ b/R/join_sparra_hhg.R
@@ -11,7 +11,7 @@ join_sparra_hhg <- function(data, year) {
       read_file(get_sparra_path(year)) %>%
         dplyr::rename(sparra_start_fy = "sparra_risk_score") %>%
         slfhelper::get_chi(),
-      by = c("chi" = "upi_number"),
+      by = c("chi"),
       na_matches = "never",
       relationship = "many-to-one"
     )
@@ -25,7 +25,7 @@ join_sparra_hhg <- function(data, year) {
       read_file(get_sparra_path(next_fy(year))) %>%
         dplyr::rename(sparra_end_fy = "sparra_risk_score") %>%
         slfhelper::get_chi(),
-      by = c("chi" = "upi_number"),
+      by = c("chi"),
       na_matches = "never",
       relationship = "many-to-one"
     )
@@ -39,7 +39,7 @@ join_sparra_hhg <- function(data, year) {
       read_file(get_hhg_path(year)) %>%
         dplyr::rename(hhg_start_fy = "hhg_score") %>%
         slfhelper::get_chi(),
-      by = c("chi" = "upi_number"),
+      by = c("chi"),
       na_matches = "never",
       relationship = "many-to-one"
     )
@@ -53,7 +53,7 @@ join_sparra_hhg <- function(data, year) {
       read_file(get_hhg_path(next_fy(year))) %>%
         dplyr::rename(hhg_end_fy = "hhg_score") %>%
         slfhelper::get_chi(),
-      by = c("chi" = "upi_number"),
+      by = c("chi"),
       na_matches = "never",
       relationship = "many-to-one"
     )

From 449155bb4586a3ecdaf427d60d365f50be11b9e8 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Wed, 29 May 2024 15:17:07 +0000
Subject: [PATCH 167/186] Update documentation

---
 man/create_episode_file.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/create_episode_file.Rd b/man/create_episode_file.Rd
index 12ae3a949..58e516b3f 100644
--- a/man/create_episode_file.Rd
+++ b/man/create_episode_file.Rd
@@ -16,7 +16,7 @@ create_episode_file(
     "cluster", "hbpraccode")),
   slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) \%>\%
     slfhelper::get_chi(),
-  sc_client = read_file(get_sc_client_lookup_path(year)),
+  sc_client = read_file(get_sc_client_lookup_path(year)) \%>\% slfhelper::get_anon_chi(),
   write_to_disk = TRUE,
   anon_chi_out = TRUE
 )

From d8e69627c92a0316a50756287a6d532b79a15680 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Thu, 30 May 2024 10:04:42 +0100
Subject: [PATCH 168/186] fix typo

---
 R/create_episode_file.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index aef49f55c..0bb804c7d 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -30,7 +30,7 @@ create_episode_file <- function(
       col_select = c("gpprac", "cluster", "hbpraccode")
     ),
     slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) %>% slfhelper::get_chi(),
-    sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_anon_chi(),
+    sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(),
     write_to_disk = TRUE,
     anon_chi_out = TRUE) {
   processed_data_list <- purrr::discard(processed_data_list, ~ is.null(.x) | identical(.x, tibble::tibble()))

From f37a45fd7818937b922d5362803d58e40664a9d8 Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Thu, 30 May 2024 09:06:27 +0000
Subject: [PATCH 169/186] Update documentation

---
 man/create_episode_file.Rd | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/man/create_episode_file.Rd b/man/create_episode_file.Rd
index 58e516b3f..d6bd6d526 100644
--- a/man/create_episode_file.Rd
+++ b/man/create_episode_file.Rd
@@ -16,7 +16,7 @@ create_episode_file(
     "cluster", "hbpraccode")),
   slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) \%>\%
     slfhelper::get_chi(),
-  sc_client = read_file(get_sc_client_lookup_path(year)) \%>\% slfhelper::get_anon_chi(),
+  sc_client = read_file(get_sc_client_lookup_path(year)) \%>\% slfhelper::get_chi(),
   write_to_disk = TRUE,
   anon_chi_out = TRUE
 )

From 09fa8bfd8bf7fcc24b623ee1c442fc37b808bd39 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Thu, 30 May 2024 12:07:23 +0100
Subject: [PATCH 170/186] fix write_tests_xlsx path

---
 R/write_tests_xlsx.R | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R
index f9ae4a1f0..e0a763269 100644
--- a/R/write_tests_xlsx.R
+++ b/R/write_tests_xlsx.R
@@ -26,7 +26,9 @@ write_tests_xlsx <- function(comparison_data,
   # Set up the workbook ----
   tests_workbook_name <- dplyr::case_when(
     is.null(year) & workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_ep_file_tests"),
+    !is.null(year) & workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "{year}_ep_file_tests"),
     is.null(year) & workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_indiv_file_tests"),
+    !is.null(year) & workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "{year}_indiv_file_tests"),
     is.null(year) & workbook_name == "lookup" ~ stringr::str_glue(latest_update(), "_lookups_tests"),
     is.null(year) & workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests"),
     is.null(year) & workbook_name == "cross_year" ~ stringr::str_glue(latest_update(), "_cross_year_tests"),

From 49f6d8fc60ff7633e1b40e319a662e0ff27a6f05 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Fri, 31 May 2024 12:04:38 +0100
Subject: [PATCH 171/186] minor fix

---
 R/write_tests_xlsx.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R
index e0a763269..ec3cc5705 100644
--- a/R/write_tests_xlsx.R
+++ b/R/write_tests_xlsx.R
@@ -26,9 +26,9 @@ write_tests_xlsx <- function(comparison_data,
   # Set up the workbook ----
   tests_workbook_name <- dplyr::case_when(
     is.null(year) & workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_ep_file_tests"),
-    !is.null(year) & workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "{year}_ep_file_tests"),
+    !is.null(year) & workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_{year}_ep_file_tests"),
     is.null(year) & workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_indiv_file_tests"),
-    !is.null(year) & workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "{year}_indiv_file_tests"),
+    !is.null(year) & workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_{year}_indiv_file_tests"),
     is.null(year) & workbook_name == "lookup" ~ stringr::str_glue(latest_update(), "_lookups_tests"),
     is.null(year) & workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests"),
     is.null(year) & workbook_name == "cross_year" ~ stringr::str_glue(latest_update(), "_cross_year_tests"),

From 49ba5d4106eea34524bf3c7a845503d556ffa7d5 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Fri, 7 Jun 2024 15:52:54 +0100
Subject: [PATCH 172/186] fix R package build warnings

---
 R/add_activity_after_death_flag.R          | 14 +++--
 R/get_slf_lookup_paths.R                   |  3 +-
 R/process_extract_consultations.R          | 21 ++++---
 R/process_extract_homelessness.R           |  8 +--
 R/process_tests_cross_year.R               | 10 ++--
 R/process_tests_sc_sandpit.R               | 69 +++++++++++-----------
 R/read_sc_all_home_care.R                  |  2 +-
 inst/WORDLIST                              |  4 ++
 man/get_all_slf_deaths_lookup_path.Rd      |  4 +-
 man/process_deaths_lookup.Rd               |  8 +--
 man/process_tests_sc_sandpit.Rd            |  2 +
 tests/testthat/_snaps/get_dd_path.md       |  4 +-
 tests/testthat/test-get_dd_path.R          |  8 +--
 tests/testthat/test-get_it_extract_paths.R | 38 ++++++------
 14 files changed, 106 insertions(+), 89 deletions(-)

diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R
index f68c7c9f4..e517f243b 100644
--- a/R/add_activity_after_death_flag.R
+++ b/R/add_activity_after_death_flag.R
@@ -13,6 +13,10 @@ add_activity_after_death_flag <- function(
     year,
     deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>%
       slfhelper::get_chi()) {
+
+  # to skip warnings no visible binding for global variable ‘.’
+  . <- NULL
+
   death_joined <- data %>%
     dplyr::select(.data$year, .data$chi, .data$record_keydate1, .data$record_keydate2, .data$death_date, .data$deceased) %>%
     dplyr::filter(!is.na(.data$chi) | .data$chi != "") %>%
@@ -100,11 +104,11 @@ add_activity_after_death_flag <- function(
 
 #' Create and read SLF Deaths lookup from processed BOXI NRS deaths extracts
 #'
-#' #' @description The BOXI NRS deaths extract lookup should be created after the extract files for all years have been processed,
+#' @description The BOXI NRS deaths extract lookup should be created after the extract files for all years have been processed,
 # but before an episode file has been produced. Therefore, all BOXI NRS years should be run before running episode files.
 #'
-#' @param file_path Path to the BOXI NRS file for each financial year - may not use this
-#' @param year The year to process, in FY format - may not use this
+#' @param ... additional arguments passed to [get_slf_deaths_lookup_path()]
+#' @param update the update month (defaults to use [latest_update()])
 #'
 #' @param write_to_disk (optional) Should the data be written to disk default is
 #' `TRUE` i.e. write the data to disk.
@@ -127,11 +131,11 @@ process_deaths_lookup <- function(update = latest_update(),
     rbind(read_file(get_slf_deaths_lookup_path("2122"))) %>%
     rbind(read_file(get_slf_deaths_lookup_path("2223"))) %>%
     rbind(read_file(get_slf_deaths_lookup_path("2324"))) %>%
-    # Can this be automated to pick up files starting with name "get_slf_deaths_lookup_path"?
+    # TODO: make this automated to pick up files starting with name "get_slf_deaths_lookup_path"
     slfhelper::get_chi() %>%
     # Remove rows with missing or blank CHI number - could also use na.omit?
     # na.omit(all_boxi_deaths)
-    dplyr::filter(!is.na(.data$chi) | chi != "")
+    dplyr::filter(!is.na(.data$chi) | .data$chi != "")
 
   # Check all CHI numbers are valid
   chi_check <- all_boxi_deaths %>%
diff --git a/R/get_slf_lookup_paths.R b/R/get_slf_lookup_paths.R
index c88518094..390a27a5a 100644
--- a/R/get_slf_lookup_paths.R
+++ b/R/get_slf_lookup_paths.R
@@ -74,9 +74,8 @@ get_slf_deaths_lookup_path <- function(year, ...) {
 #'
 #' @description Get the full path to the BOXI NRS Deaths lookup file for all financial years
 #'
-#' @inheritParams get_boxi_extract_path
 #' @param ... additional arguments passed to [get_file_path()]
-#' @param year financial year e.g. "1920"
+#' @param update the update month (defaults to use [latest_update()])
 #'
 #' @export
 #' @family slf lookup file path
diff --git a/R/process_extract_consultations.R b/R/process_extract_consultations.R
index 2aafdbdca..d4ebdb713 100644
--- a/R/process_extract_consultations.R
+++ b/R/process_extract_consultations.R
@@ -10,6 +10,13 @@
 #' @return the final data as a [tibble][tibble::tibble-package].
 #' @family process extracts
 process_extract_ooh_consultations <- function(data, year) {
+
+  # to skip warning no visible bingding for global variable when using data.table
+  distinct_check <- consultation_type <- location <-
+    record_keydate1 <- record_keydate2 <- chi <-
+    ooh_case_id <- episode_counter <- NULL
+
+
   # Only run for a single year
   stopifnot(length(year) == 1L)
 
@@ -84,21 +91,21 @@ process_extract_ooh_consultations <- function(data, year) {
   consultations_clean <- consultations_covid %>%
     # Sort in reverse order so we can use coalesce which takes the first non-missing value
     dplyr::arrange(
-      chi,
-      ooh_case_id,
-      record_keydate1,
-      record_keydate2
+      .data$chi,
+      .data$ooh_case_id,
+      .data$record_keydate1,
+      .data$record_keydate2
     ) %>%
     data.table::as.data.table()
 
   consultations_clean[, distinct_check := (
     record_keydate1 > data.table::shift(record_keydate2, fill = NA, type = "lag")
   ),
-  by = .(chi, ooh_case_id, consultation_type, location)
+  by = list(chi, ooh_case_id, consultation_type, location)
   ]
   consultations_clean[, distinct_check := tidyr::replace_na(distinct_check, TRUE)]
   consultations_clean[, episode_counter := cumsum(distinct_check),
-    by = .(chi, ooh_case_id, consultation_type, location)
+    by = list(chi, ooh_case_id, consultation_type, location)
   ]
   consultations_clean[,
     c(
@@ -108,7 +115,7 @@ process_extract_ooh_consultations <- function(data, year) {
       min(record_keydate1),
       max(record_keydate2)
     ),
-    by = .(
+    by = list(
       chi,
       ooh_case_id,
       consultation_type,
diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
index 167ad6b9e..49bf3935e 100644
--- a/R/process_extract_homelessness.R
+++ b/R/process_extract_homelessness.R
@@ -148,11 +148,11 @@ process_extract_homelessness <- function(
   hl1_data <- data %>%
     dplyr::left_join(
       completeness_data %>%
-        dplyr::select(sending_local_authority_name, pct_complete_all),
+        dplyr::select(.data$sending_local_authority_name, .data$pct_complete_all),
       by = dplyr::join_by("sending_local_authority_name")
     ) %>%
-    dplyr::rename(hl1_completeness = pct_complete_all) %>%
-    dplyr::mutate(hl1_completeness = round(hl1_completeness, 1))
+    dplyr::rename(hl1_completeness = "pct_complete_all") %>%
+    dplyr::mutate(hl1_completeness = round(.data$hl1_completeness, 1))
 
   # TODO - Include person_id (from client_id)
   final_data <- hl1_data %>%
@@ -171,7 +171,7 @@ process_extract_homelessness <- function(
       hl1_sending_lca = "sending_local_authority_code_9",
       hl1_property_type = "property_type_code",
       "hl1_reason_ftm",
-      hl1_completeness
+      "hl1_completeness"
     ) %>%
     slfhelper::get_anon_chi()
 
diff --git a/R/process_tests_cross_year.R b/R/process_tests_cross_year.R
index af1538b19..7a726cf26 100644
--- a/R/process_tests_cross_year.R
+++ b/R/process_tests_cross_year.R
@@ -21,7 +21,7 @@ process_tests_cross_year <- function(year) {
       n_records = 1L
     ) %>%
     dplyr::summarise(
-      n = sum(n_records)
+      n = sum(.data$n_records)
     ) %>%
     dplyr::mutate(
       fy_qtr = "total"
@@ -29,21 +29,21 @@ process_tests_cross_year <- function(year) {
 
   qtr_test <- ep_file %>%
     dplyr::mutate(
-      fy_qtr = dplyr::if_else(recid != "PIS", lubridate::quarter(record_keydate1, fiscal_start = 4), NA)
+      fy_qtr = dplyr::if_else(.data$recid != "PIS", lubridate::quarter(.data$record_keydate1, fiscal_start = 4), NA)
     ) %>%
     dplyr::group_by(.data$year, .data$recid, .data$fy_qtr) %>%
     dplyr::mutate(
       n_records = 1L
     ) %>%
     dplyr::summarise(
-      n = sum(n_records)
+      n = sum(.data$n_records)
     ) %>%
     dplyr::mutate(
-      fy_qtr = as.character(fy_qtr)
+      fy_qtr = as.character(.data$fy_qtr)
     )
 
   join_tests <- dplyr::bind_rows(total_test, qtr_test) %>%
-    dplyr::arrange(year, recid, fy_qtr)
+    dplyr::arrange(.data$year, .data$recid, .data$fy_qtr)
 
   pivot_tests <- join_tests %>%
     tidyr::pivot_wider(
diff --git a/R/process_tests_sc_sandpit.R b/R/process_tests_sc_sandpit.R
index 7540d8686..9d1b35f8a 100644
--- a/R/process_tests_sc_sandpit.R
+++ b/R/process_tests_sc_sandpit.R
@@ -1,6 +1,7 @@
 #' Process tests for the social care sandpit extracts
 #'
 #' @param type Name of sandpit extract.
+#' @param year Year of extract
 #'
 #' @return a [tibble][tibble::tibble-package] containing a test comparison.
 #' @export
@@ -47,7 +48,7 @@ produce_sc_sandpit_tests <- function(data, type = c("demographics", "client", "a
         n_missing_postcode = is_missing(.data$chi_postcode),
         n_missing_gender = is_missing(.data$chi_gender_code)
       ) %>%
-      dplyr::select(n_missing_chi:n_missing_gender) %>%
+      dplyr::select(.data$n_missing_chi:.data$n_missing_gender) %>%
       calculate_measures(measure = "sum")
 
     latest_flag_tests <- data %>%
@@ -78,44 +79,44 @@ produce_sc_sandpit_tests <- function(data, type = c("demographics", "client", "a
       dplyr::group_by(.data$social_care_id, .data$sending_location) %>%
       dplyr::distinct(.data$chi_upi, .keep_all = TRUE) %>%
       dplyr::mutate(distinct_chi_count = dplyr::n_distinct(.data$chi_upi)) %>%
-      dplyr::filter(distinct_chi_count > 1) %>%
+      dplyr::filter(.data$distinct_chi_count > 1) %>%
       dplyr::distinct(.data$social_care_id, .data$sending_location, .keep_all = TRUE) %>%
       dplyr::mutate(sc_id_multi_chi = 1) %>%
       create_sending_location_test_flags(.data$sending_location) %>%
       dplyr::ungroup() %>%
       dplyr::rename(
-        sc_id_multi_chi_Aberdeen_City = Aberdeen_City,
-        sc_id_multi_chi_Aberdeenshire = Aberdeenshire,
-        sc_id_multi_chi_Angus = Angus,
-        sc_id_multi_chi_Argyll_and_Bute = Argyll_and_Bute,
-        sc_id_multi_chi_City_of_Edinburgh = City_of_Edinburgh,
-        sc_id_multi_chi_Clackmannanshire = Clackmannanshire,
-        sc_id_multi_chi_Dumfries_and_Galloway = Dumfries_and_Galloway,
-        sc_id_multi_chi_Dundee_City = Dundee_City,
-        sc_id_multi_chi_East_Ayrshire = East_Ayrshire,
-        sc_id_multi_chi_East_Dunbartonshire = East_Dunbartonshire,
-        sc_id_multi_chi_East_Lothian = East_Lothian,
-        sc_id_multi_chi_East_Renfrewshire = East_Renfrewshire,
-        sc_id_multi_chi_Falkirk = Falkirk,
-        sc_id_multi_chi_Fife = Fife,
-        sc_id_multi_chi_Glasgow_City = Glasgow_City,
-        sc_id_multi_chi_Highland = Highland,
-        sc_id_multi_chi_Inverclyde = Inverclyde,
-        sc_id_multi_chi_Midlothian = Midlothian,
-        sc_id_multi_chi_Moray = Moray,
-        sc_id_multi_chi_Na_h_Eileanan_Siar = Na_h_Eileanan_Siar,
-        sc_id_multi_chi_North_Ayrshire = North_Ayrshire,
-        sc_id_multi_chi_North_Lanarkshire = North_Lanarkshire,
-        sc_id_multi_chi_Orkney_Islands = Orkney_Islands,
-        sc_id_multi_chi_Perth_and_Kinross = Perth_and_Kinross,
-        sc_id_multi_chi_Renfrewshire = Renfrewshire,
-        sc_id_multi_chi_Scottish_Borders = Scottish_Borders,
-        sc_id_multi_chi_Shetland_Islands = Shetland_Islands,
-        sc_id_multi_chi_South_Ayrshire = South_Ayrshire,
-        sc_id_multi_chi_South_Lanarkshire = South_Lanarkshire,
-        sc_id_multi_chi_Stirling = Stirling,
-        sc_id_multi_chi_West_Dunbartonshire = West_Dunbartonshire,
-        sc_id_multi_chi_West_Lothian = West_Lothian
+        sc_id_multi_chi_Aberdeen_City = "Aberdeen_City",
+        sc_id_multi_chi_Aberdeenshire = "Aberdeenshire",
+        sc_id_multi_chi_Angus = "Angus",
+        sc_id_multi_chi_Argyll_and_Bute = "Argyll_and_Bute",
+        sc_id_multi_chi_City_of_Edinburgh = "City_of_Edinburgh",
+        sc_id_multi_chi_Clackmannanshire = "Clackmannanshire",
+        sc_id_multi_chi_Dumfries_and_Galloway = "Dumfries_and_Galloway",
+        sc_id_multi_chi_Dundee_City = "Dundee_City",
+        sc_id_multi_chi_East_Ayrshire = "East_Ayrshire",
+        sc_id_multi_chi_East_Dunbartonshire = "East_Dunbartonshire",
+        sc_id_multi_chi_East_Lothian = "East_Lothian",
+        sc_id_multi_chi_East_Renfrewshire = "East_Renfrewshire",
+        sc_id_multi_chi_Falkirk = "Falkirk",
+        sc_id_multi_chi_Fife = "Fife",
+        sc_id_multi_chi_Glasgow_City = "Glasgow_City",
+        sc_id_multi_chi_Highland = "Highland",
+        sc_id_multi_chi_Inverclyde = "Inverclyde",
+        sc_id_multi_chi_Midlothian = "Midlothian",
+        sc_id_multi_chi_Moray = "Moray",
+        sc_id_multi_chi_Na_h_Eileanan_Siar = "Na_h_Eileanan_Siar",
+        sc_id_multi_chi_North_Ayrshire = "North_Ayrshire",
+        sc_id_multi_chi_North_Lanarkshire = "North_Lanarkshire",
+        sc_id_multi_chi_Orkney_Islands = "Orkney_Islands",
+        sc_id_multi_chi_Perth_and_Kinross = "Perth_and_Kinross",
+        sc_id_multi_chi_Renfrewshire = "Renfrewshire",
+        sc_id_multi_chi_Scottish_Borders = "Scottish_Borders",
+        sc_id_multi_chi_Shetland_Islands = "Shetland_Islands",
+        sc_id_multi_chi_South_Ayrshire = "South_Ayrshire",
+        sc_id_multi_chi_South_Lanarkshire = "South_Lanarkshire",
+        sc_id_multi_chi_Stirling = "Stirling",
+        sc_id_multi_chi_West_Dunbartonshire = "West_Dunbartonshire",
+        sc_id_multi_chi_West_Lothian = "West_Lothian"
       ) %>%
       dplyr::select(.data$sc_id_multi_chi, .data$sc_id_multi_chi_Aberdeen_City:.data$sc_id_multi_chi_West_Lothian) %>%
       calculate_measures(measure = "sum")
diff --git a/R/read_sc_all_home_care.R b/R/read_sc_all_home_care.R
index 2f4d892e3..2349cf1cd 100644
--- a/R/read_sc_all_home_care.R
+++ b/R/read_sc_all_home_care.R
@@ -52,7 +52,7 @@ read_sc_all_home_care <- function(sc_dvprod_connection = phs_db_connection(dsn =
     home_care_data %>%
       write_file(get_sandpit_extract_path(type = "hc"))
 
-    home_care_date %>%
+    home_care_data %>%
       process_tests_sc_sandpit(type = "hc")
   } else {
     home_care_data <- home_care_data
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 00f129e64..7edd722c5 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -5,6 +5,7 @@ Beddays
 CH
 CH's
 CHD
+CHIs
 CIJ
 CMD
 CMH
@@ -39,6 +40,7 @@ MH
 MLS
 MMM
 MMMYY
+NA's
 NRS
 NSU
 OoH
@@ -115,7 +117,9 @@ smrtype
 sourcedev
 specialties
 specialty
+st
 sysmis
 telecare
 tibble
+uk
 ️
diff --git a/man/get_all_slf_deaths_lookup_path.Rd b/man/get_all_slf_deaths_lookup_path.Rd
index dd5291c2d..2f06b64d3 100644
--- a/man/get_all_slf_deaths_lookup_path.Rd
+++ b/man/get_all_slf_deaths_lookup_path.Rd
@@ -7,9 +7,9 @@
 get_all_slf_deaths_lookup_path(update = latest_update(), ...)
 }
 \arguments{
-\item{...}{additional arguments passed to \code{\link[=get_file_path]{get_file_path()}}}
+\item{update}{the update month (defaults to use \code{\link[=latest_update]{latest_update()}})}
 
-\item{year}{financial year e.g. "1920"}
+\item{...}{additional arguments passed to \code{\link[=get_file_path]{get_file_path()}}}
 }
 \description{
 Get the full path to the BOXI NRS Deaths lookup file for all financial years
diff --git a/man/process_deaths_lookup.Rd b/man/process_deaths_lookup.Rd
index 8e9ec1199..e897e49a2 100644
--- a/man/process_deaths_lookup.Rd
+++ b/man/process_deaths_lookup.Rd
@@ -7,16 +7,16 @@
 process_deaths_lookup(update = latest_update(), write_to_disk = TRUE, ...)
 }
 \arguments{
+\item{update}{the update month (defaults to use \code{\link[=latest_update]{latest_update()}})}
+
 \item{write_to_disk}{(optional) Should the data be written to disk default is
 \code{TRUE} i.e. write the data to disk.}
 
-\item{file_path}{Path to the BOXI NRS file for each financial year - may not use this}
-
-\item{year}{The year to process, in FY format - may not use this}
+\item{...}{additional arguments passed to \code{\link[=get_slf_deaths_lookup_path]{get_slf_deaths_lookup_path()}}}
 }
 \value{
 the final data as a \link[tibble:tibble-package]{tibble}.
 }
 \description{
-#' @description The BOXI NRS deaths extract lookup should be created after the extract files for all years have been processed,
+The BOXI NRS deaths extract lookup should be created after the extract files for all years have been processed,
 }
diff --git a/man/process_tests_sc_sandpit.Rd b/man/process_tests_sc_sandpit.Rd
index d3c1f5984..24fb7433d 100644
--- a/man/process_tests_sc_sandpit.Rd
+++ b/man/process_tests_sc_sandpit.Rd
@@ -11,6 +11,8 @@ process_tests_sc_sandpit(
 }
 \arguments{
 \item{type}{Name of sandpit extract.}
+
+\item{year}{Year of extract}
 }
 \value{
 a \link[tibble:tibble-package]{tibble} containing a test comparison.
diff --git a/tests/testthat/_snaps/get_dd_path.md b/tests/testthat/_snaps/get_dd_path.md
index 76de0fb7b..4f7740e79 100644
--- a/tests/testthat/_snaps/get_dd_path.md
+++ b/tests/testthat/_snaps/get_dd_path.md
@@ -3,11 +3,11 @@
     Code
       dplyr::glimpse(latest_dd_file, width = 0)
     Output
-      Rows: 198,751
+      Rows: 206,029
       Columns: 14
       $ cennum                 <dbl> ~
       $ MONTHFLAG              <chr> ~
-      $ chi                    <chr> ~
+      $ anon_chi               <chr> ~
       $ OriginalAdmissionDate  <date> ~
       $ RDD                    <date> ~
       $ Delay_End_Date         <date> ~
diff --git a/tests/testthat/test-get_dd_path.R b/tests/testthat/test-get_dd_path.R
index 0ca999f23..54727be69 100644
--- a/tests/testthat/test-get_dd_path.R
+++ b/tests/testthat/test-get_dd_path.R
@@ -15,10 +15,10 @@ test_that("Delayed discharges file is as expected", {
   expect_gt(n_rows, 150000)
 
   # Expect at least 98% of CHIs to be valid
-  expect_gt(
-    table(phsmethods::chi_check(latest_dd_file$chi))["Valid CHI"],
-    0.98 * n_rows
-  )
+  expect_gt(table(
+    phsmethods::chi_check(latest_dd_file %>% slfhelper::get_chi() %>% dplyr::pull(chi))
+  )["Valid CHI"],
+  0.98 * n_rows)
 
   expect_snapshot(dplyr::glimpse(latest_dd_file, width = 0))
 })
diff --git a/tests/testthat/test-get_it_extract_paths.R b/tests/testthat/test-get_it_extract_paths.R
index 52f9e4181..f8e9a81e2 100644
--- a/tests/testthat/test-get_it_extract_paths.R
+++ b/tests/testthat/test-get_it_extract_paths.R
@@ -33,25 +33,25 @@ test_that("IT extract file paths work", {
     get_it_prescribing_path("1111")
   )
 
-  # Older IT extracts
-  expect_s3_class(
-    get_it_prescribing_path("1213",
-      it_reference = "0182748"
-    ),
-    "fs_path"
-  )
-  expect_s3_class(
-    get_it_prescribing_path("1314",
-      it_reference = "0182748"
-    ),
-    "fs_path"
-  )
-  expect_s3_class(
-    get_it_prescribing_path("1415",
-      it_reference = "0182748"
-    ),
-    "fs_path"
-  )
+  # # Older IT extracts
+  # expect_s3_class(
+  #   get_it_prescribing_path("1213",
+  #     it_reference = "0182748"
+  #   ),
+  #   "fs_path"
+  # )
+  # expect_s3_class(
+  #   get_it_prescribing_path("1314",
+  #     it_reference = "0182748"
+  #   ),
+  #   "fs_path"
+  # )
+  # expect_s3_class(
+  #   get_it_prescribing_path("1415",
+  #     it_reference = "0182748"
+  #   ),
+  #   "fs_path"
+  # )
   expect_error(
     get_it_prescribing_path("1415",
       it_reference = "0000000"

From 127bf4a5a95caf886328ca5df79f5c722f461a6c Mon Sep 17 00:00:00 2001
From: lizihao-anu <lizihao-anu@users.noreply.github.com>
Date: Fri, 7 Jun 2024 15:00:37 +0000
Subject: [PATCH 173/186] Style code

---
 R/add_activity_after_death_flag.R |  1 -
 R/process_extract_consultations.R |  1 -
 tests/testthat/test-get_dd_path.R | 10 ++++++----
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R
index e517f243b..0842b47ec 100644
--- a/R/add_activity_after_death_flag.R
+++ b/R/add_activity_after_death_flag.R
@@ -13,7 +13,6 @@ add_activity_after_death_flag <- function(
     year,
     deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>%
       slfhelper::get_chi()) {
-
   # to skip warnings no visible binding for global variable ‘.’
   . <- NULL
 
diff --git a/R/process_extract_consultations.R b/R/process_extract_consultations.R
index d4ebdb713..4c4a18c0e 100644
--- a/R/process_extract_consultations.R
+++ b/R/process_extract_consultations.R
@@ -10,7 +10,6 @@
 #' @return the final data as a [tibble][tibble::tibble-package].
 #' @family process extracts
 process_extract_ooh_consultations <- function(data, year) {
-
   # to skip warning no visible bingding for global variable when using data.table
   distinct_check <- consultation_type <- location <-
     record_keydate1 <- record_keydate2 <- chi <-
diff --git a/tests/testthat/test-get_dd_path.R b/tests/testthat/test-get_dd_path.R
index 54727be69..62c878eb1 100644
--- a/tests/testthat/test-get_dd_path.R
+++ b/tests/testthat/test-get_dd_path.R
@@ -15,10 +15,12 @@ test_that("Delayed discharges file is as expected", {
   expect_gt(n_rows, 150000)
 
   # Expect at least 98% of CHIs to be valid
-  expect_gt(table(
-    phsmethods::chi_check(latest_dd_file %>% slfhelper::get_chi() %>% dplyr::pull(chi))
-  )["Valid CHI"],
-  0.98 * n_rows)
+  expect_gt(
+    table(
+      phsmethods::chi_check(latest_dd_file %>% slfhelper::get_chi() %>% dplyr::pull(chi))
+    )["Valid CHI"],
+    0.98 * n_rows
+  )
 
   expect_snapshot(dplyr::glimpse(latest_dd_file, width = 0))
 })

From c24339d060b689b28dbca743d00bdd66bbd6b332 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Fri, 7 Jun 2024 16:15:27 +0100
Subject: [PATCH 174/186] aligning

---
 R/process_extract_consultations.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/process_extract_consultations.R b/R/process_extract_consultations.R
index 4c4a18c0e..b342a5af8 100644
--- a/R/process_extract_consultations.R
+++ b/R/process_extract_consultations.R
@@ -10,7 +10,7 @@
 #' @return the final data as a [tibble][tibble::tibble-package].
 #' @family process extracts
 process_extract_ooh_consultations <- function(data, year) {
-  # to skip warning no visible bingding for global variable when using data.table
+  # to skip warning no visible binding for global variable when using data.table
   distinct_check <- consultation_type <- location <-
     record_keydate1 <- record_keydate2 <- chi <-
     ooh_case_id <- episode_counter <- NULL

From 85e4c204b318a6f4dc74b00c3595b8f3a6c82632 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Fri, 7 Jun 2024 15:35:58 +0000
Subject: [PATCH 175/186] [check-spelling] Update metadata

Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/9419296266/attempts/1
Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/962#issuecomment-2155082190

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev>
---
 .github/actions/spelling/expect.txt | 44 +++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 2d546a9db..207ab31c9 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -1,9 +1,13 @@
+ACa
 acc
 Accom
+Admissio
 admloc
 admtype
+ADPE
 adpe
 adtf
+anu
 arrivalmode
 arth
 atlassian
@@ -21,6 +25,7 @@ callr
 Canx
 carehome
 careinspectorate
+casewhen
 categorises
 cattend
 ccyy
@@ -31,15 +36,19 @@ chd
 chp
 chpstart
 cij
+Classificat
 classificat
 cls
 cmh
+CNWs
 cnws
 codecov
 comhairle
 commhosp
+commiting
 congen
 copd
+costinc
 costincdnas
 costmonthnum
 costsfy
@@ -59,6 +68,7 @@ datetime
 daycase
 dbconnect
 dbplyr
+deathdate
 deathdiag
 demog
 devhints
@@ -67,13 +77,16 @@ diagramme
 disch
 dischloc
 dischto
+discondition
 disdest
+DMe
 dminutes
 dna
 docx
 dontrun
 downup
 dplyr
+Drugsand
 dsn
 dtplyr
 dvprod
@@ -81,8 +94,10 @@ eee
 efa
 endomet
 envir
+eol
 fcase
 feb
+Fhelper
 fifelse
 fileext
 finalise
@@ -94,9 +109,12 @@ fyyear
 geogs
 ggplot
 github
+GLS
 gls
+glsrecord
 gms
 gpoo
+gpooh
 gpprac
 gss
 hbnames
@@ -106,6 +124,7 @@ hbrescode
 hbtreatcode
 hbtreatname
 hci
+HCP
 hcp
 hefailure
 hhg
@@ -114,11 +133,15 @@ hms
 homecare
 homev
 hri
+HSCDIIP
 hscdiip
 hscp
 hscpnames
 htmlwidgets
+IDPC
 idpc
+incdn
+incdnas
 infyyear
 intzone
 ipdc
@@ -137,26 +160,32 @@ keytimex
 kis
 lazydt
 lcap
+LCHO
 lcho
 lgl
 lintr
+lizihao
 los
 ltc
 ltd
 lubridate
 magrittr
 markdownguide
+Matern
 matern
 mcbride
 mcmahon
 MIU
+MMMYY
 mmmyy
+momths
 monthflag
 mpat
 multiday
 multisession
 multistaff
 na
+NAs
 newcons
 nhs
 nhshosp
@@ -169,6 +198,7 @@ openxl
 openxlsx
 orcid
 outfile
+overcounting
 pandoc
 parkinsons
 patflow
@@ -177,8 +207,11 @@ pcec
 phs
 phsmethods
 phsopendata
+Physicaland
 pkgdown
 placeinc
+plateform
+PLICS
 plics
 pms
 popluation
@@ -193,6 +226,7 @@ rankdir
 rbuildignore
 rcmdcheck
 rdd
+rdname
 rds
 reabl
 reablement
@@ -204,7 +238,9 @@ recid
 refailure
 reflectoring
 refsource
+reftype
 renviron
+returnsthe
 rlang
 rmarkdown
 roxygen
@@ -214,9 +250,11 @@ rspm
 rstudio
 rstudioapi
 rtype
+scid
 sco
 scoial
 scotp
+SCTASK
 SDcols
 seealso
 selfharm
@@ -235,6 +273,7 @@ smrtype
 sourcedev
 sparra
 spd
+SPSS
 spss
 stadm
 starwars
@@ -243,6 +282,7 @@ stopwords
 stringdist
 stringr
 submis
+sysmis
 tadm
 tarchetypes
 tbl
@@ -252,13 +292,17 @@ thom
 tibble
 tidyr
 tidyselect
+tidyverse
 TJDX
 todo
+totalnodncontacts
 uid
 ungroup
 unicode
 updown
 upi
+URx
+visualisations
 vline
 wdbf
 WORDLIST

From d7ed238694f9b50c22a8e395a65877d49ceca95b Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 11 Jun 2024 09:35:07 +0100
Subject: [PATCH 176/186] remove version 3.6 arrow package requries 4.0 or
 newer

---
 .github/workflows/R-CMD-check.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index babd1de81..298d28e3c 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -17,7 +17,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        r_version: ['3.6', '4.0.2', '4.1.2', 'release']
+        r_version: ['4.0.2', '4.1.2', 'release']
 
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}

From 1df8bc419cb62c3c30c1af6a4cc7963f29c77fd2 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 11 Jun 2024 10:32:59 +0100
Subject: [PATCH 177/186] spelling checking fix trial

---
 .github/workflows/spelling.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/spelling.yml b/.github/workflows/spelling.yml
index d78d79255..429d4a85b 100644
--- a/.github/workflows/spelling.yml
+++ b/.github/workflows/spelling.yml
@@ -78,7 +78,7 @@ jobs:
         use_magic_file: 1
         experimental_apply_changes_via_bot: 1
         use_sarif: 0
-        only_check_changed_files: 1
+        only_check_changed_files: 0
         extra_dictionary_limit: 10
         extra_dictionaries:
            cspell:r/src/r.txt

From dbf33e801bda463dd949784d9ddb6eb35c2ab2e8 Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Tue, 11 Jun 2024 10:45:31 +0100
Subject: [PATCH 178/186] Revert "spelling checking fix trial"

This reverts commit 1df8bc419cb62c3c30c1af6a4cc7963f29c77fd2.
---
 .github/workflows/spelling.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/spelling.yml b/.github/workflows/spelling.yml
index 429d4a85b..d78d79255 100644
--- a/.github/workflows/spelling.yml
+++ b/.github/workflows/spelling.yml
@@ -78,7 +78,7 @@ jobs:
         use_magic_file: 1
         experimental_apply_changes_via_bot: 1
         use_sarif: 0
-        only_check_changed_files: 0
+        only_check_changed_files: 1
         extra_dictionary_limit: 10
         extra_dictionaries:
            cspell:r/src/r.txt

From a35dc658b88f325b8f8d698350738ccc3db6ca86 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 11 Jun 2024 10:49:24 +0100
Subject: [PATCH 179/186] new github spell check workflows

---
 .github/workflows/spelling.yml | 59 ++++++++++++++++++++++++++++------
 1 file changed, 49 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/spelling.yml b/.github/workflows/spelling.yml
index d78d79255..38dece487 100644
--- a/.github/workflows/spelling.yml
+++ b/.github/workflows/spelling.yml
@@ -33,10 +33,35 @@ name: Check Spelling
 #
 #   For background, see: https://github.com/check-spelling/check-spelling/wiki/Feature:-Update-with-deploy-key
 
+# Sarif reporting
+#
+# Access to Sarif reports is generally restricted (by GitHub) to members of the repository.
+#
+# Requires enabling `security-events: write`
+# and configuring the action with `use_sarif: 1`
+#
+#   For information on the feature, see: https://github.com/check-spelling/check-spelling/wiki/Feature:-Sarif-output
+
+# Minimal workflow structure:
+#
+# on:
+#   push:
+#     ...
+#   pull_request_target:
+#     ...
+# jobs:
+#   # you only want the spelling job, all others should be omitted
+#   spelling:
+#     # remove `security-events: write` and `use_sarif: 1`
+#     # remove `experimental_apply_changes_via_bot: 1`
+#     ... otherwise adjust the `with:` as you wish
+
 on:
   push:
     branches:
     - "**"
+    tags-ignore:
+    - "**"
   pull_request_target:
     branches:
     - "**"
@@ -44,7 +69,6 @@ on:
     - 'opened'
     - 'reopened'
     - 'synchronize'
-    - 'ready_for_review'
   issue_comment:
     types:
     - 'created'
@@ -60,7 +84,7 @@ jobs:
     outputs:
       followup: ${{ steps.spelling.outputs.followup }}
     runs-on: ubuntu-latest
-    if: "contains(github.event_name, 'pull_request') || github.event_name == 'push'"
+    if: ${{ contains(github.event_name, 'pull_request') || github.event_name == 'push' }}
     concurrency:
       group: spelling-${{ github.event.pull_request.number || github.ref }}
       # note: If you use only_check_changed_files, you do not want cancel-in-progress
@@ -70,21 +94,35 @@ jobs:
       id: spelling
       uses: check-spelling/check-spelling@main
       with:
-        suppress_push_for_open_pull_request: 1
+        suppress_push_for_open_pull_request: ${{ github.actor != 'dependabot[bot]' && 1 }}
         checkout: true
         check_file_names: 1
         spell_check_this: check-spelling/spell-check-this@prerelease
         post_comment: 0
         use_magic_file: 1
+        report-timing: 1
+        warnings: bad-regex,binary-file,deprecated-feature,large-file,limited-references,no-newline-at-eof,noisy-file,non-alpha-in-dictionary,token-is-substring,unexpected-line-ending,whitespace-in-dictionary,minified-file,unsupported-configuration,no-files-to-check
         experimental_apply_changes_via_bot: 1
-        use_sarif: 0
-        only_check_changed_files: 1
-        extra_dictionary_limit: 10
+        use_sarif: ${{ (!github.event.pull_request || (github.event.pull_request.head.repo.full_name == github.repository)) && 1 }}
+        extra_dictionary_limit: 20
         extra_dictionaries:
-           cspell:r/src/r.txt
-           cspell:cpp/src/cpp.txt
-           cspell:software-terms/src/software-terms.txt
-        check_commit_messages: commits title description
+          cspell:software-terms/dict/softwareTerms.txt
+
+  comment-push:
+    name: Report (Push)
+    # If your workflow isn't running on push, you can remove this job
+    runs-on: ubuntu-latest
+    needs: spelling
+    permissions:
+      contents: write
+    if: (success() || failure()) && needs.spelling.outputs.followup && github.event_name == 'push'
+    steps:
+    - name: comment
+      uses: check-spelling/check-spelling@main
+      with:
+        checkout: true
+        spell_check_this: check-spelling/spell-check-this@prerelease
+        task: ${{ needs.spelling.outputs.followup }}
 
   comment-pr:
     name: Report (PR)
@@ -92,6 +130,7 @@ jobs:
     runs-on: ubuntu-latest
     needs: spelling
     permissions:
+      contents: read
       pull-requests: write
     if: (success() || failure()) && needs.spelling.outputs.followup && contains(github.event_name, 'pull_request')
     steps:

From c3e5d4d8fd5829277af28a1078e1f488275acca7 Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Tue, 11 Jun 2024 10:52:37 +0100
Subject: [PATCH 180/186] Revert "new github spell check workflows"

This reverts commit a35dc658b88f325b8f8d698350738ccc3db6ca86.
---
 .github/workflows/spelling.yml | 59 ++++++----------------------------
 1 file changed, 10 insertions(+), 49 deletions(-)

diff --git a/.github/workflows/spelling.yml b/.github/workflows/spelling.yml
index 38dece487..d78d79255 100644
--- a/.github/workflows/spelling.yml
+++ b/.github/workflows/spelling.yml
@@ -33,35 +33,10 @@ name: Check Spelling
 #
 #   For background, see: https://github.com/check-spelling/check-spelling/wiki/Feature:-Update-with-deploy-key
 
-# Sarif reporting
-#
-# Access to Sarif reports is generally restricted (by GitHub) to members of the repository.
-#
-# Requires enabling `security-events: write`
-# and configuring the action with `use_sarif: 1`
-#
-#   For information on the feature, see: https://github.com/check-spelling/check-spelling/wiki/Feature:-Sarif-output
-
-# Minimal workflow structure:
-#
-# on:
-#   push:
-#     ...
-#   pull_request_target:
-#     ...
-# jobs:
-#   # you only want the spelling job, all others should be omitted
-#   spelling:
-#     # remove `security-events: write` and `use_sarif: 1`
-#     # remove `experimental_apply_changes_via_bot: 1`
-#     ... otherwise adjust the `with:` as you wish
-
 on:
   push:
     branches:
     - "**"
-    tags-ignore:
-    - "**"
   pull_request_target:
     branches:
     - "**"
@@ -69,6 +44,7 @@ on:
     - 'opened'
     - 'reopened'
     - 'synchronize'
+    - 'ready_for_review'
   issue_comment:
     types:
     - 'created'
@@ -84,7 +60,7 @@ jobs:
     outputs:
       followup: ${{ steps.spelling.outputs.followup }}
     runs-on: ubuntu-latest
-    if: ${{ contains(github.event_name, 'pull_request') || github.event_name == 'push' }}
+    if: "contains(github.event_name, 'pull_request') || github.event_name == 'push'"
     concurrency:
       group: spelling-${{ github.event.pull_request.number || github.ref }}
       # note: If you use only_check_changed_files, you do not want cancel-in-progress
@@ -94,35 +70,21 @@ jobs:
       id: spelling
       uses: check-spelling/check-spelling@main
       with:
-        suppress_push_for_open_pull_request: ${{ github.actor != 'dependabot[bot]' && 1 }}
+        suppress_push_for_open_pull_request: 1
         checkout: true
         check_file_names: 1
         spell_check_this: check-spelling/spell-check-this@prerelease
         post_comment: 0
         use_magic_file: 1
-        report-timing: 1
-        warnings: bad-regex,binary-file,deprecated-feature,large-file,limited-references,no-newline-at-eof,noisy-file,non-alpha-in-dictionary,token-is-substring,unexpected-line-ending,whitespace-in-dictionary,minified-file,unsupported-configuration,no-files-to-check
         experimental_apply_changes_via_bot: 1
-        use_sarif: ${{ (!github.event.pull_request || (github.event.pull_request.head.repo.full_name == github.repository)) && 1 }}
-        extra_dictionary_limit: 20
+        use_sarif: 0
+        only_check_changed_files: 1
+        extra_dictionary_limit: 10
         extra_dictionaries:
-          cspell:software-terms/dict/softwareTerms.txt
-
-  comment-push:
-    name: Report (Push)
-    # If your workflow isn't running on push, you can remove this job
-    runs-on: ubuntu-latest
-    needs: spelling
-    permissions:
-      contents: write
-    if: (success() || failure()) && needs.spelling.outputs.followup && github.event_name == 'push'
-    steps:
-    - name: comment
-      uses: check-spelling/check-spelling@main
-      with:
-        checkout: true
-        spell_check_this: check-spelling/spell-check-this@prerelease
-        task: ${{ needs.spelling.outputs.followup }}
+           cspell:r/src/r.txt
+           cspell:cpp/src/cpp.txt
+           cspell:software-terms/src/software-terms.txt
+        check_commit_messages: commits title description
 
   comment-pr:
     name: Report (PR)
@@ -130,7 +92,6 @@ jobs:
     runs-on: ubuntu-latest
     needs: spelling
     permissions:
-      contents: read
       pull-requests: write
     if: (success() || failure()) && needs.spelling.outputs.followup && contains(github.event_name, 'pull_request')
     steps:

From 9d066724e41c0eec5d399df028a60b1c69bbc4bb Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 11 Jun 2024 11:19:36 +0100
Subject: [PATCH 181/186] trial spell checking

---
 .github/actions/spelling/allow.txt    | 1 +
 .github/actions/spelling/excludes.txt | 1 +
 .github/actions/spelling/patterns.txt | 5 +++++
 .github/workflows/spelling.yml        | 4 ++--
 4 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/.github/actions/spelling/allow.txt b/.github/actions/spelling/allow.txt
index f23cd6eac..8b1906ece 100644
--- a/.github/actions/spelling/allow.txt
+++ b/.github/actions/spelling/allow.txt
@@ -9,3 +9,4 @@ Moohan
 Scougal
 Tayside
 Zihao
+ubuntu
diff --git a/.github/actions/spelling/excludes.txt b/.github/actions/spelling/excludes.txt
index e508dc7ad..0bfb34118 100644
--- a/.github/actions/spelling/excludes.txt
+++ b/.github/actions/spelling/excludes.txt
@@ -43,6 +43,7 @@
 \.psd$
 \.pyc$
 \.s$
+\.sps$
 \.svgz?$
 \.tar$
 \.tiff?$
diff --git a/.github/actions/spelling/patterns.txt b/.github/actions/spelling/patterns.txt
index 885237064..1b0cfa6ec 100644
--- a/.github/actions/spelling/patterns.txt
+++ b/.github/actions/spelling/patterns.txt
@@ -32,3 +32,8 @@
 
 # uuid:
 \b[0-9a-fA-F]{8}-(?:[0-9a-fA-F]{4}-){3}[0-9a-fA-F]{12}\b
+
+# Automatically suggested patterns
+# hit-count: 27 file-count: 17
+# version suffix <word>v#
+(?:(?<=[A-Z]{2})V|(?<=[a-z]{2}|[A-Z]{2})v)\d+(?:\b|(?=[a-zA-Z_]))
diff --git a/.github/workflows/spelling.yml b/.github/workflows/spelling.yml
index d78d79255..b36502866 100644
--- a/.github/workflows/spelling.yml
+++ b/.github/workflows/spelling.yml
@@ -72,13 +72,13 @@ jobs:
       with:
         suppress_push_for_open_pull_request: 1
         checkout: true
-        check_file_names: 1
+        check_file_names: 0
         spell_check_this: check-spelling/spell-check-this@prerelease
         post_comment: 0
         use_magic_file: 1
         experimental_apply_changes_via_bot: 1
         use_sarif: 0
-        only_check_changed_files: 1
+        only_check_changed_files: 0
         extra_dictionary_limit: 10
         extra_dictionaries:
            cspell:r/src/r.txt

From 9a349ecc6c9d2dc48c47a416cb7deeee7f35df97 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 11 Jun 2024 11:32:50 +0100
Subject: [PATCH 182/186] update expected word list

---
 .github/actions/spelling/expect.txt | 54 +++++++++++++++++++++++++----
 1 file changed, 47 insertions(+), 7 deletions(-)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 207ab31c9..c632a6b79 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -7,6 +7,7 @@ admtype
 ADPE
 adpe
 adtf
+alstr
 anu
 arrivalmode
 arth
@@ -23,25 +24,26 @@ bodyloc
 boxi
 callr
 Canx
+CAK
 carehome
 careinspectorate
 casewhen
 categorises
 cattend
 ccyy
+CCYY
 cdn
 cennum
 CEREBROVASC
 chd
 chp
 chpstart
+CHC
 cij
 Classificat
-classificat
 cls
 cmh
 CNWs
-cnws
 codecov
 comhairle
 commhosp
@@ -71,8 +73,9 @@ dbplyr
 deathdate
 deathdiag
 demog
+dependant
 devhints
-dfc
+Diagramme
 diagramme
 disch
 dischloc
@@ -84,12 +87,14 @@ dminutes
 dna
 docx
 dontrun
+dotdash
 downup
 dplyr
 Drugsand
 dsn
 dtplyr
 dvprod
+Dunkeld
 eee
 efa
 endomet
@@ -97,26 +102,32 @@ envir
 eol
 fcase
 feb
+Finalise
+finalise
 Fhelper
 fifelse
 fileext
-finalise
 fnc
 fst
 ftm
 fyear
 fyyear
+Fraility
+furrr
 geogs
 ggplot
+GIC
 github
 GLS
 gls
 glsrecord
 gms
 gpoo
+GPOoH
 gpooh
 gpprac
 gss
+gtsave
 hbnames
 hbp
 hbpraccode
@@ -128,6 +139,7 @@ HCP
 hcp
 hefailure
 hhg
+hiw
 hjust
 hms
 homecare
@@ -137,32 +149,42 @@ HSCDIIP
 hscdiip
 hscp
 hscpnames
+htmltools
 htmlwidgets
+httr
 IDPC
 idpc
+img
+improvementservice
 incdn
 incdnas
 infyyear
+integerish
 intzone
 ipdc
 Isdsf
+isdscotland
 issuenumber
 itle
 iwalk
 jaccard
 jan
+javascript
 jennifer
 jul
+JXz
 keydate
 keyring
 keytime
 keytimex
 kis
+knitr
 lazydt
 lcap
 LCHO
 lcho
 lgl
+linetype
 lintr
 lizihao
 los
@@ -172,25 +194,30 @@ lubridate
 magrittr
 markdownguide
 Matern
-matern
 mcbride
 mcmahon
+microsoft
 MIU
 MMMYY
 mmmyy
 momths
 monthflag
 mpat
+multiarch
 multiday
 multisession
 multistaff
 na
 NAs
+netlify
 newcons
 nhs
 nhshosp
+noptions
+noreply
 nrs
 nsu
+nwtgck
 odbc
 oldtadm
 opendata
@@ -203,7 +230,8 @@ pandoc
 parkinsons
 patflow
 pattype
-pcec
+PCEC
+PERTH
 phs
 phsmethods
 phsopendata
@@ -216,7 +244,9 @@ plics
 pms
 popluation
 postcodes
+Posix
 ppas
+PPAs
 prac
 praccode
 ptypes
@@ -239,13 +269,18 @@ refailure
 reflectoring
 refsource
 reftype
+relaint
 renviron
 returnsthe
 rlang
 rmarkdown
+Rnw
 roxygen
-rprofile
+roxygenise
+Roxygenize
+Rprofile
 rscript
+Rscript
 rspm
 rstudio
 rstudioapi
@@ -269,6 +304,7 @@ slf
 slfhelper
 smr
 smra
+SMRA
 smrtype
 sourcedev
 sparra
@@ -283,6 +319,7 @@ stringdist
 stringr
 submis
 sysmis
+tac
 tadm
 tarchetypes
 tbl
@@ -299,9 +336,11 @@ totalnodncontacts
 uid
 ungroup
 unicode
+unrecognised
 updown
 upi
 URx
+usethis
 visualisations
 vline
 wdbf
@@ -311,6 +350,7 @@ xintercept
 xlsx
 yearstay
 yml
+YYYYQX
 yyyyqx
 zihao
 zsav

From 949955cee610f8fe84a9a95c04dbad947b1be4c0 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 11 Jun 2024 11:39:44 +0100
Subject: [PATCH 183/186] update word list

---
 .github/actions/spelling/expect.txt | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index c632a6b79..ea5fbbeca 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -4,7 +4,6 @@ Accom
 Admissio
 admloc
 admtype
-ADPE
 adpe
 adtf
 alstr
@@ -31,7 +30,6 @@ casewhen
 categorises
 cattend
 ccyy
-CCYY
 cdn
 cennum
 CEREBROVASC
@@ -75,7 +73,6 @@ deathdiag
 demog
 dependant
 devhints
-Diagramme
 diagramme
 disch
 dischloc
@@ -102,7 +99,6 @@ envir
 eol
 fcase
 feb
-Finalise
 finalise
 Fhelper
 fifelse
@@ -118,12 +114,10 @@ geogs
 ggplot
 GIC
 github
-GLS
 gls
 glsrecord
 gms
 gpoo
-GPOoH
 gpooh
 gpprac
 gss
@@ -135,7 +129,6 @@ hbrescode
 hbtreatcode
 hbtreatname
 hci
-HCP
 hcp
 hefailure
 hhg
@@ -145,14 +138,12 @@ hms
 homecare
 homev
 hri
-HSCDIIP
 hscdiip
 hscp
 hscpnames
 htmltools
 htmlwidgets
 httr
-IDPC
 idpc
 img
 improvementservice
@@ -181,7 +172,6 @@ kis
 knitr
 lazydt
 lcap
-LCHO
 lcho
 lgl
 linetype

From fe25eeefd34e6ed2265c520ec1921921af03f61d Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 11 Jun 2024 10:40:43 +0000
Subject: [PATCH 184/186] Update metadata

check-spelling run (push) for 966-github-action-spell-checking-issues-cannot-properly-recognize-variants

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev>
---
 .github/actions/spelling/expect.txt | 78 ++++++++---------------------
 1 file changed, 20 insertions(+), 58 deletions(-)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index ea5fbbeca..febce72b9 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -1,61 +1,54 @@
 ACa
-acc
 Accom
 Admissio
 admloc
 admtype
-adpe
+ADPE
 adtf
 alstr
-anu
 arrivalmode
 arth
 atlassian
 atrialfib
 attendcat
 aut
-bba
 bedday
 BFO
 birthtime
 bloodbfo
 bodyloc
 boxi
+CAK
 callr
 Canx
-CAK
 carehome
 careinspectorate
-casewhen
 categorises
 cattend
-ccyy
+CCYY
 cdn
 cennum
 CEREBROVASC
+CHC
 chd
 chp
 chpstart
-CHC
 cij
 Classificat
-cls
 cmh
 CNWs
 codecov
-comhairle
+Comhairle
 commhosp
-commiting
 congen
 copd
 costinc
-costincdnas
 costmonthnum
 costsfy
 covr
 cph
+CPN
 createslf
-csf
 customise
 cvd
 dataframe
@@ -73,7 +66,7 @@ deathdiag
 demog
 dependant
 devhints
-diagramme
+Diagramme
 disch
 dischloc
 dischto
@@ -82,7 +75,6 @@ disdest
 DMe
 dminutes
 dna
-docx
 dontrun
 dotdash
 downup
@@ -90,26 +82,24 @@ dplyr
 Drugsand
 dsn
 dtplyr
-dvprod
 Dunkeld
-eee
-efa
+dvprod
 endomet
 envir
 eol
 fcase
 feb
-finalise
 Fhelper
 fifelse
 fileext
+Finalise
 fnc
+Fraility
 fst
 ftm
+furrr
 fyear
 fyyear
-Fraility
-furrr
 geogs
 ggplot
 GIC
@@ -117,7 +107,7 @@ github
 gls
 glsrecord
 gms
-gpoo
+GPOo
 gpooh
 gpprac
 gss
@@ -129,7 +119,7 @@ hbrescode
 hbtreatcode
 hbtreatname
 hci
-hcp
+HCP
 hefailure
 hhg
 hiw
@@ -144,20 +134,16 @@ hscpnames
 htmltools
 htmlwidgets
 httr
-idpc
+IDPC
 img
 improvementservice
 incdn
 incdnas
-infyyear
 integerish
 intzone
 ipdc
-Isdsf
 isdscotland
 issuenumber
-itle
-iwalk
 jaccard
 jan
 javascript
@@ -167,16 +153,13 @@ JXz
 keydate
 keyring
 keytime
-keytimex
 kis
 knitr
-lazydt
 lcap
-lcho
+LCHO
 lgl
 linetype
 lintr
-lizihao
 los
 ltc
 ltd
@@ -184,12 +167,11 @@ lubridate
 magrittr
 markdownguide
 Matern
-mcbride
+Mcbride
 mcmahon
 microsoft
 MIU
 MMMYY
-mmmyy
 momths
 monthflag
 mpat
@@ -211,10 +193,8 @@ nwtgck
 odbc
 oldtadm
 opendata
-openxl
 openxlsx
-orcid
-outfile
+ORCID
 overcounting
 pandoc
 parkinsons
@@ -222,20 +202,17 @@ patflow
 pattype
 PCEC
 PERTH
+PHIBCS
 phs
 phsmethods
 phsopendata
 Physicaland
 pkgdown
 placeinc
-plateform
 PLICS
-plics
-pms
 popluation
-postcodes
 Posix
-ppas
+postcodes
 PPAs
 prac
 praccode
@@ -243,11 +220,9 @@ ptypes
 purrr
 quickstart
 rankdir
-rbuildignore
 rcmdcheck
 rdd
 rdname
-rds
 reabl
 reablement
 readcode
@@ -265,17 +240,15 @@ returnsthe
 rlang
 rmarkdown
 Rnw
+ROBERM
 roxygen
 roxygenise
 Roxygenize
 Rprofile
-rscript
 Rscript
 rspm
 rstudio
 rstudioapi
-rtype
-scid
 sco
 scoial
 scotp
@@ -293,7 +266,6 @@ simd
 slf
 slfhelper
 smr
-smra
 SMRA
 smrtype
 sourcedev
@@ -312,7 +284,6 @@ sysmis
 tac
 tadm
 tarchetypes
-tbl
 telecare
 testthat
 thom
@@ -320,28 +291,19 @@ tibble
 tidyr
 tidyselect
 tidyverse
-TJDX
-todo
 totalnodncontacts
 uid
 ungroup
-unicode
 unrecognised
 updown
 upi
 URx
 usethis
-visualisations
-vline
-wdbf
-WORDLIST
 workflows
 xintercept
 xlsx
 yearstay
-yml
 YYYYQX
-yyyyqx
 zihao
 zsav
 zstd

From 662e03acbfd490c8f8960412855cd58043a05cf6 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 11 Jun 2024 12:36:23 +0100
Subject: [PATCH 185/186] spell checking update

---
 .github/actions/spelling/expect.txt     | 1 -
 R/process_tests_sc_all_ch_episodes.R    | 2 +-
 man/process_tests_sc_all_ch_episodes.Rd | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index febce72b9..d1430d6bb 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -271,7 +271,6 @@ smrtype
 sourcedev
 sparra
 spd
-SPSS
 spss
 stadm
 starwars
diff --git a/R/process_tests_sc_all_ch_episodes.R b/R/process_tests_sc_all_ch_episodes.R
index 6887eb662..d42eca2c7 100644
--- a/R/process_tests_sc_all_ch_episodes.R
+++ b/R/process_tests_sc_all_ch_episodes.R
@@ -1,4 +1,4 @@
-#' Process Social Care Care Home all episodes tests
+#' Process Social Care, Care Home all episodes tests
 #'
 #' @param data The processed Care Home all episode data produced by
 #' [process_extract_care_home()].
diff --git a/man/process_tests_sc_all_ch_episodes.Rd b/man/process_tests_sc_all_ch_episodes.Rd
index c4ba45751..f37fe1e0d 100644
--- a/man/process_tests_sc_all_ch_episodes.Rd
+++ b/man/process_tests_sc_all_ch_episodes.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/process_tests_sc_all_ch_episodes.R
 \name{process_tests_sc_all_ch_episodes}
 \alias{process_tests_sc_all_ch_episodes}
-\title{Process Social Care Care Home all episodes tests}
+\title{Process Social Care, Care Home all episodes tests}
 \usage{
 process_tests_sc_all_ch_episodes(data)
 }

From 2b3ef52ca2d2f46ab821ecdf64bb2ff2727ffb82 Mon Sep 17 00:00:00 2001
From: Zihao Li <lizihao_anu@outlook.com>
Date: Tue, 11 Jun 2024 12:34:51 +0000
Subject: [PATCH 186/186] Update metadata

check-spelling run (pull_request_target) for June-24-update

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
on-behalf-of: @check-spelling <check-spelling-bot@check-spelling.dev>
---
 .github/actions/spelling/expect.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index d1430d6bb..7b21f93d1 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -104,6 +104,7 @@ geogs
 ggplot
 GIC
 github
+GLS
 gls
 glsrecord
 gms
@@ -128,6 +129,7 @@ hms
 homecare
 homev
 hri
+HSCDIIP
 hscdiip
 hscp
 hscpnames
@@ -271,6 +273,7 @@ smrtype
 sourcedev
 sparra
 spd
+SPSS
 spss
 stadm
 starwars