From 3820c19861e01309af8ce5c6067bd6be148bd3c2 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 14 Aug 2023 10:16:38 +0100
Subject: [PATCH 01/19] Fix locality (#802)

Tiny error and a simple fix.

Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 R/process_lookup_postcode.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/process_lookup_postcode.R b/R/process_lookup_postcode.R
index 878c51f37..69cc13bd8 100644
--- a/R/process_lookup_postcode.R
+++ b/R/process_lookup_postcode.R
@@ -53,7 +53,7 @@ process_lookup_postcode <- function(spd_path = get_spd_path(),
       tidyselect::matches("datazone\\d{4}$")
     ) %>%
     dplyr::mutate(
-      locality = tidyr::replace_na("locality", "No Locality Information")
+      locality = tidyr::replace_na(.data$locality, "No Locality Information")
     )
 
 

From 8ea15c0f742994f4863d3fe49a50cff14469dbbe Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 14 Aug 2023 15:01:35 +0100
Subject: [PATCH 02/19] Add simple scripts for running targets as a workbench
 job (#767)

---
 .Rbuildignore      | 1 +
 run_targets_1718.R | 4 ++++
 run_targets_1819.R | 4 ++++
 run_targets_1920.R | 4 ++++
 run_targets_2021.R | 4 ++++
 run_targets_2122.R | 4 ++++
 run_targets_2223.R | 4 ++++
 7 files changed, 25 insertions(+)
 create mode 100644 run_targets_1718.R
 create mode 100644 run_targets_1819.R
 create mode 100644 run_targets_1920.R
 create mode 100644 run_targets_2021.R
 create mode 100644 run_targets_2122.R
 create mode 100644 run_targets_2223.R

diff --git a/.Rbuildignore b/.Rbuildignore
index 168a3e006..2cab1bda6 100644
--- a/.Rbuildignore
+++ b/.Rbuildignore
@@ -22,3 +22,4 @@
 ^_targets\.R$
 ^_targets\.yaml$
 ^_SPSS_archived$
+^run_targets_
diff --git a/run_targets_1718.R b/run_targets_1718.R
new file mode 100644
index 000000000..ebc58895f
--- /dev/null
+++ b/run_targets_1718.R
@@ -0,0 +1,4 @@
+library(targets)
+tar_make_future(
+  names = (targets::contains("1718"))
+)
diff --git a/run_targets_1819.R b/run_targets_1819.R
new file mode 100644
index 000000000..83bbcedef
--- /dev/null
+++ b/run_targets_1819.R
@@ -0,0 +1,4 @@
+library(targets)
+tar_make_future(
+  names = (targets::contains("1819"))
+)
diff --git a/run_targets_1920.R b/run_targets_1920.R
new file mode 100644
index 000000000..1640d1900
--- /dev/null
+++ b/run_targets_1920.R
@@ -0,0 +1,4 @@
+library(targets)
+tar_make_future(
+  names = (targets::contains("1920"))
+)
diff --git a/run_targets_2021.R b/run_targets_2021.R
new file mode 100644
index 000000000..80749e81a
--- /dev/null
+++ b/run_targets_2021.R
@@ -0,0 +1,4 @@
+library(targets)
+tar_make_future(
+  names = (targets::contains("2021"))
+)
diff --git a/run_targets_2122.R b/run_targets_2122.R
new file mode 100644
index 000000000..aa95d7b24
--- /dev/null
+++ b/run_targets_2122.R
@@ -0,0 +1,4 @@
+library(targets)
+tar_make_future(
+  names = (targets::contains("2122"))
+)
diff --git a/run_targets_2223.R b/run_targets_2223.R
new file mode 100644
index 000000000..2ded7d5fd
--- /dev/null
+++ b/run_targets_2223.R
@@ -0,0 +1,4 @@
+library(targets)
+tar_make_future(
+  names = (targets::contains("2223"))
+)

From 80799a2838b8b2bad133e310091dd6b3434cf477 Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Mon, 14 Aug 2023 15:14:14 +0100
Subject: [PATCH 03/19] Fix CHI duplicates of chi in individual file (#791)

* fix duplicated matches in chi in sc data.

* Update R/create_individual_file.R

* update on join_sc_client

* Create a test checking if individual files have duplicated chi

* add duplicated chi number to the tests in process_tests_individual_file

---------

Co-authored-by: lizihao-anu <lizihao-anu@users.noreply.github.com>
Co-authored-by: James McMahon <james.mcmahon@phs.scot>
---
 R/create_individual_file.R        | 18 +++++++++++++++---
 R/process_tests_individual_file.R | 16 ++++++++++++----
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index 84dbd28ee..f0e6bcdfc 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -61,7 +61,7 @@ create_individual_file <- function(
     remove_blank_chi() %>%
     add_cij_columns() %>%
     add_all_columns() %>%
-    aggregate_ch_episodes_zihao() %>%
+    aggregate_ch_episodes() %>%
     clean_up_ch(year) %>%
     recode_gender() %>%
     aggregate_by_chi() %>%
@@ -741,13 +741,25 @@ join_sc_client <- function(
       sc_demographics %>%
         dplyr::select("sending_location", "social_care_id", "chi"),
       by = c("sending_location", "social_care_id")
-    )
+    ) %>%
+    dplyr::mutate(count_not_known = rowSums(dplyr::select(., all_of(
+      c(
+        "sc_living_alone",
+        "sc_support_from_unpaid_carer",
+        "sc_social_worker",
+        "sc_meals",
+        "sc_day_care"
+      )
+    )) == "Not Known")) %>%
+    dplyr::arrange(chi, count_not_known) %>%
+    dplyr::distinct(chi, .keep_all = TRUE)
 
   # Match on client variables by chi
   individual_file <- individual_file %>%
     dplyr::left_join(
       join_client_demog,
-      by = "chi"
+      by = "chi",
+      relationship = "one-to-one"
     ) %>%
     dplyr::select(!c("sending_location", "social_care_id", "sc_latest_submission"))
 
diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R
index 2c93f243e..a9d193465 100644
--- a/R/process_tests_individual_file.R
+++ b/R/process_tests_individual_file.R
@@ -64,9 +64,8 @@ produce_individual_file_tests <- function(data) {
     create_demog_test_flags() %>%
     create_hb_test_flags(.data$hbrescode) %>%
     create_hb_cost_test_flags(.data$hbrescode, .data$health_net_cost) %>%
-    create_hscp_test_flags(.data$hscp2018) %>%
     # keep variables for comparison
-    dplyr::select("valid_chi":dplyr::last_col()) %>%
+    dplyr::select(c("valid_chi":dplyr::last_col())) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
@@ -86,7 +85,9 @@ produce_individual_file_tests <- function(data) {
 
   min_max_measures <- data %>%
     calculate_measures(
-      vars = "health_net_cost",
+      vars = c(
+        "health_net_cost"
+      ),
       measure = "min-max"
     )
 
@@ -99,11 +100,18 @@ produce_individual_file_tests <- function(data) {
       measure = "sum"
     )
 
+  dup_chi <- data.frame(
+    measure = "duplicated chi number",
+    value = duplicated(data$chi) %>%
+      sum() %>% as.integer()
+  )
+
   join_output <- list(
     test_flags,
     all_measures,
     min_max_measures,
-    sum_measures
+    sum_measures,
+    dup_chi
   ) %>%
     purrr::reduce(dplyr::full_join, by = c("measure", "value"))
 

From 19779e3fd6c4e9265661f617103e7f8dda044444 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Mon, 14 Aug 2023 15:17:35 +0100
Subject: [PATCH 04/19] Update NSU code for new 22/23 cohort (#784)

Update `check_year_valid` for NSUs
---
 R/check_year_valid.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/check_year_valid.R b/R/check_year_valid.R
index d170cd5b5..1361eb47e 100644
--- a/R/check_year_valid.R
+++ b/R/check_year_valid.R
@@ -42,7 +42,7 @@ check_year_valid <- function(
     return(FALSE)
   } else if (year >= "2122" && type %in% c("CMH", "DN")) {
     return(FALSE)
-  } else if (year >= "2223" && type %in% "NSU") {
+  } else if (year >= "2324" && type %in% "NSU") {
     return(FALSE)
   } else if (year >= "2324" && type %in% c("SPARRA", "HHG")) {
     return(FALSE)

From 7e3215da42b2c92f395de458500deb23b1952d54 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Mon, 14 Aug 2023 15:21:05 +0100
Subject: [PATCH 05/19] Amend `get_boxi_extract_path` function for archiving DN
 and CMH data  (#785)

* Update `get_boxi_extract_path` for DN/CMH data

* Remove extra function

* [check-spelling] Update metadata

Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/5856792420/attempts/1
Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/785#issuecomment-1677400900

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>

---------

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: James McMahon <james.mcmahon@phs.scot>
---
 .github/actions/spelling/expect.txt |  1 +
 R/get_boxi_extract_path.R           | 11 ++++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 87300a6a1..51c0a6c6b 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -91,6 +91,7 @@ hjust
 hms
 homecare
 homev
+hscdiip
 hscp
 hscpnames
 IDPC
diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R
index 60dd7857a..6096525e5 100644
--- a/R/get_boxi_extract_path.R
+++ b/R/get_boxi_extract_path.R
@@ -29,7 +29,11 @@ get_boxi_extract_path <- function(
     )) {
   type <- match.arg(type)
 
-  year_dir <- get_year_dir(year, extracts_dir = TRUE)
+  if (type %in% c("DN", "CMH")) {
+    dir <- fs::path(get_slf_dir(), "Archived_data")
+  } else {
+    dir <- get_year_dir(year, extracts_dir = TRUE)
+  }
 
   if (!check_year_valid(year, type)) {
     return(get_dummy_boxi_extract_path())
@@ -53,11 +57,12 @@ get_boxi_extract_path <- function(
   )
 
   boxi_extract_path_csv_gz <- fs::path(
-    year_dir,
+    dir,
     stringr::str_glue("{file_name}-20{year}.csv.gz")
   )
+
   boxi_extract_path_csv <- fs::path(
-    year_dir,
+    dir,
     stringr::str_glue("{file_name}-20{year}.csv")
   )
 

From 612e0698cc2401faa040a9607062f97cb5d9207b Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Mon, 14 Aug 2023 15:21:58 +0100
Subject: [PATCH 06/19] Fix increase in total preventable beddays (#779)

* further obsolete code change

* fix the preventable_beddays

Co-authored-by: James McMahon <james.mcmahon@phs.scot>

---------

Co-authored-by: James McMahon <james.mcmahon@phs.scot>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 R/aggregate_by_chi.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R
index 99da03ba8..5e7ebc7c0 100644
--- a/R/aggregate_by_chi.R
+++ b/R/aggregate_by_chi.R
@@ -126,9 +126,9 @@ aggregate_by_chi <- function(episode_file) {
   individual_file_cols6 <- episode_file[,
     .(
       preventable_beddays = ifelse(
-        max(cij_ppa, na.rm = TRUE),
-        max(cij_end_date) - min(cij_start_date),
-        NA_real_
+        any(cij_ppa, na.rm = TRUE),
+        as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))),
+        NA_integer_
       )
     ),
     # cij_marker has been renamed as cij_total

From 51a0b0590a554613c6e56001eb326cf6600977c3 Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Tue, 15 Aug 2023 15:17:33 +0100
Subject: [PATCH 07/19] fix warning on `:=` (#797)

* fix warning on `:=`

* Update R/aggregate_by_chi.R

Co-authored-by: James McMahon <james.mcmahon@phs.scot>

* Style code

---------

Co-authored-by: James McMahon <james.mcmahon@phs.scot>
Co-authored-by: lizihao-anu <lizihao-anu@users.noreply.github.com>
---
 R/aggregate_by_chi.R | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R
index 5e7ebc7c0..db12f7a9e 100644
--- a/R/aggregate_by_chi.R
+++ b/R/aggregate_by_chi.R
@@ -203,12 +203,19 @@ aggregate_ch_episodes <- function(episode_file) {
   data.table::setDT(episode_file)
 
   # Perform grouping and aggregation
-  episode_file <- episode_file[, `:=`(
-    ch_no_cost = max(ch_no_cost),
-    ch_ep_start = min(record_keydate1),
-    ch_ep_end = max(ch_ep_end),
-    ch_cost_per_day = mean(ch_cost_per_day)
-  ), by = c("chi", "ch_chi_cis")]
+  episode_file[, c(
+    "ch_no_cost",
+    "ch_ep_start",
+    "ch_ep_end",
+    "ch_cost_per_day"
+  ) := list(
+    max(ch_no_cost),
+    min(record_keydate1),
+    max(ch_ep_end),
+    mean(ch_cost_per_day)
+  ),
+  by = c("chi", "ch_chi_cis")
+  ]
 
   # Convert back to tibble if needed
   episode_file <- tibble::as_tibble(episode_file)

From 0f25195e234fac4fe33d677e240d798dc3e7a76c Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 14 Aug 2023 15:28:00 +0100
Subject: [PATCH 08/19] Add 2324 targets/workbench job file

---
 run_targets_2324.R | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 run_targets_2324.R

diff --git a/run_targets_2324.R b/run_targets_2324.R
new file mode 100644
index 000000000..b875984f4
--- /dev/null
+++ b/run_targets_2324.R
@@ -0,0 +1,4 @@
+library(targets)
+tar_make_future(
+  names = (targets::contains("2324"))
+)

From c4a54f84a95e2a691085c98e366b225d330bac18 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 14 Aug 2023 15:39:00 +0100
Subject: [PATCH 09/19] Use `get_source_extract_path` in homelessness (#796)

This was already set up, just not used for some reason. Note that this will switch from using a `.rds` to `.parquet` (unless you do `get_source_extract_path(year, "Homelessness", ext = "rds")`).

Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 R/get_source_extract_path.R      | 49 ++++++++++++++++++--------------
 R/process_extract_homelessness.R | 13 +++++----
 2 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/R/get_source_extract_path.R b/R/get_source_extract_path.R
index 1816ceb25..4cb5eef44 100644
--- a/R/get_source_extract_path.R
+++ b/R/get_source_extract_path.R
@@ -10,27 +10,34 @@
 #' @export
 #'
 #' @family extract file paths
-get_source_extract_path <- function(year,
-                                    type = c(
-                                      "Acute",
-                                      "AE",
-                                      "AT",
-                                      "CH",
-                                      "Client",
-                                      "CMH",
-                                      "DD",
-                                      "Deaths",
-                                      "DN",
-                                      "GPOoH",
-                                      "HC",
-                                      "Homelessness",
-                                      "Maternity",
-                                      "MH",
-                                      "Outpatients",
-                                      "PIS",
-                                      "SDS"
-                                    ),
-                                    ...) {
+get_source_extract_path <- function(
+    year,
+    type = c(
+      "Acute",
+      "AE",
+      "AT",
+      "CH",
+      "Client",
+      "CMH",
+      "DD",
+      "Deaths",
+      "DN",
+      "GPOoH",
+      "HC",
+      "Homelessness",
+      "Maternity",
+      "MH",
+      "Outpatients",
+      "PIS",
+      "SDS"
+    ),
+    ...) {
+  if (year %in% type) {
+    cli::cli_abort("{.val {year}} was supplied to the {.arg year} argument.")
+  }
+
+  year <- check_year_format(year)
+
   type <- match.arg(type)
 
   if (!check_year_valid(year, type)) {
diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
index a16c9a57b..a900cff9a 100644
--- a/R/process_extract_homelessness.R
+++ b/R/process_extract_homelessness.R
@@ -146,13 +146,14 @@ process_extract_homelessness <- function(
     )
 
   if (write_to_disk) {
-    final_data %>%
-      write_file(get_file_path(
-        get_year_dir(year),
-        stringr::str_glue("homelessness_for_source-20{year}"),
-        ext = "rds",
+    write_file(
+      final_data,
+      get_source_extract_path(
+        year = year,
+        type = "Homelessness",
         check_mode = "write"
-      ))
+      )
+    )
   }
 
   return(final_data)

From e36c97c17a711510605c15a8b7a21cd045bb8fdc Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 14 Aug 2023 15:41:56 +0100
Subject: [PATCH 10/19] Correct tests for NSU

---
 tests/testthat/test-check_year_valid.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/testthat/test-check_year_valid.R b/tests/testthat/test-check_year_valid.R
index ca0738c89..eda74dbdf 100644
--- a/tests/testthat/test-check_year_valid.R
+++ b/tests/testthat/test-check_year_valid.R
@@ -49,7 +49,8 @@ test_that("Check year valid works for specific datasets ", {
   expect_true(check_year_valid("1920", "NSU"))
   expect_true(check_year_valid("2021", "NSU"))
   expect_true(check_year_valid("2122", "NSU"))
-  expect_false(check_year_valid("2223", "NSU"))
+  expect_true(check_year_valid("2223", "NSU"))
+  expect_false(check_year_valid("2324", "NSU"))
 
   # SPARRA
   expect_false(check_year_valid("1415", "SPARRA"))

From 62a41740b01c6bc266b3842684dc5b77608aa6a5 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 15 Aug 2023 15:41:40 +0100
Subject: [PATCH 11/19] Update script for extracting NSU from SMRA space

---
 .../All_years/02-Lookups/99_extract_NSU_data.R       | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
index d33dfbe49..54cc316e8 100644
--- a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
+++ b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
@@ -6,7 +6,7 @@ library(glue)
 nsu_dir <- path("/conf/hscdiip/SLF_Extracts/NSU")
 
 # Change the year
-fin_year <- "1516"
+fin_year <- "2324"
 
 db_connection <- odbc::dbConnect(
   odbc::odbc(),
@@ -16,7 +16,7 @@ db_connection <- odbc::dbConnect(
 )
 
 # Check the table name and change if required.
-table <- dbplyr::in_schema("ROBERM18", "FINAL_2")
+table <- dbplyr::in_schema("ROBERM18", "FINAL_1")
 
 # Read NSU data
 nsu_data <-
@@ -35,9 +35,11 @@ nsu_data <-
   collect()
 
 # Write out the data
-file_path <- path(nsu_dir, glue("All_CHIs_20{fin_year}.zsav"))
+file_path <- path(nsu_dir, glue("All_CHIs_20{fin_year}.parquet"))
 # This will archive the existing file for later comparison
 if (file_exists(file_path)) {
-  file_copy(file_path, path(nsu_dir, glue("All_CHIs_20{fin_year}_OLD.zsav")))
+  file_copy(file_path, path(nsu_dir, glue("All_CHIs_20{fin_year}_OLD.parquet")))
 }
-write_sav(nsu_data, file_path, compress = TRUE)
+
+nsu_data %>%
+arrow::write_parquet(file_path, compression = "zstd", compression_level = 10)

From d310dfd3bc1f586001b4d09bcec358d5b458ca09 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 15 Aug 2023 15:47:29 +0100
Subject: [PATCH 12/19] Update year in 99_NSU extract script

---
 _SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
index 54cc316e8..8bbd0513c 100644
--- a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
+++ b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
@@ -6,7 +6,7 @@ library(glue)
 nsu_dir <- path("/conf/hscdiip/SLF_Extracts/NSU")
 
 # Change the year
-fin_year <- "2324"
+fin_year <- "2223"
 
 db_connection <- odbc::dbConnect(
   odbc::odbc(),

From 51c4a637d7701c1061af67964adda0d5519cfa47 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Tue, 15 Aug 2023 16:04:23 +0100
Subject: [PATCH 13/19] Update news for September 23 update (#811)

* Update News for March and June updates

* Update release date

* WIP - update news for Sep update

* Update NEWS.md

Fix some typos / grammar

---------

Co-authored-by: James McMahon <james.mcmahon@phs.scot>
---
 NEWS.md | 44 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index cbcb62079..2a3453eea 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,48 @@
-# March 2023 Update - Unreleased
+# September 2023 Update - Unreleased
+* Update of 2017/18 onwards to include bug fixes within the files. 
+* New 2023/24 files. 
+* New NSU cohort for 2022/23 file. 
+* Re addition of:
+  * HRIs in individual file.
+  * Homelessness Flags.
+* Bug fixes: 
+  * Blank `datazone` in A&E. This has been fixed and was due to PC8 postcode format matching onto SLF pc lookup. 
+  * Large increase in preventable beddays. This was caused due to an SPSS vs R logic difference. Uses SPSS logic which 
+    brings the difference down to `3.3%`. 
+  * Issue with `locality` which showed `locality` in each row instead of its true `locality`. This has now been fixed. 
+  * Duplicated CHI in the individual file. The issue was identified when trying to include HRIs. This has now been corrected. 
+* Internal changes to SLF development: 
+  * `DN` and `CMH` data are now archived in an HSCDIIP folder as the BOXI datamart is now closed down for these. Function `get_boxi_extract_path` has been updated to reflect this. 
+  * Tests updated to include `HSCP`count. 
+  * Tests created for `Delayed Discharges` extract and `Social care Client lookup`.
 
 
+# June 2023 Update - Released 24-Jul-2023
+* 2011/12 -> 2013/14 – These files have not been altered, other than to make them available in a new file type (parquet).
+* 2017/18 – These files have been recreated using our new R pipeline, but the data has not changed. We did this so that we would have a good comparator file.
+* 2018/19 -> 2022/23 – These files have been recreated using the R pipeline and are also using updated data (as in a ‘normal’ update).
+* Files changed into parquet format. 
+* SLFhelper updated. 
+* Removal of `keydate1_dateformat` and `keydate2_dateformat`.
+* `dd_responsible_lca` – This variable now uses CA2019 codes instead of the 2-digit ‘old’ LCA code.
+* Preventable beddays - not able to calculate these correctly. * Death fixes not included.
+* Variables not ordered in R like they used to be in SPSS.
+* End of HHG.
+* New variable `ch_postcode`.
+* rename of variables `cost_total_net_incdnas`, `ooh_outcome.1`, `ooh_outcome.2`, `ooh_outcome.3`, `ooh_outcome.4`, `totalnodncontacts`. 
+* HRI's not included. 
+* Homelessness flags not included. 
+* Keep_population flag not included. 
+
+
+# March 2023 Update - Released 10-Mar-2023
+* 2021/22 episode and individual files refreshed with updated activity.
+* 2022/23 file updated and contains data up to the end of Q3. 
+* Social care data is available for 2022/23. 
+* Typo in the variable name `ooh_covid_assessment`
+* Next update in May as a test run in R but won't be released. 
+* Next release in June. 
+
 # December 2022 Update - Released 07-Dec-2022
 * Now using the 2022v2 Scottish Postcode Directory.
 * Now using the 2020 Urban Rural classifications (instead of the older 2016 ones), this means variables such as `URx_2016` will now be called `URx_2020`.

From ee3943ffcc7ec9cb5932de6f4bb8f9ca5ba7423c Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Wed, 16 Aug 2023 09:01:09 +0100
Subject: [PATCH 14/19] Apply styling

---
 _SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
index 8bbd0513c..ea6f81bfc 100644
--- a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
+++ b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
@@ -42,4 +42,4 @@ if (file_exists(file_path)) {
 }
 
 nsu_data %>%
-arrow::write_parquet(file_path, compression = "zstd", compression_level = 10)
+  arrow::write_parquet(file_path, compression = "zstd", compression_level = 10)

From 61283cc69f072a17cdad8f09d053626b2f2ad31f Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Wed, 16 Aug 2023 09:16:29 +0100
Subject: [PATCH 15/19] Fix issue with `case_match` types (#810)

* Fix issue with `case_match` types

It seems that `case_match()` is stricter about types than `case_when()`. See the below code:

```r
library(dplyr)
# Breaks
mutate(starwars,
  new_height = case_when(
    height == "172" ~ "170"),
  new_height2 = case_match(
    height,
    "172" ~ "170"
  ),
  .after = "height"
)

# Works
mutate(starwars,
  new_height = case_when(
      height == "172" ~ "170"),
  new_height2 = case_match(
    height,
    172L ~ "170"
  ),
  .after = "height"
)
```

Since `sending_location` is an integer, the LHS of `case_match` must be numeric. It was slightly incorrect previously but `case_when` let us get away with it!

I also updated and added to the tests.

* Style code

* Style code

---------

Co-authored-by: Moohan <Moohan@users.noreply.github.com>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
---
 R/convert_sending_location_to_lca.R           | 66 ++++++++--------
 .../_snaps/convert_sending_location_to_lca.md |  8 +-
 .../test-convert_sending_location_to_lca.R    | 79 +++++++++++--------
 3 files changed, 82 insertions(+), 71 deletions(-)

diff --git a/R/convert_sending_location_to_lca.R b/R/convert_sending_location_to_lca.R
index 6e9c577c0..d0d79dd39 100644
--- a/R/convert_sending_location_to_lca.R
+++ b/R/convert_sending_location_to_lca.R
@@ -18,38 +18,40 @@
 convert_sending_location_to_lca <- function(sending_location) {
   lca <- dplyr::case_match(
     sending_location,
-    "100" ~ "01", # Aberdeen City
-    "110" ~ "02", # Aberdeenshire
-    "120" ~ "03", # Angus
-    "130" ~ "04", # Argyll and Bute
-    "355" ~ "05", # Scottish Borders
-    "150" ~ "06", # Clackmannanshire
-    "395" ~ "07", # West Dumbartonshire
-    "170" ~ "08", # Dumfries and Galloway
-    "180" ~ "09", # Dundee City
-    "190" ~ "10", # East Ayrshire
-    "200" ~ "11", # East Dunbartonshire
-    "210" ~ "12", # East Lothian
-    "220" ~ "13", # East Renfrewshire
-    "230" ~ "14", # City of Edinburgh
-    "240" ~ "15", # Falkirk
-    "250" ~ "16", # Fife
-    "260" ~ "17", # Glasgow City
-    "270" ~ "18", # Highland
-    "280" ~ "19", # Inverclyde
-    "290" ~ "20", # Midlothian
-    "300" ~ "21", # Moray
-    "310" ~ "22", # North Ayrshire
-    "320" ~ "23", # North Lanarkshire
-    "330" ~ "24", # Orkney Islands
-    "340" ~ "25", # Perth and Kinross
-    "350" ~ "26", # Renfrewshire
-    "360" ~ "27", # Shetland Islands
-    "370" ~ "28", # South Ayrshire
-    "380" ~ "29", # South Lanarkshire
-    "390" ~ "30", # Stirling
-    "400" ~ "31", # West Lothian
-    "235" ~ "32" # Na_h_Eileanan_Siar
+    100L ~ "01", # Aberdeen City
+    110L ~ "02", # Aberdeenshire
+    120L ~ "03", # Angus
+    130L ~ "04", # Argyll and Bute
+    355L ~ "05", # Scottish Borders
+    150L ~ "06", # Clackmannanshire
+    395L ~ "07", # West Dunbartonshire
+    170L ~ "08", # Dumfries and Galloway
+    180L ~ "09", # Dundee City
+    190L ~ "10", # East Ayrshire
+    200L ~ "11", # East Dunbartonshire
+    210L ~ "12", # East Lothian
+    220L ~ "13", # East Renfrewshire
+    230L ~ "14", # City of Edinburgh
+    240L ~ "15", # Falkirk
+    250L ~ "16", # Fife
+    260L ~ "17", # Glasgow City
+    270L ~ "18", # Highland
+    280L ~ "19", # Inverclyde
+    290L ~ "20", # Midlothian
+    300L ~ "21", # Moray
+    310L ~ "22", # North Ayrshire
+    320L ~ "23", # North Lanarkshire
+    330L ~ "24", # Orkney Islands
+    340L ~ "25", # Perth and Kinross
+    350L ~ "26", # Renfrewshire
+    360L ~ "27", # Shetland Islands
+    370L ~ "28", # South Ayrshire
+    380L ~ "29", # South Lanarkshire
+    390L ~ "30", # Stirling
+    400L ~ "31", # West Lothian
+    235L ~ "32", # Na_h_Eileanan_Siar
+    .default = NA_character_
   )
+
   return(lca)
 }
diff --git a/tests/testthat/_snaps/convert_sending_location_to_lca.md b/tests/testthat/_snaps/convert_sending_location_to_lca.md
index 464ff2d37..1fa02dc14 100644
--- a/tests/testthat/_snaps/convert_sending_location_to_lca.md
+++ b/tests/testthat/_snaps/convert_sending_location_to_lca.md
@@ -1,10 +1,10 @@
 # Can convert a SC sending location to lca code
 
     Code
-      convert_sending_location_to_lca(c("100", "110", "120", "130", "355", "150",
-        "395", "170", "180", "190", "200", "210", "220", "230", "240", "250", "260",
-        "270", "280", "290", "300", "310", "320", "330", "340", "350", "360", "370",
-        "380", "390", "400", "235", "999", "0", NA))
+      convert_sending_location_to_lca(c(100L, 110L, 120L, 130L, 355L, 150L, 395L,
+        170L, 180L, 190L, 200L, 210L, 220L, 230L, 240L, 250L, 260L, 270L, 280L, 290L,
+        300L, 310L, 320L, 330L, 340L, 350L, 360L, 370L, 380L, 390L, 400L, 235L, 999L,
+        0L, NA_integer_))
     Output
        [1] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15"
       [16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30"
diff --git a/tests/testthat/test-convert_sending_location_to_lca.R b/tests/testthat/test-convert_sending_location_to_lca.R
index 0bc67668e..eb66802a6 100644
--- a/tests/testthat/test-convert_sending_location_to_lca.R
+++ b/tests/testthat/test-convert_sending_location_to_lca.R
@@ -2,42 +2,51 @@ test_that("Can convert a SC sending location to lca code", {
   expect_snapshot(
     convert_sending_location_to_lca(
       c(
-        "100",
-        "110",
-        "120",
-        "130",
-        "355",
-        "150",
-        "395",
-        "170",
-        "180",
-        "190",
-        "200",
-        "210",
-        "220",
-        "230",
-        "240",
-        "250",
-        "260",
-        "270",
-        "280",
-        "290",
-        "300",
-        "310",
-        "320",
-        "330",
-        "340",
-        "350",
-        "360",
-        "370",
-        "380",
-        "390",
-        "400",
-        "235",
-        "999",
-        "0",
-        NA
+        100L,
+        110L,
+        120L,
+        130L,
+        355L,
+        150L,
+        395L,
+        170L,
+        180L,
+        190L,
+        200L,
+        210L,
+        220L,
+        230L,
+        240L,
+        250L,
+        260L,
+        270L,
+        280L,
+        290L,
+        300L,
+        310L,
+        320L,
+        330L,
+        340L,
+        350L,
+        360L,
+        370L,
+        380L,
+        390L,
+        400L,
+        235L,
+        999L,
+        0L,
+        NA_integer_
       )
     )
   )
 })
+
+test_that("Errors on unexpected input", {
+  expect_error(
+    convert_sending_location_to_lca("100")
+  )
+  expect_error(
+    convert_sending_location_to_lca(c("100", 99L))
+  )
+})

From c88562cad41b619a7cc071523d5038f53ff6c57d Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Wed, 16 Aug 2023 10:32:52 +0100
Subject: [PATCH 16/19] Bug - Outpatients tests failing due to missing HSCP
 (#816)

* Update `produce_source_extract_tests`

* Update outpatients tests with hscp_var = FALSE

* Revert "Style code"

This reverts commit 8e73d4abc042986a76754c2acc1d197292a1c245.

* Style code

* simplify code

* Update documentation

* Rename `hscp_var` to `add_hscp_count`

* Update documentation

---------

Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: James McMahon <james.mcmahon@phs.scot>
Co-authored-by: Moohan <Moohan@users.noreply.github.com>
---
 R/process_tests_outpatients.R       |  6 ++++--
 R/produce_source_extract_tests.R    | 13 ++++++++++---
 man/produce_source_extract_tests.Rd |  5 ++++-
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/R/process_tests_outpatients.R b/R/process_tests_outpatients.R
index f8a7a6a2e..5ab3e82db 100644
--- a/R/process_tests_outpatients.R
+++ b/R/process_tests_outpatients.R
@@ -12,11 +12,13 @@ process_tests_outpatients <- function(data, year) {
   comparison <- produce_test_comparison(
     old_data = produce_source_extract_tests(old_data,
       sum_mean_vars = "cost",
-      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net")
+      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net"),
+      add_hscp_count = FALSE
     ),
     new_data = produce_source_extract_tests(data,
       sum_mean_vars = "cost",
-      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net")
+      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net"),
+      add_hscp_count = FALSE
     )
   ) %>%
     write_tests_xlsx(sheet_name = "00B", year)
diff --git a/R/produce_source_extract_tests.R b/R/produce_source_extract_tests.R
index 10f842fc6..7f8feda92 100644
--- a/R/produce_source_extract_tests.R
+++ b/R/produce_source_extract_tests.R
@@ -13,6 +13,7 @@
 #' (data is from [get_source_extract_path()])
 #' @param sum_mean_vars variables used when selecting 'all' measures from [calculate_measures()]
 #' @param max_min_vars variables used when selecting 'min-max' from [calculate_measures()]
+#' @param add_hscp_count  Default set to TRUE. For use where `hscp variable` is not available, specify FALSE.
 #'
 #' @return a dataframe with a count of each flag
 #' from [calculate_measures()]
@@ -28,13 +29,19 @@ produce_source_extract_tests <- function(data,
                                          max_min_vars = c(
                                            "record_keydate1", "record_keydate2",
                                            "cost_total_net", "yearstay"
-                                         )) {
+                                         ),
+                                         add_hscp_count = TRUE) {
   test_flags <- data %>%
     # use functions to create HB and partnership flags
     create_demog_test_flags() %>%
     create_hb_test_flags(.data$hbtreatcode) %>%
-    create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>%
-    create_hscp_test_flags(.data$hscp) %>%
+    create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net)
+
+  if (add_hscp_count) {
+    test_flags <- create_hscp_test_flags(test_flags, .data$hscp)
+  }
+
+  test_flags <- test_flags %>%
     # keep variables for comparison
     dplyr::select("valid_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
diff --git a/man/produce_source_extract_tests.Rd b/man/produce_source_extract_tests.Rd
index 679132127..97984103a 100644
--- a/man/produce_source_extract_tests.Rd
+++ b/man/produce_source_extract_tests.Rd
@@ -7,7 +7,8 @@
 produce_source_extract_tests(
   data,
   sum_mean_vars = c("beddays", "cost", "yearstay"),
-  max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net", "yearstay")
+  max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net", "yearstay"),
+  add_hscp_count = TRUE
 )
 }
 \arguments{
@@ -17,6 +18,8 @@ produce_source_extract_tests(
 \item{sum_mean_vars}{variables used when selecting 'all' measures from \code{\link[=calculate_measures]{calculate_measures()}}}
 
 \item{max_min_vars}{variables used when selecting 'min-max' from \code{\link[=calculate_measures]{calculate_measures()}}}
+
+\item{add_hscp_count}{Default set to TRUE. For use where \verb{hscp variable} is not available, specify FALSE.}
 }
 \value{
 a dataframe with a count of each flag

From 1e06921a5a0c484fb84f416a28b5a1b9c37cee66 Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Wed, 16 Aug 2023 10:53:57 +0100
Subject: [PATCH 17/19] fix read_sc_all_alarms_telecare with incorrect format
 in period (#814)

* fix read_sc_all_alarms_telecare with the incorrect format in period

---------

Co-authored-by: lizihao-anu <lizihao-anu@users.noreply.github.com>
Co-authored-by: James McMahon <james.mcmahon@phs.scot>
---
 .github/actions/spelling/expect.txt |  1 +
 R/read_sc_all_alarms_telecare.R     | 16 +++++++---------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 51c0a6c6b..464adca0e 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -108,6 +108,7 @@ keyring
 keytime
 keytimex
 kis
+lazydt
 lgl
 los
 ltc
diff --git a/R/read_sc_all_alarms_telecare.R b/R/read_sc_all_alarms_telecare.R
index ac3ac206d..2c7bd03db 100644
--- a/R/read_sc_all_alarms_telecare.R
+++ b/R/read_sc_all_alarms_telecare.R
@@ -22,21 +22,19 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection
       "service_start_date",
       "service_end_date"
     ) %>%
-    # fix bad period (2017, 2020 & 2021)
+    dplyr::collect() %>%
+    # fix bad period (2017, 2020, 2021, and so on)
     dplyr::mutate(
-      period = dplyr::case_match(
-        .data$period,
-        "2017" ~ "2017Q4",
-        "2020" ~ "2020Q4",
-        "2021" ~ "2021Q4",
-        .default = .data$period
+      period = dplyr::if_else(
+        grepl("\\d{4}$", .data$period),
+        paste0(.data$period, "Q4"),
+        .data$period
       )
     ) %>%
     dplyr::mutate(
       dplyr::across(c("sending_location", "service_type"), ~ as.integer(.x))
     ) %>%
-    dplyr::arrange(.data$sending_location, .data$social_care_id) %>%
-    dplyr::collect()
+    dplyr::arrange(.data$sending_location, .data$social_care_id)
 
   return(at_full_data)
 }

From 2c6853c255b3584830660cd37d0d23f617349d17 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Wed, 16 Aug 2023 11:14:25 +0100
Subject: [PATCH 18/19] Fix `convert_sending_location_to_lca` example

---
 R/convert_sending_location_to_lca.R    | 2 +-
 man/convert_sending_location_to_lca.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/convert_sending_location_to_lca.R b/R/convert_sending_location_to_lca.R
index d0d79dd39..ff7e51db1 100644
--- a/R/convert_sending_location_to_lca.R
+++ b/R/convert_sending_location_to_lca.R
@@ -9,7 +9,7 @@
 #' @export
 #'
 #' @examples
-#' sending_location <- c("100", "120")
+#' sending_location <- c(100, 120)
 #' convert_sending_location_to_lca(sending_location)
 #'
 #' @family code functions
diff --git a/man/convert_sending_location_to_lca.Rd b/man/convert_sending_location_to_lca.Rd
index 8c7a29088..78bf475ba 100644
--- a/man/convert_sending_location_to_lca.Rd
+++ b/man/convert_sending_location_to_lca.Rd
@@ -17,7 +17,7 @@ Convert Social Care Sending Location Codes into the
 Local Council Authority Codes.
 }
 \examples{
-sending_location <- c("100", "120")
+sending_location <- c(100, 120)
 convert_sending_location_to_lca(sending_location)
 
 }

From ff4d35f48c8c2076a98d748912492573e88caad2 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 1 Sep 2023 12:07:43 +0100
Subject: [PATCH 19/19] Use `col_select` instead of `columns` in tests

---
 R/get_existing_data_for_tests.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/get_existing_data_for_tests.R b/R/get_existing_data_for_tests.R
index 91fa2293e..1dd0b350e 100644
--- a/R/get_existing_data_for_tests.R
+++ b/R/get_existing_data_for_tests.R
@@ -46,7 +46,7 @@ get_existing_data_for_tests <- function(new_data, file_version = "episode") {
     slf_data <- suppressMessages(slfhelper::read_slf_episode(
       year = year,
       recids = recids,
-      columns = variable_names
+      col_select = variable_names
     ))
     if ("hscp2018" %in% variable_names) {
       slf_data <- dplyr::rename(slf_data, "hscp" = "hscp2018")
@@ -54,7 +54,7 @@ get_existing_data_for_tests <- function(new_data, file_version = "episode") {
   } else {
     slf_data <- suppressMessages(slfhelper::read_slf_individual(
       year = year,
-      columns = variable_names
+      col_select = variable_names
     ))
   }