From 80799a2838b8b2bad133e310091dd6b3434cf477 Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Mon, 14 Aug 2023 15:14:14 +0100
Subject: [PATCH 01/26] Fix CHI duplicates of chi in individual file (#791)

* fix duplicated matches in chi in sc data.

* Update R/create_individual_file.R

* update on join_sc_client

* Create a test checking if individual files have duplicated chi

* add duplicated chi number to the tests in process_tests_individual_file

---------

Co-authored-by: lizihao-anu <lizihao-anu@users.noreply.github.com>
Co-authored-by: James McMahon <james.mcmahon@phs.scot>
---
 R/create_individual_file.R        | 18 +++++++++++++++---
 R/process_tests_individual_file.R | 16 ++++++++++++----
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index 84dbd28ee..f0e6bcdfc 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -61,7 +61,7 @@ create_individual_file <- function(
     remove_blank_chi() %>%
     add_cij_columns() %>%
     add_all_columns() %>%
-    aggregate_ch_episodes_zihao() %>%
+    aggregate_ch_episodes() %>%
     clean_up_ch(year) %>%
     recode_gender() %>%
     aggregate_by_chi() %>%
@@ -741,13 +741,25 @@ join_sc_client <- function(
       sc_demographics %>%
         dplyr::select("sending_location", "social_care_id", "chi"),
       by = c("sending_location", "social_care_id")
-    )
+    ) %>%
+    dplyr::mutate(count_not_known = rowSums(dplyr::select(., all_of(
+      c(
+        "sc_living_alone",
+        "sc_support_from_unpaid_carer",
+        "sc_social_worker",
+        "sc_meals",
+        "sc_day_care"
+      )
+    )) == "Not Known")) %>%
+    dplyr::arrange(chi, count_not_known) %>%
+    dplyr::distinct(chi, .keep_all = TRUE)
 
   # Match on client variables by chi
   individual_file <- individual_file %>%
     dplyr::left_join(
       join_client_demog,
-      by = "chi"
+      by = "chi",
+      relationship = "one-to-one"
     ) %>%
     dplyr::select(!c("sending_location", "social_care_id", "sc_latest_submission"))
 
diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R
index 2c93f243e..a9d193465 100644
--- a/R/process_tests_individual_file.R
+++ b/R/process_tests_individual_file.R
@@ -64,9 +64,8 @@ produce_individual_file_tests <- function(data) {
     create_demog_test_flags() %>%
     create_hb_test_flags(.data$hbrescode) %>%
     create_hb_cost_test_flags(.data$hbrescode, .data$health_net_cost) %>%
-    create_hscp_test_flags(.data$hscp2018) %>%
     # keep variables for comparison
-    dplyr::select("valid_chi":dplyr::last_col()) %>%
+    dplyr::select(c("valid_chi":dplyr::last_col())) %>%
     # use function to sum new test flags
     calculate_measures(measure = "sum")
 
@@ -86,7 +85,9 @@ produce_individual_file_tests <- function(data) {
 
   min_max_measures <- data %>%
     calculate_measures(
-      vars = "health_net_cost",
+      vars = c(
+        "health_net_cost"
+      ),
       measure = "min-max"
     )
 
@@ -99,11 +100,18 @@ produce_individual_file_tests <- function(data) {
       measure = "sum"
     )
 
+  dup_chi <- data.frame(
+    measure = "duplicated chi number",
+    value = duplicated(data$chi) %>%
+      sum() %>% as.integer()
+  )
+
   join_output <- list(
     test_flags,
     all_measures,
     min_max_measures,
-    sum_measures
+    sum_measures,
+    dup_chi
   ) %>%
     purrr::reduce(dplyr::full_join, by = c("measure", "value"))
 

From 19779e3fd6c4e9265661f617103e7f8dda044444 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Mon, 14 Aug 2023 15:17:35 +0100
Subject: [PATCH 02/26] Update NSU code for new 22/23 cohort (#784)

Update `check_year_valid` for NSUs
---
 R/check_year_valid.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/check_year_valid.R b/R/check_year_valid.R
index d170cd5b5..1361eb47e 100644
--- a/R/check_year_valid.R
+++ b/R/check_year_valid.R
@@ -42,7 +42,7 @@ check_year_valid <- function(
     return(FALSE)
   } else if (year >= "2122" && type %in% c("CMH", "DN")) {
     return(FALSE)
-  } else if (year >= "2223" && type %in% "NSU") {
+  } else if (year >= "2324" && type %in% "NSU") {
     return(FALSE)
   } else if (year >= "2324" && type %in% c("SPARRA", "HHG")) {
     return(FALSE)

From 7e3215da42b2c92f395de458500deb23b1952d54 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Mon, 14 Aug 2023 15:21:05 +0100
Subject: [PATCH 03/26] Amend `get_boxi_extract_path` function for archiving DN
 and CMH data  (#785)

* Update `get_boxi_extract_path` for DN/CMH data

* Remove extra function

* [check-spelling] Update metadata

Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/5856792420/attempts/1
Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/785#issuecomment-1677400900

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>

---------

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: James McMahon <james.mcmahon@phs.scot>
---
 .github/actions/spelling/expect.txt |  1 +
 R/get_boxi_extract_path.R           | 11 ++++++++---
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 87300a6a1..51c0a6c6b 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -91,6 +91,7 @@ hjust
 hms
 homecare
 homev
+hscdiip
 hscp
 hscpnames
 IDPC
diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R
index 60dd7857a..6096525e5 100644
--- a/R/get_boxi_extract_path.R
+++ b/R/get_boxi_extract_path.R
@@ -29,7 +29,11 @@ get_boxi_extract_path <- function(
     )) {
   type <- match.arg(type)
 
-  year_dir <- get_year_dir(year, extracts_dir = TRUE)
+  if (type %in% c("DN", "CMH")) {
+    dir <- fs::path(get_slf_dir(), "Archived_data")
+  } else {
+    dir <- get_year_dir(year, extracts_dir = TRUE)
+  }
 
   if (!check_year_valid(year, type)) {
     return(get_dummy_boxi_extract_path())
@@ -53,11 +57,12 @@ get_boxi_extract_path <- function(
   )
 
   boxi_extract_path_csv_gz <- fs::path(
-    year_dir,
+    dir,
     stringr::str_glue("{file_name}-20{year}.csv.gz")
   )
+
   boxi_extract_path_csv <- fs::path(
-    year_dir,
+    dir,
     stringr::str_glue("{file_name}-20{year}.csv")
   )
 

From 612e0698cc2401faa040a9607062f97cb5d9207b Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Mon, 14 Aug 2023 15:21:58 +0100
Subject: [PATCH 04/26] Fix increase in total preventable beddays (#779)

* further obsolete code change

* fix the preventable_beddays

Co-authored-by: James McMahon <james.mcmahon@phs.scot>

---------

Co-authored-by: James McMahon <james.mcmahon@phs.scot>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 R/aggregate_by_chi.R | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R
index 99da03ba8..5e7ebc7c0 100644
--- a/R/aggregate_by_chi.R
+++ b/R/aggregate_by_chi.R
@@ -126,9 +126,9 @@ aggregate_by_chi <- function(episode_file) {
   individual_file_cols6 <- episode_file[,
     .(
       preventable_beddays = ifelse(
-        max(cij_ppa, na.rm = TRUE),
-        max(cij_end_date) - min(cij_start_date),
-        NA_real_
+        any(cij_ppa, na.rm = TRUE),
+        as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))),
+        NA_integer_
       )
     ),
     # cij_marker has been renamed as cij_total

From 51a0b0590a554613c6e56001eb326cf6600977c3 Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Tue, 15 Aug 2023 15:17:33 +0100
Subject: [PATCH 05/26] fix warning on `:=` (#797)

* fix warning on `:=`

* Update R/aggregate_by_chi.R

Co-authored-by: James McMahon <james.mcmahon@phs.scot>

* Style code

---------

Co-authored-by: James McMahon <james.mcmahon@phs.scot>
Co-authored-by: lizihao-anu <lizihao-anu@users.noreply.github.com>
---
 R/aggregate_by_chi.R | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R
index 5e7ebc7c0..db12f7a9e 100644
--- a/R/aggregate_by_chi.R
+++ b/R/aggregate_by_chi.R
@@ -203,12 +203,19 @@ aggregate_ch_episodes <- function(episode_file) {
   data.table::setDT(episode_file)
 
   # Perform grouping and aggregation
-  episode_file <- episode_file[, `:=`(
-    ch_no_cost = max(ch_no_cost),
-    ch_ep_start = min(record_keydate1),
-    ch_ep_end = max(ch_ep_end),
-    ch_cost_per_day = mean(ch_cost_per_day)
-  ), by = c("chi", "ch_chi_cis")]
+  episode_file[, c(
+    "ch_no_cost",
+    "ch_ep_start",
+    "ch_ep_end",
+    "ch_cost_per_day"
+  ) := list(
+    max(ch_no_cost),
+    min(record_keydate1),
+    max(ch_ep_end),
+    mean(ch_cost_per_day)
+  ),
+  by = c("chi", "ch_chi_cis")
+  ]
 
   # Convert back to tibble if needed
   episode_file <- tibble::as_tibble(episode_file)

From 0f25195e234fac4fe33d677e240d798dc3e7a76c Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 14 Aug 2023 15:28:00 +0100
Subject: [PATCH 06/26] Add 2324 targets/workbench job file

---
 run_targets_2324.R | 4 ++++
 1 file changed, 4 insertions(+)
 create mode 100644 run_targets_2324.R

diff --git a/run_targets_2324.R b/run_targets_2324.R
new file mode 100644
index 000000000..b875984f4
--- /dev/null
+++ b/run_targets_2324.R
@@ -0,0 +1,4 @@
+library(targets)
+tar_make_future(
+  names = (targets::contains("2324"))
+)

From c4a54f84a95e2a691085c98e366b225d330bac18 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 14 Aug 2023 15:39:00 +0100
Subject: [PATCH 07/26] Use `get_source_extract_path` in homelessness (#796)

This was already set up, just not used for some reason. Note that this will switch from using a `.rds` to `.parquet` (unless you do `get_source_extract_path(year, "Homelessness", ext = "rds")`).

Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 R/get_source_extract_path.R      | 49 ++++++++++++++++++--------------
 R/process_extract_homelessness.R | 13 +++++----
 2 files changed, 35 insertions(+), 27 deletions(-)

diff --git a/R/get_source_extract_path.R b/R/get_source_extract_path.R
index 1816ceb25..4cb5eef44 100644
--- a/R/get_source_extract_path.R
+++ b/R/get_source_extract_path.R
@@ -10,27 +10,34 @@
 #' @export
 #'
 #' @family extract file paths
-get_source_extract_path <- function(year,
-                                    type = c(
-                                      "Acute",
-                                      "AE",
-                                      "AT",
-                                      "CH",
-                                      "Client",
-                                      "CMH",
-                                      "DD",
-                                      "Deaths",
-                                      "DN",
-                                      "GPOoH",
-                                      "HC",
-                                      "Homelessness",
-                                      "Maternity",
-                                      "MH",
-                                      "Outpatients",
-                                      "PIS",
-                                      "SDS"
-                                    ),
-                                    ...) {
+get_source_extract_path <- function(
+    year,
+    type = c(
+      "Acute",
+      "AE",
+      "AT",
+      "CH",
+      "Client",
+      "CMH",
+      "DD",
+      "Deaths",
+      "DN",
+      "GPOoH",
+      "HC",
+      "Homelessness",
+      "Maternity",
+      "MH",
+      "Outpatients",
+      "PIS",
+      "SDS"
+    ),
+    ...) {
+  if (year %in% type) {
+    cli::cli_abort("{.val {year}} was supplied to the {.arg year} argument.")
+  }
+
+  year <- check_year_format(year)
+
   type <- match.arg(type)
 
   if (!check_year_valid(year, type)) {
diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
index a16c9a57b..a900cff9a 100644
--- a/R/process_extract_homelessness.R
+++ b/R/process_extract_homelessness.R
@@ -146,13 +146,14 @@ process_extract_homelessness <- function(
     )
 
   if (write_to_disk) {
-    final_data %>%
-      write_file(get_file_path(
-        get_year_dir(year),
-        stringr::str_glue("homelessness_for_source-20{year}"),
-        ext = "rds",
+    write_file(
+      final_data,
+      get_source_extract_path(
+        year = year,
+        type = "Homelessness",
         check_mode = "write"
-      ))
+      )
+    )
   }
 
   return(final_data)

From e36c97c17a711510605c15a8b7a21cd045bb8fdc Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 14 Aug 2023 15:41:56 +0100
Subject: [PATCH 08/26] Correct tests for NSU

---
 tests/testthat/test-check_year_valid.R | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/testthat/test-check_year_valid.R b/tests/testthat/test-check_year_valid.R
index ca0738c89..eda74dbdf 100644
--- a/tests/testthat/test-check_year_valid.R
+++ b/tests/testthat/test-check_year_valid.R
@@ -49,7 +49,8 @@ test_that("Check year valid works for specific datasets ", {
   expect_true(check_year_valid("1920", "NSU"))
   expect_true(check_year_valid("2021", "NSU"))
   expect_true(check_year_valid("2122", "NSU"))
-  expect_false(check_year_valid("2223", "NSU"))
+  expect_true(check_year_valid("2223", "NSU"))
+  expect_false(check_year_valid("2324", "NSU"))
 
   # SPARRA
   expect_false(check_year_valid("1415", "SPARRA"))

From 62a41740b01c6bc266b3842684dc5b77608aa6a5 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 15 Aug 2023 15:41:40 +0100
Subject: [PATCH 09/26] Update script for extracting NSU from SMRA space

---
 .../All_years/02-Lookups/99_extract_NSU_data.R       | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
index d33dfbe49..54cc316e8 100644
--- a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
+++ b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
@@ -6,7 +6,7 @@ library(glue)
 nsu_dir <- path("/conf/hscdiip/SLF_Extracts/NSU")
 
 # Change the year
-fin_year <- "1516"
+fin_year <- "2324"
 
 db_connection <- odbc::dbConnect(
   odbc::odbc(),
@@ -16,7 +16,7 @@ db_connection <- odbc::dbConnect(
 )
 
 # Check the table name and change if required.
-table <- dbplyr::in_schema("ROBERM18", "FINAL_2")
+table <- dbplyr::in_schema("ROBERM18", "FINAL_1")
 
 # Read NSU data
 nsu_data <-
@@ -35,9 +35,11 @@ nsu_data <-
   collect()
 
 # Write out the data
-file_path <- path(nsu_dir, glue("All_CHIs_20{fin_year}.zsav"))
+file_path <- path(nsu_dir, glue("All_CHIs_20{fin_year}.parquet"))
 # This will archive the existing file for later comparison
 if (file_exists(file_path)) {
-  file_copy(file_path, path(nsu_dir, glue("All_CHIs_20{fin_year}_OLD.zsav")))
+  file_copy(file_path, path(nsu_dir, glue("All_CHIs_20{fin_year}_OLD.parquet")))
 }
-write_sav(nsu_data, file_path, compress = TRUE)
+
+nsu_data %>%
+arrow::write_parquet(file_path, compression = "zstd", compression_level = 10)

From d310dfd3bc1f586001b4d09bcec358d5b458ca09 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Tue, 15 Aug 2023 15:47:29 +0100
Subject: [PATCH 10/26] Update year in 99_NSU extract script

---
 _SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
index 54cc316e8..8bbd0513c 100644
--- a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
+++ b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
@@ -6,7 +6,7 @@ library(glue)
 nsu_dir <- path("/conf/hscdiip/SLF_Extracts/NSU")
 
 # Change the year
-fin_year <- "2324"
+fin_year <- "2223"
 
 db_connection <- odbc::dbConnect(
   odbc::odbc(),

From 51c4a637d7701c1061af67964adda0d5519cfa47 Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Tue, 15 Aug 2023 16:04:23 +0100
Subject: [PATCH 11/26] Update news for September 23 update (#811)

* Update News for March and June updates

* Update release date

* WIP - update news for Sep update

* Update NEWS.md

Fix some typos / grammar

---------

Co-authored-by: James McMahon <james.mcmahon@phs.scot>
---
 NEWS.md | 44 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 43 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index cbcb62079..2a3453eea 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,6 +1,48 @@
-# March 2023 Update - Unreleased
+# September 2023 Update - Unreleased
+* Update of 2017/18 onwards to include bug fixes within the files. 
+* New 2023/24 files. 
+* New NSU cohort for 2022/23 file. 
+* Re addition of:
+  * HRIs in individual file.
+  * Homelessness Flags.
+* Bug fixes: 
+  * Blank `datazone` in A&E. This has been fixed and was due to PC8 postcode format matching onto SLF pc lookup. 
+  * Large increase in preventable beddays. This was caused due to an SPSS vs R logic difference. Uses SPSS logic which 
+    brings the difference down to `3.3%`. 
+  * Issue with `locality` which showed `locality` in each row instead of its true `locality`. This has now been fixed. 
+  * Duplicated CHI in the individual file. The issue was identified when trying to include HRIs. This has now been corrected. 
+* Internal changes to SLF development: 
+  * `DN` and `CMH` data are now archived in an HSCDIIP folder as the BOXI datamart is now closed down for these. Function `get_boxi_extract_path` has been updated to reflect this. 
+  * Tests updated to include `HSCP`count. 
+  * Tests created for `Delayed Discharges` extract and `Social care Client lookup`.
 
 
+# June 2023 Update - Released 24-Jul-2023
+* 2011/12 -> 2013/14 – These files have not been altered, other than to make them available in a new file type (parquet).
+* 2017/18 – These files have been recreated using our new R pipeline, but the data has not changed. We did this so that we would have a good comparator file.
+* 2018/19 -> 2022/23 – These files have been recreated using the R pipeline and are also using updated data (as in a ‘normal’ update).
+* Files changed into parquet format. 
+* SLFhelper updated. 
+* Removal of `keydate1_dateformat` and `keydate2_dateformat`.
+* `dd_responsible_lca` – This variable now uses CA2019 codes instead of the 2-digit ‘old’ LCA code.
+* Preventable beddays - not able to calculate these correctly. * Death fixes not included.
+* Variables not ordered in R like they used to be in SPSS.
+* End of HHG.
+* New variable `ch_postcode`.
+* rename of variables `cost_total_net_incdnas`, `ooh_outcome.1`, `ooh_outcome.2`, `ooh_outcome.3`, `ooh_outcome.4`, `totalnodncontacts`. 
+* HRI's not included. 
+* Homelessness flags not included. 
+* Keep_population flag not included. 
+
+
+# March 2023 Update - Released 10-Mar-2023
+* 2021/22 episode and individual files refreshed with updated activity.
+* 2022/23 file updated and contains data up to the end of Q3. 
+* Social care data is available for 2022/23. 
+* Typo in the variable name `ooh_covid_assessment`
+* Next update in May as a test run in R but won't be released. 
+* Next release in June. 
+
 # December 2022 Update - Released 07-Dec-2022
 * Now using the 2022v2 Scottish Postcode Directory.
 * Now using the 2020 Urban Rural classifications (instead of the older 2016 ones), this means variables such as `URx_2016` will now be called `URx_2020`.

From ee3943ffcc7ec9cb5932de6f4bb8f9ca5ba7423c Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Wed, 16 Aug 2023 09:01:09 +0100
Subject: [PATCH 12/26] Apply styling

---
 _SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
index 8bbd0513c..ea6f81bfc 100644
--- a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
+++ b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R
@@ -42,4 +42,4 @@ if (file_exists(file_path)) {
 }
 
 nsu_data %>%
-arrow::write_parquet(file_path, compression = "zstd", compression_level = 10)
+  arrow::write_parquet(file_path, compression = "zstd", compression_level = 10)

From 61283cc69f072a17cdad8f09d053626b2f2ad31f Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Wed, 16 Aug 2023 09:16:29 +0100
Subject: [PATCH 13/26] Fix issue with `case_match` types (#810)

* Fix issue with `case_match` types

It seems that `case_match()` is stricter about types than `case_when()`. See the below code:

```r
library(dplyr)
# Breaks
mutate(starwars,
  new_height = case_when(
    height == "172" ~ "170"),
  new_height2 = case_match(
    height,
    "172" ~ "170"
  ),
  .after = "height"
)

# Works
mutate(starwars,
  new_height = case_when(
      height == "172" ~ "170"),
  new_height2 = case_match(
    height,
    172L ~ "170"
  ),
  .after = "height"
)
```

Since `sending_location` is an integer, the LHS of `case_match` must be numeric. It was slightly incorrect previously but `case_when` let us get away with it!

I also updated and added to the tests.

* Style code

* Style code

---------

Co-authored-by: Moohan <Moohan@users.noreply.github.com>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
---
 R/convert_sending_location_to_lca.R           | 66 ++++++++--------
 .../_snaps/convert_sending_location_to_lca.md |  8 +-
 .../test-convert_sending_location_to_lca.R    | 79 +++++++++++--------
 3 files changed, 82 insertions(+), 71 deletions(-)

diff --git a/R/convert_sending_location_to_lca.R b/R/convert_sending_location_to_lca.R
index 6e9c577c0..d0d79dd39 100644
--- a/R/convert_sending_location_to_lca.R
+++ b/R/convert_sending_location_to_lca.R
@@ -18,38 +18,40 @@
 convert_sending_location_to_lca <- function(sending_location) {
   lca <- dplyr::case_match(
     sending_location,
-    "100" ~ "01", # Aberdeen City
-    "110" ~ "02", # Aberdeenshire
-    "120" ~ "03", # Angus
-    "130" ~ "04", # Argyll and Bute
-    "355" ~ "05", # Scottish Borders
-    "150" ~ "06", # Clackmannanshire
-    "395" ~ "07", # West Dumbartonshire
-    "170" ~ "08", # Dumfries and Galloway
-    "180" ~ "09", # Dundee City
-    "190" ~ "10", # East Ayrshire
-    "200" ~ "11", # East Dunbartonshire
-    "210" ~ "12", # East Lothian
-    "220" ~ "13", # East Renfrewshire
-    "230" ~ "14", # City of Edinburgh
-    "240" ~ "15", # Falkirk
-    "250" ~ "16", # Fife
-    "260" ~ "17", # Glasgow City
-    "270" ~ "18", # Highland
-    "280" ~ "19", # Inverclyde
-    "290" ~ "20", # Midlothian
-    "300" ~ "21", # Moray
-    "310" ~ "22", # North Ayrshire
-    "320" ~ "23", # North Lanarkshire
-    "330" ~ "24", # Orkney Islands
-    "340" ~ "25", # Perth and Kinross
-    "350" ~ "26", # Renfrewshire
-    "360" ~ "27", # Shetland Islands
-    "370" ~ "28", # South Ayrshire
-    "380" ~ "29", # South Lanarkshire
-    "390" ~ "30", # Stirling
-    "400" ~ "31", # West Lothian
-    "235" ~ "32" # Na_h_Eileanan_Siar
+    100L ~ "01", # Aberdeen City
+    110L ~ "02", # Aberdeenshire
+    120L ~ "03", # Angus
+    130L ~ "04", # Argyll and Bute
+    355L ~ "05", # Scottish Borders
+    150L ~ "06", # Clackmannanshire
+    395L ~ "07", # West Dunbartonshire
+    170L ~ "08", # Dumfries and Galloway
+    180L ~ "09", # Dundee City
+    190L ~ "10", # East Ayrshire
+    200L ~ "11", # East Dunbartonshire
+    210L ~ "12", # East Lothian
+    220L ~ "13", # East Renfrewshire
+    230L ~ "14", # City of Edinburgh
+    240L ~ "15", # Falkirk
+    250L ~ "16", # Fife
+    260L ~ "17", # Glasgow City
+    270L ~ "18", # Highland
+    280L ~ "19", # Inverclyde
+    290L ~ "20", # Midlothian
+    300L ~ "21", # Moray
+    310L ~ "22", # North Ayrshire
+    320L ~ "23", # North Lanarkshire
+    330L ~ "24", # Orkney Islands
+    340L ~ "25", # Perth and Kinross
+    350L ~ "26", # Renfrewshire
+    360L ~ "27", # Shetland Islands
+    370L ~ "28", # South Ayrshire
+    380L ~ "29", # South Lanarkshire
+    390L ~ "30", # Stirling
+    400L ~ "31", # West Lothian
+    235L ~ "32", # Na_h_Eileanan_Siar
+    .default = NA_character_
   )
+
   return(lca)
 }
diff --git a/tests/testthat/_snaps/convert_sending_location_to_lca.md b/tests/testthat/_snaps/convert_sending_location_to_lca.md
index 464ff2d37..1fa02dc14 100644
--- a/tests/testthat/_snaps/convert_sending_location_to_lca.md
+++ b/tests/testthat/_snaps/convert_sending_location_to_lca.md
@@ -1,10 +1,10 @@
 # Can convert a SC sending location to lca code
 
     Code
-      convert_sending_location_to_lca(c("100", "110", "120", "130", "355", "150",
-        "395", "170", "180", "190", "200", "210", "220", "230", "240", "250", "260",
-        "270", "280", "290", "300", "310", "320", "330", "340", "350", "360", "370",
-        "380", "390", "400", "235", "999", "0", NA))
+      convert_sending_location_to_lca(c(100L, 110L, 120L, 130L, 355L, 150L, 395L,
+        170L, 180L, 190L, 200L, 210L, 220L, 230L, 240L, 250L, 260L, 270L, 280L, 290L,
+        300L, 310L, 320L, 330L, 340L, 350L, 360L, 370L, 380L, 390L, 400L, 235L, 999L,
+        0L, NA_integer_))
     Output
        [1] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15"
       [16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30"
diff --git a/tests/testthat/test-convert_sending_location_to_lca.R b/tests/testthat/test-convert_sending_location_to_lca.R
index 0bc67668e..eb66802a6 100644
--- a/tests/testthat/test-convert_sending_location_to_lca.R
+++ b/tests/testthat/test-convert_sending_location_to_lca.R
@@ -2,42 +2,51 @@ test_that("Can convert a SC sending location to lca code", {
   expect_snapshot(
     convert_sending_location_to_lca(
       c(
-        "100",
-        "110",
-        "120",
-        "130",
-        "355",
-        "150",
-        "395",
-        "170",
-        "180",
-        "190",
-        "200",
-        "210",
-        "220",
-        "230",
-        "240",
-        "250",
-        "260",
-        "270",
-        "280",
-        "290",
-        "300",
-        "310",
-        "320",
-        "330",
-        "340",
-        "350",
-        "360",
-        "370",
-        "380",
-        "390",
-        "400",
-        "235",
-        "999",
-        "0",
-        NA
+        100L,
+        110L,
+        120L,
+        130L,
+        355L,
+        150L,
+        395L,
+        170L,
+        180L,
+        190L,
+        200L,
+        210L,
+        220L,
+        230L,
+        240L,
+        250L,
+        260L,
+        270L,
+        280L,
+        290L,
+        300L,
+        310L,
+        320L,
+        330L,
+        340L,
+        350L,
+        360L,
+        370L,
+        380L,
+        390L,
+        400L,
+        235L,
+        999L,
+        0L,
+        NA_integer_
       )
     )
   )
 })
+
+test_that("Errors on unexpected input", {
+  expect_error(
+    convert_sending_location_to_lca("100")
+  )
+  expect_error(
+    convert_sending_location_to_lca(c("100", 99L))
+  )
+})

From c88562cad41b619a7cc071523d5038f53ff6c57d Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Wed, 16 Aug 2023 10:32:52 +0100
Subject: [PATCH 14/26] Bug - Outpatients tests failing due to missing HSCP
 (#816)

* Update `produce_source_extract_tests`

* Update outpatients tests with hscp_var = FALSE

* Revert "Style code"

This reverts commit 8e73d4abc042986a76754c2acc1d197292a1c245.

* Style code

* simplify code

* Update documentation

* Rename `hscp_var` to `add_hscp_count`

* Update documentation

---------

Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: James McMahon <james.mcmahon@phs.scot>
Co-authored-by: Moohan <Moohan@users.noreply.github.com>
---
 R/process_tests_outpatients.R       |  6 ++++--
 R/produce_source_extract_tests.R    | 13 ++++++++++---
 man/produce_source_extract_tests.Rd |  5 ++++-
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/R/process_tests_outpatients.R b/R/process_tests_outpatients.R
index f8a7a6a2e..5ab3e82db 100644
--- a/R/process_tests_outpatients.R
+++ b/R/process_tests_outpatients.R
@@ -12,11 +12,13 @@ process_tests_outpatients <- function(data, year) {
   comparison <- produce_test_comparison(
     old_data = produce_source_extract_tests(old_data,
       sum_mean_vars = "cost",
-      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net")
+      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net"),
+      add_hscp_count = FALSE
     ),
     new_data = produce_source_extract_tests(data,
       sum_mean_vars = "cost",
-      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net")
+      max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net"),
+      add_hscp_count = FALSE
     )
   ) %>%
     write_tests_xlsx(sheet_name = "00B", year)
diff --git a/R/produce_source_extract_tests.R b/R/produce_source_extract_tests.R
index 10f842fc6..7f8feda92 100644
--- a/R/produce_source_extract_tests.R
+++ b/R/produce_source_extract_tests.R
@@ -13,6 +13,7 @@
 #' (data is from [get_source_extract_path()])
 #' @param sum_mean_vars variables used when selecting 'all' measures from [calculate_measures()]
 #' @param max_min_vars variables used when selecting 'min-max' from [calculate_measures()]
+#' @param add_hscp_count  Default set to TRUE. For use where `hscp variable` is not available, specify FALSE.
 #'
 #' @return a dataframe with a count of each flag
 #' from [calculate_measures()]
@@ -28,13 +29,19 @@ produce_source_extract_tests <- function(data,
                                          max_min_vars = c(
                                            "record_keydate1", "record_keydate2",
                                            "cost_total_net", "yearstay"
-                                         )) {
+                                         ),
+                                         add_hscp_count = TRUE) {
   test_flags <- data %>%
     # use functions to create HB and partnership flags
     create_demog_test_flags() %>%
     create_hb_test_flags(.data$hbtreatcode) %>%
-    create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>%
-    create_hscp_test_flags(.data$hscp) %>%
+    create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net)
+
+  if (add_hscp_count) {
+    test_flags <- create_hscp_test_flags(test_flags, .data$hscp)
+  }
+
+  test_flags <- test_flags %>%
     # keep variables for comparison
     dplyr::select("valid_chi":dplyr::last_col()) %>%
     # use function to sum new test flags
diff --git a/man/produce_source_extract_tests.Rd b/man/produce_source_extract_tests.Rd
index 679132127..97984103a 100644
--- a/man/produce_source_extract_tests.Rd
+++ b/man/produce_source_extract_tests.Rd
@@ -7,7 +7,8 @@
 produce_source_extract_tests(
   data,
   sum_mean_vars = c("beddays", "cost", "yearstay"),
-  max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net", "yearstay")
+  max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net", "yearstay"),
+  add_hscp_count = TRUE
 )
 }
 \arguments{
@@ -17,6 +18,8 @@ produce_source_extract_tests(
 \item{sum_mean_vars}{variables used when selecting 'all' measures from \code{\link[=calculate_measures]{calculate_measures()}}}
 
 \item{max_min_vars}{variables used when selecting 'min-max' from \code{\link[=calculate_measures]{calculate_measures()}}}
+
+\item{add_hscp_count}{Default set to TRUE. For use where \verb{hscp variable} is not available, specify FALSE.}
 }
 \value{
 a dataframe with a count of each flag

From 1e06921a5a0c484fb84f416a28b5a1b9c37cee66 Mon Sep 17 00:00:00 2001
From: Zihao Li <zihao.li@phs.scot>
Date: Wed, 16 Aug 2023 10:53:57 +0100
Subject: [PATCH 15/26] fix read_sc_all_alarms_telecare with incorrect format
 in period (#814)

* fix read_sc_all_alarms_telecare with the incorrect format in period

---------

Co-authored-by: lizihao-anu <lizihao-anu@users.noreply.github.com>
Co-authored-by: James McMahon <james.mcmahon@phs.scot>
---
 .github/actions/spelling/expect.txt |  1 +
 R/read_sc_all_alarms_telecare.R     | 16 +++++++---------
 2 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 51c0a6c6b..464adca0e 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -108,6 +108,7 @@ keyring
 keytime
 keytimex
 kis
+lazydt
 lgl
 los
 ltc
diff --git a/R/read_sc_all_alarms_telecare.R b/R/read_sc_all_alarms_telecare.R
index ac3ac206d..2c7bd03db 100644
--- a/R/read_sc_all_alarms_telecare.R
+++ b/R/read_sc_all_alarms_telecare.R
@@ -22,21 +22,19 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection
       "service_start_date",
       "service_end_date"
     ) %>%
-    # fix bad period (2017, 2020 & 2021)
+    dplyr::collect() %>%
+    # fix bad period (2017, 2020, 2021, and so on)
     dplyr::mutate(
-      period = dplyr::case_match(
-        .data$period,
-        "2017" ~ "2017Q4",
-        "2020" ~ "2020Q4",
-        "2021" ~ "2021Q4",
-        .default = .data$period
+      period = dplyr::if_else(
+        grepl("\\d{4}$", .data$period),
+        paste0(.data$period, "Q4"),
+        .data$period
       )
     ) %>%
     dplyr::mutate(
       dplyr::across(c("sending_location", "service_type"), ~ as.integer(.x))
     ) %>%
-    dplyr::arrange(.data$sending_location, .data$social_care_id) %>%
-    dplyr::collect()
+    dplyr::arrange(.data$sending_location, .data$social_care_id)
 
   return(at_full_data)
 }

From 2c6853c255b3584830660cd37d0d23f617349d17 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Wed, 16 Aug 2023 11:14:25 +0100
Subject: [PATCH 16/26] Fix `convert_sending_location_to_lca` example

---
 R/convert_sending_location_to_lca.R    | 2 +-
 man/convert_sending_location_to_lca.Rd | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/convert_sending_location_to_lca.R b/R/convert_sending_location_to_lca.R
index d0d79dd39..ff7e51db1 100644
--- a/R/convert_sending_location_to_lca.R
+++ b/R/convert_sending_location_to_lca.R
@@ -9,7 +9,7 @@
 #' @export
 #'
 #' @examples
-#' sending_location <- c("100", "120")
+#' sending_location <- c(100, 120)
 #' convert_sending_location_to_lca(sending_location)
 #'
 #' @family code functions
diff --git a/man/convert_sending_location_to_lca.Rd b/man/convert_sending_location_to_lca.Rd
index 8c7a29088..78bf475ba 100644
--- a/man/convert_sending_location_to_lca.Rd
+++ b/man/convert_sending_location_to_lca.Rd
@@ -17,7 +17,7 @@ Convert Social Care Sending Location Codes into the
 Local Council Authority Codes.
 }
 \examples{
-sending_location <- c("100", "120")
+sending_location <- c(100, 120)
 convert_sending_location_to_lca(sending_location)
 
 }

From ff4d35f48c8c2076a98d748912492573e88caad2 Mon Sep 17 00:00:00 2001
From: Jennifer Thom <jennifer.thom@phs.scot>
Date: Fri, 1 Sep 2023 12:07:43 +0100
Subject: [PATCH 17/26] Use `col_select` instead of `columns` in tests

---
 R/get_existing_data_for_tests.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/get_existing_data_for_tests.R b/R/get_existing_data_for_tests.R
index 91fa2293e..1dd0b350e 100644
--- a/R/get_existing_data_for_tests.R
+++ b/R/get_existing_data_for_tests.R
@@ -46,7 +46,7 @@ get_existing_data_for_tests <- function(new_data, file_version = "episode") {
     slf_data <- suppressMessages(slfhelper::read_slf_episode(
       year = year,
       recids = recids,
-      columns = variable_names
+      col_select = variable_names
     ))
     if ("hscp2018" %in% variable_names) {
       slf_data <- dplyr::rename(slf_data, "hscp" = "hscp2018")
@@ -54,7 +54,7 @@ get_existing_data_for_tests <- function(new_data, file_version = "episode") {
   } else {
     slf_data <- suppressMessages(slfhelper::read_slf_individual(
       year = year,
-      columns = variable_names
+      col_select = variable_names
     ))
   }
 

From 53ede24805c28010621d178f5ee7b5261625c1f7 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 25 Sep 2023 11:26:10 +0100
Subject: [PATCH 18/26] Add tests for `compute_mid_year_age` (#809)

* Add tests for `compute_mid_year_age`

* Remove redundant code

* Update documentation

---------

Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 R/compute_mid_year_age.R                   |  2 +-
 man/read_file.Rd                           |  2 +-
 tests/testthat/test-compute_mid_year_age.R | 16 ++++++++++++++++
 3 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 tests/testthat/test-compute_mid_year_age.R

diff --git a/R/compute_mid_year_age.R b/R/compute_mid_year_age.R
index 01bfaf5d5..0e2483cf7 100644
--- a/R/compute_mid_year_age.R
+++ b/R/compute_mid_year_age.R
@@ -20,7 +20,7 @@
 compute_mid_year_age <- function(fyyear, dob) {
   age_intervals <- lubridate::interval(
     start = dob,
-    end = as.Date(midpoint_fy(fyyear))
+    end = midpoint_fy(fyyear)
   )
 
   ages <- lubridate::as.period(age_intervals)$year
diff --git a/man/read_file.Rd b/man/read_file.Rd
index b8231218f..1ef351342 100644
--- a/man/read_file.Rd
+++ b/man/read_file.Rd
@@ -14,7 +14,7 @@ read_file(path, col_select = NULL, as_data_frame = TRUE, ...)
 \link[tidyselect:eval_select]{tidy selection specification}
 of columns, as used in \code{dplyr::select()}.}
 
-\item{as_data_frame}{Should the function return a \code{data.frame} (default) or
+\item{as_data_frame}{Should the function return a \code{tibble} (default) or
 an Arrow \link[arrow]{Table}?}
 
 \item{...}{Addition arguments passed to the relevant function.}
diff --git a/tests/testthat/test-compute_mid_year_age.R b/tests/testthat/test-compute_mid_year_age.R
new file mode 100644
index 000000000..a4a542b9e
--- /dev/null
+++ b/tests/testthat/test-compute_mid_year_age.R
@@ -0,0 +1,16 @@
+test_that("Accurately compute mid year age", {
+  expect_equal(
+    compute_mid_year_age("1718", lubridate::make_date("2000")),
+    phsmethods::age_calculate(
+      lubridate::make_date("2000"),
+      lubridate::make_date("2017", 9L, 30L)
+    )
+  )
+  expect_equal(
+    compute_mid_year_age("2021", lubridate::make_date("1999") + 1:1000),
+    phsmethods::age_calculate(
+      lubridate::make_date("1999") + 1:1000,
+      lubridate::make_date("2020", 9L, 30L)
+    )
+  )
+})

From 04c2685d25855a1941f7dfc7d0209fd2a4ee1300 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 25 Sep 2023 11:37:04 +0100
Subject: [PATCH 19/26] Add a new function to set up keyring (#800)

* Add a new function to set up keyring

I've tested this by deleting my `.Renviron` and deleting my keyring `keyring::keyring_delete("createslf")` and it seems to work. Would be great to have someone with an existing set-up (Jen) test it, and to have someone who doesn't have it set up to test it.

The code looks complicated but I've just tried to catch every scenario, so the process should be smooth and clear (from the user's point of view).

I've also expanded the code relating to the username, which will now hopefully work in more cases.

* [check-spelling] Update metadata

Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/5824423711/attempts/1
Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/800#issuecomment-1673658357

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>

* Update documentation

---------

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 .github/actions/spelling/expect.txt |   1 +
 NAMESPACE                           |   1 +
 R/get_connection_PHS_database.R     | 247 ++++++++++++++++++++++++----
 man/phs_db_connection.Rd            |  18 +-
 man/setup_keyring.Rd                |  44 +++++
 5 files changed, 272 insertions(+), 39 deletions(-)
 create mode 100644 man/setup_keyring.Rd

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 464adca0e..473e0304d 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -168,6 +168,7 @@ readxl
 reasonwait
 recid
 refsource
+renviron
 rlang
 rmarkdown
 roxygen
diff --git a/NAMESPACE b/NAMESPACE
index 6c4f3cd52..678d7a53f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -156,6 +156,7 @@ export(read_sc_all_care_home)
 export(read_sc_all_home_care)
 export(read_sc_all_sds)
 export(run_episode_file)
+export(setup_keyring)
 export(start_fy)
 export(start_fy_quarter)
 export(start_next_fy_quarter)
diff --git a/R/get_connection_PHS_database.R b/R/get_connection_PHS_database.R
index 5973e0003..0a528f45b 100644
--- a/R/get_connection_PHS_database.R
+++ b/R/get_connection_PHS_database.R
@@ -1,22 +1,32 @@
 #' Open a connection to a PHS database
 #'
-#' @description Opens a connection to PHS database to allow data to be collected
+#' @description Opens a connection to PHS database given a Data Source Name
+#' (DSN) it will try to get the username, asking for input if in an interactive
+#' session. It will also use [keyring][keyring::keyring-package] to find
+#' an existing keyring called 'createslf' which should contain a `db_password`
+#' key with the users database password.
 #'
-#' @param dsn The Data Source Name passed on to `odbc::dbconnect`
-#' the dsn must be setup first. e.g. SMRA or DVPROD
+#' @param dsn The Data Source Name (DSN) passed on to [odbc::dbConnect()]
+#' the DSN must be set up first. e.g. `SMRA` or `DVPROD`
 #' @param username The username to use for authentication,
-#' if not supplied it first will check the environment variable
-#' and finally ask the user for input.
+#' if not supplied it will try to find it automatically and if possible ask the
+#' user for input.
 #'
-#' @return a connection to the specified dsn
+#' @return a connection to the specified Data Source.
 #' @export
-#'
-phs_db_connection <- function(dsn, username = Sys.getenv("USER")) {
-  # Collect username from the environment
-  username <- Sys.getenv("USER")
+phs_db_connection <- function(dsn, username) {
+  if (missing(username)) {
+    # Collect username if possible
+    username <- dplyr::case_when(
+      Sys.info()["USER"] != "unknown" ~ Sys.info()["USER"],
+      Sys.getenv("USER") != "" ~ Sys.getenv("USER"),
+      system2("whoami", stdout = TRUE) != "" ~ system2("whoami", stdout = TRUE),
+      .default = NA
+    )
+  }
 
-  # Check the username is not empty and take input if not
-  if (is.na(username) || username == "") {
+  # If the username is missing try to get input from the user
+  if (is.na(username)) {
     if (rlang::is_interactive()) {
       username <- rstudioapi::showPrompt(
         title = "Username",
@@ -24,46 +34,219 @@ phs_db_connection <- function(dsn, username = Sys.getenv("USER")) {
         default = ""
       )
     } else {
-      cli::cli_abort("No username found, you should supply one with {.arg username}")
+      cli::cli_abort(
+        c(
+          "x" = "No username found, you can use the {.arg username} argument.",
+          "i" = "Alternatively, add {.code USER = \"<your username>\"} to your
+          {.file .Renviron} file."
+        )
+      )
     }
   }
 
-  # TODO improve error messages and provide instructions for setting up keyring
-  # Add the following code to R profile.
-  # Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf"),
-  # keyring_create("createslf", password = Sys.getenv("CREATESLF_KEYRING_PASS")),
-  # key_set(keyring = "createslf", service = "db_password")
+  # Check the status of keyring
+  # Does the 'createslf' keyring exist
+  keyring_exists <- "createslf" %in% keyring::keyring_list()[["keyring"]]
 
-  if (!("createslf" %in% keyring::keyring_list()[["keyring"]])) {
-    cli::cli_abort("The {.val createslf} keyring does not exist.")
+  # Does the 'db_password' key exist in the 'createslf' keyring
+  if (keyring_exists) {
+    key_exists <- "db_password" %in% keyring::key_list(keyring = "createslf")[["service"]]
+  } else {
+    key_exists <- FALSE
   }
 
-  if (!("db_password" %in% keyring::key_list(keyring = "createslf")[["service"]])) {
-    cli::cli_abort("{.val db_password} is missing from the {.val createslf} keyring.")
-  }
+  # Does the 'CREATESLF_KEYRING_PASS' environment variable exist
+  env_var_pass_exists <- Sys.getenv("CREATESLF_KEYRING_PASS") != ""
 
-  if (Sys.getenv("CREATESLF_KEYRING_PASS") == "") {
-    cli::cli_abort("You must have the password to unlock the {.val createslf} keyring in your environment as
-                   {.envvar CREATESLF_KEYRING_PASS}. Please set this up in your {.file .Renviron} or {.file .Rprofile}")
+  if (!all(keyring_exists, key_exists, env_var_pass_exists)) {
+    if (rlang::is_interactive()) {
+      setup_keyring(
+        keyring = "createslf",
+        key = "db_password",
+        keyring_exists = keyring_exists,
+        key_exists = key_exists,
+        env_var_pass_exists = env_var_pass_exists
+      )
+    } else {
+      if (any(keyring_exists, key_exists, env_var_pass_exists)) {
+        cli::cli_abort(
+          c(
+            "x" = "Your keyring needs to be set up, run:",
+            "{.code setup_keyring(keyring = \"createslf\", key = \"db_password\",
+  keyring_exists = {keyring_exists}, key_exists = {key_exists},
+  env_var_pass_exists = {env_var_pass_exists})}"
+          )
+        )
+      } else {
+        cli::cli_abort(
+          c(
+            "x" = "Your keyring needs to be set up, run:",
+            "{.code setup_keyring(keyring = \"createslf\",
+            key = \"db_password\")}"
+          )
+        )
+      }
+    }
   }
 
-  keyring::keyring_unlock(keyring = "createslf", password = Sys.getenv("CREATESLF_KEYRING_PASS"))
-
-  if (keyring::keyring_is_locked(keyring = "createslf")) {
-    cli::cli_abort("Keyring is locked. To unlock createslf keyring, please use {.fun keyring::keyring_unlock}")
+  if (env_var_pass_exists) {
+    keyring::keyring_unlock(
+      keyring = "createslf",
+      password = Sys.getenv("CREATESLF_KEYRING_PASS")
+    )
+  } else {
+    keyring::keyring_unlock(
+      keyring = "createslf",
+      password = rstudioapi::askForPassword(
+        prompt = "Enter the password for the keyring you just created."
+      )
+    )
   }
 
 
   # Create the connection
-  password_text <- stringr::str_glue("{dsn} password for user: {username}")
   db_connection <- odbc::dbConnect(
     odbc::odbc(),
     dsn = dsn,
     uid = username,
-    pwd = keyring::key_get(keyring = "createslf", service = "db_password")
+    pwd = keyring::key_get(
+      keyring = "createslf",
+      service = "db_password"
+    )
   )
 
   keyring::keyring_lock(keyring = "createslf")
 
   return(db_connection)
 }
+
+#' Interactively set up the keyring
+#'
+#' @description
+#' This is meant to be used with [phs_db_connection()], it can only be used
+#' interactively i.e. not in targets or in a workbench job.
+#'
+#' With the default options it will go through the steps to set up a keyring
+#' which can be used to supply passwords to [odbc::dbConnect()] (or others) in a
+#' secure and seamless way.
+#'
+#'  1. Create an .Renviron file in the project and add a password (for the
+#'  keyring) to it.
+#'  2. Create a keyring with the password - Since we have saved the password as
+#'  an environment variable it can be picked unlocked and used automatically.
+#'  3. Add the database password to the keyring.
+#'
+#'
+#' @param keyring Name of the keyring
+#' @param key Name of the key
+#' @param keyring_exists Does the keyring already exist
+#' @param key_exists Does the key already exist
+#' @param env_var_pass_exists Does the password for the keyring already exist
+#' in the environment.
+#'
+#' @return NULL (invisibly)
+#' @export
+setup_keyring <- function(
+    keyring = "createslf",
+    key = "db_password",
+    keyring_exists = FALSE,
+    key_exists = FALSE,
+    env_var_pass_exists = FALSE) {
+  # First we need the password as an environment variable
+  if (!env_var_pass_exists) {
+    if (Sys.getenv("CREATESLF_KEYRING_PASS") != "") {
+      cli::cli_alert_warning(
+        "{.env CREATESLF_KEYRING_PASS} already exists in the environment, you
+        will need to clean this up manually if it's not correct."
+      )
+      keyring_password <- Sys.getenv("CREATESLF_KEYRING_PASS")
+    } else if (
+      any(stringr::str_detect(
+        readr::read_lines(".Renviron"),
+        "^CREATESLF_KEYRING_PASS\\s*?=\\s*?['\"].+?['\"]$"
+      ))
+
+    ) {
+      cli::cli_abort(
+        "Your {.file .Renviron} file looks ok, try restarting your session."
+      )
+    } else {
+      keyring_password <- rstudioapi::askForPassword(
+        prompt = stringr::str_glue(
+          "Enter a password for the '{keyring}' keyring, this should
+        not be your LDAP / database password."
+        )
+      )
+      if (is.null(keyring_password)) {
+        cli::cli_abort("No keyring password entered.")
+      }
+      if (!fs::file_exists(".Renviron")) {
+        cli::cli_alert_success("Creating an {.file .Renviron} file.")
+      }
+
+      renviron_text <- stringr::str_glue(
+        "CREATESLF_KEYRING_PASS = \"{keyring_password}\""
+      )
+
+      readr::write_lines(
+        x = renviron_text,
+        file = ".Renviron",
+        append = TRUE
+      )
+
+      cli::cli_alert_success(
+        "Added {.code {renviron_text}} to the {.file .Renviron} file."
+      )
+
+      cli::cli_alert_info("You will need to restart your R session.")
+    }
+  } else {
+    keyring_password <- Sys.getenv("CREATESLF_KEYRING_PASS")
+  }
+
+  # If the keyring doesn't exist create it now.
+  if (!keyring_exists) {
+    if (keyring %in% keyring::keyring_list()[["keyring"]]) {
+      cli::cli_alert_warning(
+        "The {keyring} keyring already exists, you will be asked to
+        overwrite it."
+      )
+    }
+    keyring::keyring_create(
+      keyring = keyring,
+      password = keyring_password
+    )
+
+    cli::cli_alert_success(
+      "Created the '{keyring}' keyring with {.fun keyring::keyring_create}."
+    )
+  }
+
+  # If we just created the keyring it will already be unlocked
+  keyring::keyring_unlock(
+    keyring = keyring,
+    password = keyring_password
+  )
+
+  # Now add the password to the keyring
+  if (!key_exists) {
+    keyring::key_set(
+      keyring = keyring,
+      service = key,
+      prompt = "Enter you LDAP password for database connections."
+    )
+
+    cli::cli_alert_success(
+      "Added the '{key}' key to the '{keyring}' keyring with
+      {.fun keyring::keyring_set}."
+    )
+  }
+
+  keyring::keyring_lock(keyring = keyring)
+
+  cli::cli_alert_success(
+    "The keyring should now be set up correctly."
+  )
+
+  return(invisible(NULL))
+}
diff --git a/man/phs_db_connection.Rd b/man/phs_db_connection.Rd
index 93e73ee55..8ff9d0a32 100644
--- a/man/phs_db_connection.Rd
+++ b/man/phs_db_connection.Rd
@@ -4,19 +4,23 @@
 \alias{phs_db_connection}
 \title{Open a connection to a PHS database}
 \usage{
-phs_db_connection(dsn, username = Sys.getenv("USER"))
+phs_db_connection(dsn, username)
 }
 \arguments{
-\item{dsn}{The Data Source Name passed on to \code{odbc::dbconnect}
-the dsn must be setup first. e.g. SMRA or DVPROD}
+\item{dsn}{The Data Source Name (DSN) passed on to \code{\link[odbc:dbConnect-OdbcDriver-method]{odbc::dbConnect()}}
+the DSN must be set up first. e.g. \code{SMRA} or \code{DVPROD}}
 
 \item{username}{The username to use for authentication,
-if not supplied it first will check the environment variable
-and finally ask the user for input.}
+if not supplied it will try to find it automatically and if possible ask the
+user for input.}
 }
 \value{
-a connection to the specified dsn
+a connection to the specified Data Source.
 }
 \description{
-Opens a connection to PHS database to allow data to be collected
+Opens a connection to PHS database given a Data Source Name
+(DSN) it will try to get the username, asking for input if in an interactive
+session. It will also use \link[keyring:keyring-package]{keyring} to find
+an existing keyring called 'createslf' which should contain a \code{db_password}
+key with the users database password.
 }
diff --git a/man/setup_keyring.Rd b/man/setup_keyring.Rd
new file mode 100644
index 000000000..c40ef31c1
--- /dev/null
+++ b/man/setup_keyring.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_connection_PHS_database.R
+\name{setup_keyring}
+\alias{setup_keyring}
+\title{Interactively set up the keyring}
+\usage{
+setup_keyring(
+  keyring = "createslf",
+  key = "db_password",
+  keyring_exists = FALSE,
+  key_exists = FALSE,
+  env_var_pass_exists = FALSE
+)
+}
+\arguments{
+\item{keyring}{Name of the keyring}
+
+\item{key}{Name of the key}
+
+\item{keyring_exists}{Does the keyring already exist}
+
+\item{key_exists}{Does the key already exist}
+
+\item{env_var_pass_exists}{Does the password for the keyring already exist
+in the environment.}
+}
+\value{
+NULL (invisibly)
+}
+\description{
+This is meant to be used with \code{\link[=phs_db_connection]{phs_db_connection()}}, it can only be used
+interactively i.e. not in targets or in a workbench job.
+
+With the default options it will go through the steps to set up a keyring
+which can be used to supply passwords to \code{\link[odbc:dbConnect-OdbcDriver-method]{odbc::dbConnect()}} (or others) in a
+secure and seamless way.
+\enumerate{
+\item Create an .Renviron file in the project and add a password (for the
+keyring) to it.
+\item Create a keyring with the password - Since we have saved the password as
+an environment variable it can be picked unlocked and used automatically.
+\item Add the database password to the keyring.
+}
+}

From 9652c691fc45d9626b145f1156a7a3647df24e63 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 25 Sep 2023 12:02:44 +0100
Subject: [PATCH 20/26] Add additional tests for `get_file_path` (#808)

* Add additional tests for `get_file_path`

* Style code

---------

Co-authored-by: Moohan <Moohan@users.noreply.github.com>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 tests/testthat/test-get_file_paths.R | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests/testthat/test-get_file_paths.R b/tests/testthat/test-get_file_paths.R
index 2bec746f7..a3b29a290 100644
--- a/tests/testthat/test-get_file_paths.R
+++ b/tests/testthat/test-get_file_paths.R
@@ -1,3 +1,28 @@
+test_that("Errors properly", {
+  expect_error(
+    get_file_path(directory = "foo", file_name = "bar"),
+    "The directory .+? does not exist\\."
+  )
+
+  expect_error(
+    get_file_path(
+      directory = ".",
+      file_name_regexp = "targets",
+      check_mode = "write"
+    ),
+    "`check_mode = \"write\"` can't be used"
+  )
+})
+
+test_that("Can do check exists", {
+  expect_false(get_file_path(
+    directory = ".",
+    file_name = "foo.R",
+    check_mode = "exists"
+  ))
+})
+
+
 skip_on_ci()
 
 slf_updates_dir <- fs::path(

From bc2e4b3d5ded339ce938227c94a705fa4d8bcfff Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 25 Sep 2023 15:15:03 +0100
Subject: [PATCH 21/26] Rename `run_episode_file()` -> `create_episode_file()`
 (#803)

* Rename `run_episode_file()` -> `create_episode_file()`

This improves consistency! When speaking to Megan we noted that having the two 'main' functions with different names was needlessly confusing!

* Delete run_targets_tests.R

* Update documentation

---------

Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com>
---
 NAMESPACE                                        |  2 +-
 R/{run_episode_file.R => create_episode_file.R}  | 13 ++++++-------
 R/create_individual_file.R                       | 10 +++++-----
 _targets.R                                       |  2 +-
 man/add_acute_columns.Rd                         |  2 +-
 man/add_ae_columns.Rd                            |  2 +-
 man/add_all_columns.Rd                           |  2 +-
 man/add_at_columns.Rd                            |  2 +-
 man/add_ch_columns.Rd                            |  2 +-
 man/add_cij_columns.Rd                           |  2 +-
 man/add_cmh_columns.Rd                           |  2 +-
 man/add_dd_columns.Rd                            |  2 +-
 man/add_dn_columns.Rd                            |  2 +-
 man/add_gls_columns.Rd                           |  2 +-
 man/add_hc_columns.Rd                            |  2 +-
 man/add_hl1_columns.Rd                           |  2 +-
 man/add_ipdc_cols.Rd                             |  2 +-
 man/add_mat_columns.Rd                           |  2 +-
 man/add_mh_columns.Rd                            |  2 +-
 man/add_nrs_columns.Rd                           |  2 +-
 man/add_nsu_columns.Rd                           |  2 +-
 man/add_ooh_columns.Rd                           |  2 +-
 man/add_op_columns.Rd                            |  2 +-
 man/add_pis_columns.Rd                           |  2 +-
 man/add_sds_columns.Rd                           |  2 +-
 man/add_standard_cols.Rd                         |  2 +-
 man/aggregate_by_chi.Rd                          |  2 +-
 man/aggregate_ch_episodes.Rd                     |  2 +-
 man/clean_up_ch.Rd                               |  2 +-
 man/correct_cij_vars.Rd                          |  2 +-
 man/create_cohort_lookups.Rd                     |  2 +-
 man/create_cost_inc_dna.Rd                       |  2 +-
 ...un_episode_file.Rd => create_episode_file.Rd} | 16 ++++++++--------
 man/create_individual_file.Rd                    | 10 +++++-----
 man/fill_missing_cij_markers.Rd                  |  2 +-
 man/join_cohort_lookups.Rd                       |  2 +-
 man/load_ep_file_vars.Rd                         |  2 +-
 man/recode_gender.Rd                             |  2 +-
 man/remove_blank_chi.Rd                          |  2 +-
 man/store_ep_file_vars.Rd                        |  2 +-
 40 files changed, 60 insertions(+), 61 deletions(-)
 rename R/{run_episode_file.R => create_episode_file.R} (97%)
 rename man/{run_episode_file.Rd => create_episode_file.Rd} (63%)

diff --git a/NAMESPACE b/NAMESPACE
index 678d7a53f..d0323b8c4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -13,6 +13,7 @@ export(convert_hscp_to_hscpnames)
 export(convert_numeric_to_date)
 export(convert_sending_location_to_lca)
 export(convert_year_to_fyyear)
+export(create_episode_file)
 export(create_individual_file)
 export(create_service_use_cohorts)
 export(end_fy)
@@ -155,7 +156,6 @@ export(read_sc_all_alarms_telecare)
 export(read_sc_all_care_home)
 export(read_sc_all_home_care)
 export(read_sc_all_sds)
-export(run_episode_file)
 export(setup_keyring)
 export(start_fy)
 export(start_fy_quarter)
diff --git a/R/run_episode_file.R b/R/create_episode_file.R
similarity index 97%
rename from R/run_episode_file.R
rename to R/create_episode_file.R
index 19c2481f2..bad42be5e 100644
--- a/R/run_episode_file.R
+++ b/R/create_episode_file.R
@@ -1,16 +1,15 @@
-#' Produce the Source Episode file
+#' Create the Source Episode file
 #'
 #' @param processed_data_list containing data from processed extracts.
 #' @param year The year to process, in FY format.
 #' @param write_to_disk (optional) Should the data be written to disk default is
 #' `TRUE` i.e. write the data to disk.
 #' @param anon_chi_out (Default:TRUE) Should `anon_chi` be used in the output
-#' (instead of chi)
+#' (instead of chi).
 #'
-#' @return a [tibble][tibble::tibble-package] containing the episode file
+#' @return the Source Episode file as a [tibble][tibble::tibble-package].
 #' @export
-#'
-run_episode_file <- function(
+create_episode_file <- function(
     processed_data_list,
     year,
     write_to_disk = TRUE,
@@ -134,7 +133,7 @@ run_episode_file <- function(
 #' Store the unneeded episode file variables
 #'
 #' @param data The in-progress episode file data.
-#' @inheritParams run_episode_file
+#' @inheritParams create_episode_file
 #' @param vars_to_keep a character vector of the variables to keep, all others
 #' will be stored.
 #'
@@ -172,7 +171,7 @@ store_ep_file_vars <- function(data, year, vars_to_keep) {
 
 #' Load the unneeded episode file variables
 #'
-#' @inheritParams run_episode_file
+#' @inheritParams create_episode_file
 #' @inheritParams store_ep_file_vars
 #'
 #' @return The full SLF data.
diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index f0e6bcdfc..436f1c8d7 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -1,11 +1,11 @@
-#' Create individual file
+#' Create the Source Individual file
 #'
-#' @description Creates individual file from episode file
+#' @description Creates the individual file from the episode file.
 #'
-#' @param episode_file Tibble containing episodic data
+#' @param episode_file Tibble containing episodic data.
 #' @param anon_chi_in (Default:TRUE) Is `anon_chi` used in the input
-#' (instead of chi)
-#' @inheritParams run_episode_file
+#' (instead of chi).
+#' @inheritParams create_episode_file
 #'
 #' @return The processed individual file
 #' @export
diff --git a/_targets.R b/_targets.R
index 9698cf27e..db26477ef 100644
--- a/_targets.R
+++ b/_targets.R
@@ -545,7 +545,7 @@ list(
     ),
     tar_target(
       episode_file,
-      run_episode_file(
+      create_episode_file(
         processed_data_list,
         year,
         write_to_disk
diff --git a/man/add_acute_columns.Rd b/man/add_acute_columns.Rd
index 52ba071b6..c2659f821 100644
--- a/man/add_acute_columns.Rd
+++ b/man/add_acute_columns.Rd
@@ -7,7 +7,7 @@
 add_acute_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_ae_columns.Rd b/man/add_ae_columns.Rd
index 9b7099513..fdc31b7ff 100644
--- a/man/add_ae_columns.Rd
+++ b/man/add_ae_columns.Rd
@@ -7,7 +7,7 @@
 add_ae_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_all_columns.Rd b/man/add_all_columns.Rd
index d502e95c3..1d2e587db 100644
--- a/man/add_all_columns.Rd
+++ b/man/add_all_columns.Rd
@@ -7,7 +7,7 @@
 add_all_columns(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Add new columns based on SMRType and recid which follow a pattern
diff --git a/man/add_at_columns.Rd b/man/add_at_columns.Rd
index e05ea9101..af978530a 100644
--- a/man/add_at_columns.Rd
+++ b/man/add_at_columns.Rd
@@ -7,7 +7,7 @@
 add_at_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_ch_columns.Rd b/man/add_ch_columns.Rd
index 4938f7690..a036a257e 100644
--- a/man/add_ch_columns.Rd
+++ b/man/add_ch_columns.Rd
@@ -7,7 +7,7 @@
 add_ch_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_cij_columns.Rd b/man/add_cij_columns.Rd
index 7d00e6299..c48c1a3ef 100644
--- a/man/add_cij_columns.Rd
+++ b/man/add_cij_columns.Rd
@@ -7,7 +7,7 @@
 add_cij_columns(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Add new columns related to CIJ
diff --git a/man/add_cmh_columns.Rd b/man/add_cmh_columns.Rd
index a1d82cba6..a1cb74abb 100644
--- a/man/add_cmh_columns.Rd
+++ b/man/add_cmh_columns.Rd
@@ -7,7 +7,7 @@
 add_cmh_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_dd_columns.Rd b/man/add_dd_columns.Rd
index 08d9c0fe4..11e85fdc7 100644
--- a/man/add_dd_columns.Rd
+++ b/man/add_dd_columns.Rd
@@ -7,7 +7,7 @@
 add_dd_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_dn_columns.Rd b/man/add_dn_columns.Rd
index bf6af008f..ffdf59a82 100644
--- a/man/add_dn_columns.Rd
+++ b/man/add_dn_columns.Rd
@@ -7,7 +7,7 @@
 add_dn_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_gls_columns.Rd b/man/add_gls_columns.Rd
index e71dc755b..6ab7e9645 100644
--- a/man/add_gls_columns.Rd
+++ b/man/add_gls_columns.Rd
@@ -7,7 +7,7 @@
 add_gls_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_hc_columns.Rd b/man/add_hc_columns.Rd
index 95d8f1d3b..a58f226ec 100644
--- a/man/add_hc_columns.Rd
+++ b/man/add_hc_columns.Rd
@@ -7,7 +7,7 @@
 add_hc_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_hl1_columns.Rd b/man/add_hl1_columns.Rd
index 7600db5e9..24fc714e9 100644
--- a/man/add_hl1_columns.Rd
+++ b/man/add_hl1_columns.Rd
@@ -7,7 +7,7 @@
 add_hl1_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_ipdc_cols.Rd b/man/add_ipdc_cols.Rd
index 0f91cbd90..bd630b9d3 100644
--- a/man/add_ipdc_cols.Rd
+++ b/man/add_ipdc_cols.Rd
@@ -7,7 +7,7 @@
 add_ipdc_cols(episode_file, prefix, condition, ipdc_d = TRUE, elective = TRUE)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_mat_columns.Rd b/man/add_mat_columns.Rd
index aae729323..5faab0dc1 100644
--- a/man/add_mat_columns.Rd
+++ b/man/add_mat_columns.Rd
@@ -7,7 +7,7 @@
 add_mat_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_mh_columns.Rd b/man/add_mh_columns.Rd
index 3c50c6cb8..c587c490a 100644
--- a/man/add_mh_columns.Rd
+++ b/man/add_mh_columns.Rd
@@ -7,7 +7,7 @@
 add_mh_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_nrs_columns.Rd b/man/add_nrs_columns.Rd
index 9d7b3f8bf..b41201a57 100644
--- a/man/add_nrs_columns.Rd
+++ b/man/add_nrs_columns.Rd
@@ -7,7 +7,7 @@
 add_nrs_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_nsu_columns.Rd b/man/add_nsu_columns.Rd
index 6a54bbcbf..5aed481f0 100644
--- a/man/add_nsu_columns.Rd
+++ b/man/add_nsu_columns.Rd
@@ -7,7 +7,7 @@
 add_nsu_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_ooh_columns.Rd b/man/add_ooh_columns.Rd
index 01814ab6d..f1e6b63f5 100644
--- a/man/add_ooh_columns.Rd
+++ b/man/add_ooh_columns.Rd
@@ -7,7 +7,7 @@
 add_ooh_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_op_columns.Rd b/man/add_op_columns.Rd
index 08c4419e2..9fb8bc158 100644
--- a/man/add_op_columns.Rd
+++ b/man/add_op_columns.Rd
@@ -7,7 +7,7 @@
 add_op_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_pis_columns.Rd b/man/add_pis_columns.Rd
index b582acf2e..836218da0 100644
--- a/man/add_pis_columns.Rd
+++ b/man/add_pis_columns.Rd
@@ -7,7 +7,7 @@
 add_pis_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_sds_columns.Rd b/man/add_sds_columns.Rd
index d5a5fb2cf..c06b88527 100644
--- a/man/add_sds_columns.Rd
+++ b/man/add_sds_columns.Rd
@@ -7,7 +7,7 @@
 add_sds_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_standard_cols.Rd b/man/add_standard_cols.Rd
index 744aa49de..4392157d2 100644
--- a/man/add_standard_cols.Rd
+++ b/man/add_standard_cols.Rd
@@ -13,7 +13,7 @@ add_standard_cols(
 )
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/aggregate_by_chi.Rd b/man/aggregate_by_chi.Rd
index 013123902..1585accbb 100644
--- a/man/aggregate_by_chi.Rd
+++ b/man/aggregate_by_chi.Rd
@@ -7,7 +7,7 @@
 aggregate_by_chi(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Aggregate episode file by CHI to convert into
diff --git a/man/aggregate_ch_episodes.Rd b/man/aggregate_ch_episodes.Rd
index 1c955d666..3223e6d25 100644
--- a/man/aggregate_ch_episodes.Rd
+++ b/man/aggregate_ch_episodes.Rd
@@ -7,7 +7,7 @@
 aggregate_ch_episodes(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Aggregate CH variables by CHI and CIS.
diff --git a/man/clean_up_ch.Rd b/man/clean_up_ch.Rd
index 0182c84e8..c0c61966d 100644
--- a/man/clean_up_ch.Rd
+++ b/man/clean_up_ch.Rd
@@ -7,7 +7,7 @@
 clean_up_ch(episode_file, year)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{year}{The year to process, in FY format.}
 }
diff --git a/man/correct_cij_vars.Rd b/man/correct_cij_vars.Rd
index 97a7f046f..558514dc6 100644
--- a/man/correct_cij_vars.Rd
+++ b/man/correct_cij_vars.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{correct_cij_vars}
 \alias{correct_cij_vars}
 \title{Correct the CIJ variables}
diff --git a/man/create_cohort_lookups.Rd b/man/create_cohort_lookups.Rd
index f0ad267aa..109869074 100644
--- a/man/create_cohort_lookups.Rd
+++ b/man/create_cohort_lookups.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{create_cohort_lookups}
 \alias{create_cohort_lookups}
 \title{Create the cohort lookups}
diff --git a/man/create_cost_inc_dna.Rd b/man/create_cost_inc_dna.Rd
index 69e7e37b5..47c38b176 100644
--- a/man/create_cost_inc_dna.Rd
+++ b/man/create_cost_inc_dna.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{create_cost_inc_dna}
 \alias{create_cost_inc_dna}
 \title{Create cost total net inc DNA}
diff --git a/man/run_episode_file.Rd b/man/create_episode_file.Rd
similarity index 63%
rename from man/run_episode_file.Rd
rename to man/create_episode_file.Rd
index 59d5fea1d..e1bda32b9 100644
--- a/man/run_episode_file.Rd
+++ b/man/create_episode_file.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
-\name{run_episode_file}
-\alias{run_episode_file}
-\title{Produce the Source Episode file}
+% Please edit documentation in R/create_episode_file.R
+\name{create_episode_file}
+\alias{create_episode_file}
+\title{Create the Source Episode file}
 \usage{
-run_episode_file(
+create_episode_file(
   processed_data_list,
   year,
   write_to_disk = TRUE,
@@ -20,11 +20,11 @@ run_episode_file(
 \code{TRUE} i.e. write the data to disk.}
 
 \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output
-(instead of chi)}
+(instead of chi).}
 }
 \value{
-a \link[tibble:tibble-package]{tibble} containing the episode file
+the Source Episode file as a \link[tibble:tibble-package]{tibble}.
 }
 \description{
-Produce the Source Episode file
+Create the Source Episode file
 }
diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd
index fa759e7b1..4fd9a4a53 100644
--- a/man/create_individual_file.Rd
+++ b/man/create_individual_file.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/create_individual_file.R
 \name{create_individual_file}
 \alias{create_individual_file}
-\title{Create individual file}
+\title{Create the Source Individual file}
 \usage{
 create_individual_file(
   episode_file,
@@ -13,7 +13,7 @@ create_individual_file(
 )
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{year}{The year to process, in FY format.}
 
@@ -21,14 +21,14 @@ create_individual_file(
 \code{TRUE} i.e. write the data to disk.}
 
 \item{anon_chi_in}{(Default:TRUE) Is \code{anon_chi} used in the input
-(instead of chi)}
+(instead of chi).}
 
 \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output
-(instead of chi)}
+(instead of chi).}
 }
 \value{
 The processed individual file
 }
 \description{
-Creates individual file from episode file
+Creates the individual file from the episode file.
 }
diff --git a/man/fill_missing_cij_markers.Rd b/man/fill_missing_cij_markers.Rd
index 03b64217e..4795eed7a 100644
--- a/man/fill_missing_cij_markers.Rd
+++ b/man/fill_missing_cij_markers.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{fill_missing_cij_markers}
 \alias{fill_missing_cij_markers}
 \title{Fill any missing CIJ markers for records that should have them}
diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd
index 445dcd7c0..15a860a36 100644
--- a/man/join_cohort_lookups.Rd
+++ b/man/join_cohort_lookups.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{join_cohort_lookups}
 \alias{join_cohort_lookups}
 \title{Join cohort lookups}
diff --git a/man/load_ep_file_vars.Rd b/man/load_ep_file_vars.Rd
index cee9cc440..509b0e00c 100644
--- a/man/load_ep_file_vars.Rd
+++ b/man/load_ep_file_vars.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{load_ep_file_vars}
 \alias{load_ep_file_vars}
 \title{Load the unneeded episode file variables}
diff --git a/man/recode_gender.Rd b/man/recode_gender.Rd
index 526d2829d..aaa28e6eb 100644
--- a/man/recode_gender.Rd
+++ b/man/recode_gender.Rd
@@ -7,7 +7,7 @@
 recode_gender(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Recode gender to 1.5 if 0 or 9.
diff --git a/man/remove_blank_chi.Rd b/man/remove_blank_chi.Rd
index 9cba40a8f..b290dd1e7 100644
--- a/man/remove_blank_chi.Rd
+++ b/man/remove_blank_chi.Rd
@@ -7,7 +7,7 @@
 remove_blank_chi(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Convert blank strings to NA and remove NAs from CHI column
diff --git a/man/store_ep_file_vars.Rd b/man/store_ep_file_vars.Rd
index 06316aac1..880266d58 100644
--- a/man/store_ep_file_vars.Rd
+++ b/man/store_ep_file_vars.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{store_ep_file_vars}
 \alias{store_ep_file_vars}
 \title{Store the unneeded episode file variables}

From 854868e6b9ae6b01d4b1af62c890f7390ce8a85f Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 25 Sep 2023 15:57:22 +0100
Subject: [PATCH 22/26] Remove incorrect references to rds (#798)

* Remove incorrect references to rds

Since we (mostly) don't use rds anymore these references are incorrect and potentially confusing.

I've updated lots of documentation to remove the reference to rds.

I've also updated many comments that mentioned rds (these were probably the most confusing).

* Update documentation

---------

Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com>
---
 R/process_costs_rmd.R                     | 18 ++++++++-----
 R/process_extract_acute.R                 | 15 +++++------
 R/process_extract_ae.R                    | 15 +++++------
 R/process_extract_alarms_telecare.R       |  2 +-
 R/process_extract_care_home.R             |  2 +-
 R/process_extract_cmh.R                   | 15 +++++------
 R/process_extract_consultations.R         |  2 +-
 R/process_extract_delayed_discharges.R    |  2 +-
 R/process_extract_district_nursing.R      |  2 +-
 R/process_extract_gp_ooh.R                |  2 +-
 R/process_extract_home_care.R             |  2 +-
 R/process_extract_homelessness.R          |  2 +-
 R/process_extract_maternity.R             | 18 +++++--------
 R/process_extract_mental_health.R         | 16 +++++-------
 R/process_extract_ooh_diagnosis.R         |  2 +-
 R/process_extract_ooh_outcomes.R          |  2 +-
 R/process_extract_outpatients.R           | 19 +++++---------
 R/process_extract_prescribing.R           |  9 ++++---
 R/process_extract_sds.R                   |  2 +-
 R/process_lookup_ltc.R                    | 10 +++----
 R/process_lookup_postcode.R               | 19 ++++++--------
 R/process_lookup_sc_client.R              | 16 +++++-------
 R/process_lookup_sc_demographics.R        | 32 +++++++++--------------
 R/process_sc_all_alarms_telecare.R        |  9 ++++---
 R/process_sc_all_care_home.R              |  2 +-
 R/process_sc_all_home_care.R              |  2 +-
 R/process_sc_all_sds.R                    | 11 ++++----
 R/read_extract_gp_ooh.R                   |  2 +-
 man/process_costs_ch_rmd.Rd               |  2 +-
 man/process_costs_dn_rmd.Rd               |  2 +-
 man/process_costs_gp_ooh_rmd.Rd           |  2 +-
 man/process_costs_hc_rmd.Rd               |  2 +-
 man/process_extract_acute.Rd              |  2 +-
 man/process_extract_ae.Rd                 |  2 +-
 man/process_extract_alarms_telecare.Rd    |  2 +-
 man/process_extract_care_home.Rd          |  2 +-
 man/process_extract_cmh.Rd                |  2 +-
 man/process_extract_delayed_discharges.Rd |  2 +-
 man/process_extract_district_nursing.Rd   |  2 +-
 man/process_extract_gp_ooh.Rd             |  2 +-
 man/process_extract_home_care.Rd          |  2 +-
 man/process_extract_homelessness.Rd       |  2 +-
 man/process_extract_maternity.Rd          |  2 +-
 man/process_extract_mental_health.Rd      |  2 +-
 man/process_extract_ooh_consultations.Rd  |  2 +-
 man/process_extract_ooh_diagnosis.Rd      |  2 +-
 man/process_extract_ooh_outcomes.Rd       |  2 +-
 man/process_extract_outpatients.Rd        |  2 +-
 man/process_extract_prescribing.Rd        |  2 +-
 man/process_extract_sds.Rd                |  2 +-
 man/process_lookup_postcode.Rd            |  2 +-
 man/process_lookup_sc_client.Rd           |  2 +-
 man/process_lookup_sc_demographics.Rd     |  2 +-
 man/process_sc_all_alarms_telecare.Rd     |  2 +-
 man/process_sc_all_care_home.Rd           |  2 +-
 man/process_sc_all_home_care.Rd           |  2 +-
 man/process_sc_all_sds.Rd                 |  2 +-
 man/read_extract_gp_ooh.Rd                |  2 +-
 58 files changed, 145 insertions(+), 165 deletions(-)

diff --git a/R/process_costs_rmd.R b/R/process_costs_rmd.R
index 2efc25dd8..5d97d705f 100644
--- a/R/process_costs_rmd.R
+++ b/R/process_costs_rmd.R
@@ -14,7 +14,8 @@ process_costs_rmd <- function(file_name) {
     stringr::fixed("Rmd", ignore_case = TRUE)
   )) {
     cli::cli_abort(
-      "{.arg file_name} must be an {.code .Rmd} not a {.code .{fs::path_ext(file_name)}}."
+      "{.arg file_name} must be an {.code .Rmd} not a
+      {.code .{fs::path_ext(file_name)}}."
     )
   }
 
@@ -34,7 +35,12 @@ process_costs_rmd <- function(file_name) {
 
   output_file <- get_file_path(
     directory = output_dir,
-    file_name = fs::path_ext_set(stringr::str_glue("{fs::path_ext_remove(file_name)}-{latest_update()}-{date_today}"), "html"),
+    file_name = fs::path_ext_set(
+      stringr::str_glue(
+        "{fs::path_ext_remove(file_name)}-{latest_update()}-{date_today}"
+      ),
+      "html"
+    ),
     check_mode = "write"
   )
 
@@ -55,7 +61,7 @@ process_costs_rmd <- function(file_name) {
 #'
 #' @description This will read and process the
 #' District Nursing cost lookup, it will return the final data
-#' but also write this out as a rds.
+#' and write it to disk.
 #'
 #' @param file_path Path to the cost lookup.
 #'
@@ -73,7 +79,7 @@ process_costs_dn_rmd <- function(file_path = get_dn_costs_path()) {
 #'
 #' @description This will read and process the
 #' care homes cost lookup, it will return the final data
-#' but also write this out as a rds.
+#' and write it to disk.
 #'
 #' @inheritParams process_costs_dn_rmd
 #'
@@ -91,7 +97,7 @@ process_costs_ch_rmd <- function(file_path = get_ch_costs_path()) {
 #'
 #' @description This will read and process the
 #' GP ooh cost lookup, it will return the final data
-#' but also write this out as a rds.
+#' and write it to disk.
 #'
 #' @inheritParams process_costs_dn_rmd
 #'
@@ -109,7 +115,7 @@ process_costs_gp_ooh_rmd <- function(file_path = get_gp_ooh_costs_path()) {
 #'
 #' @description This will read and process the
 #' Home Care cost lookup, it will return the final data
-#' but also write this out as a rds.
+#' and write it to disk.
 #'
 #' @inheritParams process_costs_dn_rmd
 #'
diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R
index 107b346c9..70ff29370 100644
--- a/R/process_extract_acute.R
+++ b/R/process_extract_acute.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' acute extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -61,9 +61,7 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) {
       levels = 0L:8L
     ))
 
-
-  ## save outfile ---------------------------------------
-  outfile <- acute_clean %>%
+  acute_processed <- acute_clean %>%
     dplyr::select(
       "year",
       "recid",
@@ -113,10 +111,11 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) {
     dplyr::arrange(.data$chi, .data$record_keydate1)
 
   if (write_to_disk) {
-    # Save as rds file
-    outfile %>%
-      write_file(get_source_extract_path(year, "Acute", check_mode = "write"))
+    write_file(
+      acute_processed,
+      get_source_extract_path(year, "Acute", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(acute_processed)
 }
diff --git a/R/process_extract_ae.R b/R/process_extract_ae.R
index 7e61db018..95dfd99be 100644
--- a/R/process_extract_ae.R
+++ b/R/process_extract_ae.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' A&E extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -237,9 +237,7 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) {
       .data$keytime2
     )
 
-
-  # Save outfile----------------------------------------
-  outfile <- matched_ae_data %>%
+  ae_processed <- matched_ae_data %>%
     dplyr::select(
       "year",
       "recid",
@@ -294,10 +292,11 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) {
     )
 
   if (write_to_disk) {
-    # Save as rds file
-    outfile %>%
-      write_file(get_source_extract_path(year, "AE", check_mode = "write"))
+    write_file(
+      ae_processed,
+      get_source_extract_path(year, "AE", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(ae_processed)
 }
diff --git a/R/process_extract_alarms_telecare.R b/R/process_extract_alarms_telecare.R
index 15cd79809..9a0745a04 100644
--- a/R/process_extract_alarms_telecare.R
+++ b/R/process_extract_alarms_telecare.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' (year specific) Alarms Telecare extract, it will return the final data
-#' but also write this out as rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_extract_care_home
 #'
diff --git a/R/process_extract_care_home.R b/R/process_extract_care_home.R
index 757e47f6c..cbf6d417c 100644
--- a/R/process_extract_care_home.R
+++ b/R/process_extract_care_home.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' (year specific) Care Home extract, it will return the final data
-#' but also write this out as rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The full processed data which will be selected from to create
 #' the year specific data.
diff --git a/R/process_extract_cmh.R b/R/process_extract_cmh.R
index 0b1ba0f19..a2adad75e 100644
--- a/R/process_extract_cmh.R
+++ b/R/process_extract_cmh.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' CMH extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -44,9 +44,7 @@ process_extract_cmh <- function(data,
     # create blank diag 6
     dplyr::mutate(diag6 = NA)
 
-  # Outfile --------------------------------------------
-
-  outfile <- cmh_clean %>%
+  cmh_processed <- cmh_clean %>%
     dplyr::select(
       "year",
       "recid",
@@ -73,10 +71,11 @@ process_extract_cmh <- function(data,
     )
 
   if (write_to_disk) {
-    # Save as rds file
-    outfile %>%
-      write_file(get_source_extract_path(year, "CMH", check_mode = "write"))
+    write_file(
+      cmh_processed,
+      get_source_extract_path(year, "CMH", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(cmh_processed)
 }
diff --git a/R/process_extract_consultations.R b/R/process_extract_consultations.R
index 7262c1df6..6dc175cb8 100644
--- a/R/process_extract_consultations.R
+++ b/R/process_extract_consultations.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' GP OOH Consultations extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_delayed_discharges.R b/R/process_extract_delayed_discharges.R
index 29f37eb29..3c56807f9 100644
--- a/R/process_extract_delayed_discharges.R
+++ b/R/process_extract_delayed_discharges.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' delayed discharges extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_district_nursing.R b/R/process_extract_district_nursing.R
index a1b3bf816..9d1df62a6 100644
--- a/R/process_extract_district_nursing.R
+++ b/R/process_extract_district_nursing.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' District Nursing extract, it will return the final data
-#' but also write this out an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_gp_ooh.R b/R/process_extract_gp_ooh.R
index 4add41cfa..2b536878a 100644
--- a/R/process_extract_gp_ooh.R
+++ b/R/process_extract_gp_ooh.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' GP OoH extract, it will return the final data
-#' but also write this out an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param year The year to process, in FY format.
 #' @param data_list A list containing the extracts.
diff --git a/R/process_extract_home_care.R b/R/process_extract_home_care.R
index 3fcf009eb..874ad899c 100644
--- a/R/process_extract_home_care.R
+++ b/R/process_extract_home_care.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' (year specific) Home Care extract, it will return the final data
-#' but also write this out as rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_extract_care_home
 #'
diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
index a900cff9a..f4fb7d3e5 100644
--- a/R/process_extract_homelessness.R
+++ b/R/process_extract_homelessness.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' homelessness extract, it will return the final data
-#' and optionally write it out as rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process from [read_extract_homelessness()].
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_maternity.R b/R/process_extract_maternity.R
index 04fa46ced..64fa4e205 100644
--- a/R/process_extract_maternity.R
+++ b/R/process_extract_maternity.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' maternity extract, it will return the final data
-#' but also write this out an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -63,10 +63,7 @@ process_extract_maternity <- function(data, year, write_to_disk = TRUE) {
       )
     )
 
-
-  # Save outfile------------------------------------------------
-
-  outfile <- maternity_clean %>%
+  maternity_processed <- maternity_clean %>%
     dplyr::select(
       "year",
       "recid",
@@ -113,12 +110,11 @@ process_extract_maternity <- function(data, year, write_to_disk = TRUE) {
     dplyr::arrange(.data$chi, .data$record_keydate1)
 
   if (write_to_disk) {
-    # Save as rds file
-    outfile %>%
-      write_file(
-        get_source_extract_path(year, "Maternity", check_mode = "write")
-      )
+    write_file(
+      maternity_processed,
+      get_source_extract_path(year, "Maternity", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(maternity_processed)
 }
diff --git a/R/process_extract_mental_health.R b/R/process_extract_mental_health.R
index 76e7157e3..ffea63d28 100644
--- a/R/process_extract_mental_health.R
+++ b/R/process_extract_mental_health.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' mental health extract, it will return the final data
-#' but also write this out an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -67,10 +67,7 @@ process_extract_mental_health <- function(data, year, write_to_disk = TRUE) {
       smrtype = add_smr_type(.data$recid)
     )
 
-
-  # Outfile  ---------------------------------------
-
-  outfile <- mh_clean %>%
+  mh_processed <- mh_clean %>%
     dplyr::arrange(.data$chi, .data$record_keydate1) %>%
     dplyr::select(
       "year",
@@ -118,10 +115,11 @@ process_extract_mental_health <- function(data, year, write_to_disk = TRUE) {
     )
 
   if (write_to_disk) {
-    outfile %>%
-      # Save as rds file
-      write_file(get_source_extract_path(year, "MH", check_mode = "write"))
+    write_file(
+      mh_processed,
+      get_source_extract_path(year, "MH", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(mh_processed)
 }
diff --git a/R/process_extract_ooh_diagnosis.R b/R/process_extract_ooh_diagnosis.R
index 128c6c772..f2afd634e 100644
--- a/R/process_extract_ooh_diagnosis.R
+++ b/R/process_extract_ooh_diagnosis.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' GP OOH Diagnosis extract, it will return the final data
-#' but also write this out an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_ooh_outcomes.R b/R/process_extract_ooh_outcomes.R
index 6a14bced5..f188e6de1 100644
--- a/R/process_extract_ooh_outcomes.R
+++ b/R/process_extract_ooh_outcomes.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' GP OOH Outcomes extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_outpatients.R b/R/process_extract_outpatients.R
index 39b421ab4..341ee0f1a 100644
--- a/R/process_extract_outpatients.R
+++ b/R/process_extract_outpatients.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' outpatients extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -49,11 +49,7 @@ process_extract_outpatients <- function(data, year, write_to_disk = TRUE) {
       )
     )
 
-
-  ## save outfile ---------------------------------------
-
-  outfile <-
-    outpatients_clean %>%
+  outpatients_processed <- outpatients_clean %>%
     dplyr::select(
       "year",
       "recid",
@@ -89,12 +85,11 @@ process_extract_outpatients <- function(data, year, write_to_disk = TRUE) {
     )
 
   if (write_to_disk) {
-    # Save as rds file
-    outfile %>%
-      write_file(
-        get_source_extract_path(year, "Outpatients", check_mode = "write")
-      )
+    write_file(
+      outpatients_processed,
+      get_source_extract_path(year, "Outpatients", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(outpatients_processed)
 }
diff --git a/R/process_extract_prescribing.R b/R/process_extract_prescribing.R
index 776299d47..68c388b83 100644
--- a/R/process_extract_prescribing.R
+++ b/R/process_extract_prescribing.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' prescribing extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -50,9 +50,10 @@ process_extract_prescribing <- function(data, year, write_to_disk = TRUE) {
   }
 
   if (write_to_disk) {
-    # Save as rds file
-    pis_clean %>%
-      write_file(get_source_extract_path(year, "PIS", check_mode = "write"))
+    write_file(
+      pis_clean,
+      get_source_extract_path(year, "PIS", check_mode = "write")
+    )
   }
 
   return(pis_clean)
diff --git a/R/process_extract_sds.R b/R/process_extract_sds.R
index a58651749..bd9e93a3f 100644
--- a/R/process_extract_sds.R
+++ b/R/process_extract_sds.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' (year specific) SDS extract, it will return the final data
-#' but also write this out as rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_extract_care_home
 #'
diff --git a/R/process_lookup_ltc.R b/R/process_lookup_ltc.R
index 5a80deaff..8ea33da48 100644
--- a/R/process_lookup_ltc.R
+++ b/R/process_lookup_ltc.R
@@ -24,13 +24,11 @@ process_lookup_ltc <- function(data, year, write_to_disk = TRUE) {
       .fn = ~ stringr::str_remove(.x, "_date_flag")
     )
 
-  # Save Outfile---------------------------------------------
-
   if (write_to_disk) {
-    # Save .rds file
-    ltc_flags %>%
-      dplyr::arrange(.data$chi) %>%
-      write_file(get_ltcs_path(year, check_mode = "write"))
+    write_file(
+      ltc_flags,
+      get_ltcs_path(year, check_mode = "write")
+    )
   }
 
   return(ltc_flags)
diff --git a/R/process_lookup_postcode.R b/R/process_lookup_postcode.R
index 69cc13bd8..f9f1d47f4 100644
--- a/R/process_lookup_postcode.R
+++ b/R/process_lookup_postcode.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' postcode lookup, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param simd_path Path to SIMD lookup.
 #' @param locality_path Path to locality lookup.
@@ -58,16 +58,14 @@ process_lookup_postcode <- function(spd_path = get_spd_path(),
 
 
   # Join data together  -----------------------------------------------------
-  data <-
-    dplyr::left_join(spd_file, simd_file, by = "pc7") %>%
+  data <- dplyr::left_join(spd_file, simd_file, by = "pc7") %>%
     dplyr::rename(postcode = "pc7") %>%
     dplyr::left_join(locality_file, by = "datazone2011")
 
 
   # Finalise output -----------------------------------------------------
 
-  outfile <-
-    data %>%
+  slf_pc_lookup <- data %>%
     dplyr::select(
       "postcode",
       "lca",
@@ -89,13 +87,12 @@ process_lookup_postcode <- function(spd_path = get_spd_path(),
       tidyselect::matches("ur2_\\d{4}$")
     )
 
-
-  # Save out ----------------------------------------------------------------
   if (write_to_disk) {
-    outfile %>%
-      # Save .rds file
-      write_file(get_slf_postcode_path(check_mode = "write"))
+    write_file(
+      slf_pc_lookup,
+      get_slf_postcode_path(check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(slf_pc_lookup)
 }
diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R
index 1874c2b5a..87e6b107d 100644
--- a/R/process_lookup_sc_client.R
+++ b/R/process_lookup_sc_client.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' social care client lookup, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process
@@ -108,10 +108,7 @@ process_lookup_sc_client <- function(data, year, write_to_disk = TRUE) {
       .fn = ~ paste0("sc_", .x)
     )
 
-
-  ## save outfile ---------------------------------------
-  outfile <-
-    client_clean %>%
+  sc_client_lookup <- client_clean %>%
     # reorder
     dplyr::select(
       "sending_location",
@@ -125,10 +122,11 @@ process_lookup_sc_client <- function(data, year, write_to_disk = TRUE) {
     )
 
   if (write_to_disk) {
-    # Save .rds file
-    outfile %>%
-      write_file(get_source_extract_path(year, "Client", check_mode = "write"))
+    write_file(
+      sc_client_lookup,
+      get_source_extract_path(year, "Client", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(sc_client_lookup)
 }
diff --git a/R/process_lookup_sc_demographics.R b/R/process_lookup_sc_demographics.R
index 4b0f7500f..8c363f547 100644
--- a/R/process_lookup_sc_demographics.R
+++ b/R/process_lookup_sc_demographics.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' social care demographic lookup, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process.
 #' @param spd_path Path to the Scottish Postcode Directory.
@@ -12,7 +12,10 @@
 #' @return the final data as a [tibble][tibble::tibble-package].
 #' @export
 #' @family process extracts
-process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), write_to_disk = TRUE) {
+process_lookup_sc_demographics <- function(
+    data,
+    spd_path = get_spd_path(),
+    write_to_disk = TRUE) {
   # Deal with postcodes ---------------------------------------
 
   # UK postcode regex - see https://ideal-postcodes.co.uk/guides/postcode-validation
@@ -51,8 +54,7 @@ process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), writ
     ))
 
   # count number of na postcodes
-  na_postcodes <-
-    sc_demog %>%
+  na_postcodes <- sc_demog %>%
     dplyr::count(dplyr::across(tidyselect::contains("postcode"), ~ is.na(.x)))
 
   sc_demog <- sc_demog %>%
@@ -97,17 +99,11 @@ process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), writ
     dplyr::count(.data$postcode_type)
 
   # count number of replaced postcode - compare with count above
-  na_replaced_postcodes <-
-    sc_demog %>%
+  na_replaced_postcodes <- sc_demog %>%
     dplyr::count(dplyr::across(tidyselect::ends_with("_postcode"), ~ is.na(.x)))
 
-  na_replaced_postcodes
-  na_postcodes
 
-
-  ## save outfile ---------------------------------------
-  outfile <-
-    sc_demog %>%
+  sc_demog_lookup <- sc_demog %>%
     # group by sending location and ID
     dplyr::group_by(.data$sending_location, .data$social_care_id) %>%
     # arrange so latest submissions are last
@@ -126,14 +122,12 @@ process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), writ
     ) %>%
     dplyr::ungroup()
 
-
-  ## save file ##
-
   if (write_to_disk) {
-    # Save .rds file
-    outfile %>%
-      write_file(get_sc_demog_lookup_path(check_mode = "write"))
+    write_file(
+      sc_demog_lookup,
+      get_sc_demog_lookup_path(check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(sc_demog_lookup)
 }
diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R
index 61bce41b6..620b14cee 100644
--- a/R/process_sc_all_alarms_telecare.R
+++ b/R/process_sc_all_alarms_telecare.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' all Alarms Telecare extract, it will return the final data
-#' but also write this out as a rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_sc_all_care_home
 #'
@@ -121,9 +121,10 @@ process_sc_all_alarms_telecare <- function(
     tibble::as_tibble()
 
   if (write_to_disk) {
-    # Save .rds file ----
-    qtr_merge %>%
-      write_file(get_sc_at_episodes_path(check_mode = "write"))
+    write_file(
+      qtr_merge,
+      get_sc_at_episodes_path(check_mode = "write")
+    )
   }
 
   return(qtr_merge)
diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R
index a11f275e8..c41e1a1d5 100644
--- a/R/process_sc_all_care_home.R
+++ b/R/process_sc_all_care_home.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' all Care Home extract, it will return the final data
-#' but also write this out as a rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param sc_demog_lookup The Social Care Demographics lookup produced by
diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R
index b812d492b..5f2b4db49 100644
--- a/R/process_sc_all_home_care.R
+++ b/R/process_sc_all_home_care.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' all home care extract, it will return the final data
-#' but also write this out as a rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_sc_all_care_home
 #'
diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R
index 068215a28..c17f74f28 100644
--- a/R/process_sc_all_sds.R
+++ b/R/process_sc_all_sds.R
@@ -1,7 +1,7 @@
 #' Process the all SDS extract
 #' @description This will read and process the
 #' all SDS extract, it will return the final data
-#' but also write this out as a rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_sc_all_care_home
 #'
@@ -119,12 +119,11 @@ process_sc_all_sds <- function(
     # change the data format from data.table to data.frame
     tibble::as_tibble()
 
-
-  # Save outfile------------------------------------------------
   if (write_to_disk) {
-    # Save .rds file
-    final_data %>%
-      write_file(get_sc_sds_episodes_path(check_mode = "write"))
+    write_file(
+      final_data,
+      get_sc_sds_episodes_path(check_mode = "write")
+    )
   }
 
   return(final_data)
diff --git a/R/read_extract_gp_ooh.R b/R/read_extract_gp_ooh.R
index 98606eb8a..3a711c2f8 100644
--- a/R/read_extract_gp_ooh.R
+++ b/R/read_extract_gp_ooh.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' GP OoH extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param year The year to process, in FY format.
 #' @param diagnosis_path Path to diagnosis BOXI extract location.
diff --git a/man/process_costs_ch_rmd.Rd b/man/process_costs_ch_rmd.Rd
index 520898c9e..b990564b7 100644
--- a/man/process_costs_ch_rmd.Rd
+++ b/man/process_costs_ch_rmd.Rd
@@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data.
 \description{
 This will read and process the
 care homes cost lookup, it will return the final data
-but also write this out as a rds.
+and write it to disk.
 }
diff --git a/man/process_costs_dn_rmd.Rd b/man/process_costs_dn_rmd.Rd
index bde475d5a..46bcd93dd 100644
--- a/man/process_costs_dn_rmd.Rd
+++ b/man/process_costs_dn_rmd.Rd
@@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data.
 \description{
 This will read and process the
 District Nursing cost lookup, it will return the final data
-but also write this out as a rds.
+and write it to disk.
 }
diff --git a/man/process_costs_gp_ooh_rmd.Rd b/man/process_costs_gp_ooh_rmd.Rd
index fd71066c0..f5c611f11 100644
--- a/man/process_costs_gp_ooh_rmd.Rd
+++ b/man/process_costs_gp_ooh_rmd.Rd
@@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data.
 \description{
 This will read and process the
 GP ooh cost lookup, it will return the final data
-but also write this out as a rds.
+and write it to disk.
 }
diff --git a/man/process_costs_hc_rmd.Rd b/man/process_costs_hc_rmd.Rd
index b15c311da..c3448bcbc 100644
--- a/man/process_costs_hc_rmd.Rd
+++ b/man/process_costs_hc_rmd.Rd
@@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data.
 \description{
 This will read and process the
 Home Care cost lookup, it will return the final data
-but also write this out as a rds.
+and write it to disk.
 }
diff --git a/man/process_extract_acute.Rd b/man/process_extract_acute.Rd
index af6b85bfe..88264cf3d 100644
--- a/man/process_extract_acute.Rd
+++ b/man/process_extract_acute.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 acute extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_ae.Rd b/man/process_extract_ae.Rd
index 58878e689..b2f6954b7 100644
--- a/man/process_extract_ae.Rd
+++ b/man/process_extract_ae.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 A&E extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_alarms_telecare.Rd b/man/process_extract_alarms_telecare.Rd
index a6e61365d..5e37847b3 100644
--- a/man/process_extract_alarms_telecare.Rd
+++ b/man/process_extract_alarms_telecare.Rd
@@ -29,7 +29,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 (year specific) Alarms Telecare extract, it will return the final data
-but also write this out as rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_care_home.Rd b/man/process_extract_care_home.Rd
index f058ca787..03c1cd705 100644
--- a/man/process_extract_care_home.Rd
+++ b/man/process_extract_care_home.Rd
@@ -32,7 +32,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 (year specific) Care Home extract, it will return the final data
-but also write this out as rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_cmh.Rd b/man/process_extract_cmh.Rd
index 147651f37..52086848a 100644
--- a/man/process_extract_cmh.Rd
+++ b/man/process_extract_cmh.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 CMH extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_delayed_discharges.Rd b/man/process_extract_delayed_discharges.Rd
index ddc41ec46..f9a6b7439 100644
--- a/man/process_extract_delayed_discharges.Rd
+++ b/man/process_extract_delayed_discharges.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 delayed discharges extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_district_nursing.Rd b/man/process_extract_district_nursing.Rd
index 4d9383c2e..f2f466440 100644
--- a/man/process_extract_district_nursing.Rd
+++ b/man/process_extract_district_nursing.Rd
@@ -27,7 +27,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 District Nursing extract, it will return the final data
-but also write this out an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_gp_ooh.Rd b/man/process_extract_gp_ooh.Rd
index 8217f0d6f..f96e1dcf2 100644
--- a/man/process_extract_gp_ooh.Rd
+++ b/man/process_extract_gp_ooh.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 GP OoH extract, it will return the final data
-but also write this out an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_home_care.Rd b/man/process_extract_home_care.Rd
index 4fef5ac14..8dc1ec6a7 100644
--- a/man/process_extract_home_care.Rd
+++ b/man/process_extract_home_care.Rd
@@ -24,7 +24,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 (year specific) Home Care extract, it will return the final data
-but also write this out as rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_homelessness.Rd b/man/process_extract_homelessness.Rd
index 7531f8f22..9b6eb9463 100644
--- a/man/process_extract_homelessness.Rd
+++ b/man/process_extract_homelessness.Rd
@@ -31,7 +31,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 homelessness extract, it will return the final data
-and optionally write it out as rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_maternity.Rd b/man/process_extract_maternity.Rd
index cd01e6931..8ff8d7014 100644
--- a/man/process_extract_maternity.Rd
+++ b/man/process_extract_maternity.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 maternity extract, it will return the final data
-but also write this out an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_mental_health.Rd b/man/process_extract_mental_health.Rd
index 7159aae8b..4a7519d2d 100644
--- a/man/process_extract_mental_health.Rd
+++ b/man/process_extract_mental_health.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 mental health extract, it will return the final data
-but also write this out an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_ooh_consultations.Rd b/man/process_extract_ooh_consultations.Rd
index d682197ca..029d951cb 100644
--- a/man/process_extract_ooh_consultations.Rd
+++ b/man/process_extract_ooh_consultations.Rd
@@ -17,7 +17,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 GP OOH Consultations extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_ooh_diagnosis.Rd b/man/process_extract_ooh_diagnosis.Rd
index 2a962989a..864d4029f 100644
--- a/man/process_extract_ooh_diagnosis.Rd
+++ b/man/process_extract_ooh_diagnosis.Rd
@@ -17,7 +17,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 GP OOH Diagnosis extract, it will return the final data
-but also write this out an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_ooh_outcomes.Rd b/man/process_extract_ooh_outcomes.Rd
index 5b220e04a..186525ca3 100644
--- a/man/process_extract_ooh_outcomes.Rd
+++ b/man/process_extract_ooh_outcomes.Rd
@@ -17,7 +17,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 GP OOH Outcomes extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_outpatients.Rd b/man/process_extract_outpatients.Rd
index c5e10abc8..721809c1e 100644
--- a/man/process_extract_outpatients.Rd
+++ b/man/process_extract_outpatients.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 outpatients extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_prescribing.Rd b/man/process_extract_prescribing.Rd
index cf294d95c..55b9eb242 100644
--- a/man/process_extract_prescribing.Rd
+++ b/man/process_extract_prescribing.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 prescribing extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_sds.Rd b/man/process_extract_sds.Rd
index 7e8e44a38..14fe9cd0b 100644
--- a/man/process_extract_sds.Rd
+++ b/man/process_extract_sds.Rd
@@ -24,7 +24,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 (year specific) SDS extract, it will return the final data
-but also write this out as rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_lookup_postcode.Rd b/man/process_lookup_postcode.Rd
index 6ad56e5b3..19520b4e8 100644
--- a/man/process_lookup_postcode.Rd
+++ b/man/process_lookup_postcode.Rd
@@ -27,7 +27,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 postcode lookup, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_lookup_sc_client.Rd b/man/process_lookup_sc_client.Rd
index e48426419..5daa569b3 100644
--- a/man/process_lookup_sc_client.Rd
+++ b/man/process_lookup_sc_client.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 social care client lookup, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_lookup_sc_demographics.Rd b/man/process_lookup_sc_demographics.Rd
index 6c00b4352..60478ab18 100644
--- a/man/process_lookup_sc_demographics.Rd
+++ b/man/process_lookup_sc_demographics.Rd
@@ -24,7 +24,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 social care demographic lookup, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_sc_all_alarms_telecare.Rd b/man/process_sc_all_alarms_telecare.Rd
index 7e21407f9..8bbc1dce0 100644
--- a/man/process_sc_all_alarms_telecare.Rd
+++ b/man/process_sc_all_alarms_telecare.Rd
@@ -21,7 +21,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 all Alarms Telecare extract, it will return the final data
-but also write this out as a rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_sc_all_care_home.Rd b/man/process_sc_all_care_home.Rd
index 691fe51db..2c5d2c4ce 100644
--- a/man/process_sc_all_care_home.Rd
+++ b/man/process_sc_all_care_home.Rd
@@ -36,7 +36,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 all Care Home extract, it will return the final data
-but also write this out as a rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_sc_all_home_care.Rd b/man/process_sc_all_home_care.Rd
index d498514db..9a0b999af 100644
--- a/man/process_sc_all_home_care.Rd
+++ b/man/process_sc_all_home_care.Rd
@@ -21,7 +21,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 all home care extract, it will return the final data
-but also write this out as a rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_sc_all_sds.Rd b/man/process_sc_all_sds.Rd
index f2d6d8a1c..cca9d0fe5 100644
--- a/man/process_sc_all_sds.Rd
+++ b/man/process_sc_all_sds.Rd
@@ -21,7 +21,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 all SDS extract, it will return the final data
-but also write this out as a rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/read_extract_gp_ooh.Rd b/man/read_extract_gp_ooh.Rd
index eae6c52dc..73e6672d9 100644
--- a/man/read_extract_gp_ooh.Rd
+++ b/man/read_extract_gp_ooh.Rd
@@ -26,7 +26,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 GP OoH extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 

From ad98c3cc4a1226072ec012333925eb8b4af20bbb Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 25 Sep 2023 16:16:25 +0100
Subject: [PATCH 23/26] Make targets and tarchetypes required packages (#799)

Co-authored-by: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com>
---
 DESCRIPTION | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 4f1cec425..5123289dd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -53,6 +53,8 @@ Imports:
     slfhelper (>= 0.10.0),
     stringdist (>= 0.9.10),
     stringr (>= 1.5.0),
+    tarchetypes (>= 0.7.6),
+    targets (>= 1.2.0),
     tibble (>= 3.2.1),
     tidyr (>= 1.3.0),
     tidyselect (>= 1.2.0),
@@ -61,8 +63,6 @@ Suggests:
     covr (>= 3.6.1),
     roxygen2 (>= 7.2.3),
     spelling (>= 2.2),
-    tarchetypes (>= 0.7.5),
-    targets (>= 0.14.3),
     testthat (>= 3.1.7)
 Remotes: 
     Public-Health-Scotland/phsmethods,

From a5bc79e3b55de2476d1d460ef58defa461019004 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Tue, 26 Sep 2023 11:39:58 +0100
Subject: [PATCH 24/26] Update episode file functions to pass data through
 (#754)

* Update `read_file` to return an empty tibble if passed the dummy path

This is needed for some other bits, notably NSUs

* Update SPARRA and HHG paths to return dummy if the year is invalid

* Extract all data as a parameter

* Style code

* Update documentation

* Style code

* Update documentation

* rename `run` to `create_episode_file`

* Update documentation

---------

Co-authored-by: Moohan <Moohan@users.noreply.github.com>
Co-authored-by: Jennifer Thom <jennifer.thom@phs.scot>
Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
---
 R/add_nsu_cohort.R            | 13 +++++---
 R/create_episode_file.R       | 61 +++++++++++++++++++++++++----------
 R/fill_geographies.R          | 51 +++++++++++++++++++++--------
 R/get_source_extract_path.R   | 26 +++++++--------
 R/get_sparra_hhg_paths.R      |  8 +++++
 R/join_deaths_data.R          |  6 ++--
 R/match_on_ltcs.R             |  8 +++--
 R/read_file.R                 |  5 +++
 _targets.R                    |  7 ++++
 man/add_nsu_cohort.Rd         |  4 ++-
 man/create_episode_file.Rd    | 27 +++++++++++++---
 man/create_individual_file.Rd |  2 +-
 man/fill_geographies.Rd       | 11 ++++++-
 man/join_cohort_lookups.Rd    | 12 ++++++-
 man/join_deaths_data.Rd       |  4 +--
 man/match_on_ltcs.Rd          |  4 ++-
 16 files changed, 183 insertions(+), 66 deletions(-)

diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R
index c5a26da12..00260bb8e 100644
--- a/R/add_nsu_cohort.R
+++ b/R/add_nsu_cohort.R
@@ -2,13 +2,17 @@
 #'
 #' @param data The input data frame
 #' @param year The year being processed
+#' @param nsu_cohort The NSU data for the year
 #'
 #' @return A data frame containing the Non-Service Users as additional rows
 #' @export
 #'
 #' @family episode file
 #' @seealso [get_nsu_path()]
-add_nsu_cohort <- function(data, year) {
+add_nsu_cohort <- function(
+    data,
+    year,
+    nsu_cohort = read_file(get_nsu_path(year))) {
   year_param <- year
 
   if (!check_year_valid(year, "NSU")) {
@@ -29,9 +33,9 @@ add_nsu_cohort <- function(data, year) {
     )
   )
 
-  matched <- dplyr::full_join(data,
-    # NSU cohort file
-    read_file(get_nsu_path(year)) %>%
+  matched <- dplyr::full_join(
+    data,
+    nsu_cohort %>%
       dplyr::mutate(
         dob = as.Date(.data[["dob"]]),
         gpprac = convert_eng_gpprac_to_dummy(.data[["gpprac"]])
@@ -110,7 +114,6 @@ add_nsu_cohort <- function(data, year) {
         .data[["chi"]]
       )
     ) %>%
-    # Remove the additional columns
     dplyr::select(-dplyr::contains("_nsu"), -"has_chi")
 
   return(return_df)
diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index bad42be5e..3dc33e193 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -1,17 +1,32 @@
-#' Create the Source Episode file
+#' Produce the Source Episode file
 #'
 #' @param processed_data_list containing data from processed extracts.
 #' @param year The year to process, in FY format.
 #' @param write_to_disk (optional) Should the data be written to disk default is
 #' `TRUE` i.e. write the data to disk.
+#' @inheritParams add_nsu_cohort
+#' @inheritParams fill_geographies
+#' @inheritParams join_cohort_lookups
+#' @inheritParams join_deaths_data
+#' @inheritParams match_on_ltcs
+#' @inheritParams link_delayed_discharge_eps
 #' @param anon_chi_out (Default:TRUE) Should `anon_chi` be used in the output
-#' (instead of chi).
+#' (instead of chi)
 #'
-#' @return the Source Episode file as a [tibble][tibble::tibble-package].
+#' @return a [tibble][tibble::tibble-package] containing the episode file
 #' @export
 create_episode_file <- function(
     processed_data_list,
     year,
+    dd_data = read_file(get_source_extract_path(year, "DD")),
+    nsu_cohort = read_file(get_nsu_path(year)),
+    ltc_data = read_file(get_ltcs_path(year)),
+    slf_pc_lookup = read_file(get_slf_postcode_path()),
+    slf_gpprac_lookup = read_file(
+      get_slf_gpprac_path(),
+      col_select = c("gpprac", "cluster", "hbpraccode")
+    ),
+    slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)),
     write_to_disk = TRUE,
     anon_chi_out = TRUE) {
   episode_file <- dplyr::bind_rows(processed_data_list) %>%
@@ -99,15 +114,21 @@ create_episode_file <- function(
     correct_cij_vars() %>%
     fill_missing_cij_markers() %>%
     add_ppa_flag() %>%
-    link_delayed_discharge_eps(year) %>%
-    add_nsu_cohort(year) %>%
-    match_on_ltcs(year) %>%
+    link_delayed_discharge_eps(year, dd_data) %>%
+    add_nsu_cohort(year, nsu_cohort) %>%
+    match_on_ltcs(year, ltc_data) %>%
     correct_demographics(year) %>%
     create_cohort_lookups(year) %>%
     join_cohort_lookups(year) %>%
     join_sparra_hhg(year) %>%
-    fill_geographies() %>%
-    join_deaths_data(year) %>%
+    fill_geographies(
+      slf_pc_lookup,
+      slf_gpprac_lookup
+    ) %>%
+    join_deaths_data(
+      year,
+      slf_deaths_lookup
+    ) %>%
     load_ep_file_vars(year)
 
   if (anon_chi_out) {
@@ -354,22 +375,28 @@ create_cohort_lookups <- function(data, year, update = latest_update()) {
 #'
 #' @inheritParams store_ep_file_vars
 #' @inheritParams get_demographic_cohorts_path
+#' @param demographic_cohort,service_use_cohort The cohort data
 #'
 #' @return The data including the Demographic and Service Use lookups.
-join_cohort_lookups <- function(data, year, update = latest_update()) {
+join_cohort_lookups <- function(
+    data,
+    year,
+    update = latest_update(),
+    demographic_cohort = read_file(
+      get_demographic_cohorts_path(year, update),
+      col_select = c("chi", "demographic_cohort")
+    ),
+    service_use_cohort = read_file(
+      get_service_use_cohorts_path(year, update),
+      col_select = c("chi", "service_use_cohort")
+    )) {
   join_cohort_lookups <- data %>%
     dplyr::left_join(
-      read_file(
-        get_demographic_cohorts_path(year, update),
-        col_select = c("chi", "demographic_cohort")
-      ),
+      demographic_cohort,
       by = "chi"
     ) %>%
     dplyr::left_join(
-      read_file(
-        get_service_use_cohorts_path(year, update),
-        col_select = c("chi", "service_use_cohort")
-      ),
+      service_use_cohort,
       by = "chi"
     )
 
diff --git a/R/fill_geographies.R b/R/fill_geographies.R
index 8f4a470e8..c9aee6355 100644
--- a/R/fill_geographies.R
+++ b/R/fill_geographies.R
@@ -4,10 +4,18 @@
 #' then use the lookups to match on additional variables.
 #'
 #' @param data the SLF
+#' @param slf_pc_lookup The SLF Postcode lookup
+#' @param slf_gpprac_lookup The SLF GP Practice lookup
 #'
 #' @return a [tibble][tibble::tibble-package] of the SLF with improved
 #' Postcode and GP Practice details.
-fill_geographies <- function(data) {
+fill_geographies <- function(
+    data,
+    slf_pc_lookup = read_file(get_slf_postcode_path()),
+    slf_gpprac_lookup = read_file(
+      get_slf_gpprac_path(),
+      col_select = c("gpprac", "cluster", "hbpraccode")
+    )) {
   check_variables_exist(data, c(
     "chi",
     "postcode",
@@ -21,8 +29,15 @@ fill_geographies <- function(data) {
   ))
 
   data %>%
-    fill_postcode_geogs() %>%
-    fill_gpprac_geographies()
+    fill_postcode_geogs(
+      slf_pc_lookup = read_file(get_slf_postcode_path())
+    ) %>%
+    fill_gpprac_geographies(
+      slf_gpprac_lookup = read_file(
+        get_slf_gpprac_path(),
+        col_select = c("gpprac", "cluster", "hbpraccode")
+      )
+    )
 }
 
 #' Make a postcode lookup for filling to most recent postcodes based on CHI
@@ -86,9 +101,9 @@ make_gpprac_lookup <- function(data) {
   return(gpprac_lookup)
 }
 
-fill_postcode_geogs <- function(data) {
-  slf_pc_lookup <- read_file(get_slf_postcode_path())
-
+fill_postcode_geogs <- function(
+    data,
+    slf_pc_lookup) {
   filled_postcodes <- dplyr::left_join(
     data,
     make_postcode_lookup(data),
@@ -123,17 +138,20 @@ fill_postcode_geogs <- function(data) {
       lca = dplyr::coalesce(.data$lca, .data$lca_old),
       datazone2011 = dplyr::coalesce(.data$datazone2011, .data$datazone2011_old)
     ) %>%
-    dplyr::select(!c("hb2018", "hscp", "lca_old", "datazone2011_old", "most_recent_postcode"))
+    dplyr::select(!c(
+      "hb2018",
+      "hscp",
+      "lca_old",
+      "datazone2011_old",
+      "most_recent_postcode"
+    ))
 
   return(filled_postcodes)
 }
 
-fill_gpprac_geographies <- function(data) {
-  gpprac_ref <- read_file(
-    get_slf_gpprac_path(),
-    col_select = c("gpprac", "cluster", "hbpraccode")
-  )
-
+fill_gpprac_geographies <- function(
+    data,
+    slf_gpprac_lookup) {
   filled_gpprac <- dplyr::left_join(
     data,
     make_gpprac_lookup(data),
@@ -147,7 +165,12 @@ fill_gpprac_geographies <- function(data) {
         .data$gpprac
       )
     ) %>%
-    dplyr::left_join(gpprac_ref, by = "gpprac", suffix = c("_old", "")) %>%
+    dplyr::left_join(
+      slf_gpprac_lookup %>%
+        dplyr::select("gpprac", "cluster", "hbpraccode"),
+      by = "gpprac",
+      suffix = c("_old", "")
+    ) %>%
     dplyr::mutate(
       hbpraccode = dplyr::coalesce(.data$hbpraccode, .data$hbpraccode_old)
     ) %>%
diff --git a/R/get_source_extract_path.R b/R/get_source_extract_path.R
index 4cb5eef44..37ed545cf 100644
--- a/R/get_source_extract_path.R
+++ b/R/get_source_extract_path.R
@@ -41,34 +41,34 @@ get_source_extract_path <- function(
   type <- match.arg(type)
 
   if (!check_year_valid(year, type)) {
-    return(NA)
+    return(get_dummy_boxi_extract_path())
   }
 
   file_name <- dplyr::case_match(
     type,
     "Acute" ~ "acute_for_source",
-    "AE" ~ "a&e_for_source",
-    "AT" ~ "Alarms-Telecare-for-source",
+    "AE" ~ "a_and_e_for_source",
+    "AT" ~ "alarms-telecare-for-source",
     "CH" ~ "care_home_for_source",
-    "CMH" ~ "CMH_for_source",
+    "CMH" ~ "cmh_for_source",
     "Client" ~ "client_for_source",
-    "DD" ~ "DD_for_source",
+    "DD" ~ "delayed_discharge_for_source",
     "Deaths" ~ "deaths_for_source",
-    "DN" ~ "DN_for_source",
-    "GPOoH" ~ "GP_OOH_for_source",
-    "HC" ~ "Home_Care_for_source",
+    "DN" ~ "district_nursing_for_source",
+    "GPOoH" ~ "gp_ooh_for_source",
+    "HC" ~ "home_care_for_source",
     "Homelessness" ~ "homelessness_for_source",
     "Maternity" ~ "maternity_for_source",
     "MH" ~ "mental_health_for_source",
-    "DD" ~ "DD_for_source",
     "Outpatients" ~ "outpatients_for_source",
-    "PIS" ~ "prescribing_file_for_source",
-    "SDS" ~ "SDS-for-source"
-  )
+    "PIS" ~ "prescribing_for_source",
+    "SDS" ~ "sds_for_source"
+  ) %>%
+    stringr::str_glue("-{year}.parquet")
 
   source_extract_path <- get_file_path(
     directory = get_year_dir(year),
-    file_name = stringr::str_glue("{file_name}-20{year}.parquet"),
+    file_name = file_name,
     ...
   )
 
diff --git a/R/get_sparra_hhg_paths.R b/R/get_sparra_hhg_paths.R
index 2fd1a69f9..157160ed4 100644
--- a/R/get_sparra_hhg_paths.R
+++ b/R/get_sparra_hhg_paths.R
@@ -10,6 +10,10 @@
 #' @family extract file paths
 #' @seealso [get_file_path()] for the generic function.
 get_hhg_path <- function(year, ...) {
+  if (!check_year_valid(year, "HHG")) {
+    return(get_dummy_boxi_extract_path())
+  }
+
   hhg_file_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "HHG"),
     file_name = stringr::str_glue("HHG-20{year}.parquet"),
@@ -31,6 +35,10 @@ get_hhg_path <- function(year, ...) {
 #' @family extract file paths
 #' @seealso [get_file_path()] for the generic function.
 get_sparra_path <- function(year, ...) {
+  if (!check_year_valid(year, "SPARRA")) {
+    return(get_dummy_boxi_extract_path())
+  }
+
   sparra_file_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "SPARRA"),
     file_name = stringr::str_glue("SPARRA-20{year}.parquet"),
diff --git a/R/join_deaths_data.R b/R/join_deaths_data.R
index 694d2e2b9..89bcbbe13 100644
--- a/R/join_deaths_data.R
+++ b/R/join_deaths_data.R
@@ -2,16 +2,14 @@
 #'
 #' @param data Episode file data
 #' @param year financial year, e.g. '1920'
-#' @param slf_deaths_lookup_path Path to slf deaths lookup.
+#' @param slf_deaths_lookup The SLF deaths lookup.
 #'
 #' @return The data including the deaths lookup matched
 #'         on to the episode file.
 join_deaths_data <- function(
     data,
     year,
-    slf_deaths_lookup_path = get_slf_deaths_lookup_path(year)) {
-  slf_deaths_lookup <- read_file(slf_deaths_lookup_path)
-
+    slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year))) {
   return(
     data %>%
       dplyr::left_join(
diff --git a/R/match_on_ltcs.R b/R/match_on_ltcs.R
index 42345655a..f83f31325 100644
--- a/R/match_on_ltcs.R
+++ b/R/match_on_ltcs.R
@@ -5,13 +5,17 @@
 #'
 #' @param data episode files
 #' @param year financial year, e.g. '1920'
+#' @param ltc_data The LTC data for the year
 #'
 #' @return data matched with long term conditions
-match_on_ltcs <- function(data, year) {
+match_on_ltcs <- function(
+    data,
+    year,
+    ltc_data = read_file(get_ltcs_path(year))) {
   # Match on LTC lookup
   matched <- dplyr::left_join(
     data,
-    read_file(get_ltcs_path(year)),
+    ltc_data,
     by = "chi",
     suffix = c("", "_ltc")
   ) %>%
diff --git a/R/read_file.R b/R/read_file.R
index 2941b62ed..be0a6fc65 100644
--- a/R/read_file.R
+++ b/R/read_file.R
@@ -27,6 +27,11 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) {
     "parquet"
   )
 
+  # Return an empty tibble if trying to read the dummy path
+  if (path == get_dummy_boxi_extract_path()) {
+    return(tibble::tibble())
+  }
+
   ext <- fs::path_ext(path)
 
   if (ext == "gz") {
diff --git a/_targets.R b/_targets.R
index db26477ef..a9fa80d7a 100644
--- a/_targets.R
+++ b/_targets.R
@@ -543,11 +543,18 @@ list(
         source_sc_alarms_tele
       )
     ),
+    tar_file_read(nsu_cohort, get_nsu_path(year), read_file(!!.x)),
     tar_target(
       episode_file,
       create_episode_file(
         processed_data_list,
         year,
+        dd_data = source_dd_extract,
+        nsu_cohort = nsu_cohort,
+        ltc_data = source_ltc_lookup,
+        slf_pc_lookup = source_pc_lookup,
+        slf_gpprac_lookup = source_gp_lookup,
+        slf_deaths_lookup = slf_deaths_lookup,
         write_to_disk
       )
     ),
diff --git a/man/add_nsu_cohort.Rd b/man/add_nsu_cohort.Rd
index 723c105e1..4ea9324e0 100644
--- a/man/add_nsu_cohort.Rd
+++ b/man/add_nsu_cohort.Rd
@@ -4,12 +4,14 @@
 \alias{add_nsu_cohort}
 \title{Add NSU cohort to working file}
 \usage{
-add_nsu_cohort(data, year)
+add_nsu_cohort(data, year, nsu_cohort = read_file(get_nsu_path(year)))
 }
 \arguments{
 \item{data}{The input data frame}
 
 \item{year}{The year being processed}
+
+\item{nsu_cohort}{The NSU data for the year}
 }
 \value{
 A data frame containing the Non-Service Users as additional rows
diff --git a/man/create_episode_file.Rd b/man/create_episode_file.Rd
index e1bda32b9..c1ce0e063 100644
--- a/man/create_episode_file.Rd
+++ b/man/create_episode_file.Rd
@@ -2,11 +2,18 @@
 % Please edit documentation in R/create_episode_file.R
 \name{create_episode_file}
 \alias{create_episode_file}
-\title{Create the Source Episode file}
+\title{Produce the Source Episode file}
 \usage{
 create_episode_file(
   processed_data_list,
   year,
+  dd_data = read_file(get_source_extract_path(year, "DD")),
+  nsu_cohort = read_file(get_nsu_path(year)),
+  ltc_data = read_file(get_ltcs_path(year)),
+  slf_pc_lookup = read_file(get_slf_postcode_path()),
+  slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac",
+    "cluster", "hbpraccode")),
+  slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)),
   write_to_disk = TRUE,
   anon_chi_out = TRUE
 )
@@ -16,15 +23,27 @@ create_episode_file(
 
 \item{year}{The year to process, in FY format.}
 
+\item{dd_data}{The processed DD extract}
+
+\item{nsu_cohort}{The NSU data for the year}
+
+\item{ltc_data}{The LTC data for the year}
+
+\item{slf_pc_lookup}{The SLF Postcode lookup}
+
+\item{slf_gpprac_lookup}{The SLF GP Practice lookup}
+
+\item{slf_deaths_lookup}{The SLF deaths lookup.}
+
 \item{write_to_disk}{(optional) Should the data be written to disk default is
 \code{TRUE} i.e. write the data to disk.}
 
 \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output
-(instead of chi).}
+(instead of chi)}
 }
 \value{
-the Source Episode file as a \link[tibble:tibble-package]{tibble}.
+a \link[tibble:tibble-package]{tibble} containing the episode file
 }
 \description{
-Create the Source Episode file
+Produce the Source Episode file
 }
diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd
index 4fd9a4a53..c4502e5ae 100644
--- a/man/create_individual_file.Rd
+++ b/man/create_individual_file.Rd
@@ -24,7 +24,7 @@ create_individual_file(
 (instead of chi).}
 
 \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output
-(instead of chi).}
+(instead of chi)}
 }
 \value{
 The processed individual file
diff --git a/man/fill_geographies.Rd b/man/fill_geographies.Rd
index 5308fd8d0..bb619405b 100644
--- a/man/fill_geographies.Rd
+++ b/man/fill_geographies.Rd
@@ -4,10 +4,19 @@
 \alias{fill_geographies}
 \title{Fill postcode and GP practice geographies}
 \usage{
-fill_geographies(data)
+fill_geographies(
+  data,
+  slf_pc_lookup = read_file(get_slf_postcode_path()),
+  slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac",
+    "cluster", "hbpraccode"))
+)
 }
 \arguments{
 \item{data}{the SLF}
+
+\item{slf_pc_lookup}{The SLF Postcode lookup}
+
+\item{slf_gpprac_lookup}{The SLF GP Practice lookup}
 }
 \value{
 a \link[tibble:tibble-package]{tibble} of the SLF with improved
diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd
index 15a860a36..3ef549cc3 100644
--- a/man/join_cohort_lookups.Rd
+++ b/man/join_cohort_lookups.Rd
@@ -4,7 +4,15 @@
 \alias{join_cohort_lookups}
 \title{Join cohort lookups}
 \usage{
-join_cohort_lookups(data, year, update = latest_update())
+join_cohort_lookups(
+  data,
+  year,
+  update = latest_update(),
+  demographic_cohort = read_file(get_demographic_cohorts_path(year, update), col_select =
+    c("chi", "demographic_cohort")),
+  service_use_cohort = read_file(get_service_use_cohorts_path(year, update), col_select =
+    c("chi", "service_use_cohort"))
+)
 }
 \arguments{
 \item{data}{The in-progress episode file data.}
@@ -12,6 +20,8 @@ join_cohort_lookups(data, year, update = latest_update())
 \item{year}{The year to process, in FY format.}
 
 \item{update}{The update to use}
+
+\item{demographic_cohort, service_use_cohort}{The cohort data}
 }
 \value{
 The data including the Demographic and Service Use lookups.
diff --git a/man/join_deaths_data.Rd b/man/join_deaths_data.Rd
index 6508d7893..f3b68fe1a 100644
--- a/man/join_deaths_data.Rd
+++ b/man/join_deaths_data.Rd
@@ -7,7 +7,7 @@
 join_deaths_data(
   data,
   year,
-  slf_deaths_lookup_path = get_slf_deaths_lookup_path(year)
+  slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year))
 )
 }
 \arguments{
@@ -15,7 +15,7 @@ join_deaths_data(
 
 \item{year}{financial year, e.g. '1920'}
 
-\item{slf_deaths_lookup_path}{Path to slf deaths lookup.}
+\item{slf_deaths_lookup}{The SLF deaths lookup.}
 }
 \value{
 The data including the deaths lookup matched
diff --git a/man/match_on_ltcs.Rd b/man/match_on_ltcs.Rd
index 0c7e7fb53..e0def00cc 100644
--- a/man/match_on_ltcs.Rd
+++ b/man/match_on_ltcs.Rd
@@ -4,12 +4,14 @@
 \alias{match_on_ltcs}
 \title{Match on LTC DoB and dates of LTC incidence}
 \usage{
-match_on_ltcs(data, year)
+match_on_ltcs(data, year, ltc_data = read_file(get_ltcs_path(year)))
 }
 \arguments{
 \item{data}{episode files}
 
 \item{year}{financial year, e.g. '1920'}
+
+\item{ltc_data}{The LTC data for the year}
 }
 \value{
 data matched with long term conditions

From b7ce1b158e27d1407571b1404ff4c6ea295033ad Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Tue, 26 Sep 2023 11:53:44 +0100
Subject: [PATCH 25/26] Tests/it extract path (#807)

* Add additional tests for `check_it_reference()`

* Make the check on the IT reference stricter

* Update documentation

---------

Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 R/get_it_extract_paths.R                   |  2 +-
 tests/testthat/test-get_it_extract_paths.R | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/R/get_it_extract_paths.R b/R/get_it_extract_paths.R
index 2c587e93b..3c4dc54c3 100644
--- a/R/get_it_extract_paths.R
+++ b/R/get_it_extract_paths.R
@@ -107,7 +107,7 @@ check_it_reference <- function(it_reference) {
     it_reference <- stringr::str_sub(it_reference, start = 7L, end = 14L)
   }
 
-  if (stringr::str_detect(it_reference, "[0-9]{7}", negate = TRUE)) {
+  if (stringr::str_detect(it_reference, "^[0-9]{7}$", negate = TRUE)) {
     cli::cli_abort(
       c("x" = "{.arg it_reference} must be exactly 7 numbers."),
       call = rlang::caller_env()
diff --git a/tests/testthat/test-get_it_extract_paths.R b/tests/testthat/test-get_it_extract_paths.R
index baaad52a5..52f9e4181 100644
--- a/tests/testthat/test-get_it_extract_paths.R
+++ b/tests/testthat/test-get_it_extract_paths.R
@@ -1,3 +1,25 @@
+test_that("IT reference cleanup works", {
+  expect_equal(check_it_reference("SCTASK0439133"), "0439133")
+  expect_equal(check_it_reference("0439133"), "0439133")
+
+  expect_error(
+    check_it_reference("123456789"),
+    "`it_reference` must be exactly 7 numbers\\."
+  )
+  expect_error(
+    check_it_reference("1234567890"),
+    "`it_reference` must be exactly 7 numbers\\."
+  )
+  expect_error(
+    check_it_reference("SCTASK123456789"),
+    "`it_reference` must be exactly 7 numbers\\."
+  )
+  expect_error(
+    check_it_reference("ABCDEF123456789"),
+    "`it_reference` must be exactly 7 numbers\\."
+  )
+})
+
 skip_on_ci()
 
 test_that("IT extract file paths work", {

From 2f6f25c9a319874bd83ff5122dcbd38bc5ee1815 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Tue, 26 Sep 2023 11:56:42 +0100
Subject: [PATCH 26/26] Update workflow to run against the development branch
 (#795)

* Make test-coverage.yaml run against development

* Make lint-changed-files.yaml run against development

---------

Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 .github/workflows/lint-changed-files.yaml | 2 +-
 .github/workflows/test-coverage.yaml      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/lint-changed-files.yaml b/.github/workflows/lint-changed-files.yaml
index a5074e3b6..96f1673a1 100644
--- a/.github/workflows/lint-changed-files.yaml
+++ b/.github/workflows/lint-changed-files.yaml
@@ -2,7 +2,7 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   pull_request:
-    branches: [main-R, master, main]
+    branches: [master, main, development]
 
 name: lint-changed-files
 
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
index 2405bcc47..f7096264e 100644
--- a/.github/workflows/test-coverage.yaml
+++ b/.github/workflows/test-coverage.yaml
@@ -2,9 +2,9 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: [master, main, main-R]
+    branches: [master, main, development]
   pull_request:
-    branches: [master, main, main-R]
+    branches: [master, main, development]
 
 name: test-coverage