From 53ede24805c28010621d178f5ee7b5261625c1f7 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 25 Sep 2023 11:26:10 +0100
Subject: [PATCH 01/11] Add tests for `compute_mid_year_age` (#809)

* Add tests for `compute_mid_year_age`

* Remove redundant code

* Update documentation

---------

Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 R/compute_mid_year_age.R                   |  2 +-
 man/read_file.Rd                           |  2 +-
 tests/testthat/test-compute_mid_year_age.R | 16 ++++++++++++++++
 3 files changed, 18 insertions(+), 2 deletions(-)
 create mode 100644 tests/testthat/test-compute_mid_year_age.R

diff --git a/R/compute_mid_year_age.R b/R/compute_mid_year_age.R
index 01bfaf5d5..0e2483cf7 100644
--- a/R/compute_mid_year_age.R
+++ b/R/compute_mid_year_age.R
@@ -20,7 +20,7 @@
 compute_mid_year_age <- function(fyyear, dob) {
   age_intervals <- lubridate::interval(
     start = dob,
-    end = as.Date(midpoint_fy(fyyear))
+    end = midpoint_fy(fyyear)
   )
 
   ages <- lubridate::as.period(age_intervals)$year
diff --git a/man/read_file.Rd b/man/read_file.Rd
index b8231218f..1ef351342 100644
--- a/man/read_file.Rd
+++ b/man/read_file.Rd
@@ -14,7 +14,7 @@ read_file(path, col_select = NULL, as_data_frame = TRUE, ...)
 \link[tidyselect:eval_select]{tidy selection specification}
 of columns, as used in \code{dplyr::select()}.}
 
-\item{as_data_frame}{Should the function return a \code{data.frame} (default) or
+\item{as_data_frame}{Should the function return a \code{tibble} (default) or
 an Arrow \link[arrow]{Table}?}
 
 \item{...}{Addition arguments passed to the relevant function.}
diff --git a/tests/testthat/test-compute_mid_year_age.R b/tests/testthat/test-compute_mid_year_age.R
new file mode 100644
index 000000000..a4a542b9e
--- /dev/null
+++ b/tests/testthat/test-compute_mid_year_age.R
@@ -0,0 +1,16 @@
+test_that("Accurately compute mid year age", {
+  expect_equal(
+    compute_mid_year_age("1718", lubridate::make_date("2000")),
+    phsmethods::age_calculate(
+      lubridate::make_date("2000"),
+      lubridate::make_date("2017", 9L, 30L)
+    )
+  )
+  expect_equal(
+    compute_mid_year_age("2021", lubridate::make_date("1999") + 1:1000),
+    phsmethods::age_calculate(
+      lubridate::make_date("1999") + 1:1000,
+      lubridate::make_date("2020", 9L, 30L)
+    )
+  )
+})

From 04c2685d25855a1941f7dfc7d0209fd2a4ee1300 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 25 Sep 2023 11:37:04 +0100
Subject: [PATCH 02/11] Add a new function to set up keyring (#800)

* Add a new function to set up keyring

I've tested this by deleting my `.Renviron` and deleting my keyring `keyring::keyring_delete("createslf")` and it seems to work. Would be great to have someone with an existing set-up (Jen) test it, and to have someone who doesn't have it set up to test it.

The code looks complicated but I've just tried to catch every scenario, so the process should be smooth and clear (from the user's point of view).

I've also expanded the code relating to the username, which will now hopefully work in more cases.

* [check-spelling] Update metadata

Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/5824423711/attempts/1
Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/800#issuecomment-1673658357

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>

* Update documentation

---------

Signed-off-by: check-spelling-bot <check-spelling-bot@users.noreply.github.com>
Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 .github/actions/spelling/expect.txt |   1 +
 NAMESPACE                           |   1 +
 R/get_connection_PHS_database.R     | 247 ++++++++++++++++++++++++----
 man/phs_db_connection.Rd            |  18 +-
 man/setup_keyring.Rd                |  44 +++++
 5 files changed, 272 insertions(+), 39 deletions(-)
 create mode 100644 man/setup_keyring.Rd

diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 464adca0e..473e0304d 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -168,6 +168,7 @@ readxl
 reasonwait
 recid
 refsource
+renviron
 rlang
 rmarkdown
 roxygen
diff --git a/NAMESPACE b/NAMESPACE
index 6c4f3cd52..678d7a53f 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -156,6 +156,7 @@ export(read_sc_all_care_home)
 export(read_sc_all_home_care)
 export(read_sc_all_sds)
 export(run_episode_file)
+export(setup_keyring)
 export(start_fy)
 export(start_fy_quarter)
 export(start_next_fy_quarter)
diff --git a/R/get_connection_PHS_database.R b/R/get_connection_PHS_database.R
index 5973e0003..0a528f45b 100644
--- a/R/get_connection_PHS_database.R
+++ b/R/get_connection_PHS_database.R
@@ -1,22 +1,32 @@
 #' Open a connection to a PHS database
 #'
-#' @description Opens a connection to PHS database to allow data to be collected
+#' @description Opens a connection to PHS database given a Data Source Name
+#' (DSN) it will try to get the username, asking for input if in an interactive
+#' session. It will also use [keyring][keyring::keyring-package] to find
+#' an existing keyring called 'createslf' which should contain a `db_password`
+#' key with the users database password.
 #'
-#' @param dsn The Data Source Name passed on to `odbc::dbconnect`
-#' the dsn must be setup first. e.g. SMRA or DVPROD
+#' @param dsn The Data Source Name (DSN) passed on to [odbc::dbConnect()]
+#' the DSN must be set up first. e.g. `SMRA` or `DVPROD`
 #' @param username The username to use for authentication,
-#' if not supplied it first will check the environment variable
-#' and finally ask the user for input.
+#' if not supplied it will try to find it automatically and if possible ask the
+#' user for input.
 #'
-#' @return a connection to the specified dsn
+#' @return a connection to the specified Data Source.
 #' @export
-#'
-phs_db_connection <- function(dsn, username = Sys.getenv("USER")) {
-  # Collect username from the environment
-  username <- Sys.getenv("USER")
+phs_db_connection <- function(dsn, username) {
+  if (missing(username)) {
+    # Collect username if possible
+    username <- dplyr::case_when(
+      Sys.info()["USER"] != "unknown" ~ Sys.info()["USER"],
+      Sys.getenv("USER") != "" ~ Sys.getenv("USER"),
+      system2("whoami", stdout = TRUE) != "" ~ system2("whoami", stdout = TRUE),
+      .default = NA
+    )
+  }
 
-  # Check the username is not empty and take input if not
-  if (is.na(username) || username == "") {
+  # If the username is missing try to get input from the user
+  if (is.na(username)) {
     if (rlang::is_interactive()) {
       username <- rstudioapi::showPrompt(
         title = "Username",
@@ -24,46 +34,219 @@ phs_db_connection <- function(dsn, username = Sys.getenv("USER")) {
         default = ""
       )
     } else {
-      cli::cli_abort("No username found, you should supply one with {.arg username}")
+      cli::cli_abort(
+        c(
+          "x" = "No username found, you can use the {.arg username} argument.",
+          "i" = "Alternatively, add {.code USER = \"<your username>\"} to your
+          {.file .Renviron} file."
+        )
+      )
     }
   }
 
-  # TODO improve error messages and provide instructions for setting up keyring
-  # Add the following code to R profile.
-  # Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf"),
-  # keyring_create("createslf", password = Sys.getenv("CREATESLF_KEYRING_PASS")),
-  # key_set(keyring = "createslf", service = "db_password")
+  # Check the status of keyring
+  # Does the 'createslf' keyring exist
+  keyring_exists <- "createslf" %in% keyring::keyring_list()[["keyring"]]
 
-  if (!("createslf" %in% keyring::keyring_list()[["keyring"]])) {
-    cli::cli_abort("The {.val createslf} keyring does not exist.")
+  # Does the 'db_password' key exist in the 'createslf' keyring
+  if (keyring_exists) {
+    key_exists <- "db_password" %in% keyring::key_list(keyring = "createslf")[["service"]]
+  } else {
+    key_exists <- FALSE
   }
 
-  if (!("db_password" %in% keyring::key_list(keyring = "createslf")[["service"]])) {
-    cli::cli_abort("{.val db_password} is missing from the {.val createslf} keyring.")
-  }
+  # Does the 'CREATESLF_KEYRING_PASS' environment variable exist
+  env_var_pass_exists <- Sys.getenv("CREATESLF_KEYRING_PASS") != ""
 
-  if (Sys.getenv("CREATESLF_KEYRING_PASS") == "") {
-    cli::cli_abort("You must have the password to unlock the {.val createslf} keyring in your environment as
-                   {.envvar CREATESLF_KEYRING_PASS}. Please set this up in your {.file .Renviron} or {.file .Rprofile}")
+  if (!all(keyring_exists, key_exists, env_var_pass_exists)) {
+    if (rlang::is_interactive()) {
+      setup_keyring(
+        keyring = "createslf",
+        key = "db_password",
+        keyring_exists = keyring_exists,
+        key_exists = key_exists,
+        env_var_pass_exists = env_var_pass_exists
+      )
+    } else {
+      if (any(keyring_exists, key_exists, env_var_pass_exists)) {
+        cli::cli_abort(
+          c(
+            "x" = "Your keyring needs to be set up, run:",
+            "{.code setup_keyring(keyring = \"createslf\", key = \"db_password\",
+  keyring_exists = {keyring_exists}, key_exists = {key_exists},
+  env_var_pass_exists = {env_var_pass_exists})}"
+          )
+        )
+      } else {
+        cli::cli_abort(
+          c(
+            "x" = "Your keyring needs to be set up, run:",
+            "{.code setup_keyring(keyring = \"createslf\",
+            key = \"db_password\")}"
+          )
+        )
+      }
+    }
   }
 
-  keyring::keyring_unlock(keyring = "createslf", password = Sys.getenv("CREATESLF_KEYRING_PASS"))
-
-  if (keyring::keyring_is_locked(keyring = "createslf")) {
-    cli::cli_abort("Keyring is locked. To unlock createslf keyring, please use {.fun keyring::keyring_unlock}")
+  if (env_var_pass_exists) {
+    keyring::keyring_unlock(
+      keyring = "createslf",
+      password = Sys.getenv("CREATESLF_KEYRING_PASS")
+    )
+  } else {
+    keyring::keyring_unlock(
+      keyring = "createslf",
+      password = rstudioapi::askForPassword(
+        prompt = "Enter the password for the keyring you just created."
+      )
+    )
   }
 
 
   # Create the connection
-  password_text <- stringr::str_glue("{dsn} password for user: {username}")
   db_connection <- odbc::dbConnect(
     odbc::odbc(),
     dsn = dsn,
     uid = username,
-    pwd = keyring::key_get(keyring = "createslf", service = "db_password")
+    pwd = keyring::key_get(
+      keyring = "createslf",
+      service = "db_password"
+    )
   )
 
   keyring::keyring_lock(keyring = "createslf")
 
   return(db_connection)
 }
+
+#' Interactively set up the keyring
+#'
+#' @description
+#' This is meant to be used with [phs_db_connection()], it can only be used
+#' interactively i.e. not in targets or in a workbench job.
+#'
+#' With the default options it will go through the steps to set up a keyring
+#' which can be used to supply passwords to [odbc::dbConnect()] (or others) in a
+#' secure and seamless way.
+#'
+#'  1. Create an .Renviron file in the project and add a password (for the
+#'  keyring) to it.
+#'  2. Create a keyring with the password - Since we have saved the password as
+#'  an environment variable it can be picked unlocked and used automatically.
+#'  3. Add the database password to the keyring.
+#'
+#'
+#' @param keyring Name of the keyring
+#' @param key Name of the key
+#' @param keyring_exists Does the keyring already exist
+#' @param key_exists Does the key already exist
+#' @param env_var_pass_exists Does the password for the keyring already exist
+#' in the environment.
+#'
+#' @return NULL (invisibly)
+#' @export
+setup_keyring <- function(
+    keyring = "createslf",
+    key = "db_password",
+    keyring_exists = FALSE,
+    key_exists = FALSE,
+    env_var_pass_exists = FALSE) {
+  # First we need the password as an environment variable
+  if (!env_var_pass_exists) {
+    if (Sys.getenv("CREATESLF_KEYRING_PASS") != "") {
+      cli::cli_alert_warning(
+        "{.env CREATESLF_KEYRING_PASS} already exists in the environment, you
+        will need to clean this up manually if it's not correct."
+      )
+      keyring_password <- Sys.getenv("CREATESLF_KEYRING_PASS")
+    } else if (
+      any(stringr::str_detect(
+        readr::read_lines(".Renviron"),
+        "^CREATESLF_KEYRING_PASS\\s*?=\\s*?['\"].+?['\"]$"
+      ))
+
+    ) {
+      cli::cli_abort(
+        "Your {.file .Renviron} file looks ok, try restarting your session."
+      )
+    } else {
+      keyring_password <- rstudioapi::askForPassword(
+        prompt = stringr::str_glue(
+          "Enter a password for the '{keyring}' keyring, this should
+        not be your LDAP / database password."
+        )
+      )
+      if (is.null(keyring_password)) {
+        cli::cli_abort("No keyring password entered.")
+      }
+      if (!fs::file_exists(".Renviron")) {
+        cli::cli_alert_success("Creating an {.file .Renviron} file.")
+      }
+
+      renviron_text <- stringr::str_glue(
+        "CREATESLF_KEYRING_PASS = \"{keyring_password}\""
+      )
+
+      readr::write_lines(
+        x = renviron_text,
+        file = ".Renviron",
+        append = TRUE
+      )
+
+      cli::cli_alert_success(
+        "Added {.code {renviron_text}} to the {.file .Renviron} file."
+      )
+
+      cli::cli_alert_info("You will need to restart your R session.")
+    }
+  } else {
+    keyring_password <- Sys.getenv("CREATESLF_KEYRING_PASS")
+  }
+
+  # If the keyring doesn't exist create it now.
+  if (!keyring_exists) {
+    if (keyring %in% keyring::keyring_list()[["keyring"]]) {
+      cli::cli_alert_warning(
+        "The {keyring} keyring already exists, you will be asked to
+        overwrite it."
+      )
+    }
+    keyring::keyring_create(
+      keyring = keyring,
+      password = keyring_password
+    )
+
+    cli::cli_alert_success(
+      "Created the '{keyring}' keyring with {.fun keyring::keyring_create}."
+    )
+  }
+
+  # If we just created the keyring it will already be unlocked
+  keyring::keyring_unlock(
+    keyring = keyring,
+    password = keyring_password
+  )
+
+  # Now add the password to the keyring
+  if (!key_exists) {
+    keyring::key_set(
+      keyring = keyring,
+      service = key,
+      prompt = "Enter you LDAP password for database connections."
+    )
+
+    cli::cli_alert_success(
+      "Added the '{key}' key to the '{keyring}' keyring with
+      {.fun keyring::keyring_set}."
+    )
+  }
+
+  keyring::keyring_lock(keyring = keyring)
+
+  cli::cli_alert_success(
+    "The keyring should now be set up correctly."
+  )
+
+  return(invisible(NULL))
+}
diff --git a/man/phs_db_connection.Rd b/man/phs_db_connection.Rd
index 93e73ee55..8ff9d0a32 100644
--- a/man/phs_db_connection.Rd
+++ b/man/phs_db_connection.Rd
@@ -4,19 +4,23 @@
 \alias{phs_db_connection}
 \title{Open a connection to a PHS database}
 \usage{
-phs_db_connection(dsn, username = Sys.getenv("USER"))
+phs_db_connection(dsn, username)
 }
 \arguments{
-\item{dsn}{The Data Source Name passed on to \code{odbc::dbconnect}
-the dsn must be setup first. e.g. SMRA or DVPROD}
+\item{dsn}{The Data Source Name (DSN) passed on to \code{\link[odbc:dbConnect-OdbcDriver-method]{odbc::dbConnect()}}
+the DSN must be set up first. e.g. \code{SMRA} or \code{DVPROD}}
 
 \item{username}{The username to use for authentication,
-if not supplied it first will check the environment variable
-and finally ask the user for input.}
+if not supplied it will try to find it automatically and if possible ask the
+user for input.}
 }
 \value{
-a connection to the specified dsn
+a connection to the specified Data Source.
 }
 \description{
-Opens a connection to PHS database to allow data to be collected
+Opens a connection to PHS database given a Data Source Name
+(DSN) it will try to get the username, asking for input if in an interactive
+session. It will also use \link[keyring:keyring-package]{keyring} to find
+an existing keyring called 'createslf' which should contain a \code{db_password}
+key with the users database password.
 }
diff --git a/man/setup_keyring.Rd b/man/setup_keyring.Rd
new file mode 100644
index 000000000..c40ef31c1
--- /dev/null
+++ b/man/setup_keyring.Rd
@@ -0,0 +1,44 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_connection_PHS_database.R
+\name{setup_keyring}
+\alias{setup_keyring}
+\title{Interactively set up the keyring}
+\usage{
+setup_keyring(
+  keyring = "createslf",
+  key = "db_password",
+  keyring_exists = FALSE,
+  key_exists = FALSE,
+  env_var_pass_exists = FALSE
+)
+}
+\arguments{
+\item{keyring}{Name of the keyring}
+
+\item{key}{Name of the key}
+
+\item{keyring_exists}{Does the keyring already exist}
+
+\item{key_exists}{Does the key already exist}
+
+\item{env_var_pass_exists}{Does the password for the keyring already exist
+in the environment.}
+}
+\value{
+NULL (invisibly)
+}
+\description{
+This is meant to be used with \code{\link[=phs_db_connection]{phs_db_connection()}}, it can only be used
+interactively i.e. not in targets or in a workbench job.
+
+With the default options it will go through the steps to set up a keyring
+which can be used to supply passwords to \code{\link[odbc:dbConnect-OdbcDriver-method]{odbc::dbConnect()}} (or others) in a
+secure and seamless way.
+\enumerate{
+\item Create an .Renviron file in the project and add a password (for the
+keyring) to it.
+\item Create a keyring with the password - Since we have saved the password as
+an environment variable it can be picked unlocked and used automatically.
+\item Add the database password to the keyring.
+}
+}

From 9652c691fc45d9626b145f1156a7a3647df24e63 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 25 Sep 2023 12:02:44 +0100
Subject: [PATCH 03/11] Add additional tests for `get_file_path` (#808)

* Add additional tests for `get_file_path`

* Style code

---------

Co-authored-by: Moohan <Moohan@users.noreply.github.com>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 tests/testthat/test-get_file_paths.R | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests/testthat/test-get_file_paths.R b/tests/testthat/test-get_file_paths.R
index 2bec746f7..a3b29a290 100644
--- a/tests/testthat/test-get_file_paths.R
+++ b/tests/testthat/test-get_file_paths.R
@@ -1,3 +1,28 @@
+test_that("Errors properly", {
+  expect_error(
+    get_file_path(directory = "foo", file_name = "bar"),
+    "The directory .+? does not exist\\."
+  )
+
+  expect_error(
+    get_file_path(
+      directory = ".",
+      file_name_regexp = "targets",
+      check_mode = "write"
+    ),
+    "`check_mode = \"write\"` can't be used"
+  )
+})
+
+test_that("Can do check exists", {
+  expect_false(get_file_path(
+    directory = ".",
+    file_name = "foo.R",
+    check_mode = "exists"
+  ))
+})
+
+
 skip_on_ci()
 
 slf_updates_dir <- fs::path(

From bc2e4b3d5ded339ce938227c94a705fa4d8bcfff Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 25 Sep 2023 15:15:03 +0100
Subject: [PATCH 04/11] Rename `run_episode_file()` -> `create_episode_file()`
 (#803)

* Rename `run_episode_file()` -> `create_episode_file()`

This improves consistency! When speaking to Megan we noted that having the two 'main' functions with different names was needlessly confusing!

* Delete run_targets_tests.R

* Update documentation

---------

Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com>
---
 NAMESPACE                                        |  2 +-
 R/{run_episode_file.R => create_episode_file.R}  | 13 ++++++-------
 R/create_individual_file.R                       | 10 +++++-----
 _targets.R                                       |  2 +-
 man/add_acute_columns.Rd                         |  2 +-
 man/add_ae_columns.Rd                            |  2 +-
 man/add_all_columns.Rd                           |  2 +-
 man/add_at_columns.Rd                            |  2 +-
 man/add_ch_columns.Rd                            |  2 +-
 man/add_cij_columns.Rd                           |  2 +-
 man/add_cmh_columns.Rd                           |  2 +-
 man/add_dd_columns.Rd                            |  2 +-
 man/add_dn_columns.Rd                            |  2 +-
 man/add_gls_columns.Rd                           |  2 +-
 man/add_hc_columns.Rd                            |  2 +-
 man/add_hl1_columns.Rd                           |  2 +-
 man/add_ipdc_cols.Rd                             |  2 +-
 man/add_mat_columns.Rd                           |  2 +-
 man/add_mh_columns.Rd                            |  2 +-
 man/add_nrs_columns.Rd                           |  2 +-
 man/add_nsu_columns.Rd                           |  2 +-
 man/add_ooh_columns.Rd                           |  2 +-
 man/add_op_columns.Rd                            |  2 +-
 man/add_pis_columns.Rd                           |  2 +-
 man/add_sds_columns.Rd                           |  2 +-
 man/add_standard_cols.Rd                         |  2 +-
 man/aggregate_by_chi.Rd                          |  2 +-
 man/aggregate_ch_episodes.Rd                     |  2 +-
 man/clean_up_ch.Rd                               |  2 +-
 man/correct_cij_vars.Rd                          |  2 +-
 man/create_cohort_lookups.Rd                     |  2 +-
 man/create_cost_inc_dna.Rd                       |  2 +-
 ...un_episode_file.Rd => create_episode_file.Rd} | 16 ++++++++--------
 man/create_individual_file.Rd                    | 10 +++++-----
 man/fill_missing_cij_markers.Rd                  |  2 +-
 man/join_cohort_lookups.Rd                       |  2 +-
 man/load_ep_file_vars.Rd                         |  2 +-
 man/recode_gender.Rd                             |  2 +-
 man/remove_blank_chi.Rd                          |  2 +-
 man/store_ep_file_vars.Rd                        |  2 +-
 40 files changed, 60 insertions(+), 61 deletions(-)
 rename R/{run_episode_file.R => create_episode_file.R} (97%)
 rename man/{run_episode_file.Rd => create_episode_file.Rd} (63%)

diff --git a/NAMESPACE b/NAMESPACE
index 678d7a53f..d0323b8c4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -13,6 +13,7 @@ export(convert_hscp_to_hscpnames)
 export(convert_numeric_to_date)
 export(convert_sending_location_to_lca)
 export(convert_year_to_fyyear)
+export(create_episode_file)
 export(create_individual_file)
 export(create_service_use_cohorts)
 export(end_fy)
@@ -155,7 +156,6 @@ export(read_sc_all_alarms_telecare)
 export(read_sc_all_care_home)
 export(read_sc_all_home_care)
 export(read_sc_all_sds)
-export(run_episode_file)
 export(setup_keyring)
 export(start_fy)
 export(start_fy_quarter)
diff --git a/R/run_episode_file.R b/R/create_episode_file.R
similarity index 97%
rename from R/run_episode_file.R
rename to R/create_episode_file.R
index 19c2481f2..bad42be5e 100644
--- a/R/run_episode_file.R
+++ b/R/create_episode_file.R
@@ -1,16 +1,15 @@
-#' Produce the Source Episode file
+#' Create the Source Episode file
 #'
 #' @param processed_data_list containing data from processed extracts.
 #' @param year The year to process, in FY format.
 #' @param write_to_disk (optional) Should the data be written to disk default is
 #' `TRUE` i.e. write the data to disk.
 #' @param anon_chi_out (Default:TRUE) Should `anon_chi` be used in the output
-#' (instead of chi)
+#' (instead of chi).
 #'
-#' @return a [tibble][tibble::tibble-package] containing the episode file
+#' @return the Source Episode file as a [tibble][tibble::tibble-package].
 #' @export
-#'
-run_episode_file <- function(
+create_episode_file <- function(
     processed_data_list,
     year,
     write_to_disk = TRUE,
@@ -134,7 +133,7 @@ run_episode_file <- function(
 #' Store the unneeded episode file variables
 #'
 #' @param data The in-progress episode file data.
-#' @inheritParams run_episode_file
+#' @inheritParams create_episode_file
 #' @param vars_to_keep a character vector of the variables to keep, all others
 #' will be stored.
 #'
@@ -172,7 +171,7 @@ store_ep_file_vars <- function(data, year, vars_to_keep) {
 
 #' Load the unneeded episode file variables
 #'
-#' @inheritParams run_episode_file
+#' @inheritParams create_episode_file
 #' @inheritParams store_ep_file_vars
 #'
 #' @return The full SLF data.
diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index f0e6bcdfc..436f1c8d7 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -1,11 +1,11 @@
-#' Create individual file
+#' Create the Source Individual file
 #'
-#' @description Creates individual file from episode file
+#' @description Creates the individual file from the episode file.
 #'
-#' @param episode_file Tibble containing episodic data
+#' @param episode_file Tibble containing episodic data.
 #' @param anon_chi_in (Default:TRUE) Is `anon_chi` used in the input
-#' (instead of chi)
-#' @inheritParams run_episode_file
+#' (instead of chi).
+#' @inheritParams create_episode_file
 #'
 #' @return The processed individual file
 #' @export
diff --git a/_targets.R b/_targets.R
index 9698cf27e..db26477ef 100644
--- a/_targets.R
+++ b/_targets.R
@@ -545,7 +545,7 @@ list(
     ),
     tar_target(
       episode_file,
-      run_episode_file(
+      create_episode_file(
         processed_data_list,
         year,
         write_to_disk
diff --git a/man/add_acute_columns.Rd b/man/add_acute_columns.Rd
index 52ba071b6..c2659f821 100644
--- a/man/add_acute_columns.Rd
+++ b/man/add_acute_columns.Rd
@@ -7,7 +7,7 @@
 add_acute_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_ae_columns.Rd b/man/add_ae_columns.Rd
index 9b7099513..fdc31b7ff 100644
--- a/man/add_ae_columns.Rd
+++ b/man/add_ae_columns.Rd
@@ -7,7 +7,7 @@
 add_ae_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_all_columns.Rd b/man/add_all_columns.Rd
index d502e95c3..1d2e587db 100644
--- a/man/add_all_columns.Rd
+++ b/man/add_all_columns.Rd
@@ -7,7 +7,7 @@
 add_all_columns(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Add new columns based on SMRType and recid which follow a pattern
diff --git a/man/add_at_columns.Rd b/man/add_at_columns.Rd
index e05ea9101..af978530a 100644
--- a/man/add_at_columns.Rd
+++ b/man/add_at_columns.Rd
@@ -7,7 +7,7 @@
 add_at_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_ch_columns.Rd b/man/add_ch_columns.Rd
index 4938f7690..a036a257e 100644
--- a/man/add_ch_columns.Rd
+++ b/man/add_ch_columns.Rd
@@ -7,7 +7,7 @@
 add_ch_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_cij_columns.Rd b/man/add_cij_columns.Rd
index 7d00e6299..c48c1a3ef 100644
--- a/man/add_cij_columns.Rd
+++ b/man/add_cij_columns.Rd
@@ -7,7 +7,7 @@
 add_cij_columns(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Add new columns related to CIJ
diff --git a/man/add_cmh_columns.Rd b/man/add_cmh_columns.Rd
index a1d82cba6..a1cb74abb 100644
--- a/man/add_cmh_columns.Rd
+++ b/man/add_cmh_columns.Rd
@@ -7,7 +7,7 @@
 add_cmh_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_dd_columns.Rd b/man/add_dd_columns.Rd
index 08d9c0fe4..11e85fdc7 100644
--- a/man/add_dd_columns.Rd
+++ b/man/add_dd_columns.Rd
@@ -7,7 +7,7 @@
 add_dd_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_dn_columns.Rd b/man/add_dn_columns.Rd
index bf6af008f..ffdf59a82 100644
--- a/man/add_dn_columns.Rd
+++ b/man/add_dn_columns.Rd
@@ -7,7 +7,7 @@
 add_dn_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_gls_columns.Rd b/man/add_gls_columns.Rd
index e71dc755b..6ab7e9645 100644
--- a/man/add_gls_columns.Rd
+++ b/man/add_gls_columns.Rd
@@ -7,7 +7,7 @@
 add_gls_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_hc_columns.Rd b/man/add_hc_columns.Rd
index 95d8f1d3b..a58f226ec 100644
--- a/man/add_hc_columns.Rd
+++ b/man/add_hc_columns.Rd
@@ -7,7 +7,7 @@
 add_hc_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_hl1_columns.Rd b/man/add_hl1_columns.Rd
index 7600db5e9..24fc714e9 100644
--- a/man/add_hl1_columns.Rd
+++ b/man/add_hl1_columns.Rd
@@ -7,7 +7,7 @@
 add_hl1_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_ipdc_cols.Rd b/man/add_ipdc_cols.Rd
index 0f91cbd90..bd630b9d3 100644
--- a/man/add_ipdc_cols.Rd
+++ b/man/add_ipdc_cols.Rd
@@ -7,7 +7,7 @@
 add_ipdc_cols(episode_file, prefix, condition, ipdc_d = TRUE, elective = TRUE)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_mat_columns.Rd b/man/add_mat_columns.Rd
index aae729323..5faab0dc1 100644
--- a/man/add_mat_columns.Rd
+++ b/man/add_mat_columns.Rd
@@ -7,7 +7,7 @@
 add_mat_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_mh_columns.Rd b/man/add_mh_columns.Rd
index 3c50c6cb8..c587c490a 100644
--- a/man/add_mh_columns.Rd
+++ b/man/add_mh_columns.Rd
@@ -7,7 +7,7 @@
 add_mh_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_nrs_columns.Rd b/man/add_nrs_columns.Rd
index 9d7b3f8bf..b41201a57 100644
--- a/man/add_nrs_columns.Rd
+++ b/man/add_nrs_columns.Rd
@@ -7,7 +7,7 @@
 add_nrs_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_nsu_columns.Rd b/man/add_nsu_columns.Rd
index 6a54bbcbf..5aed481f0 100644
--- a/man/add_nsu_columns.Rd
+++ b/man/add_nsu_columns.Rd
@@ -7,7 +7,7 @@
 add_nsu_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_ooh_columns.Rd b/man/add_ooh_columns.Rd
index 01814ab6d..f1e6b63f5 100644
--- a/man/add_ooh_columns.Rd
+++ b/man/add_ooh_columns.Rd
@@ -7,7 +7,7 @@
 add_ooh_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_op_columns.Rd b/man/add_op_columns.Rd
index 08c4419e2..9fb8bc158 100644
--- a/man/add_op_columns.Rd
+++ b/man/add_op_columns.Rd
@@ -7,7 +7,7 @@
 add_op_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_pis_columns.Rd b/man/add_pis_columns.Rd
index b582acf2e..836218da0 100644
--- a/man/add_pis_columns.Rd
+++ b/man/add_pis_columns.Rd
@@ -7,7 +7,7 @@
 add_pis_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_sds_columns.Rd b/man/add_sds_columns.Rd
index d5a5fb2cf..c06b88527 100644
--- a/man/add_sds_columns.Rd
+++ b/man/add_sds_columns.Rd
@@ -7,7 +7,7 @@
 add_sds_columns(episode_file, prefix, condition)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/add_standard_cols.Rd b/man/add_standard_cols.Rd
index 744aa49de..4392157d2 100644
--- a/man/add_standard_cols.Rd
+++ b/man/add_standard_cols.Rd
@@ -13,7 +13,7 @@ add_standard_cols(
 )
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{prefix}{Prefix to add to related columns, e.g. "Acute"}
 
diff --git a/man/aggregate_by_chi.Rd b/man/aggregate_by_chi.Rd
index 013123902..1585accbb 100644
--- a/man/aggregate_by_chi.Rd
+++ b/man/aggregate_by_chi.Rd
@@ -7,7 +7,7 @@
 aggregate_by_chi(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Aggregate episode file by CHI to convert into
diff --git a/man/aggregate_ch_episodes.Rd b/man/aggregate_ch_episodes.Rd
index 1c955d666..3223e6d25 100644
--- a/man/aggregate_ch_episodes.Rd
+++ b/man/aggregate_ch_episodes.Rd
@@ -7,7 +7,7 @@
 aggregate_ch_episodes(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Aggregate CH variables by CHI and CIS.
diff --git a/man/clean_up_ch.Rd b/man/clean_up_ch.Rd
index 0182c84e8..c0c61966d 100644
--- a/man/clean_up_ch.Rd
+++ b/man/clean_up_ch.Rd
@@ -7,7 +7,7 @@
 clean_up_ch(episode_file, year)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{year}{The year to process, in FY format.}
 }
diff --git a/man/correct_cij_vars.Rd b/man/correct_cij_vars.Rd
index 97a7f046f..558514dc6 100644
--- a/man/correct_cij_vars.Rd
+++ b/man/correct_cij_vars.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{correct_cij_vars}
 \alias{correct_cij_vars}
 \title{Correct the CIJ variables}
diff --git a/man/create_cohort_lookups.Rd b/man/create_cohort_lookups.Rd
index f0ad267aa..109869074 100644
--- a/man/create_cohort_lookups.Rd
+++ b/man/create_cohort_lookups.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{create_cohort_lookups}
 \alias{create_cohort_lookups}
 \title{Create the cohort lookups}
diff --git a/man/create_cost_inc_dna.Rd b/man/create_cost_inc_dna.Rd
index 69e7e37b5..47c38b176 100644
--- a/man/create_cost_inc_dna.Rd
+++ b/man/create_cost_inc_dna.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{create_cost_inc_dna}
 \alias{create_cost_inc_dna}
 \title{Create cost total net inc DNA}
diff --git a/man/run_episode_file.Rd b/man/create_episode_file.Rd
similarity index 63%
rename from man/run_episode_file.Rd
rename to man/create_episode_file.Rd
index 59d5fea1d..e1bda32b9 100644
--- a/man/run_episode_file.Rd
+++ b/man/create_episode_file.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
-\name{run_episode_file}
-\alias{run_episode_file}
-\title{Produce the Source Episode file}
+% Please edit documentation in R/create_episode_file.R
+\name{create_episode_file}
+\alias{create_episode_file}
+\title{Create the Source Episode file}
 \usage{
-run_episode_file(
+create_episode_file(
   processed_data_list,
   year,
   write_to_disk = TRUE,
@@ -20,11 +20,11 @@ run_episode_file(
 \code{TRUE} i.e. write the data to disk.}
 
 \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output
-(instead of chi)}
+(instead of chi).}
 }
 \value{
-a \link[tibble:tibble-package]{tibble} containing the episode file
+the Source Episode file as a \link[tibble:tibble-package]{tibble}.
 }
 \description{
-Produce the Source Episode file
+Create the Source Episode file
 }
diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd
index fa759e7b1..4fd9a4a53 100644
--- a/man/create_individual_file.Rd
+++ b/man/create_individual_file.Rd
@@ -2,7 +2,7 @@
 % Please edit documentation in R/create_individual_file.R
 \name{create_individual_file}
 \alias{create_individual_file}
-\title{Create individual file}
+\title{Create the Source Individual file}
 \usage{
 create_individual_file(
   episode_file,
@@ -13,7 +13,7 @@ create_individual_file(
 )
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 
 \item{year}{The year to process, in FY format.}
 
@@ -21,14 +21,14 @@ create_individual_file(
 \code{TRUE} i.e. write the data to disk.}
 
 \item{anon_chi_in}{(Default:TRUE) Is \code{anon_chi} used in the input
-(instead of chi)}
+(instead of chi).}
 
 \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output
-(instead of chi)}
+(instead of chi).}
 }
 \value{
 The processed individual file
 }
 \description{
-Creates individual file from episode file
+Creates the individual file from the episode file.
 }
diff --git a/man/fill_missing_cij_markers.Rd b/man/fill_missing_cij_markers.Rd
index 03b64217e..4795eed7a 100644
--- a/man/fill_missing_cij_markers.Rd
+++ b/man/fill_missing_cij_markers.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{fill_missing_cij_markers}
 \alias{fill_missing_cij_markers}
 \title{Fill any missing CIJ markers for records that should have them}
diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd
index 445dcd7c0..15a860a36 100644
--- a/man/join_cohort_lookups.Rd
+++ b/man/join_cohort_lookups.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{join_cohort_lookups}
 \alias{join_cohort_lookups}
 \title{Join cohort lookups}
diff --git a/man/load_ep_file_vars.Rd b/man/load_ep_file_vars.Rd
index cee9cc440..509b0e00c 100644
--- a/man/load_ep_file_vars.Rd
+++ b/man/load_ep_file_vars.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{load_ep_file_vars}
 \alias{load_ep_file_vars}
 \title{Load the unneeded episode file variables}
diff --git a/man/recode_gender.Rd b/man/recode_gender.Rd
index 526d2829d..aaa28e6eb 100644
--- a/man/recode_gender.Rd
+++ b/man/recode_gender.Rd
@@ -7,7 +7,7 @@
 recode_gender(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Recode gender to 1.5 if 0 or 9.
diff --git a/man/remove_blank_chi.Rd b/man/remove_blank_chi.Rd
index 9cba40a8f..b290dd1e7 100644
--- a/man/remove_blank_chi.Rd
+++ b/man/remove_blank_chi.Rd
@@ -7,7 +7,7 @@
 remove_blank_chi(episode_file)
 }
 \arguments{
-\item{episode_file}{Tibble containing episodic data}
+\item{episode_file}{Tibble containing episodic data.}
 }
 \description{
 Convert blank strings to NA and remove NAs from CHI column
diff --git a/man/store_ep_file_vars.Rd b/man/store_ep_file_vars.Rd
index 06316aac1..880266d58 100644
--- a/man/store_ep_file_vars.Rd
+++ b/man/store_ep_file_vars.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/run_episode_file.R
+% Please edit documentation in R/create_episode_file.R
 \name{store_ep_file_vars}
 \alias{store_ep_file_vars}
 \title{Store the unneeded episode file variables}

From 854868e6b9ae6b01d4b1af62c890f7390ce8a85f Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 25 Sep 2023 15:57:22 +0100
Subject: [PATCH 05/11] Remove incorrect references to rds (#798)

* Remove incorrect references to rds

Since we (mostly) don't use rds anymore these references are incorrect and potentially confusing.

I've updated lots of documentation to remove the reference to rds.

I've also updated many comments that mentioned rds (these were probably the most confusing).

* Update documentation

---------

Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com>
---
 R/process_costs_rmd.R                     | 18 ++++++++-----
 R/process_extract_acute.R                 | 15 +++++------
 R/process_extract_ae.R                    | 15 +++++------
 R/process_extract_alarms_telecare.R       |  2 +-
 R/process_extract_care_home.R             |  2 +-
 R/process_extract_cmh.R                   | 15 +++++------
 R/process_extract_consultations.R         |  2 +-
 R/process_extract_delayed_discharges.R    |  2 +-
 R/process_extract_district_nursing.R      |  2 +-
 R/process_extract_gp_ooh.R                |  2 +-
 R/process_extract_home_care.R             |  2 +-
 R/process_extract_homelessness.R          |  2 +-
 R/process_extract_maternity.R             | 18 +++++--------
 R/process_extract_mental_health.R         | 16 +++++-------
 R/process_extract_ooh_diagnosis.R         |  2 +-
 R/process_extract_ooh_outcomes.R          |  2 +-
 R/process_extract_outpatients.R           | 19 +++++---------
 R/process_extract_prescribing.R           |  9 ++++---
 R/process_extract_sds.R                   |  2 +-
 R/process_lookup_ltc.R                    | 10 +++----
 R/process_lookup_postcode.R               | 19 ++++++--------
 R/process_lookup_sc_client.R              | 16 +++++-------
 R/process_lookup_sc_demographics.R        | 32 +++++++++--------------
 R/process_sc_all_alarms_telecare.R        |  9 ++++---
 R/process_sc_all_care_home.R              |  2 +-
 R/process_sc_all_home_care.R              |  2 +-
 R/process_sc_all_sds.R                    | 11 ++++----
 R/read_extract_gp_ooh.R                   |  2 +-
 man/process_costs_ch_rmd.Rd               |  2 +-
 man/process_costs_dn_rmd.Rd               |  2 +-
 man/process_costs_gp_ooh_rmd.Rd           |  2 +-
 man/process_costs_hc_rmd.Rd               |  2 +-
 man/process_extract_acute.Rd              |  2 +-
 man/process_extract_ae.Rd                 |  2 +-
 man/process_extract_alarms_telecare.Rd    |  2 +-
 man/process_extract_care_home.Rd          |  2 +-
 man/process_extract_cmh.Rd                |  2 +-
 man/process_extract_delayed_discharges.Rd |  2 +-
 man/process_extract_district_nursing.Rd   |  2 +-
 man/process_extract_gp_ooh.Rd             |  2 +-
 man/process_extract_home_care.Rd          |  2 +-
 man/process_extract_homelessness.Rd       |  2 +-
 man/process_extract_maternity.Rd          |  2 +-
 man/process_extract_mental_health.Rd      |  2 +-
 man/process_extract_ooh_consultations.Rd  |  2 +-
 man/process_extract_ooh_diagnosis.Rd      |  2 +-
 man/process_extract_ooh_outcomes.Rd       |  2 +-
 man/process_extract_outpatients.Rd        |  2 +-
 man/process_extract_prescribing.Rd        |  2 +-
 man/process_extract_sds.Rd                |  2 +-
 man/process_lookup_postcode.Rd            |  2 +-
 man/process_lookup_sc_client.Rd           |  2 +-
 man/process_lookup_sc_demographics.Rd     |  2 +-
 man/process_sc_all_alarms_telecare.Rd     |  2 +-
 man/process_sc_all_care_home.Rd           |  2 +-
 man/process_sc_all_home_care.Rd           |  2 +-
 man/process_sc_all_sds.Rd                 |  2 +-
 man/read_extract_gp_ooh.Rd                |  2 +-
 58 files changed, 145 insertions(+), 165 deletions(-)

diff --git a/R/process_costs_rmd.R b/R/process_costs_rmd.R
index 2efc25dd8..5d97d705f 100644
--- a/R/process_costs_rmd.R
+++ b/R/process_costs_rmd.R
@@ -14,7 +14,8 @@ process_costs_rmd <- function(file_name) {
     stringr::fixed("Rmd", ignore_case = TRUE)
   )) {
     cli::cli_abort(
-      "{.arg file_name} must be an {.code .Rmd} not a {.code .{fs::path_ext(file_name)}}."
+      "{.arg file_name} must be an {.code .Rmd} not a
+      {.code .{fs::path_ext(file_name)}}."
     )
   }
 
@@ -34,7 +35,12 @@ process_costs_rmd <- function(file_name) {
 
   output_file <- get_file_path(
     directory = output_dir,
-    file_name = fs::path_ext_set(stringr::str_glue("{fs::path_ext_remove(file_name)}-{latest_update()}-{date_today}"), "html"),
+    file_name = fs::path_ext_set(
+      stringr::str_glue(
+        "{fs::path_ext_remove(file_name)}-{latest_update()}-{date_today}"
+      ),
+      "html"
+    ),
     check_mode = "write"
   )
 
@@ -55,7 +61,7 @@ process_costs_rmd <- function(file_name) {
 #'
 #' @description This will read and process the
 #' District Nursing cost lookup, it will return the final data
-#' but also write this out as a rds.
+#' and write it to disk.
 #'
 #' @param file_path Path to the cost lookup.
 #'
@@ -73,7 +79,7 @@ process_costs_dn_rmd <- function(file_path = get_dn_costs_path()) {
 #'
 #' @description This will read and process the
 #' care homes cost lookup, it will return the final data
-#' but also write this out as a rds.
+#' and write it to disk.
 #'
 #' @inheritParams process_costs_dn_rmd
 #'
@@ -91,7 +97,7 @@ process_costs_ch_rmd <- function(file_path = get_ch_costs_path()) {
 #'
 #' @description This will read and process the
 #' GP ooh cost lookup, it will return the final data
-#' but also write this out as a rds.
+#' and write it to disk.
 #'
 #' @inheritParams process_costs_dn_rmd
 #'
@@ -109,7 +115,7 @@ process_costs_gp_ooh_rmd <- function(file_path = get_gp_ooh_costs_path()) {
 #'
 #' @description This will read and process the
 #' Home Care cost lookup, it will return the final data
-#' but also write this out as a rds.
+#' and write it to disk.
 #'
 #' @inheritParams process_costs_dn_rmd
 #'
diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R
index 107b346c9..70ff29370 100644
--- a/R/process_extract_acute.R
+++ b/R/process_extract_acute.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' acute extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -61,9 +61,7 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) {
       levels = 0L:8L
     ))
 
-
-  ## save outfile ---------------------------------------
-  outfile <- acute_clean %>%
+  acute_processed <- acute_clean %>%
     dplyr::select(
       "year",
       "recid",
@@ -113,10 +111,11 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) {
     dplyr::arrange(.data$chi, .data$record_keydate1)
 
   if (write_to_disk) {
-    # Save as rds file
-    outfile %>%
-      write_file(get_source_extract_path(year, "Acute", check_mode = "write"))
+    write_file(
+      acute_processed,
+      get_source_extract_path(year, "Acute", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(acute_processed)
 }
diff --git a/R/process_extract_ae.R b/R/process_extract_ae.R
index 7e61db018..95dfd99be 100644
--- a/R/process_extract_ae.R
+++ b/R/process_extract_ae.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' A&E extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -237,9 +237,7 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) {
       .data$keytime2
     )
 
-
-  # Save outfile----------------------------------------
-  outfile <- matched_ae_data %>%
+  ae_processed <- matched_ae_data %>%
     dplyr::select(
       "year",
       "recid",
@@ -294,10 +292,11 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) {
     )
 
   if (write_to_disk) {
-    # Save as rds file
-    outfile %>%
-      write_file(get_source_extract_path(year, "AE", check_mode = "write"))
+    write_file(
+      ae_processed,
+      get_source_extract_path(year, "AE", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(ae_processed)
 }
diff --git a/R/process_extract_alarms_telecare.R b/R/process_extract_alarms_telecare.R
index 15cd79809..9a0745a04 100644
--- a/R/process_extract_alarms_telecare.R
+++ b/R/process_extract_alarms_telecare.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' (year specific) Alarms Telecare extract, it will return the final data
-#' but also write this out as rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_extract_care_home
 #'
diff --git a/R/process_extract_care_home.R b/R/process_extract_care_home.R
index 757e47f6c..cbf6d417c 100644
--- a/R/process_extract_care_home.R
+++ b/R/process_extract_care_home.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' (year specific) Care Home extract, it will return the final data
-#' but also write this out as rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The full processed data which will be selected from to create
 #' the year specific data.
diff --git a/R/process_extract_cmh.R b/R/process_extract_cmh.R
index 0b1ba0f19..a2adad75e 100644
--- a/R/process_extract_cmh.R
+++ b/R/process_extract_cmh.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' CMH extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -44,9 +44,7 @@ process_extract_cmh <- function(data,
     # create blank diag 6
     dplyr::mutate(diag6 = NA)
 
-  # Outfile --------------------------------------------
-
-  outfile <- cmh_clean %>%
+  cmh_processed <- cmh_clean %>%
     dplyr::select(
       "year",
       "recid",
@@ -73,10 +71,11 @@ process_extract_cmh <- function(data,
     )
 
   if (write_to_disk) {
-    # Save as rds file
-    outfile %>%
-      write_file(get_source_extract_path(year, "CMH", check_mode = "write"))
+    write_file(
+      cmh_processed,
+      get_source_extract_path(year, "CMH", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(cmh_processed)
 }
diff --git a/R/process_extract_consultations.R b/R/process_extract_consultations.R
index 7262c1df6..6dc175cb8 100644
--- a/R/process_extract_consultations.R
+++ b/R/process_extract_consultations.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' GP OOH Consultations extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_delayed_discharges.R b/R/process_extract_delayed_discharges.R
index 29f37eb29..3c56807f9 100644
--- a/R/process_extract_delayed_discharges.R
+++ b/R/process_extract_delayed_discharges.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' delayed discharges extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_district_nursing.R b/R/process_extract_district_nursing.R
index a1b3bf816..9d1df62a6 100644
--- a/R/process_extract_district_nursing.R
+++ b/R/process_extract_district_nursing.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' District Nursing extract, it will return the final data
-#' but also write this out an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_gp_ooh.R b/R/process_extract_gp_ooh.R
index 4add41cfa..2b536878a 100644
--- a/R/process_extract_gp_ooh.R
+++ b/R/process_extract_gp_ooh.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' GP OoH extract, it will return the final data
-#' but also write this out an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param year The year to process, in FY format.
 #' @param data_list A list containing the extracts.
diff --git a/R/process_extract_home_care.R b/R/process_extract_home_care.R
index 3fcf009eb..874ad899c 100644
--- a/R/process_extract_home_care.R
+++ b/R/process_extract_home_care.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' (year specific) Home Care extract, it will return the final data
-#' but also write this out as rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_extract_care_home
 #'
diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
index a900cff9a..f4fb7d3e5 100644
--- a/R/process_extract_homelessness.R
+++ b/R/process_extract_homelessness.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' homelessness extract, it will return the final data
-#' and optionally write it out as rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process from [read_extract_homelessness()].
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_maternity.R b/R/process_extract_maternity.R
index 04fa46ced..64fa4e205 100644
--- a/R/process_extract_maternity.R
+++ b/R/process_extract_maternity.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' maternity extract, it will return the final data
-#' but also write this out an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -63,10 +63,7 @@ process_extract_maternity <- function(data, year, write_to_disk = TRUE) {
       )
     )
 
-
-  # Save outfile------------------------------------------------
-
-  outfile <- maternity_clean %>%
+  maternity_processed <- maternity_clean %>%
     dplyr::select(
       "year",
       "recid",
@@ -113,12 +110,11 @@ process_extract_maternity <- function(data, year, write_to_disk = TRUE) {
     dplyr::arrange(.data$chi, .data$record_keydate1)
 
   if (write_to_disk) {
-    # Save as rds file
-    outfile %>%
-      write_file(
-        get_source_extract_path(year, "Maternity", check_mode = "write")
-      )
+    write_file(
+      maternity_processed,
+      get_source_extract_path(year, "Maternity", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(maternity_processed)
 }
diff --git a/R/process_extract_mental_health.R b/R/process_extract_mental_health.R
index 76e7157e3..ffea63d28 100644
--- a/R/process_extract_mental_health.R
+++ b/R/process_extract_mental_health.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' mental health extract, it will return the final data
-#' but also write this out an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -67,10 +67,7 @@ process_extract_mental_health <- function(data, year, write_to_disk = TRUE) {
       smrtype = add_smr_type(.data$recid)
     )
 
-
-  # Outfile  ---------------------------------------
-
-  outfile <- mh_clean %>%
+  mh_processed <- mh_clean %>%
     dplyr::arrange(.data$chi, .data$record_keydate1) %>%
     dplyr::select(
       "year",
@@ -118,10 +115,11 @@ process_extract_mental_health <- function(data, year, write_to_disk = TRUE) {
     )
 
   if (write_to_disk) {
-    outfile %>%
-      # Save as rds file
-      write_file(get_source_extract_path(year, "MH", check_mode = "write"))
+    write_file(
+      mh_processed,
+      get_source_extract_path(year, "MH", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(mh_processed)
 }
diff --git a/R/process_extract_ooh_diagnosis.R b/R/process_extract_ooh_diagnosis.R
index 128c6c772..f2afd634e 100644
--- a/R/process_extract_ooh_diagnosis.R
+++ b/R/process_extract_ooh_diagnosis.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' GP OOH Diagnosis extract, it will return the final data
-#' but also write this out an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_ooh_outcomes.R b/R/process_extract_ooh_outcomes.R
index 6a14bced5..f188e6de1 100644
--- a/R/process_extract_ooh_outcomes.R
+++ b/R/process_extract_ooh_outcomes.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' GP OOH Outcomes extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
diff --git a/R/process_extract_outpatients.R b/R/process_extract_outpatients.R
index 39b421ab4..341ee0f1a 100644
--- a/R/process_extract_outpatients.R
+++ b/R/process_extract_outpatients.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' outpatients extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -49,11 +49,7 @@ process_extract_outpatients <- function(data, year, write_to_disk = TRUE) {
       )
     )
 
-
-  ## save outfile ---------------------------------------
-
-  outfile <-
-    outpatients_clean %>%
+  outpatients_processed <- outpatients_clean %>%
     dplyr::select(
       "year",
       "recid",
@@ -89,12 +85,11 @@ process_extract_outpatients <- function(data, year, write_to_disk = TRUE) {
     )
 
   if (write_to_disk) {
-    # Save as rds file
-    outfile %>%
-      write_file(
-        get_source_extract_path(year, "Outpatients", check_mode = "write")
-      )
+    write_file(
+      outpatients_processed,
+      get_source_extract_path(year, "Outpatients", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(outpatients_processed)
 }
diff --git a/R/process_extract_prescribing.R b/R/process_extract_prescribing.R
index 776299d47..68c388b83 100644
--- a/R/process_extract_prescribing.R
+++ b/R/process_extract_prescribing.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' prescribing extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
@@ -50,9 +50,10 @@ process_extract_prescribing <- function(data, year, write_to_disk = TRUE) {
   }
 
   if (write_to_disk) {
-    # Save as rds file
-    pis_clean %>%
-      write_file(get_source_extract_path(year, "PIS", check_mode = "write"))
+    write_file(
+      pis_clean,
+      get_source_extract_path(year, "PIS", check_mode = "write")
+    )
   }
 
   return(pis_clean)
diff --git a/R/process_extract_sds.R b/R/process_extract_sds.R
index a58651749..bd9e93a3f 100644
--- a/R/process_extract_sds.R
+++ b/R/process_extract_sds.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' (year specific) SDS extract, it will return the final data
-#' but also write this out as rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_extract_care_home
 #'
diff --git a/R/process_lookup_ltc.R b/R/process_lookup_ltc.R
index 5a80deaff..8ea33da48 100644
--- a/R/process_lookup_ltc.R
+++ b/R/process_lookup_ltc.R
@@ -24,13 +24,11 @@ process_lookup_ltc <- function(data, year, write_to_disk = TRUE) {
       .fn = ~ stringr::str_remove(.x, "_date_flag")
     )
 
-  # Save Outfile---------------------------------------------
-
   if (write_to_disk) {
-    # Save .rds file
-    ltc_flags %>%
-      dplyr::arrange(.data$chi) %>%
-      write_file(get_ltcs_path(year, check_mode = "write"))
+    write_file(
+      ltc_flags,
+      get_ltcs_path(year, check_mode = "write")
+    )
   }
 
   return(ltc_flags)
diff --git a/R/process_lookup_postcode.R b/R/process_lookup_postcode.R
index 69cc13bd8..f9f1d47f4 100644
--- a/R/process_lookup_postcode.R
+++ b/R/process_lookup_postcode.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' postcode lookup, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param simd_path Path to SIMD lookup.
 #' @param locality_path Path to locality lookup.
@@ -58,16 +58,14 @@ process_lookup_postcode <- function(spd_path = get_spd_path(),
 
 
   # Join data together  -----------------------------------------------------
-  data <-
-    dplyr::left_join(spd_file, simd_file, by = "pc7") %>%
+  data <- dplyr::left_join(spd_file, simd_file, by = "pc7") %>%
     dplyr::rename(postcode = "pc7") %>%
     dplyr::left_join(locality_file, by = "datazone2011")
 
 
   # Finalise output -----------------------------------------------------
 
-  outfile <-
-    data %>%
+  slf_pc_lookup <- data %>%
     dplyr::select(
       "postcode",
       "lca",
@@ -89,13 +87,12 @@ process_lookup_postcode <- function(spd_path = get_spd_path(),
       tidyselect::matches("ur2_\\d{4}$")
     )
 
-
-  # Save out ----------------------------------------------------------------
   if (write_to_disk) {
-    outfile %>%
-      # Save .rds file
-      write_file(get_slf_postcode_path(check_mode = "write"))
+    write_file(
+      slf_pc_lookup,
+      get_slf_postcode_path(check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(slf_pc_lookup)
 }
diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R
index 1874c2b5a..87e6b107d 100644
--- a/R/process_lookup_sc_client.R
+++ b/R/process_lookup_sc_client.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' social care client lookup, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param year The year to process
@@ -108,10 +108,7 @@ process_lookup_sc_client <- function(data, year, write_to_disk = TRUE) {
       .fn = ~ paste0("sc_", .x)
     )
 
-
-  ## save outfile ---------------------------------------
-  outfile <-
-    client_clean %>%
+  sc_client_lookup <- client_clean %>%
     # reorder
     dplyr::select(
       "sending_location",
@@ -125,10 +122,11 @@ process_lookup_sc_client <- function(data, year, write_to_disk = TRUE) {
     )
 
   if (write_to_disk) {
-    # Save .rds file
-    outfile %>%
-      write_file(get_source_extract_path(year, "Client", check_mode = "write"))
+    write_file(
+      sc_client_lookup,
+      get_source_extract_path(year, "Client", check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(sc_client_lookup)
 }
diff --git a/R/process_lookup_sc_demographics.R b/R/process_lookup_sc_demographics.R
index 4b0f7500f..8c363f547 100644
--- a/R/process_lookup_sc_demographics.R
+++ b/R/process_lookup_sc_demographics.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' social care demographic lookup, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process.
 #' @param spd_path Path to the Scottish Postcode Directory.
@@ -12,7 +12,10 @@
 #' @return the final data as a [tibble][tibble::tibble-package].
 #' @export
 #' @family process extracts
-process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), write_to_disk = TRUE) {
+process_lookup_sc_demographics <- function(
+    data,
+    spd_path = get_spd_path(),
+    write_to_disk = TRUE) {
   # Deal with postcodes ---------------------------------------
 
   # UK postcode regex - see https://ideal-postcodes.co.uk/guides/postcode-validation
@@ -51,8 +54,7 @@ process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), writ
     ))
 
   # count number of na postcodes
-  na_postcodes <-
-    sc_demog %>%
+  na_postcodes <- sc_demog %>%
     dplyr::count(dplyr::across(tidyselect::contains("postcode"), ~ is.na(.x)))
 
   sc_demog <- sc_demog %>%
@@ -97,17 +99,11 @@ process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), writ
     dplyr::count(.data$postcode_type)
 
   # count number of replaced postcode - compare with count above
-  na_replaced_postcodes <-
-    sc_demog %>%
+  na_replaced_postcodes <- sc_demog %>%
     dplyr::count(dplyr::across(tidyselect::ends_with("_postcode"), ~ is.na(.x)))
 
-  na_replaced_postcodes
-  na_postcodes
 
-
-  ## save outfile ---------------------------------------
-  outfile <-
-    sc_demog %>%
+  sc_demog_lookup <- sc_demog %>%
     # group by sending location and ID
     dplyr::group_by(.data$sending_location, .data$social_care_id) %>%
     # arrange so latest submissions are last
@@ -126,14 +122,12 @@ process_lookup_sc_demographics <- function(data, spd_path = get_spd_path(), writ
     ) %>%
     dplyr::ungroup()
 
-
-  ## save file ##
-
   if (write_to_disk) {
-    # Save .rds file
-    outfile %>%
-      write_file(get_sc_demog_lookup_path(check_mode = "write"))
+    write_file(
+      sc_demog_lookup,
+      get_sc_demog_lookup_path(check_mode = "write")
+    )
   }
 
-  return(outfile)
+  return(sc_demog_lookup)
 }
diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R
index 61bce41b6..620b14cee 100644
--- a/R/process_sc_all_alarms_telecare.R
+++ b/R/process_sc_all_alarms_telecare.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' all Alarms Telecare extract, it will return the final data
-#' but also write this out as a rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_sc_all_care_home
 #'
@@ -121,9 +121,10 @@ process_sc_all_alarms_telecare <- function(
     tibble::as_tibble()
 
   if (write_to_disk) {
-    # Save .rds file ----
-    qtr_merge %>%
-      write_file(get_sc_at_episodes_path(check_mode = "write"))
+    write_file(
+      qtr_merge,
+      get_sc_at_episodes_path(check_mode = "write")
+    )
   }
 
   return(qtr_merge)
diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R
index a11f275e8..c41e1a1d5 100644
--- a/R/process_sc_all_care_home.R
+++ b/R/process_sc_all_care_home.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' all Care Home extract, it will return the final data
-#' but also write this out as a rds.
+#' and (optionally) write it to disk.
 #'
 #' @param data The extract to process
 #' @param sc_demog_lookup The Social Care Demographics lookup produced by
diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R
index b812d492b..5f2b4db49 100644
--- a/R/process_sc_all_home_care.R
+++ b/R/process_sc_all_home_care.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' all home care extract, it will return the final data
-#' but also write this out as a rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_sc_all_care_home
 #'
diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R
index 068215a28..c17f74f28 100644
--- a/R/process_sc_all_sds.R
+++ b/R/process_sc_all_sds.R
@@ -1,7 +1,7 @@
 #' Process the all SDS extract
 #' @description This will read and process the
 #' all SDS extract, it will return the final data
-#' but also write this out as a rds.
+#' and (optionally) write it to disk.
 #'
 #' @inheritParams process_sc_all_care_home
 #'
@@ -119,12 +119,11 @@ process_sc_all_sds <- function(
     # change the data format from data.table to data.frame
     tibble::as_tibble()
 
-
-  # Save outfile------------------------------------------------
   if (write_to_disk) {
-    # Save .rds file
-    final_data %>%
-      write_file(get_sc_sds_episodes_path(check_mode = "write"))
+    write_file(
+      final_data,
+      get_sc_sds_episodes_path(check_mode = "write")
+    )
   }
 
   return(final_data)
diff --git a/R/read_extract_gp_ooh.R b/R/read_extract_gp_ooh.R
index 98606eb8a..3a711c2f8 100644
--- a/R/read_extract_gp_ooh.R
+++ b/R/read_extract_gp_ooh.R
@@ -2,7 +2,7 @@
 #'
 #' @description This will read and process the
 #' GP OoH extract, it will return the final data
-#' but also write this out as an rds.
+#' and (optionally) write it to disk.
 #'
 #' @param year The year to process, in FY format.
 #' @param diagnosis_path Path to diagnosis BOXI extract location.
diff --git a/man/process_costs_ch_rmd.Rd b/man/process_costs_ch_rmd.Rd
index 520898c9e..b990564b7 100644
--- a/man/process_costs_ch_rmd.Rd
+++ b/man/process_costs_ch_rmd.Rd
@@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data.
 \description{
 This will read and process the
 care homes cost lookup, it will return the final data
-but also write this out as a rds.
+and write it to disk.
 }
diff --git a/man/process_costs_dn_rmd.Rd b/man/process_costs_dn_rmd.Rd
index bde475d5a..46bcd93dd 100644
--- a/man/process_costs_dn_rmd.Rd
+++ b/man/process_costs_dn_rmd.Rd
@@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data.
 \description{
 This will read and process the
 District Nursing cost lookup, it will return the final data
-but also write this out as a rds.
+and write it to disk.
 }
diff --git a/man/process_costs_gp_ooh_rmd.Rd b/man/process_costs_gp_ooh_rmd.Rd
index fd71066c0..f5c611f11 100644
--- a/man/process_costs_gp_ooh_rmd.Rd
+++ b/man/process_costs_gp_ooh_rmd.Rd
@@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data.
 \description{
 This will read and process the
 GP ooh cost lookup, it will return the final data
-but also write this out as a rds.
+and write it to disk.
 }
diff --git a/man/process_costs_hc_rmd.Rd b/man/process_costs_hc_rmd.Rd
index b15c311da..c3448bcbc 100644
--- a/man/process_costs_hc_rmd.Rd
+++ b/man/process_costs_hc_rmd.Rd
@@ -15,5 +15,5 @@ a \link[tibble:tibble-package]{tibble} containing the final cost data.
 \description{
 This will read and process the
 Home Care cost lookup, it will return the final data
-but also write this out as a rds.
+and write it to disk.
 }
diff --git a/man/process_extract_acute.Rd b/man/process_extract_acute.Rd
index af6b85bfe..88264cf3d 100644
--- a/man/process_extract_acute.Rd
+++ b/man/process_extract_acute.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 acute extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_ae.Rd b/man/process_extract_ae.Rd
index 58878e689..b2f6954b7 100644
--- a/man/process_extract_ae.Rd
+++ b/man/process_extract_ae.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 A&E extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_alarms_telecare.Rd b/man/process_extract_alarms_telecare.Rd
index a6e61365d..5e37847b3 100644
--- a/man/process_extract_alarms_telecare.Rd
+++ b/man/process_extract_alarms_telecare.Rd
@@ -29,7 +29,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 (year specific) Alarms Telecare extract, it will return the final data
-but also write this out as rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_care_home.Rd b/man/process_extract_care_home.Rd
index f058ca787..03c1cd705 100644
--- a/man/process_extract_care_home.Rd
+++ b/man/process_extract_care_home.Rd
@@ -32,7 +32,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 (year specific) Care Home extract, it will return the final data
-but also write this out as rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_cmh.Rd b/man/process_extract_cmh.Rd
index 147651f37..52086848a 100644
--- a/man/process_extract_cmh.Rd
+++ b/man/process_extract_cmh.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 CMH extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_delayed_discharges.Rd b/man/process_extract_delayed_discharges.Rd
index ddc41ec46..f9a6b7439 100644
--- a/man/process_extract_delayed_discharges.Rd
+++ b/man/process_extract_delayed_discharges.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 delayed discharges extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_district_nursing.Rd b/man/process_extract_district_nursing.Rd
index 4d9383c2e..f2f466440 100644
--- a/man/process_extract_district_nursing.Rd
+++ b/man/process_extract_district_nursing.Rd
@@ -27,7 +27,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 District Nursing extract, it will return the final data
-but also write this out an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_gp_ooh.Rd b/man/process_extract_gp_ooh.Rd
index 8217f0d6f..f96e1dcf2 100644
--- a/man/process_extract_gp_ooh.Rd
+++ b/man/process_extract_gp_ooh.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 GP OoH extract, it will return the final data
-but also write this out an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_home_care.Rd b/man/process_extract_home_care.Rd
index 4fef5ac14..8dc1ec6a7 100644
--- a/man/process_extract_home_care.Rd
+++ b/man/process_extract_home_care.Rd
@@ -24,7 +24,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 (year specific) Home Care extract, it will return the final data
-but also write this out as rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_homelessness.Rd b/man/process_extract_homelessness.Rd
index 7531f8f22..9b6eb9463 100644
--- a/man/process_extract_homelessness.Rd
+++ b/man/process_extract_homelessness.Rd
@@ -31,7 +31,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 homelessness extract, it will return the final data
-and optionally write it out as rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_maternity.Rd b/man/process_extract_maternity.Rd
index cd01e6931..8ff8d7014 100644
--- a/man/process_extract_maternity.Rd
+++ b/man/process_extract_maternity.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 maternity extract, it will return the final data
-but also write this out an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_mental_health.Rd b/man/process_extract_mental_health.Rd
index 7159aae8b..4a7519d2d 100644
--- a/man/process_extract_mental_health.Rd
+++ b/man/process_extract_mental_health.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 mental health extract, it will return the final data
-but also write this out an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_ooh_consultations.Rd b/man/process_extract_ooh_consultations.Rd
index d682197ca..029d951cb 100644
--- a/man/process_extract_ooh_consultations.Rd
+++ b/man/process_extract_ooh_consultations.Rd
@@ -17,7 +17,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 GP OOH Consultations extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_ooh_diagnosis.Rd b/man/process_extract_ooh_diagnosis.Rd
index 2a962989a..864d4029f 100644
--- a/man/process_extract_ooh_diagnosis.Rd
+++ b/man/process_extract_ooh_diagnosis.Rd
@@ -17,7 +17,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 GP OOH Diagnosis extract, it will return the final data
-but also write this out an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_ooh_outcomes.Rd b/man/process_extract_ooh_outcomes.Rd
index 5b220e04a..186525ca3 100644
--- a/man/process_extract_ooh_outcomes.Rd
+++ b/man/process_extract_ooh_outcomes.Rd
@@ -17,7 +17,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 GP OOH Outcomes extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_outpatients.Rd b/man/process_extract_outpatients.Rd
index c5e10abc8..721809c1e 100644
--- a/man/process_extract_outpatients.Rd
+++ b/man/process_extract_outpatients.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 outpatients extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_prescribing.Rd b/man/process_extract_prescribing.Rd
index cf294d95c..55b9eb242 100644
--- a/man/process_extract_prescribing.Rd
+++ b/man/process_extract_prescribing.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 prescribing extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_extract_sds.Rd b/man/process_extract_sds.Rd
index 7e8e44a38..14fe9cd0b 100644
--- a/man/process_extract_sds.Rd
+++ b/man/process_extract_sds.Rd
@@ -24,7 +24,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 (year specific) SDS extract, it will return the final data
-but also write this out as rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_lookup_postcode.Rd b/man/process_lookup_postcode.Rd
index 6ad56e5b3..19520b4e8 100644
--- a/man/process_lookup_postcode.Rd
+++ b/man/process_lookup_postcode.Rd
@@ -27,7 +27,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 postcode lookup, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_lookup_sc_client.Rd b/man/process_lookup_sc_client.Rd
index e48426419..5daa569b3 100644
--- a/man/process_lookup_sc_client.Rd
+++ b/man/process_lookup_sc_client.Rd
@@ -20,7 +20,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 social care client lookup, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_lookup_sc_demographics.Rd b/man/process_lookup_sc_demographics.Rd
index 6c00b4352..60478ab18 100644
--- a/man/process_lookup_sc_demographics.Rd
+++ b/man/process_lookup_sc_demographics.Rd
@@ -24,7 +24,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 social care demographic lookup, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_sc_all_alarms_telecare.Rd b/man/process_sc_all_alarms_telecare.Rd
index 7e21407f9..8bbc1dce0 100644
--- a/man/process_sc_all_alarms_telecare.Rd
+++ b/man/process_sc_all_alarms_telecare.Rd
@@ -21,7 +21,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 all Alarms Telecare extract, it will return the final data
-but also write this out as a rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_sc_all_care_home.Rd b/man/process_sc_all_care_home.Rd
index 691fe51db..2c5d2c4ce 100644
--- a/man/process_sc_all_care_home.Rd
+++ b/man/process_sc_all_care_home.Rd
@@ -36,7 +36,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 all Care Home extract, it will return the final data
-but also write this out as a rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_sc_all_home_care.Rd b/man/process_sc_all_home_care.Rd
index d498514db..9a0b999af 100644
--- a/man/process_sc_all_home_care.Rd
+++ b/man/process_sc_all_home_care.Rd
@@ -21,7 +21,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 all home care extract, it will return the final data
-but also write this out as a rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/process_sc_all_sds.Rd b/man/process_sc_all_sds.Rd
index f2d6d8a1c..cca9d0fe5 100644
--- a/man/process_sc_all_sds.Rd
+++ b/man/process_sc_all_sds.Rd
@@ -21,7 +21,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 all SDS extract, it will return the final data
-but also write this out as a rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 
diff --git a/man/read_extract_gp_ooh.Rd b/man/read_extract_gp_ooh.Rd
index eae6c52dc..73e6672d9 100644
--- a/man/read_extract_gp_ooh.Rd
+++ b/man/read_extract_gp_ooh.Rd
@@ -26,7 +26,7 @@ the final data as a \link[tibble:tibble-package]{tibble}.
 \description{
 This will read and process the
 GP OoH extract, it will return the final data
-but also write this out as an rds.
+and (optionally) write it to disk.
 }
 \seealso{
 Other process extracts: 

From ad98c3cc4a1226072ec012333925eb8b4af20bbb Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Mon, 25 Sep 2023 16:16:25 +0100
Subject: [PATCH 06/11] Make targets and tarchetypes required packages (#799)

Co-authored-by: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com>
---
 DESCRIPTION | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index 4f1cec425..5123289dd 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -53,6 +53,8 @@ Imports:
     slfhelper (>= 0.10.0),
     stringdist (>= 0.9.10),
     stringr (>= 1.5.0),
+    tarchetypes (>= 0.7.6),
+    targets (>= 1.2.0),
     tibble (>= 3.2.1),
     tidyr (>= 1.3.0),
     tidyselect (>= 1.2.0),
@@ -61,8 +63,6 @@ Suggests:
     covr (>= 3.6.1),
     roxygen2 (>= 7.2.3),
     spelling (>= 2.2),
-    tarchetypes (>= 0.7.5),
-    targets (>= 0.14.3),
     testthat (>= 3.1.7)
 Remotes: 
     Public-Health-Scotland/phsmethods,

From a5bc79e3b55de2476d1d460ef58defa461019004 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Tue, 26 Sep 2023 11:39:58 +0100
Subject: [PATCH 07/11] Update episode file functions to pass data through
 (#754)

* Update `read_file` to return an empty tibble if passed the dummy path

This is needed for some other bits, notably NSUs

* Update SPARRA and HHG paths to return dummy if the year is invalid

* Extract all data as a parameter

* Style code

* Update documentation

* Style code

* Update documentation

* rename `run` to `create_episode_file`

* Update documentation

---------

Co-authored-by: Moohan <Moohan@users.noreply.github.com>
Co-authored-by: Jennifer Thom <jennifer.thom@phs.scot>
Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
---
 R/add_nsu_cohort.R            | 13 +++++---
 R/create_episode_file.R       | 61 +++++++++++++++++++++++++----------
 R/fill_geographies.R          | 51 +++++++++++++++++++++--------
 R/get_source_extract_path.R   | 26 +++++++--------
 R/get_sparra_hhg_paths.R      |  8 +++++
 R/join_deaths_data.R          |  6 ++--
 R/match_on_ltcs.R             |  8 +++--
 R/read_file.R                 |  5 +++
 _targets.R                    |  7 ++++
 man/add_nsu_cohort.Rd         |  4 ++-
 man/create_episode_file.Rd    | 27 +++++++++++++---
 man/create_individual_file.Rd |  2 +-
 man/fill_geographies.Rd       | 11 ++++++-
 man/join_cohort_lookups.Rd    | 12 ++++++-
 man/join_deaths_data.Rd       |  4 +--
 man/match_on_ltcs.Rd          |  4 ++-
 16 files changed, 183 insertions(+), 66 deletions(-)

diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R
index c5a26da12..00260bb8e 100644
--- a/R/add_nsu_cohort.R
+++ b/R/add_nsu_cohort.R
@@ -2,13 +2,17 @@
 #'
 #' @param data The input data frame
 #' @param year The year being processed
+#' @param nsu_cohort The NSU data for the year
 #'
 #' @return A data frame containing the Non-Service Users as additional rows
 #' @export
 #'
 #' @family episode file
 #' @seealso [get_nsu_path()]
-add_nsu_cohort <- function(data, year) {
+add_nsu_cohort <- function(
+    data,
+    year,
+    nsu_cohort = read_file(get_nsu_path(year))) {
   year_param <- year
 
   if (!check_year_valid(year, "NSU")) {
@@ -29,9 +33,9 @@ add_nsu_cohort <- function(data, year) {
     )
   )
 
-  matched <- dplyr::full_join(data,
-    # NSU cohort file
-    read_file(get_nsu_path(year)) %>%
+  matched <- dplyr::full_join(
+    data,
+    nsu_cohort %>%
       dplyr::mutate(
         dob = as.Date(.data[["dob"]]),
         gpprac = convert_eng_gpprac_to_dummy(.data[["gpprac"]])
@@ -110,7 +114,6 @@ add_nsu_cohort <- function(data, year) {
         .data[["chi"]]
       )
     ) %>%
-    # Remove the additional columns
     dplyr::select(-dplyr::contains("_nsu"), -"has_chi")
 
   return(return_df)
diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index bad42be5e..3dc33e193 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -1,17 +1,32 @@
-#' Create the Source Episode file
+#' Produce the Source Episode file
 #'
 #' @param processed_data_list containing data from processed extracts.
 #' @param year The year to process, in FY format.
 #' @param write_to_disk (optional) Should the data be written to disk default is
 #' `TRUE` i.e. write the data to disk.
+#' @inheritParams add_nsu_cohort
+#' @inheritParams fill_geographies
+#' @inheritParams join_cohort_lookups
+#' @inheritParams join_deaths_data
+#' @inheritParams match_on_ltcs
+#' @inheritParams link_delayed_discharge_eps
 #' @param anon_chi_out (Default:TRUE) Should `anon_chi` be used in the output
-#' (instead of chi).
+#' (instead of chi)
 #'
-#' @return the Source Episode file as a [tibble][tibble::tibble-package].
+#' @return a [tibble][tibble::tibble-package] containing the episode file
 #' @export
 create_episode_file <- function(
     processed_data_list,
     year,
+    dd_data = read_file(get_source_extract_path(year, "DD")),
+    nsu_cohort = read_file(get_nsu_path(year)),
+    ltc_data = read_file(get_ltcs_path(year)),
+    slf_pc_lookup = read_file(get_slf_postcode_path()),
+    slf_gpprac_lookup = read_file(
+      get_slf_gpprac_path(),
+      col_select = c("gpprac", "cluster", "hbpraccode")
+    ),
+    slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)),
     write_to_disk = TRUE,
     anon_chi_out = TRUE) {
   episode_file <- dplyr::bind_rows(processed_data_list) %>%
@@ -99,15 +114,21 @@ create_episode_file <- function(
     correct_cij_vars() %>%
     fill_missing_cij_markers() %>%
     add_ppa_flag() %>%
-    link_delayed_discharge_eps(year) %>%
-    add_nsu_cohort(year) %>%
-    match_on_ltcs(year) %>%
+    link_delayed_discharge_eps(year, dd_data) %>%
+    add_nsu_cohort(year, nsu_cohort) %>%
+    match_on_ltcs(year, ltc_data) %>%
     correct_demographics(year) %>%
     create_cohort_lookups(year) %>%
     join_cohort_lookups(year) %>%
     join_sparra_hhg(year) %>%
-    fill_geographies() %>%
-    join_deaths_data(year) %>%
+    fill_geographies(
+      slf_pc_lookup,
+      slf_gpprac_lookup
+    ) %>%
+    join_deaths_data(
+      year,
+      slf_deaths_lookup
+    ) %>%
     load_ep_file_vars(year)
 
   if (anon_chi_out) {
@@ -354,22 +375,28 @@ create_cohort_lookups <- function(data, year, update = latest_update()) {
 #'
 #' @inheritParams store_ep_file_vars
 #' @inheritParams get_demographic_cohorts_path
+#' @param demographic_cohort,service_use_cohort The cohort data
 #'
 #' @return The data including the Demographic and Service Use lookups.
-join_cohort_lookups <- function(data, year, update = latest_update()) {
+join_cohort_lookups <- function(
+    data,
+    year,
+    update = latest_update(),
+    demographic_cohort = read_file(
+      get_demographic_cohorts_path(year, update),
+      col_select = c("chi", "demographic_cohort")
+    ),
+    service_use_cohort = read_file(
+      get_service_use_cohorts_path(year, update),
+      col_select = c("chi", "service_use_cohort")
+    )) {
   join_cohort_lookups <- data %>%
     dplyr::left_join(
-      read_file(
-        get_demographic_cohorts_path(year, update),
-        col_select = c("chi", "demographic_cohort")
-      ),
+      demographic_cohort,
       by = "chi"
     ) %>%
     dplyr::left_join(
-      read_file(
-        get_service_use_cohorts_path(year, update),
-        col_select = c("chi", "service_use_cohort")
-      ),
+      service_use_cohort,
       by = "chi"
     )
 
diff --git a/R/fill_geographies.R b/R/fill_geographies.R
index 8f4a470e8..c9aee6355 100644
--- a/R/fill_geographies.R
+++ b/R/fill_geographies.R
@@ -4,10 +4,18 @@
 #' then use the lookups to match on additional variables.
 #'
 #' @param data the SLF
+#' @param slf_pc_lookup The SLF Postcode lookup
+#' @param slf_gpprac_lookup The SLF GP Practice lookup
 #'
 #' @return a [tibble][tibble::tibble-package] of the SLF with improved
 #' Postcode and GP Practice details.
-fill_geographies <- function(data) {
+fill_geographies <- function(
+    data,
+    slf_pc_lookup = read_file(get_slf_postcode_path()),
+    slf_gpprac_lookup = read_file(
+      get_slf_gpprac_path(),
+      col_select = c("gpprac", "cluster", "hbpraccode")
+    )) {
   check_variables_exist(data, c(
     "chi",
     "postcode",
@@ -21,8 +29,15 @@ fill_geographies <- function(data) {
   ))
 
   data %>%
-    fill_postcode_geogs() %>%
-    fill_gpprac_geographies()
+    fill_postcode_geogs(
+      slf_pc_lookup = read_file(get_slf_postcode_path())
+    ) %>%
+    fill_gpprac_geographies(
+      slf_gpprac_lookup = read_file(
+        get_slf_gpprac_path(),
+        col_select = c("gpprac", "cluster", "hbpraccode")
+      )
+    )
 }
 
 #' Make a postcode lookup for filling to most recent postcodes based on CHI
@@ -86,9 +101,9 @@ make_gpprac_lookup <- function(data) {
   return(gpprac_lookup)
 }
 
-fill_postcode_geogs <- function(data) {
-  slf_pc_lookup <- read_file(get_slf_postcode_path())
-
+fill_postcode_geogs <- function(
+    data,
+    slf_pc_lookup) {
   filled_postcodes <- dplyr::left_join(
     data,
     make_postcode_lookup(data),
@@ -123,17 +138,20 @@ fill_postcode_geogs <- function(data) {
       lca = dplyr::coalesce(.data$lca, .data$lca_old),
       datazone2011 = dplyr::coalesce(.data$datazone2011, .data$datazone2011_old)
     ) %>%
-    dplyr::select(!c("hb2018", "hscp", "lca_old", "datazone2011_old", "most_recent_postcode"))
+    dplyr::select(!c(
+      "hb2018",
+      "hscp",
+      "lca_old",
+      "datazone2011_old",
+      "most_recent_postcode"
+    ))
 
   return(filled_postcodes)
 }
 
-fill_gpprac_geographies <- function(data) {
-  gpprac_ref <- read_file(
-    get_slf_gpprac_path(),
-    col_select = c("gpprac", "cluster", "hbpraccode")
-  )
-
+fill_gpprac_geographies <- function(
+    data,
+    slf_gpprac_lookup) {
   filled_gpprac <- dplyr::left_join(
     data,
     make_gpprac_lookup(data),
@@ -147,7 +165,12 @@ fill_gpprac_geographies <- function(data) {
         .data$gpprac
       )
     ) %>%
-    dplyr::left_join(gpprac_ref, by = "gpprac", suffix = c("_old", "")) %>%
+    dplyr::left_join(
+      slf_gpprac_lookup %>%
+        dplyr::select("gpprac", "cluster", "hbpraccode"),
+      by = "gpprac",
+      suffix = c("_old", "")
+    ) %>%
     dplyr::mutate(
       hbpraccode = dplyr::coalesce(.data$hbpraccode, .data$hbpraccode_old)
     ) %>%
diff --git a/R/get_source_extract_path.R b/R/get_source_extract_path.R
index 4cb5eef44..37ed545cf 100644
--- a/R/get_source_extract_path.R
+++ b/R/get_source_extract_path.R
@@ -41,34 +41,34 @@ get_source_extract_path <- function(
   type <- match.arg(type)
 
   if (!check_year_valid(year, type)) {
-    return(NA)
+    return(get_dummy_boxi_extract_path())
   }
 
   file_name <- dplyr::case_match(
     type,
     "Acute" ~ "acute_for_source",
-    "AE" ~ "a&e_for_source",
-    "AT" ~ "Alarms-Telecare-for-source",
+    "AE" ~ "a_and_e_for_source",
+    "AT" ~ "alarms-telecare-for-source",
     "CH" ~ "care_home_for_source",
-    "CMH" ~ "CMH_for_source",
+    "CMH" ~ "cmh_for_source",
     "Client" ~ "client_for_source",
-    "DD" ~ "DD_for_source",
+    "DD" ~ "delayed_discharge_for_source",
     "Deaths" ~ "deaths_for_source",
-    "DN" ~ "DN_for_source",
-    "GPOoH" ~ "GP_OOH_for_source",
-    "HC" ~ "Home_Care_for_source",
+    "DN" ~ "district_nursing_for_source",
+    "GPOoH" ~ "gp_ooh_for_source",
+    "HC" ~ "home_care_for_source",
     "Homelessness" ~ "homelessness_for_source",
     "Maternity" ~ "maternity_for_source",
     "MH" ~ "mental_health_for_source",
-    "DD" ~ "DD_for_source",
     "Outpatients" ~ "outpatients_for_source",
-    "PIS" ~ "prescribing_file_for_source",
-    "SDS" ~ "SDS-for-source"
-  )
+    "PIS" ~ "prescribing_for_source",
+    "SDS" ~ "sds_for_source"
+  ) %>%
+    stringr::str_glue("-{year}.parquet")
 
   source_extract_path <- get_file_path(
     directory = get_year_dir(year),
-    file_name = stringr::str_glue("{file_name}-20{year}.parquet"),
+    file_name = file_name,
     ...
   )
 
diff --git a/R/get_sparra_hhg_paths.R b/R/get_sparra_hhg_paths.R
index 2fd1a69f9..157160ed4 100644
--- a/R/get_sparra_hhg_paths.R
+++ b/R/get_sparra_hhg_paths.R
@@ -10,6 +10,10 @@
 #' @family extract file paths
 #' @seealso [get_file_path()] for the generic function.
 get_hhg_path <- function(year, ...) {
+  if (!check_year_valid(year, "HHG")) {
+    return(get_dummy_boxi_extract_path())
+  }
+
   hhg_file_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "HHG"),
     file_name = stringr::str_glue("HHG-20{year}.parquet"),
@@ -31,6 +35,10 @@ get_hhg_path <- function(year, ...) {
 #' @family extract file paths
 #' @seealso [get_file_path()] for the generic function.
 get_sparra_path <- function(year, ...) {
+  if (!check_year_valid(year, "SPARRA")) {
+    return(get_dummy_boxi_extract_path())
+  }
+
   sparra_file_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "SPARRA"),
     file_name = stringr::str_glue("SPARRA-20{year}.parquet"),
diff --git a/R/join_deaths_data.R b/R/join_deaths_data.R
index 694d2e2b9..89bcbbe13 100644
--- a/R/join_deaths_data.R
+++ b/R/join_deaths_data.R
@@ -2,16 +2,14 @@
 #'
 #' @param data Episode file data
 #' @param year financial year, e.g. '1920'
-#' @param slf_deaths_lookup_path Path to slf deaths lookup.
+#' @param slf_deaths_lookup The SLF deaths lookup.
 #'
 #' @return The data including the deaths lookup matched
 #'         on to the episode file.
 join_deaths_data <- function(
     data,
     year,
-    slf_deaths_lookup_path = get_slf_deaths_lookup_path(year)) {
-  slf_deaths_lookup <- read_file(slf_deaths_lookup_path)
-
+    slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year))) {
   return(
     data %>%
       dplyr::left_join(
diff --git a/R/match_on_ltcs.R b/R/match_on_ltcs.R
index 42345655a..f83f31325 100644
--- a/R/match_on_ltcs.R
+++ b/R/match_on_ltcs.R
@@ -5,13 +5,17 @@
 #'
 #' @param data episode files
 #' @param year financial year, e.g. '1920'
+#' @param ltc_data The LTC data for the year
 #'
 #' @return data matched with long term conditions
-match_on_ltcs <- function(data, year) {
+match_on_ltcs <- function(
+    data,
+    year,
+    ltc_data = read_file(get_ltcs_path(year))) {
   # Match on LTC lookup
   matched <- dplyr::left_join(
     data,
-    read_file(get_ltcs_path(year)),
+    ltc_data,
     by = "chi",
     suffix = c("", "_ltc")
   ) %>%
diff --git a/R/read_file.R b/R/read_file.R
index 2941b62ed..be0a6fc65 100644
--- a/R/read_file.R
+++ b/R/read_file.R
@@ -27,6 +27,11 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) {
     "parquet"
   )
 
+  # Return an empty tibble if trying to read the dummy path
+  if (path == get_dummy_boxi_extract_path()) {
+    return(tibble::tibble())
+  }
+
   ext <- fs::path_ext(path)
 
   if (ext == "gz") {
diff --git a/_targets.R b/_targets.R
index db26477ef..a9fa80d7a 100644
--- a/_targets.R
+++ b/_targets.R
@@ -543,11 +543,18 @@ list(
         source_sc_alarms_tele
       )
     ),
+    tar_file_read(nsu_cohort, get_nsu_path(year), read_file(!!.x)),
     tar_target(
       episode_file,
       create_episode_file(
         processed_data_list,
         year,
+        dd_data = source_dd_extract,
+        nsu_cohort = nsu_cohort,
+        ltc_data = source_ltc_lookup,
+        slf_pc_lookup = source_pc_lookup,
+        slf_gpprac_lookup = source_gp_lookup,
+        slf_deaths_lookup = slf_deaths_lookup,
         write_to_disk
       )
     ),
diff --git a/man/add_nsu_cohort.Rd b/man/add_nsu_cohort.Rd
index 723c105e1..4ea9324e0 100644
--- a/man/add_nsu_cohort.Rd
+++ b/man/add_nsu_cohort.Rd
@@ -4,12 +4,14 @@
 \alias{add_nsu_cohort}
 \title{Add NSU cohort to working file}
 \usage{
-add_nsu_cohort(data, year)
+add_nsu_cohort(data, year, nsu_cohort = read_file(get_nsu_path(year)))
 }
 \arguments{
 \item{data}{The input data frame}
 
 \item{year}{The year being processed}
+
+\item{nsu_cohort}{The NSU data for the year}
 }
 \value{
 A data frame containing the Non-Service Users as additional rows
diff --git a/man/create_episode_file.Rd b/man/create_episode_file.Rd
index e1bda32b9..c1ce0e063 100644
--- a/man/create_episode_file.Rd
+++ b/man/create_episode_file.Rd
@@ -2,11 +2,18 @@
 % Please edit documentation in R/create_episode_file.R
 \name{create_episode_file}
 \alias{create_episode_file}
-\title{Create the Source Episode file}
+\title{Produce the Source Episode file}
 \usage{
 create_episode_file(
   processed_data_list,
   year,
+  dd_data = read_file(get_source_extract_path(year, "DD")),
+  nsu_cohort = read_file(get_nsu_path(year)),
+  ltc_data = read_file(get_ltcs_path(year)),
+  slf_pc_lookup = read_file(get_slf_postcode_path()),
+  slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac",
+    "cluster", "hbpraccode")),
+  slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)),
   write_to_disk = TRUE,
   anon_chi_out = TRUE
 )
@@ -16,15 +23,27 @@ create_episode_file(
 
 \item{year}{The year to process, in FY format.}
 
+\item{dd_data}{The processed DD extract}
+
+\item{nsu_cohort}{The NSU data for the year}
+
+\item{ltc_data}{The LTC data for the year}
+
+\item{slf_pc_lookup}{The SLF Postcode lookup}
+
+\item{slf_gpprac_lookup}{The SLF GP Practice lookup}
+
+\item{slf_deaths_lookup}{The SLF deaths lookup.}
+
 \item{write_to_disk}{(optional) Should the data be written to disk default is
 \code{TRUE} i.e. write the data to disk.}
 
 \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output
-(instead of chi).}
+(instead of chi)}
 }
 \value{
-the Source Episode file as a \link[tibble:tibble-package]{tibble}.
+a \link[tibble:tibble-package]{tibble} containing the episode file
 }
 \description{
-Create the Source Episode file
+Produce the Source Episode file
 }
diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd
index 4fd9a4a53..c4502e5ae 100644
--- a/man/create_individual_file.Rd
+++ b/man/create_individual_file.Rd
@@ -24,7 +24,7 @@ create_individual_file(
 (instead of chi).}
 
 \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output
-(instead of chi).}
+(instead of chi)}
 }
 \value{
 The processed individual file
diff --git a/man/fill_geographies.Rd b/man/fill_geographies.Rd
index 5308fd8d0..bb619405b 100644
--- a/man/fill_geographies.Rd
+++ b/man/fill_geographies.Rd
@@ -4,10 +4,19 @@
 \alias{fill_geographies}
 \title{Fill postcode and GP practice geographies}
 \usage{
-fill_geographies(data)
+fill_geographies(
+  data,
+  slf_pc_lookup = read_file(get_slf_postcode_path()),
+  slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac",
+    "cluster", "hbpraccode"))
+)
 }
 \arguments{
 \item{data}{the SLF}
+
+\item{slf_pc_lookup}{The SLF Postcode lookup}
+
+\item{slf_gpprac_lookup}{The SLF GP Practice lookup}
 }
 \value{
 a \link[tibble:tibble-package]{tibble} of the SLF with improved
diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd
index 15a860a36..3ef549cc3 100644
--- a/man/join_cohort_lookups.Rd
+++ b/man/join_cohort_lookups.Rd
@@ -4,7 +4,15 @@
 \alias{join_cohort_lookups}
 \title{Join cohort lookups}
 \usage{
-join_cohort_lookups(data, year, update = latest_update())
+join_cohort_lookups(
+  data,
+  year,
+  update = latest_update(),
+  demographic_cohort = read_file(get_demographic_cohorts_path(year, update), col_select =
+    c("chi", "demographic_cohort")),
+  service_use_cohort = read_file(get_service_use_cohorts_path(year, update), col_select =
+    c("chi", "service_use_cohort"))
+)
 }
 \arguments{
 \item{data}{The in-progress episode file data.}
@@ -12,6 +20,8 @@ join_cohort_lookups(data, year, update = latest_update())
 \item{year}{The year to process, in FY format.}
 
 \item{update}{The update to use}
+
+\item{demographic_cohort, service_use_cohort}{The cohort data}
 }
 \value{
 The data including the Demographic and Service Use lookups.
diff --git a/man/join_deaths_data.Rd b/man/join_deaths_data.Rd
index 6508d7893..f3b68fe1a 100644
--- a/man/join_deaths_data.Rd
+++ b/man/join_deaths_data.Rd
@@ -7,7 +7,7 @@
 join_deaths_data(
   data,
   year,
-  slf_deaths_lookup_path = get_slf_deaths_lookup_path(year)
+  slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year))
 )
 }
 \arguments{
@@ -15,7 +15,7 @@ join_deaths_data(
 
 \item{year}{financial year, e.g. '1920'}
 
-\item{slf_deaths_lookup_path}{Path to slf deaths lookup.}
+\item{slf_deaths_lookup}{The SLF deaths lookup.}
 }
 \value{
 The data including the deaths lookup matched
diff --git a/man/match_on_ltcs.Rd b/man/match_on_ltcs.Rd
index 0c7e7fb53..e0def00cc 100644
--- a/man/match_on_ltcs.Rd
+++ b/man/match_on_ltcs.Rd
@@ -4,12 +4,14 @@
 \alias{match_on_ltcs}
 \title{Match on LTC DoB and dates of LTC incidence}
 \usage{
-match_on_ltcs(data, year)
+match_on_ltcs(data, year, ltc_data = read_file(get_ltcs_path(year)))
 }
 \arguments{
 \item{data}{episode files}
 
 \item{year}{financial year, e.g. '1920'}
+
+\item{ltc_data}{The LTC data for the year}
 }
 \value{
 data matched with long term conditions

From b7ce1b158e27d1407571b1404ff4c6ea295033ad Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Tue, 26 Sep 2023 11:53:44 +0100
Subject: [PATCH 08/11] Tests/it extract path (#807)

* Add additional tests for `check_it_reference()`

* Make the check on the IT reference stricter

* Update documentation

---------

Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 R/get_it_extract_paths.R                   |  2 +-
 tests/testthat/test-get_it_extract_paths.R | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/R/get_it_extract_paths.R b/R/get_it_extract_paths.R
index 2c587e93b..3c4dc54c3 100644
--- a/R/get_it_extract_paths.R
+++ b/R/get_it_extract_paths.R
@@ -107,7 +107,7 @@ check_it_reference <- function(it_reference) {
     it_reference <- stringr::str_sub(it_reference, start = 7L, end = 14L)
   }
 
-  if (stringr::str_detect(it_reference, "[0-9]{7}", negate = TRUE)) {
+  if (stringr::str_detect(it_reference, "^[0-9]{7}$", negate = TRUE)) {
     cli::cli_abort(
       c("x" = "{.arg it_reference} must be exactly 7 numbers."),
       call = rlang::caller_env()
diff --git a/tests/testthat/test-get_it_extract_paths.R b/tests/testthat/test-get_it_extract_paths.R
index baaad52a5..52f9e4181 100644
--- a/tests/testthat/test-get_it_extract_paths.R
+++ b/tests/testthat/test-get_it_extract_paths.R
@@ -1,3 +1,25 @@
+test_that("IT reference cleanup works", {
+  expect_equal(check_it_reference("SCTASK0439133"), "0439133")
+  expect_equal(check_it_reference("0439133"), "0439133")
+
+  expect_error(
+    check_it_reference("123456789"),
+    "`it_reference` must be exactly 7 numbers\\."
+  )
+  expect_error(
+    check_it_reference("1234567890"),
+    "`it_reference` must be exactly 7 numbers\\."
+  )
+  expect_error(
+    check_it_reference("SCTASK123456789"),
+    "`it_reference` must be exactly 7 numbers\\."
+  )
+  expect_error(
+    check_it_reference("ABCDEF123456789"),
+    "`it_reference` must be exactly 7 numbers\\."
+  )
+})
+
 skip_on_ci()
 
 test_that("IT extract file paths work", {

From 2f6f25c9a319874bd83ff5122dcbd38bc5ee1815 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Tue, 26 Sep 2023 11:56:42 +0100
Subject: [PATCH 09/11] Update workflow to run against the development branch
 (#795)

* Make test-coverage.yaml run against development

* Make lint-changed-files.yaml run against development

---------

Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 .github/workflows/lint-changed-files.yaml | 2 +-
 .github/workflows/test-coverage.yaml      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/lint-changed-files.yaml b/.github/workflows/lint-changed-files.yaml
index a5074e3b6..96f1673a1 100644
--- a/.github/workflows/lint-changed-files.yaml
+++ b/.github/workflows/lint-changed-files.yaml
@@ -2,7 +2,7 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   pull_request:
-    branches: [main-R, master, main]
+    branches: [master, main, development]
 
 name: lint-changed-files
 
diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
index 2405bcc47..f7096264e 100644
--- a/.github/workflows/test-coverage.yaml
+++ b/.github/workflows/test-coverage.yaml
@@ -2,9 +2,9 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: [master, main, main-R]
+    branches: [master, main, development]
   pull_request:
-    branches: [master, main, main-R]
+    branches: [master, main, development]
 
 name: test-coverage
 

From 50f4ef9127fea5a8f3f7b0387ec3b064becf72fd Mon Sep 17 00:00:00 2001
From: Jennit07 <67372904+Jennit07@users.noreply.github.com>
Date: Tue, 26 Sep 2023 12:30:06 +0100
Subject: [PATCH 10/11] Remove package wide imports of `readr` (#792)

* Update documentation

* Use `readr::` where possible

* Update documentation

---------

Co-authored-by: Jennit07 <Jennit07@users.noreply.github.com>
Co-authored-by: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com>
---
 NAMESPACE                         |  11 ---
 R/createslf-package.R             |   2 -
 R/process_extract_ae.R            |  12 +--
 R/read_extract_acute.R            | 138 +++++++++++++++---------------
 R/read_extract_ae.R               |  72 ++++++++--------
 R/read_extract_cmh.R              |  36 ++++----
 R/read_extract_district_nursing.R |  38 ++++----
 R/read_extract_homelessness.R     |  46 +++++-----
 R/read_extract_maternity.R        | 108 +++++++++++------------
 R/read_extract_mental_health.R    | 116 ++++++++++++-------------
 R/read_extract_nrs_deaths.R       |  54 ++++++------
 R/read_extract_outpatients.R      |  72 ++++++++--------
 R/read_extract_prescribing.R      |  16 ++--
 R/read_it_chi_deaths.R            |   8 +-
 R/read_lookup_ltc.R               |  44 +++++-----
 15 files changed, 380 insertions(+), 393 deletions(-)

diff --git a/NAMESPACE b/NAMESPACE
index d0323b8c4..27447da7b 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -164,17 +164,6 @@ export(write_file)
 importFrom(data.table,.N)
 importFrom(data.table,.SD)
 importFrom(magrittr,"%>%")
-importFrom(readr,col_character)
-importFrom(readr,col_date)
-importFrom(readr,col_datetime)
-importFrom(readr,col_double)
-importFrom(readr,col_factor)
-importFrom(readr,col_integer)
-importFrom(readr,col_logical)
-importFrom(readr,col_number)
-importFrom(readr,col_time)
-importFrom(readr,cols)
-importFrom(readr,cols_only)
 importFrom(rlang,":=")
 importFrom(rlang,.data)
 importFrom(tibble,tibble)
diff --git a/R/createslf-package.R b/R/createslf-package.R
index acf9154b6..cdd7d1a01 100644
--- a/R/createslf-package.R
+++ b/R/createslf-package.R
@@ -1,6 +1,4 @@
 ## usethis namespace: start
-#' @importFrom readr cols cols_only col_character col_date col_datetime
-#' col_double col_factor col_integer col_logical col_number col_time
 #' @importFrom tibble tibble
 #' @importFrom rlang := .data
 ## usethis namespace: end
diff --git a/R/process_extract_ae.R b/R/process_extract_ae.R
index 95dfd99be..d4ab9bf7c 100644
--- a/R/process_extract_ae.R
+++ b/R/process_extract_ae.R
@@ -193,12 +193,12 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) {
 
   ae_cup_file <- read_file(
     path = get_boxi_extract_path(year, "AE_CUP"),
-    col_type = cols(
-      "ED Arrival Date" = col_date(format = "%Y/%m/%d %T"),
-      "ED Arrival Time" = col_time(""),
-      "ED Case Reference Number [C]" = col_character(),
-      "CUP Marker" = col_double(),
-      "CUP Pathway Name" = col_character()
+    col_type = readr::cols(
+      "ED Arrival Date" = readr::col_date(format = "%Y/%m/%d %T"),
+      "ED Arrival Time" = readr::col_time(""),
+      "ED Case Reference Number [C]" = readr::col_character(),
+      "CUP Marker" = readr::col_double(),
+      "CUP Pathway Name" = readr::col_character()
     )
   ) %>%
     # rename variables
diff --git a/R/read_extract_acute.R b/R/read_extract_acute.R
index 6a0d23b11..84baa6f5b 100644
--- a/R/read_extract_acute.R
+++ b/R/read_extract_acute.R
@@ -9,75 +9,75 @@
 read_extract_acute <- function(year, file_path = get_boxi_extract_path(year = year, type = "Acute")) {
   # Read BOXI extract
   extract_acute <- read_file(file_path,
-    col_type = cols(
-      "Costs Financial Year (01)" = col_integer(),
-      "Costs Financial Month Number (01)" = col_double(),
-      "GLS Record" = col_character(),
-      "Date of Admission(01)" = col_date(format = "%Y/%m/%d %T"),
-      "Date of Discharge(01)" = col_date(format = "%Y/%m/%d %T"),
-      "Pat UPI" = col_character(),
-      "Pat Gender Code" = col_double(),
-      "Pat Date Of Birth [C]" = col_date(format = "%Y/%m/%d %T"),
-      "Practice Location Code" = col_character(),
-      "Practice NHS Board Code - current" = col_character(),
-      "Geo Postcode [C]" = col_character(),
-      "NHS Board of Residence Code - current" = col_character(),
-      "Geo Council Area Code" = col_character(),
-      "Geo HSCP of Residence Code - current" = col_character(),
-      "Geo Data Zone 2011" = col_character(),
-      "Treatment Location Code" = col_character(),
-      "Treatment NHS Board Code - current" = col_character(),
-      "Occupied Bed Days (01)" = col_double(),
-      "Inpatient Day Case Identifier Code" = col_character(),
-      "Specialty Classificat. 1/4/97 Code" = col_character(),
-      "Significant Facility Code" = col_character(),
-      "Lead Consultant/HCP Code" = col_character(),
-      "Management of Patient Code" = col_character(),
-      "Patient Category Code" = col_character(),
-      "Admission Type Code" = col_character(),
-      "Admitted Trans From Code" = col_character(),
-      "Location Admitted Trans From Code" = col_character(),
-      "Old SMR1 Type of Admission Code" = col_integer(),
-      "Discharge Type Code" = col_character(),
-      "Discharge Trans To Code" = col_character(),
-      "Location Discharged Trans To Code" = col_character(),
-      "Diagnosis 1 Code (6 char)" = col_character(),
-      "Diagnosis 2 Code (6 char)" = col_character(),
-      "Diagnosis 3 Code (6 char)" = col_character(),
-      "Diagnosis 4 Code (6 char)" = col_character(),
-      "Diagnosis 5 Code (6 char)" = col_character(),
-      "Diagnosis 6 Code (6 char)" = col_character(),
-      "Operation 1A Code (4 char)" = col_character(),
-      "Operation 1B Code (4 char)" = col_character(),
-      "Date of Operation 1 (01)" = col_date(format = "%Y/%m/%d %T"),
-      "Operation 2A Code (4 char)" = col_character(),
-      "Operation 2B Code (4 char)" = col_character(),
-      "Date of Operation 2 (01)" = col_date(format = "%Y/%m/%d %T"),
-      "Operation 3A Code (4 char)" = col_character(),
-      "Operation 3B Code (4 char)" = col_character(),
-      "Date of Operation 3 (01)" = col_date(format = "%Y/%m/%d %T"),
-      "Operation 4A Code (4 char)" = col_character(),
-      "Operation 4B Code (4 char)" = col_character(),
-      "Date of Operation 4 (01)" = col_date(format = "%Y/%m/%d %T"),
-      "Age at Midpoint of Financial Year (01)" = col_integer(),
-      "Continuous Inpatient Stay(SMR01) (inc GLS)" = col_integer(),
-      "Continuous Inpatient Journey Marker (01)" = col_integer(),
-      "CIJ Planned Admission Code (01)" = col_integer(),
-      "CIJ Inpatient Day Case Identifier Code (01)" = col_character(),
-      "CIJ Type of Admission Code (01)" = col_character(),
-      "CIJ Admission Specialty Code (01)" = col_character(),
-      "CIJ Discharge Specialty Code (01)" = col_character(),
-      "CIJ Start Date (01)" = col_date(format = "%Y/%m/%d %T"),
-      "CIJ End Date (01)" = col_date(format = "%Y/%m/%d %T"),
-      "Total Net Costs (01)" = col_double(),
-      "NHS Hospital Flag (01)" = col_character(),
-      "Community Hospital Flag (01)" = col_character(),
-      "Alcohol Related Admission (01)" = col_character(),
-      "Substance Misuse Related Admission (01)" = col_character(),
-      "Falls Related Admission (01)" = col_character(),
-      "Self Harm Related Admission (01)" = col_character(),
-      "Unique Record Identifier" = col_character(),
-      "Line Number (01)" = col_character()
+    col_type = readr::cols(
+      "Costs Financial Year (01)" = readr::col_integer(),
+      "Costs Financial Month Number (01)" = readr::col_double(),
+      "GLS Record" = readr::col_character(),
+      "Date of Admission(01)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Date of Discharge(01)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Pat UPI" = readr::col_character(),
+      "Pat Gender Code" = readr::col_double(),
+      "Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Practice Location Code" = readr::col_character(),
+      "Practice NHS Board Code - current" = readr::col_character(),
+      "Geo Postcode [C]" = readr::col_character(),
+      "NHS Board of Residence Code - current" = readr::col_character(),
+      "Geo Council Area Code" = readr::col_character(),
+      "Geo HSCP of Residence Code - current" = readr::col_character(),
+      "Geo Data Zone 2011" = readr::col_character(),
+      "Treatment Location Code" = readr::col_character(),
+      "Treatment NHS Board Code - current" = readr::col_character(),
+      "Occupied Bed Days (01)" = readr::col_double(),
+      "Inpatient Day Case Identifier Code" = readr::col_character(),
+      "Specialty Classificat. 1/4/97 Code" = readr::col_character(),
+      "Significant Facility Code" = readr::col_character(),
+      "Lead Consultant/HCP Code" = readr::col_character(),
+      "Management of Patient Code" = readr::col_character(),
+      "Patient Category Code" = readr::col_character(),
+      "Admission Type Code" = readr::col_character(),
+      "Admitted Trans From Code" = readr::col_character(),
+      "Location Admitted Trans From Code" = readr::col_character(),
+      "Old SMR1 Type of Admission Code" = readr::col_integer(),
+      "Discharge Type Code" = readr::col_character(),
+      "Discharge Trans To Code" = readr::col_character(),
+      "Location Discharged Trans To Code" = readr::col_character(),
+      "Diagnosis 1 Code (6 char)" = readr::col_character(),
+      "Diagnosis 2 Code (6 char)" = readr::col_character(),
+      "Diagnosis 3 Code (6 char)" = readr::col_character(),
+      "Diagnosis 4 Code (6 char)" = readr::col_character(),
+      "Diagnosis 5 Code (6 char)" = readr::col_character(),
+      "Diagnosis 6 Code (6 char)" = readr::col_character(),
+      "Operation 1A Code (4 char)" = readr::col_character(),
+      "Operation 1B Code (4 char)" = readr::col_character(),
+      "Date of Operation 1 (01)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Operation 2A Code (4 char)" = readr::col_character(),
+      "Operation 2B Code (4 char)" = readr::col_character(),
+      "Date of Operation 2 (01)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Operation 3A Code (4 char)" = readr::col_character(),
+      "Operation 3B Code (4 char)" = readr::col_character(),
+      "Date of Operation 3 (01)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Operation 4A Code (4 char)" = readr::col_character(),
+      "Operation 4B Code (4 char)" = readr::col_character(),
+      "Date of Operation 4 (01)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Age at Midpoint of Financial Year (01)" = readr::col_integer(),
+      "Continuous Inpatient Stay(SMR01) (inc GLS)" = readr::col_integer(),
+      "Continuous Inpatient Journey Marker (01)" = readr::col_integer(),
+      "CIJ Planned Admission Code (01)" = readr::col_integer(),
+      "CIJ Inpatient Day Case Identifier Code (01)" = readr::col_character(),
+      "CIJ Type of Admission Code (01)" = readr::col_character(),
+      "CIJ Admission Specialty Code (01)" = readr::col_character(),
+      "CIJ Discharge Specialty Code (01)" = readr::col_character(),
+      "CIJ Start Date (01)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "CIJ End Date (01)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Total Net Costs (01)" = readr::col_double(),
+      "NHS Hospital Flag (01)" = readr::col_character(),
+      "Community Hospital Flag (01)" = readr::col_character(),
+      "Alcohol Related Admission (01)" = readr::col_character(),
+      "Substance Misuse Related Admission (01)" = readr::col_character(),
+      "Falls Related Admission (01)" = readr::col_character(),
+      "Self Harm Related Admission (01)" = readr::col_character(),
+      "Unique Record Identifier" = readr::col_character(),
+      "Line Number (01)" = readr::col_character()
     )
   ) %>%
     # Rename variables
diff --git a/R/read_extract_ae.R b/R/read_extract_ae.R
index 6cddd1cb6..dab886816 100644
--- a/R/read_extract_ae.R
+++ b/R/read_extract_ae.R
@@ -8,42 +8,42 @@ read_extract_ae <- function(
     year,
     file_path = get_boxi_extract_path(year = year, type = "AE")) {
   extract_ae <- read_file(file_path,
-    col_type = cols(
-      "Arrival Date" = col_date(format = "%Y/%m/%d %T"),
-      "DAT Date" = col_date(format = "%Y/%m/%d %T"),
-      "Pat UPI [C]" = col_character(),
-      "Pat Date Of Birth [C]" = col_date(format = "%Y/%m/%d %T"),
-      "Pat Gender Code" = col_double(),
-      "NHS Board of Residence Code - current" = col_character(),
-      "Treatment NHS Board Code - current" = col_character(),
-      "Treatment Location Code" = col_character(),
-      "GP Practice Code" = col_character(),
-      "Council Area Code" = col_character(),
-      "Postcode (epi) [C]" = col_character(),
-      "Postcode (CHI) [C]" = col_character(),
-      "HSCP of Residence Code - current" = col_character(),
-      "Arrival Time" = col_time(""),
-      "DAT Time" = col_time(""),
-      "Arrival Mode Code" = col_character(),
-      "Referral Source Code" = col_character(),
-      "Attendance Category Code" = col_character(),
-      "Discharge Destination Code" = col_character(),
-      "Patient Flow Code" = col_double(),
-      "Place of Incident Code" = col_character(),
-      "Reason for Wait Code" = col_character(),
-      "Disease 1 Code" = col_character(),
-      "Disease 2 Code" = col_character(),
-      "Disease 3 Code" = col_character(),
-      "Bodily Location Of Injury Code" = col_character(),
-      "Alcohol Involved Code" = col_character(),
-      "Alcohol Related Admission" = col_character(),
-      "Substance Misuse Related Admission" = col_character(),
-      "Falls Related Admission" = col_character(),
-      "Self Harm Related Admission" = col_character(),
-      "Total Net Costs" = col_double(),
-      "Age at Midpoint of Financial Year" = col_double(),
-      "Case Reference Number" = col_character(),
-      "Significant Facility Code" = col_character()
+    col_type = readr::cols(
+      "Arrival Date" = readr::col_date(format = "%Y/%m/%d %T"),
+      "DAT Date" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Pat UPI [C]" = readr::col_character(),
+      "Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Pat Gender Code" = readr::col_double(),
+      "NHS Board of Residence Code - current" = readr::col_character(),
+      "Treatment NHS Board Code - current" = readr::col_character(),
+      "Treatment Location Code" = readr::col_character(),
+      "GP Practice Code" = readr::col_character(),
+      "Council Area Code" = readr::col_character(),
+      "Postcode (epi) [C]" = readr::col_character(),
+      "Postcode (CHI) [C]" = readr::col_character(),
+      "HSCP of Residence Code - current" = readr::col_character(),
+      "Arrival Time" = readr::col_time(""),
+      "DAT Time" = readr::col_time(""),
+      "Arrival Mode Code" = readr::col_character(),
+      "Referral Source Code" = readr::col_character(),
+      "Attendance Category Code" = readr::col_character(),
+      "Discharge Destination Code" = readr::col_character(),
+      "Patient Flow Code" = readr::col_double(),
+      "Place of Incident Code" = readr::col_character(),
+      "Reason for Wait Code" = readr::col_character(),
+      "Disease 1 Code" = readr::col_character(),
+      "Disease 2 Code" = readr::col_character(),
+      "Disease 3 Code" = readr::col_character(),
+      "Bodily Location Of Injury Code" = readr::col_character(),
+      "Alcohol Involved Code" = readr::col_character(),
+      "Alcohol Related Admission" = readr::col_character(),
+      "Substance Misuse Related Admission" = readr::col_character(),
+      "Falls Related Admission" = readr::col_character(),
+      "Self Harm Related Admission" = readr::col_character(),
+      "Total Net Costs" = readr::col_double(),
+      "Age at Midpoint of Financial Year" = readr::col_double(),
+      "Case Reference Number" = readr::col_character(),
+      "Significant Facility Code" = readr::col_character()
     )
   ) %>%
     # rename variables
diff --git a/R/read_extract_cmh.R b/R/read_extract_cmh.R
index 16151bd43..da627a67a 100644
--- a/R/read_extract_cmh.R
+++ b/R/read_extract_cmh.R
@@ -13,24 +13,24 @@ read_extract_cmh <- function(
 
   # Read BOXI extract
   extract_cmh <- read_file(file_path,
-    col_types = cols_only(
-      "UPI Number [C]" = col_character(),
-      "Patient DoB Date [C]" = col_date(format = "%Y/%m/%d %T"),
-      "Gender" = col_double(),
-      "Patient Postcode [C]" = col_character(),
-      "NHS Board of Residence Code 9" = col_character(),
-      "Patient HSCP Code - current" = col_character(),
-      "Practice Code" = col_integer(),
-      "Treatment NHS Board Code 9" = col_character(),
-      "Contact Date" = col_date(format = "%Y/%m/%d %T"),
-      "Contact Start Time" = col_time(format = "%T"),
-      "Duration of Contact" = col_integer(),
-      "Location of Contact" = col_character(),
-      "Main Aim of Contact" = col_character(),
-      "Other Aim of Contact (1)" = col_character(),
-      "Other Aim of Contact (2)" = col_character(),
-      "Other Aim of Contact (3)" = col_character(),
-      "Other Aim of Contact (4)" = col_character()
+    col_types = readr::cols_only(
+      "UPI Number [C]" = readr::col_character(),
+      "Patient DoB Date [C]" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Gender" = readr::col_double(),
+      "Patient Postcode [C]" = readr::col_character(),
+      "NHS Board of Residence Code 9" = readr::col_character(),
+      "Patient HSCP Code - current" = readr::col_character(),
+      "Practice Code" = readr::col_integer(),
+      "Treatment NHS Board Code 9" = readr::col_character(),
+      "Contact Date" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Contact Start Time" = readr::col_time(format = "%T"),
+      "Duration of Contact" = readr::col_integer(),
+      "Location of Contact" = readr::col_character(),
+      "Main Aim of Contact" = readr::col_character(),
+      "Other Aim of Contact (1)" = readr::col_character(),
+      "Other Aim of Contact (2)" = readr::col_character(),
+      "Other Aim of Contact (3)" = readr::col_character(),
+      "Other Aim of Contact (4)" = readr::col_character()
     )
   ) %>%
     # rename
diff --git a/R/read_extract_district_nursing.R b/R/read_extract_district_nursing.R
index 607f9b47e..e84856586 100644
--- a/R/read_extract_district_nursing.R
+++ b/R/read_extract_district_nursing.R
@@ -12,25 +12,25 @@ read_extract_district_nursing <- function(
 
   # Read BOXI extract
   extract_district_nursing <- read_file(file_path,
-    col_types = cols_only(
-      `Treatment NHS Board Code 9` = col_character(),
-      `Age at Contact Date` = col_integer(),
-      `Contact Date` = col_date(format = "%Y/%m/%d %T"),
-      `Primary Intervention Category` = col_character(),
-      `Other Intervention Category (1)` = col_character(),
-      `Other Intervention Category (2)` = col_character(),
-      `UPI Number [C]` = col_character(),
-      `Patient DoB Date [C]` = col_date(format = "%Y/%m/%d %T"),
-      `Patient Postcode [C] (Contact)` = col_character(),
-      `Duration of Contact (measure)` = col_double(),
-      Gender = col_double(),
-      `Location of Contact` = col_character(),
-      `Practice NHS Board Code 9 (Contact)` = col_character(),
-      `Patient Council Area Code (Contact)` = col_character(),
-      `Practice Code (Contact)` = col_character(),
-      `NHS Board of Residence Code 9 (Contact)` = col_character(),
-      `HSCP of Residence Code (Contact)` = col_character(),
-      `Patient Data Zone 2011 (Contact)` = col_character()
+    col_types = readr::cols_only(
+      `Treatment NHS Board Code 9` = readr::col_character(),
+      `Age at Contact Date` = readr::col_integer(),
+      `Contact Date` = readr::col_date(format = "%Y/%m/%d %T"),
+      `Primary Intervention Category` = readr::col_character(),
+      `Other Intervention Category (1)` = readr::col_character(),
+      `Other Intervention Category (2)` = readr::col_character(),
+      `UPI Number [C]` = readr::col_character(),
+      `Patient DoB Date [C]` = readr::col_date(format = "%Y/%m/%d %T"),
+      `Patient Postcode [C] (Contact)` = readr::col_character(),
+      `Duration of Contact (measure)` = readr::col_double(),
+      Gender = readr::col_double(),
+      `Location of Contact` = readr::col_character(),
+      `Practice NHS Board Code 9 (Contact)` = readr::col_character(),
+      `Patient Council Area Code (Contact)` = readr::col_character(),
+      `Practice Code (Contact)` = readr::col_character(),
+      `NHS Board of Residence Code 9 (Contact)` = readr::col_character(),
+      `HSCP of Residence Code (Contact)` = readr::col_character(),
+      `Patient Data Zone 2011 (Contact)` = readr::col_character()
     )
   ) %>%
     # rename
diff --git a/R/read_extract_homelessness.R b/R/read_extract_homelessness.R
index 32b7d6e86..64ebb639e 100644
--- a/R/read_extract_homelessness.R
+++ b/R/read_extract_homelessness.R
@@ -13,29 +13,29 @@ read_extract_homelessness <- function(
 
   extract_homelessness <- read_file(file_path,
     col_types = cols(
-      "Assessment Decision Date" = col_date(format = "%Y/%m/%d %T"),
-      "Case Closed Date" = col_date(format = "%Y/%m/%d %T"),
-      "Sending Local Authority Code 9" = col_character(),
-      "Client Unique Identifier" = col_character(),
-      "UPI Number [C]" = col_character(),
-      "Client DoB Date [C]" = col_date(format = "%Y/%m/%d %T"),
-      "Age at Assessment Decision Date" = col_integer(),
-      "Gender Code" = col_integer(),
-      "Client Postcode [C]" = col_character(),
-      "Main Applicant Flag" = col_character(),
-      "Application Reference Number" = col_character(),
-      "Property Type Code" = col_integer(),
-      "Financial Difficulties / Debt / Unemployment" = col_integer(),
-      "Physical Health Reasons" = col_integer(),
-      "Mental Health Reasons" = col_integer(),
-      "Unmet Need for Support from Housing / Social Work / Health Services" = col_integer(),
-      "Lack of Support from Friends / Family" = col_integer(),
-      "Difficulties Managing on Own" = col_integer(),
-      "Drug / Alcohol Dependency" = col_integer(),
-      "Criminal / Anti-Social Behaviour" = col_integer(),
-      "Not to do with Applicant Household" = col_integer(),
-      "Refused" = col_integer(),
-      "Person in Receipt of Universal Credit" = col_integer()
+      "Assessment Decision Date" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Case Closed Date" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Sending Local Authority Code 9" = readr::col_character(),
+      "Client Unique Identifier" = readr::col_character(),
+      "UPI Number [C]" = readr::col_character(),
+      "Client DoB Date [C]" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Age at Assessment Decision Date" = readr::col_integer(),
+      "Gender Code" = readr::col_integer(),
+      "Client Postcode [C]" = readr::col_character(),
+      "Main Applicant Flag" = readr::col_character(),
+      "Application Reference Number" = readr::col_character(),
+      "Property Type Code" = readr::col_integer(),
+      "Financial Difficulties / Debt / Unemployment" = readr::col_integer(),
+      "Physical Health Reasons" = readr::col_integer(),
+      "Mental Health Reasons" = readr::col_integer(),
+      "Unmet Need for Support from Housing / Social Work / Health Services" = readr::col_integer(),
+      "Lack of Support from Friends / Family" = readr::col_integer(),
+      "Difficulties Managing on Own" = readr::col_integer(),
+      "Drug / Alcohol Dependency" = readr::col_integer(),
+      "Criminal / Anti-Social Behaviour" = readr::col_integer(),
+      "Not to do with Applicant Household" = readr::col_integer(),
+      "Refused" = readr::col_integer(),
+      "Person in Receipt of Universal Credit" = readr::col_integer()
     )
   ) %>%
     dplyr::rename(
diff --git a/R/read_extract_maternity.R b/R/read_extract_maternity.R
index 49bda2fb5..d92295690 100644
--- a/R/read_extract_maternity.R
+++ b/R/read_extract_maternity.R
@@ -8,60 +8,60 @@ read_extract_maternity <- function(
     file_path = get_boxi_extract_path(year = year, type = "Maternity")) {
   # Read BOXI extract
   extract_maternity <- read_file(file_path,
-    col_type = cols(
-      "Costs Financial Year" = col_double(),
-      "Date of Admission Full Date" = col_date(format = "%Y/%m/%d %T"),
-      "Date of Discharge Full Date" = col_date(format = "%Y/%m/%d %T"),
-      "Pat UPI [C]" = col_character(),
-      "Pat Date Of Birth [C]" = col_date(format = "%Y/%m/%d %T"),
-      "Practice Location Code" = col_character(),
-      "Practice NHS Board Code - current" = col_character(),
-      "Geo Postcode [C]" = col_character(),
-      "NHS Board of Residence Code - current" = col_character(),
-      "HSCP of Residence Code - current" = col_character(),
-      "Geo Council Area Code" = col_character(),
-      "Treatment Location Code" = col_character(),
-      "Treatment NHS Board Code - current" = col_character(),
-      "Occupied Bed Days" = col_double(),
-      "Specialty Classification 1/4/97 Code" = col_character(),
-      "Significant Facility Code" = col_character(),
-      "Consultant/HCP Code" = col_character(),
-      "Management of Patient Code" = col_character(),
-      "Admission Reason Code" = col_character(),
-      "Admitted/Transfer from Code (new)" = col_character(),
-      "Admitted/transfer from - Location Code" = col_character(),
-      "Discharge Type Code" = col_character(),
-      "Discharge/Transfer to Code (new)" = col_character(),
-      "Discharged to - Location Code" = col_character(),
-      "Condition On Discharge Code" = col_double(),
-      "Continuous Inpatient Journey Marker" = col_double(),
-      "CIJ Planned Admission Code" = col_double(),
-      "CIJ Inpatient Day Case Identifier Code" = col_character(),
-      "CIJ Type of Admission Code" = col_character(),
-      "CIJ Admission Specialty Code" = col_character(),
-      "CIJ Discharge Specialty Code" = col_character(),
-      "CIJ Start Date" = col_date(format = "%Y/%m/%d %T"),
-      "CIJ End Date" = col_date(format = "%Y/%m/%d %T"),
-      "Total Net Costs" = col_double(),
-      "Diagnosis 1 Discharge Code" = col_character(),
-      "Diagnosis 2 Discharge Code" = col_character(),
-      "Diagnosis 3 Discharge Code" = col_character(),
-      "Diagnosis 4 Discharge Code" = col_character(),
-      "Diagnosis 5 Discharge Code" = col_character(),
-      "Diagnosis 6 Discharge Code" = col_character(),
-      "Operation 1A Code" = col_character(),
-      "Operation 2A Code" = col_character(),
-      "Operation 3A Code" = col_character(),
-      "Operation 4A Code" = col_character(),
-      "Date of Main Operation Full Date" = col_date(format = "%Y/%m/%d %T"),
-      "Age at Midpoint of Financial Year" = col_double(),
-      "NHS Hospital Flag" = col_character(),
-      "Community Hospital Flag" = col_character(),
-      "Alcohol Related AdmissioN" = col_character(),
-      "Substance Misuse Related Admission" = col_character(),
-      "Falls Related Admission" = col_character(),
-      "Self Harm Related Admission" = col_character(),
-      "Maternity Unique Record Identifier [C]" = col_character()
+    col_type = readr::cols(
+      "Costs Financial Year" = readr::col_double(),
+      "Date of Admission Full Date" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Date of Discharge Full Date" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Pat UPI [C]" = readr::col_character(),
+      "Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Practice Location Code" = readr::col_character(),
+      "Practice NHS Board Code - current" = readr::col_character(),
+      "Geo Postcode [C]" = readr::col_character(),
+      "NHS Board of Residence Code - current" = readr::col_character(),
+      "HSCP of Residence Code - current" = readr::col_character(),
+      "Geo Council Area Code" = readr::col_character(),
+      "Treatment Location Code" = readr::col_character(),
+      "Treatment NHS Board Code - current" = readr::col_character(),
+      "Occupied Bed Days" = readr::col_double(),
+      "Specialty Classification 1/4/97 Code" = readr::col_character(),
+      "Significant Facility Code" = readr::col_character(),
+      "Consultant/HCP Code" = readr::col_character(),
+      "Management of Patient Code" = readr::col_character(),
+      "Admission Reason Code" = readr::col_character(),
+      "Admitted/Transfer from Code (new)" = readr::col_character(),
+      "Admitted/transfer from - Location Code" = readr::col_character(),
+      "Discharge Type Code" = readr::col_character(),
+      "Discharge/Transfer to Code (new)" = readr::col_character(),
+      "Discharged to - Location Code" = readr::col_character(),
+      "Condition On Discharge Code" = readr::col_double(),
+      "Continuous Inpatient Journey Marker" = readr::col_double(),
+      "CIJ Planned Admission Code" = readr::col_double(),
+      "CIJ Inpatient Day Case Identifier Code" = readr::col_character(),
+      "CIJ Type of Admission Code" = readr::col_character(),
+      "CIJ Admission Specialty Code" = readr::col_character(),
+      "CIJ Discharge Specialty Code" = readr::col_character(),
+      "CIJ Start Date" = readr::col_date(format = "%Y/%m/%d %T"),
+      "CIJ End Date" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Total Net Costs" = readr::col_double(),
+      "Diagnosis 1 Discharge Code" = readr::col_character(),
+      "Diagnosis 2 Discharge Code" = readr::col_character(),
+      "Diagnosis 3 Discharge Code" = readr::col_character(),
+      "Diagnosis 4 Discharge Code" = readr::col_character(),
+      "Diagnosis 5 Discharge Code" = readr::col_character(),
+      "Diagnosis 6 Discharge Code" = readr::col_character(),
+      "Operation 1A Code" = readr::col_character(),
+      "Operation 2A Code" = readr::col_character(),
+      "Operation 3A Code" = readr::col_character(),
+      "Operation 4A Code" = readr::col_character(),
+      "Date of Main Operation Full Date" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Age at Midpoint of Financial Year" = readr::col_double(),
+      "NHS Hospital Flag" = readr::col_character(),
+      "Community Hospital Flag" = readr::col_character(),
+      "Alcohol Related AdmissioN" = readr::col_character(),
+      "Substance Misuse Related Admission" = readr::col_character(),
+      "Falls Related Admission" = readr::col_character(),
+      "Self Harm Related Admission" = readr::col_character(),
+      "Maternity Unique Record Identifier [C]" = readr::col_character()
     )
   ) %>%
     # Rename variables in line with SLF variable names
diff --git a/R/read_extract_mental_health.R b/R/read_extract_mental_health.R
index 248316975..fa236ecb1 100644
--- a/R/read_extract_mental_health.R
+++ b/R/read_extract_mental_health.R
@@ -8,64 +8,64 @@ read_extract_mental_health <- function(
     file_path = get_boxi_extract_path(year = year, type = "MH")) {
   # Read BOXI extract
   extract_mental_health <- read_file(file_path,
-    col_types = cols_only(
-      "Costs Financial Year (04)" = col_double(),
-      "Costs Financial Month Number (04)" = col_double(),
-      "Date of Admission(04)" = col_date(format = "%Y/%m/%d %T"),
-      "Date of Discharge(04)" = col_date(format = "%Y/%m/%d %T"),
-      "Pat UPI" = col_character(),
-      "Pat Gender Code" = col_integer(),
-      "Pat Date Of Birth [C]" = col_date(format = "%Y/%m/%d %T"),
-      "Practice Location Code" = col_character(),
-      "Practice NHS Board Code - current" = col_character(),
-      "Geo Postcode [C]" = col_character(),
-      "NHS Board of Residence Code - current" = col_character(),
-      "Geo Council Area Code" = col_character(),
-      "Geo HSCP of Residence Code - current" = col_character(),
-      "Geo Data Zone 2011" = col_character(),
-      "Treatment Location Code" = col_character(),
-      "Treatment NHS Board Code - current" = col_character(),
-      "Occupied Bed Days (04)" = col_double(),
-      "Specialty Classificat. 1/4/97 Code" = col_character(),
-      "Significant Facility Code" = col_character(),
-      "Lead Consultant/HCP Code" = col_character(),
-      "Management of Patient Code" = col_character(),
-      "Patient Category Code" = col_character(),
-      "Admission Type Code" = col_character(),
-      "Admitted Trans From Code" = col_character(),
-      "Location Admitted Trans From Code" = col_character(),
-      "Discharge Type Code" = col_character(),
-      "Discharge Trans To Code" = col_character(),
-      "Location Discharged Trans To Code" = col_character(),
-      "Diagnosis 1 Code (6 char)" = col_character(),
-      "Diagnosis 2 Code (6 char)" = col_character(),
-      "Diagnosis 3 Code (6 char)" = col_character(),
-      "Diagnosis 4 Code (6 char)" = col_character(),
-      "Diagnosis 5 Code (6 char)" = col_character(),
-      "Diagnosis 6 Code (6 char)" = col_character(),
-      "Status on Admission Code" = col_integer(),
-      "Admission Diagnosis 1 Code (6 char)" = col_character(),
-      "Admission Diagnosis 2 Code (6 char)" = col_character(),
-      "Admission Diagnosis 3 Code (6 char)" = col_character(),
-      "Admission Diagnosis 4 Code (6 char)" = col_character(),
-      "Age at Midpoint of Financial Year (04)" = col_integer(),
-      "Continuous Inpatient Journey Marker (04)" = col_integer(),
-      "CIJ Planned Admission Code (04)" = col_integer(),
-      "CIJ Inpatient Day Case Identifier Code (04)" = col_character(),
-      "CIJ Type of Admission Code (04)" = col_character(),
-      "CIJ Admission Specialty Code (04)" = col_character(),
-      "CIJ Discharge Specialty Code (04)" = col_character(),
-      "CIJ Start Date (04)" = col_date(format = "%Y/%m/%d %T"),
-      "CIJ End Date (04)" = col_date(format = "%Y/%m/%d %T"),
-      "Total Net Costs (04)" = col_double(),
-      "Alcohol Related Admission (04)" = col_factor(levels = c("Y", "N")),
-      "Substance Misuse Related Admission (04)" = col_factor(levels = c("Y", "N")),
-      "Falls Related Admission (04)" = col_factor(levels = c("Y", "N")),
-      "Self Harm Related Admission (04)" = col_factor(levels = c("Y", "N")),
-      "Duplicate Record Flag (04)" = col_factor(levels = c("Y", "N")),
-      "NHS Hospital Flag (04)" = col_factor(levels = c("Y", "N")),
-      "Community Hospital Flag (04)" = col_factor(levels = c("Y", "N")),
-      "Unique Record Identifier" = col_character()
+    col_types = readr::cols_only(
+      "Costs Financial Year (04)" = readr::col_double(),
+      "Costs Financial Month Number (04)" = readr::col_double(),
+      "Date of Admission(04)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Date of Discharge(04)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Pat UPI" = readr::col_character(),
+      "Pat Gender Code" = readr::col_integer(),
+      "Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Practice Location Code" = readr::col_character(),
+      "Practice NHS Board Code - current" = readr::col_character(),
+      "Geo Postcode [C]" = readr::col_character(),
+      "NHS Board of Residence Code - current" = readr::col_character(),
+      "Geo Council Area Code" = readr::col_character(),
+      "Geo HSCP of Residence Code - current" = readr::col_character(),
+      "Geo Data Zone 2011" = readr::col_character(),
+      "Treatment Location Code" = readr::col_character(),
+      "Treatment NHS Board Code - current" = readr::col_character(),
+      "Occupied Bed Days (04)" = readr::col_double(),
+      "Specialty Classificat. 1/4/97 Code" = readr::col_character(),
+      "Significant Facility Code" = readr::col_character(),
+      "Lead Consultant/HCP Code" = readr::col_character(),
+      "Management of Patient Code" = readr::col_character(),
+      "Patient Category Code" = readr::col_character(),
+      "Admission Type Code" = readr::col_character(),
+      "Admitted Trans From Code" = readr::col_character(),
+      "Location Admitted Trans From Code" = readr::col_character(),
+      "Discharge Type Code" = readr::col_character(),
+      "Discharge Trans To Code" = readr::col_character(),
+      "Location Discharged Trans To Code" = readr::col_character(),
+      "Diagnosis 1 Code (6 char)" = readr::col_character(),
+      "Diagnosis 2 Code (6 char)" = readr::col_character(),
+      "Diagnosis 3 Code (6 char)" = readr::col_character(),
+      "Diagnosis 4 Code (6 char)" = readr::col_character(),
+      "Diagnosis 5 Code (6 char)" = readr::col_character(),
+      "Diagnosis 6 Code (6 char)" = readr::col_character(),
+      "Status on Admission Code" = readr::col_integer(),
+      "Admission Diagnosis 1 Code (6 char)" = readr::col_character(),
+      "Admission Diagnosis 2 Code (6 char)" = readr::col_character(),
+      "Admission Diagnosis 3 Code (6 char)" = readr::col_character(),
+      "Admission Diagnosis 4 Code (6 char)" = readr::col_character(),
+      "Age at Midpoint of Financial Year (04)" = readr::col_integer(),
+      "Continuous Inpatient Journey Marker (04)" = readr::col_integer(),
+      "CIJ Planned Admission Code (04)" = readr::col_integer(),
+      "CIJ Inpatient Day Case Identifier Code (04)" = readr::col_character(),
+      "CIJ Type of Admission Code (04)" = readr::col_character(),
+      "CIJ Admission Specialty Code (04)" = readr::col_character(),
+      "CIJ Discharge Specialty Code (04)" = readr::col_character(),
+      "CIJ Start Date (04)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "CIJ End Date (04)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Total Net Costs (04)" = readr::col_double(),
+      "Alcohol Related Admission (04)" = readr::col_factor(levels = c("Y", "N")),
+      "Substance Misuse Related Admission (04)" = readr::col_factor(levels = c("Y", "N")),
+      "Falls Related Admission (04)" = readr::col_factor(levels = c("Y", "N")),
+      "Self Harm Related Admission (04)" = readr::col_factor(levels = c("Y", "N")),
+      "Duplicate Record Flag (04)" = readr::col_factor(levels = c("Y", "N")),
+      "NHS Hospital Flag (04)" = readr::col_factor(levels = c("Y", "N")),
+      "Community Hospital Flag (04)" = readr::col_factor(levels = c("Y", "N")),
+      "Unique Record Identifier" = readr::col_character()
     )
   ) %>%
     # rename variables
diff --git a/R/read_extract_nrs_deaths.R b/R/read_extract_nrs_deaths.R
index 1734b23aa..efcc0f148 100644
--- a/R/read_extract_nrs_deaths.R
+++ b/R/read_extract_nrs_deaths.R
@@ -7,33 +7,33 @@ read_extract_nrs_deaths <- function(
     year,
     file_path = get_boxi_extract_path(year = year, type = "Deaths")) {
   extract_nrs_deaths <- read_file(file_path,
-    col_types = cols_only(
-      "Death Location Code" = col_character(),
-      "Geo Council Area Code" = col_character(),
-      "Geo Data Zone 2011" = col_character(),
-      "Geo Postcode [C]" = col_character(),
-      "Geo HSCP of Residence Code - current" = col_character(),
-      "NHS Board of Occurrence Code - current" = col_character(),
-      "NHS Board of Residence Code - current" = col_character(),
-      "Pat Date Of Birth [C]" = col_date(format = "%Y/%m/%d %T"),
-      "Date of Death(99)" = col_date(format = "%Y/%m/%d %T"),
-      "Pat Gender Code" = col_double(),
-      "Pat UPI" = col_character(),
-      "Place Death Occurred Code" = col_character(),
-      "Post Mortem Code" = col_character(),
-      "Prim Cause of Death Code (6 char)" = col_character(),
-      "Sec Cause of Death 0 Code (6 char)" = col_character(),
-      "Sec Cause of Death 1 Code (6 char)" = col_character(),
-      "Sec Cause of Death 2 Code (6 char)" = col_character(),
-      "Sec Cause of Death 3 Code (6 char)" = col_character(),
-      "Sec Cause of Death 4 Code (6 char)" = col_character(),
-      "Sec Cause of Death 5 Code (6 char)" = col_character(),
-      "Sec Cause of Death 6 Code (6 char)" = col_character(),
-      "Sec Cause of Death 7 Code (6 char)" = col_character(),
-      "Sec Cause of Death 8 Code (6 char)" = col_character(),
-      "Sec Cause of Death 9 Code (6 char)" = col_character(),
-      "Unique Record Identifier" = col_character(),
-      "GP practice code(99)" = col_character()
+    col_types = readr::cols_only(
+      "Death Location Code" = readr::col_character(),
+      "Geo Council Area Code" = readr::col_character(),
+      "Geo Data Zone 2011" = readr::col_character(),
+      "Geo Postcode [C]" = readr::col_character(),
+      "Geo HSCP of Residence Code - current" = readr::col_character(),
+      "NHS Board of Occurrence Code - current" = readr::col_character(),
+      "NHS Board of Residence Code - current" = readr::col_character(),
+      "Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Date of Death(99)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Pat Gender Code" = readr::col_double(),
+      "Pat UPI" = readr::col_character(),
+      "Place Death Occurred Code" = readr::col_character(),
+      "Post Mortem Code" = readr::col_character(),
+      "Prim Cause of Death Code (6 char)" = readr::col_character(),
+      "Sec Cause of Death 0 Code (6 char)" = readr::col_character(),
+      "Sec Cause of Death 1 Code (6 char)" = readr::col_character(),
+      "Sec Cause of Death 2 Code (6 char)" = readr::col_character(),
+      "Sec Cause of Death 3 Code (6 char)" = readr::col_character(),
+      "Sec Cause of Death 4 Code (6 char)" = readr::col_character(),
+      "Sec Cause of Death 5 Code (6 char)" = readr::col_character(),
+      "Sec Cause of Death 6 Code (6 char)" = readr::col_character(),
+      "Sec Cause of Death 7 Code (6 char)" = readr::col_character(),
+      "Sec Cause of Death 8 Code (6 char)" = readr::col_character(),
+      "Sec Cause of Death 9 Code (6 char)" = readr::col_character(),
+      "Unique Record Identifier" = readr::col_character(),
+      "GP practice code(99)" = readr::col_character()
     )
   ) %>%
     dplyr::rename(
diff --git a/R/read_extract_outpatients.R b/R/read_extract_outpatients.R
index 44e02ca97..20b4880bf 100644
--- a/R/read_extract_outpatients.R
+++ b/R/read_extract_outpatients.R
@@ -8,42 +8,42 @@ read_extract_outpatients <- function(
     file_path = get_boxi_extract_path(year = year, type = "Outpatient")) {
   # Read BOXI extract
   extract_outpatients <- read_file(file_path,
-    col_type = cols(
-      "Clinic Date Fin Year" = col_double(),
-      "Clinic Date (00)" = col_date(format = "%Y/%m/%d %T"),
-      "Episode Record Key (SMR00) [C]" = col_character(),
-      "Pat UPI" = col_character(),
-      "Pat Gender Code" = col_double(),
-      "Pat Date Of Birth [C]" = col_date(format = "%Y/%m/%d %T"),
-      "Practice Location Code" = col_character(),
-      "Practice NHS Board Code - current" = col_character(),
-      "Geo Postcode [C]" = col_character(),
-      "NHS Board of Residence Code - current" = col_character(),
-      "Geo Council Area Code" = col_character(),
-      "Treatment Location Code" = col_character(),
-      "Treatment NHS Board Code - current" = col_character(),
-      "Operation 1A Code (4 char)" = col_character(),
-      "Operation 1B Code (4 char)" = col_character(),
-      "Date of Main Operation(00)" = col_date(format = "%Y/%m/%d %T"),
-      "Operation 2A Code (4 char)" = col_character(),
-      "Operation 2B Code (4 char)" = col_character(),
-      "Date of Operation 2 (00)" = col_date(format = "%Y/%m/%d %T"),
-      "Specialty Classificat. 1/4/97 Code" = col_character(),
-      "Significant Facility Code" = col_character(),
-      "Consultant/HCP Code" = col_character(),
-      "Patient Category Code" = col_character(),
-      "Referral Source Code" = col_character(),
-      "Referral Type Code" = col_double(),
-      "Clinic Type Code" = col_double(),
-      "Clinic Attendance (Status) Code" = col_double(),
-      "Age at Midpoint of Financial Year" = col_double(),
-      "Alcohol Related Admission" = col_character(),
-      "Substance Misuse Related Admission" = col_character(),
-      "Falls Related Admission" = col_character(),
-      "Self Harm Related Admission" = col_character(),
-      "NHS Hospital Flag" = col_character(),
-      "Community Hospital Flag" = col_character(),
-      "Total Net Costs" = col_double()
+    col_type = readr::cols(
+      "Clinic Date Fin Year" = readr::col_double(),
+      "Clinic Date (00)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Episode Record Key (SMR00) [C]" = readr::col_character(),
+      "Pat UPI" = readr::col_character(),
+      "Pat Gender Code" = readr::col_double(),
+      "Pat Date Of Birth [C]" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Practice Location Code" = readr::col_character(),
+      "Practice NHS Board Code - current" = readr::col_character(),
+      "Geo Postcode [C]" = readr::col_character(),
+      "NHS Board of Residence Code - current" = readr::col_character(),
+      "Geo Council Area Code" = readr::col_character(),
+      "Treatment Location Code" = readr::col_character(),
+      "Treatment NHS Board Code - current" = readr::col_character(),
+      "Operation 1A Code (4 char)" = readr::col_character(),
+      "Operation 1B Code (4 char)" = readr::col_character(),
+      "Date of Main Operation(00)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Operation 2A Code (4 char)" = readr::col_character(),
+      "Operation 2B Code (4 char)" = readr::col_character(),
+      "Date of Operation 2 (00)" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Specialty Classificat. 1/4/97 Code" = readr::col_character(),
+      "Significant Facility Code" = readr::col_character(),
+      "Consultant/HCP Code" = readr::col_character(),
+      "Patient Category Code" = readr::col_character(),
+      "Referral Source Code" = readr::col_character(),
+      "Referral Type Code" = readr::col_double(),
+      "Clinic Type Code" = readr::col_double(),
+      "Clinic Attendance (Status) Code" = readr::col_double(),
+      "Age at Midpoint of Financial Year" = readr::col_double(),
+      "Alcohol Related Admission" = readr::col_character(),
+      "Substance Misuse Related Admission" = readr::col_character(),
+      "Falls Related Admission" = readr::col_character(),
+      "Self Harm Related Admission" = readr::col_character(),
+      "NHS Hospital Flag" = readr::col_character(),
+      "Community Hospital Flag" = readr::col_character(),
+      "Total Net Costs" = readr::col_double()
     )
   ) %>%
     # Rename variables
diff --git a/R/read_extract_prescribing.R b/R/read_extract_prescribing.R
index 4f834a44e..683484473 100644
--- a/R/read_extract_prescribing.R
+++ b/R/read_extract_prescribing.R
@@ -5,14 +5,14 @@
 #' @export
 read_extract_prescribing <- function(year, file_path = get_it_prescribing_path(year)) {
   pis_file <- read_file(file_path,
-    col_type = cols_only(
-      "Pat UPI [C]" = col_character(),
-      "Pat DoB [C]" = col_date(format = "%d-%m-%Y"),
-      "Pat Gender" = col_double(),
-      "Pat Postcode [C]" = col_character(),
-      "Practice Code" = col_character(),
-      "Number of Paid Items" = col_double(),
-      "PD Paid GIC excl. BB" = col_double()
+    col_type = readr::cols_only(
+      "Pat UPI [C]" = readr::col_character(),
+      "Pat DoB [C]" = readr::col_date(format = "%d-%m-%Y"),
+      "Pat Gender" = readr::col_double(),
+      "Pat Postcode [C]" = readr::col_character(),
+      "Practice Code" = readr::col_character(),
+      "Number of Paid Items" = readr::col_double(),
+      "PD Paid GIC excl. BB" = readr::col_double()
     )
   ) %>%
     # Rename variables
diff --git a/R/read_it_chi_deaths.R b/R/read_it_chi_deaths.R
index 35f502c60..aab56c86d 100644
--- a/R/read_it_chi_deaths.R
+++ b/R/read_it_chi_deaths.R
@@ -8,10 +8,10 @@
 #' @family process extracts
 read_it_chi_deaths <- function(file_path = get_it_deaths_path()) {
   it_chi_deaths <- read_file(file_path,
-    col_type = cols(
-      "PATIENT_UPI [C]" = col_character(),
-      "PATIENT DoD DATE (NRS)" = col_date(format = "%d-%m-%Y"),
-      "PATIENT DoD DATE (CHI)" = col_date(format = "%d-%m-%Y")
+    col_type = readr::cols(
+      "PATIENT_UPI [C]" = readr::col_character(),
+      "PATIENT DoD DATE (NRS)" = readr::col_date(format = "%d-%m-%Y"),
+      "PATIENT DoD DATE (CHI)" = readr::col_date(format = "%d-%m-%Y")
     )
   ) %>%
     dplyr::rename(
diff --git a/R/read_lookup_ltc.R b/R/read_lookup_ltc.R
index 0a1ce5957..7eb83a434 100644
--- a/R/read_lookup_ltc.R
+++ b/R/read_lookup_ltc.R
@@ -9,28 +9,28 @@ read_lookup_ltc <- function(file_path = get_it_ltc_path()) {
   # Read data------------------------------------------------
   ltc_file <- read_file(
     file_path,
-    col_type = cols(
-      "PATIENT_UPI [C]" = col_character(),
-      "PATIENT_POSTCODE [C]" = col_character(),
-      "ARTHRITIS_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "ASTHMA_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "ATRIAL_FIB_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "CANCER_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "CEREBROVASC_DIS_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "CHRON_LIVER_DIS_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "COPD_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "DEMENTIA_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "DIABETES_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "EPILEPSY_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "HEART_DISEASE_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "HEART_FAILURE_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "MULT_SCLEROSIS_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "PARKINSONS_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "RENAL_FAILURE_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "CONGENITAL_PROB_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "BLOOD_AND_BFO_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "OTH_DIS_END_MET_DIAG_DATE" = col_date(format = "%d-%m-%Y"),
-      "OTH_DIS_DIG_SYS_DIAG_DATE" = col_date(format = "%d-%m-%Y")
+    col_type = readr::cols(
+      "PATIENT_UPI [C]" = readr::col_character(),
+      "PATIENT_POSTCODE [C]" = readr::col_character(),
+      "ARTHRITIS_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "ASTHMA_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "ATRIAL_FIB_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "CANCER_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "CEREBROVASC_DIS_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "CHRON_LIVER_DIS_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "COPD_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "DEMENTIA_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "DIABETES_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "EPILEPSY_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "HEART_DISEASE_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "HEART_FAILURE_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "MULT_SCLEROSIS_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "PARKINSONS_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "RENAL_FAILURE_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "CONGENITAL_PROB_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "BLOOD_AND_BFO_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "OTH_DIS_END_MET_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y"),
+      "OTH_DIS_DIG_SYS_DIAG_DATE" = readr::col_date(format = "%d-%m-%Y")
     )
   ) %>%
     # Rename variables

From e76176e995c6b7198d32e4922de3fba9784e6898 Mon Sep 17 00:00:00 2001
From: James McMahon <james.mcmahon@phs.scot>
Date: Tue, 26 Sep 2023 12:32:10 +0100
Subject: [PATCH 11/11] Handle OpenData extracts better (#794)

* Refactor the LA Code OpenData

This should now run as its own target and then be passed to the homelessness data.

I also added some tests.

* Also add some tests for the GP prac clusters OpenData

* Update documentation

---------

Co-authored-by: Moohan <Moohan@users.noreply.github.com>
Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com>
---
 NAMESPACE                                     |  2 +-
 ...lookup.R => get_la_code_opendata_lookup.R} |  5 ++---
 R/process_extract_homelessness.R              |  3 ++-
 _targets.R                                    | 15 +++++++++-----
 man/get_la_code_opendata_lookup.Rd            | 16 +++++++++++++++
 man/la_code_lookup.Rd                         | 20 -------------------
 man/process_extract_homelessness.Rd           |  1 +
 .../_snaps/get_la_code_opendata_lookup.md     | 20 +++++++++++++++++++
 tests/testthat/test-get_gpprac_opendata.R     | 18 +++++++++++++++++
 .../test-get_la_code_opendata_lookup.R        | 13 ++++++++++++
 10 files changed, 83 insertions(+), 30 deletions(-)
 rename R/{la_code_lookup.R => get_la_code_opendata_lookup.R} (84%)
 create mode 100644 man/get_la_code_opendata_lookup.Rd
 delete mode 100644 man/la_code_lookup.Rd
 create mode 100644 tests/testthat/_snaps/get_la_code_opendata_lookup.md
 create mode 100644 tests/testthat/test-get_gpprac_opendata.R
 create mode 100644 tests/testthat/test-get_la_code_opendata_lookup.R

diff --git a/NAMESPACE b/NAMESPACE
index 27447da7b..b5436d21e 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -42,6 +42,7 @@ export(get_homelessness_completeness_path)
 export(get_it_deaths_path)
 export(get_it_ltc_path)
 export(get_it_prescribing_path)
+export(get_la_code_opendata_lookup)
 export(get_locality_path)
 export(get_lookups_dir)
 export(get_ltcs_path)
@@ -69,7 +70,6 @@ export(get_year_dir)
 export(gzip_files)
 export(is_date_in_fyyear)
 export(is_missing)
-export(la_code_lookup)
 export(last_date_month)
 export(latest_cost_year)
 export(latest_update)
diff --git a/R/la_code_lookup.R b/R/get_la_code_opendata_lookup.R
similarity index 84%
rename from R/la_code_lookup.R
rename to R/get_la_code_opendata_lookup.R
index 09f0a9f1a..1b1e38e90 100644
--- a/R/la_code_lookup.R
+++ b/R/get_la_code_opendata_lookup.R
@@ -1,14 +1,13 @@
 #' Download the LA code lookup
 #'
-#' @inheritParams phsopendata::get_resource
-#'
 #' @description Download and process the Local Authority lookup from the Open
 #' Data platform
 #'
 #' @return a [tibble][tibble::tibble-package] with the Local Authority names
 #' and codes.
 #' @export
-la_code_lookup <- function(res_id = "967937c4-8d67-4f39-974f-fd58c4acfda5") {
+get_la_code_opendata_lookup <- function() {
+  res_id <- "967937c4-8d67-4f39-974f-fd58c4acfda5"
   la_code_lookup <- phsopendata::get_resource(
     res_id = res_id,
     col_select = c("CA", "CAName")
diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
index f4fb7d3e5..c1afff837 100644
--- a/R/process_extract_homelessness.R
+++ b/R/process_extract_homelessness.R
@@ -20,6 +20,7 @@ process_extract_homelessness <- function(
     year,
     write_to_disk = TRUE,
     update = latest_update(),
+    la_code_lookup = get_la_code_opendata_lookup(),
     sg_pub_path = get_sg_homelessness_pub_path()) {
   # Only run for a single year
   stopifnot(length(year) == 1L)
@@ -100,7 +101,7 @@ process_extract_homelessness <- function(
       )
     ) %>%
     dplyr::left_join(
-      la_code_lookup(),
+      la_code_lookup,
       by = dplyr::join_by("sending_local_authority_code_9" == "CA")
     ) %>%
     # Filter out duplicates
diff --git a/_targets.R b/_targets.R
index a9fa80d7a..e358d9baa 100644
--- a/_targets.R
+++ b/_targets.R
@@ -34,6 +34,7 @@ list(
   ),
   ## Lookup data ##
   tar_target(gpprac_opendata, get_gpprac_opendata()),
+  tar_target(la_code_opendata, get_la_code_opendata_lookup()),
   tar_target(gpprac_ref_path, get_gpprac_ref_path(), format = "file"),
   tar_target(locality_path, get_locality_path(), format = "file"),
   tar_target(simd_path, get_simd_path(), format = "file"),
@@ -339,11 +340,15 @@ list(
         year
       )
     ),
-    tar_target(source_homelessness_extract, process_extract_homelessness(
-      homelessness_data,
-      year,
-      write_to_disk = write_to_disk
-    )),
+    tar_target(
+      source_homelessness_extract,
+      process_extract_homelessness(
+        data = homelessness_data,
+        year = year,
+        write_to_disk = write_to_disk,
+        la_code_lookup = la_code_opendata
+      )
+    ),
     tar_target(
       tests_source_homelessness_extract,
       process_tests_homelessness(
diff --git a/man/get_la_code_opendata_lookup.Rd b/man/get_la_code_opendata_lookup.Rd
new file mode 100644
index 000000000..dbf2fbb73
--- /dev/null
+++ b/man/get_la_code_opendata_lookup.Rd
@@ -0,0 +1,16 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_la_code_opendata_lookup.R
+\name{get_la_code_opendata_lookup}
+\alias{get_la_code_opendata_lookup}
+\title{Download the LA code lookup}
+\usage{
+get_la_code_opendata_lookup()
+}
+\value{
+a \link[tibble:tibble-package]{tibble} with the Local Authority names
+and codes.
+}
+\description{
+Download and process the Local Authority lookup from the Open
+Data platform
+}
diff --git a/man/la_code_lookup.Rd b/man/la_code_lookup.Rd
deleted file mode 100644
index 9dde038e0..000000000
--- a/man/la_code_lookup.Rd
+++ /dev/null
@@ -1,20 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/la_code_lookup.R
-\name{la_code_lookup}
-\alias{la_code_lookup}
-\title{Download the LA code lookup}
-\usage{
-la_code_lookup(res_id = "967937c4-8d67-4f39-974f-fd58c4acfda5")
-}
-\arguments{
-\item{res_id}{The resource ID as found on
-\href{https://www.opendata.nhs.scot/}{NHS Open Data platform}}
-}
-\value{
-a \link[tibble:tibble-package]{tibble} with the Local Authority names
-and codes.
-}
-\description{
-Download and process the Local Authority lookup from the Open
-Data platform
-}
diff --git a/man/process_extract_homelessness.Rd b/man/process_extract_homelessness.Rd
index 9b6eb9463..1f94d675e 100644
--- a/man/process_extract_homelessness.Rd
+++ b/man/process_extract_homelessness.Rd
@@ -9,6 +9,7 @@ process_extract_homelessness(
   year,
   write_to_disk = TRUE,
   update = latest_update(),
+  la_code_lookup = get_la_code_opendata_lookup(),
   sg_pub_path = get_sg_homelessness_pub_path()
 )
 }
diff --git a/tests/testthat/_snaps/get_la_code_opendata_lookup.md b/tests/testthat/_snaps/get_la_code_opendata_lookup.md
new file mode 100644
index 000000000..40365d570
--- /dev/null
+++ b/tests/testthat/_snaps/get_la_code_opendata_lookup.md
@@ -0,0 +1,20 @@
+# LA Code lookup is correct
+
+    Code
+      get_la_code_opendata_lookup()
+    Output
+      # A tibble: 36 x 3
+         CA        CAName                sending_local_authority_name
+         <chr>     <chr>                 <chr>                       
+       1 S12000005 Clackmannanshire      Clackmannanshire            
+       2 S12000006 Dumfries and Galloway Dumfries & Galloway         
+       3 S12000008 East Ayrshire         East Ayrshire               
+       4 S12000010 East Lothian          East Lothian                
+       5 S12000011 East Renfrewshire     East Renfrewshire           
+       6 S12000013 Na h-Eileanan Siar    Eilean Siar                 
+       7 S12000014 Falkirk               Falkirk                     
+       8 S12000015 Fife                  Fife                        
+       9 S12000017 Highland              Highland                    
+      10 S12000018 Inverclyde            Inverclyde                  
+      # i 26 more rows
+
diff --git a/tests/testthat/test-get_gpprac_opendata.R b/tests/testthat/test-get_gpprac_opendata.R
new file mode 100644
index 000000000..c70d753b4
--- /dev/null
+++ b/tests/testthat/test-get_gpprac_opendata.R
@@ -0,0 +1,18 @@
+skip_if_offline()
+
+test_that("GP prac cluster lookup is correct", {
+  gp_cluster_lookup <- expect_warning(get_gpprac_opendata())
+
+  expect_s3_class(gp_cluster_lookup, "tbl_df")
+  expect_named(
+    gp_cluster_lookup,
+    c(
+      "gpprac",
+      "practice_name",
+      "postcode",
+      "cluster",
+      "partnership",
+      "health_board"
+    )
+  )
+})
diff --git a/tests/testthat/test-get_la_code_opendata_lookup.R b/tests/testthat/test-get_la_code_opendata_lookup.R
new file mode 100644
index 000000000..f46c17c04
--- /dev/null
+++ b/tests/testthat/test-get_la_code_opendata_lookup.R
@@ -0,0 +1,13 @@
+skip_if_offline()
+
+test_that("LA Code lookup is correct", {
+  la_code_lookup <- get_la_code_opendata_lookup()
+
+  expect_s3_class(la_code_lookup, "tbl_df")
+  expect_named(
+    la_code_lookup,
+    c("CA", "CAName", "sending_local_authority_name")
+  )
+
+  expect_snapshot(get_la_code_opendata_lookup())
+})