diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
index 7b21f93d1..1abb216ea 100644
--- a/.github/actions/spelling/expect.txt
+++ b/.github/actions/spelling/expect.txt
@@ -6,6 +6,7 @@ admtype
 ADPE
 adtf
 alstr
+anomymous
 arrivalmode
 arth
 atlassian
@@ -40,6 +41,7 @@ CNWs
 codecov
 Comhairle
 commhosp
+communicty
 congen
 copd
 costinc
@@ -53,6 +55,7 @@ customise
 cvd
 dataframe
 datamart
+datas
 datazone
 datediff
 dateformat
@@ -169,8 +172,8 @@ lubridate
 magrittr
 markdownguide
 Matern
-Mcbride
-mcmahon
+mcnicol
+megan
 microsoft
 MIU
 MMMYY
@@ -203,6 +206,7 @@ parkinsons
 patflow
 pattype
 PCEC
+pcs
 PERTH
 PHIBCS
 phs
@@ -215,6 +219,7 @@ PLICS
 popluation
 Posix
 postcodes
+Postcodesio
 PPAs
 prac
 praccode
@@ -222,6 +227,7 @@ ptypes
 purrr
 quickstart
 rankdir
+rbindlist
 rcmdcheck
 rdd
 rdname
@@ -232,13 +238,13 @@ readr
 readxl
 reasonwait
 recid
+recordlinkage
 refailure
 reflectoring
 refsource
 reftype
 relaint
 renviron
-returnsthe
 rlang
 rmarkdown
 Rnw
diff --git a/DESCRIPTION b/DESCRIPTION
index 3d731a0af..28a8303ff 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -3,12 +3,8 @@ Title: Create the Source Linkage Files
 Version: 0.0.0.9000
 Authors@R: c(
     person("Public Health Scotland", , , "phs.source@phs.scot", role = c("cre", "cph")),
+    person("Megan", "McNicol", , "megan.mcnicol2@phs.scot", role = "aut"),
     person("Jennifer", "Thom", , "jennifer.thom@phs.scot", role = "aut"),
-    person("James", "McMahon", , "james.mcmahon@phs.scot", role = "aut",
-           comment = c(ORCID = "0000-0002-5380-2029")),
-    person("Catherine", "Holland", , "catherine.holland@phs.scot", role = "aut",
-           comment = c(ORCID = "0000-0002-3259-5745")),
-    person("Bateman", "McBride", , "Bateman.Mcbride@phs.scot", role = "aut"),
     person("Zihao", "Li", , "zihao.li@phs.scot", role = "aut",
            comment = c(ORCID = "0000-0002-5178-2124"))
   )
@@ -25,25 +21,19 @@ Imports:
     data.table (>= 1.14.6),
     dbplyr (>= 2.3.1),
     dplyr (>= 1.1.1),
-    dtplyr (>= 1.3.0),
     fs (>= 1.6.1),
-    fst (>= 0.9.8),
     future (>= 1.33.0),
     future.callr (>= 0.8.1),
-    glue (>= 1.6.2),
-    haven (>= 2.5.2),
     hms (>= 1.1.0),
     janitor (>= 2.2.0),
     keyring (>= 1.3.0),
     lubridate (>= 1.9.2),
     magrittr (>= 2.0.3),
     odbc (>= 1.3.1),
-    openssl (>= 2.0.5),
     openxlsx (>= 4.2.5),
     phsmethods (>= 0.2.2),
     phsopendata (>= 0.0.1.0),
     purrr (>= 1.0.1),
-    qs (>= 0.25.5),
     R.utils (>= 2.12.2),
     readr (>= 2.1.0),
     rlang (>= 1.1.0),
@@ -73,5 +63,4 @@ Encoding: UTF-8
 Language: en-GB
 LazyData: true
 Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.3.1
-
+RoxygenNote: 7.3.2
diff --git a/NAMESPACE b/NAMESPACE
index 0475603bf..6f1c88841 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,6 +1,7 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
+export(add_deceased_flag)
 export(add_homelessness_date_flags)
 export(add_homelessness_flag)
 export(add_hri_variables)
@@ -25,9 +26,9 @@ export(end_fy_quarter)
 export(end_next_fy_quarter)
 export(find_latest_file)
 export(fy_interval)
-export(get_all_slf_deaths_lookup_path)
 export(get_boxi_extract_path)
 export(get_ch_costs_path)
+export(get_combined_slf_deaths_lookup_path)
 export(get_dd_path)
 export(get_dd_period)
 export(get_demographic_cohorts_path)
@@ -88,11 +89,11 @@ export(midpoint_fy)
 export(next_fy)
 export(phs_db_connection)
 export(previous_update)
+export(process_combined_deaths_lookup)
 export(process_costs_ch_rmd)
 export(process_costs_dn_rmd)
 export(process_costs_gp_ooh_rmd)
 export(process_costs_hc_rmd)
-export(process_deaths_lookup)
 export(process_extract_acute)
 export(process_extract_ae)
 export(process_extract_alarms_telecare)
@@ -115,6 +116,7 @@ export(process_lookup_ltc)
 export(process_lookup_postcode)
 export(process_lookup_sc_client)
 export(process_lookup_sc_demographics)
+export(process_refined_death)
 export(process_sc_all_alarms_telecare)
 export(process_sc_all_care_home)
 export(process_sc_all_home_care)
@@ -182,6 +184,7 @@ export(start_fy)
 export(start_fy_quarter)
 export(start_next_fy_quarter)
 export(write_file)
+export(years_to_run)
 importFrom(data.table,.N)
 importFrom(data.table,.SD)
 importFrom(magrittr,"%>%")
diff --git a/NEWS.md b/NEWS.md
index d5aea7364..5a07b8266 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,18 @@
-# June 2024 Update - Unreleased
+# September 2024 Update - Unreleased 
+* New 24/25 files created
+* New NSU cohort for 23/24 available
+* New SPARRA scores calculated from April 24/25
+* Death dates attached to activity after death flag 
+* Care home methodology updated
+* New cup marker for Acute and GP OOH
+* Bug fix:
+    * person id for SDS and client
+    * DD data: 
+      * The variable `cij_delay` is only attached to the `recid = DD` rows, not all the rows in the CIJ as we'd expect.
+      * The `cij_delay` variable is showing as 1 against the Delay records (NA otherwise) - We expect this to be `TRUE/FALSE` to match the other flags.
+      * The `cij_ppa` variable isn't attaching to the `recid = DD` rows.
+
+# June 2024 Update - released 06-Jun-24
 * Update of 2017/18 onwards to include bug fixes within the files.
 * Removal of extra variable caused by the LTCs not matching properly.
 * New NRS mid-2022 population estimates.
diff --git a/R/00-update_refs.R b/R/00-update_refs.R
index 6106f17cf..33022edf6 100644
--- a/R/00-update_refs.R
+++ b/R/00-update_refs.R
@@ -7,7 +7,7 @@
 #'
 #' @family initialisation
 latest_update <- function() {
-  "Jun_2024"
+  "Sep_2024"
 }
 
 #' Previous update
@@ -61,18 +61,24 @@ previous_update <- function(months_ago = 3L, override = NULL) {
 #'
 #' @family initialisation
 get_dd_period <- function() {
-  "Jul16_Mar24"
+  "Jul16_Jun24"
 }
 
-#' The latest financial year for Cost uplift setting
+#' The year list for slf to update
 #'
-#' @description Get the latest year for cost uplift
+#' @description Get the vector of years to update slf
 #'
-#' @return The financial year format
+#' @return The vector of financial years
 #'
 #' @export
 #'
 #' @family initialisation
-latest_cost_year <- function() {
-  "2223"
+years_to_run <- function() {
+  fy_start_2digit <- 17
+  fy_end_2digit <- 24
+  years_to_run <- paste0(
+    fy_start_2digit:fy_end_2digit,
+    (fy_start_2digit + 1):(fy_end_2digit + 1)
+  )
+  return(years_to_run)
 }
diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R
index 0842b47ec..5e800c80b 100644
--- a/R/add_activity_after_death_flag.R
+++ b/R/add_activity_after_death_flag.R
@@ -11,8 +11,10 @@
 add_activity_after_death_flag <- function(
     data,
     year,
-    deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>%
+    deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>%
       slfhelper::get_chi()) {
+  cli::cli_alert_info("Add activity after death flag function started at {Sys.time()}")
+
   # to skip warnings no visible binding for global variable ‘.’
   . <- NULL
 
@@ -24,7 +26,7 @@ add_activity_after_death_flag <- function(
       by = "chi",
       suffix = c("", "_boxi")
     ) %>%
-    dplyr::filter(.data$deceased == TRUE | .data$deceased_boxi == TRUE) %>%
+    dplyr::filter(.data$deceased == TRUE) %>%
     dplyr::distinct()
 
 
@@ -70,31 +72,28 @@ add_activity_after_death_flag <- function(
     ))
 
 
-  # Check and print error message for records which already are TRUE for the deceased variable in the episode file, but this doesn't match the
-  # BOXI deceased variable
-  check_deceased_match <- flag_data %>%
-    dplyr::filter(.data$deceased != .data$deceased_boxi)
-
-  if (nrow(check_deceased_match) != 0) {
-    warning("There were records in the episode file which have a deceased variable which does not match the BOXI NRS deceased variable")
-  }
-
-
   # Fill in date of death if missing in the episode file but available in BOXI lookup, due to historic dates of death not being carried
   # over from previous financial years
   flag_data <- flag_data %>%
     dplyr::filter(.data$activity_after_death == 1) %>%
     # Remove temporary flag variables used to create activity after death flag and fill in missing death_date
-    dplyr::select(.data$year, .data$chi, .data$record_keydate1, .data$record_keydate2, .data$activity_after_death) %>%
+    dplyr::select(.data$year, .data$chi, .data$record_keydate1, .data$record_keydate2, .data$activity_after_death, .data$death_date_boxi) %>%
     dplyr::distinct()
 
   # Match activity after death flag back to episode file
   final_data <- data %>%
     dplyr::left_join(
       flag_data,
+      # TODO: this join_by is not 100% accurate. Consider use ep_file_row_id to join
       by = c("year", "chi", "record_keydate1", "record_keydate2"),
       na_matches = "never"
-    )
+    ) %>%
+    dplyr::mutate(death_date = lubridate::as_date(ifelse(is.na(death_date) & !(is.na(death_date_boxi)),
+      death_date_boxi, death_date
+    ))) %>%
+    dplyr::select(-death_date_boxi) %>%
+    dplyr::distinct()
+
 
 
   return(final_data)
@@ -118,19 +117,19 @@ add_activity_after_death_flag <- function(
 #'
 #'
 # Read data------------------------------------------------
-process_deaths_lookup <- function(update = latest_update(),
-                                  write_to_disk = TRUE, ...) {
-  all_boxi_deaths <- read_file(get_slf_deaths_lookup_path("1415")) %>%
-    rbind(read_file(get_slf_deaths_lookup_path("1516"))) %>%
-    rbind(read_file(get_slf_deaths_lookup_path("1617"))) %>%
-    rbind(read_file(get_slf_deaths_lookup_path("1718"))) %>%
-    rbind(read_file(get_slf_deaths_lookup_path("1819"))) %>%
-    rbind(read_file(get_slf_deaths_lookup_path("1920"))) %>%
-    rbind(read_file(get_slf_deaths_lookup_path("2021"))) %>%
-    rbind(read_file(get_slf_deaths_lookup_path("2122"))) %>%
-    rbind(read_file(get_slf_deaths_lookup_path("2223"))) %>%
-    rbind(read_file(get_slf_deaths_lookup_path("2324"))) %>%
-    # TODO: make this automated to pick up files starting with name "get_slf_deaths_lookup_path"
+
+process_combined_deaths_lookup <- function(update = latest_update(),
+                                           write_to_disk = TRUE, ...) {
+  dir_folder <- "/conf/hscdiip/SLF_Extracts/Deaths"
+  file_names <- list.files(dir_folder,
+    pattern = "^anon-slf_deaths_lookup_.*parquet",
+    full.names = TRUE
+  )
+
+  # read all year specific deaths lookups and bind them together
+  all_boxi_deaths <- lapply(file_names, arrow::read_parquet) %>%
+    data.table::rbindlist() %>%
+    # convert to chi for processing
     slfhelper::get_chi() %>%
     # Remove rows with missing or blank CHI number - could also use na.omit?
     # na.omit(all_boxi_deaths)
@@ -177,9 +176,7 @@ process_deaths_lookup <- function(update = latest_update(),
   if (write_to_disk) {
     write_file(
       all_boxi_deaths,
-      fs::path(get_slf_dir(), "Deaths",
-        file_name = stringr::str_glue("anon-all_slf_deaths_lookup_{update}.parquet")
-      )
+      get_combined_slf_deaths_lookup_path()
     )
   }
 
diff --git a/R/add_deceased_flag.R b/R/add_deceased_flag.R
new file mode 100644
index 000000000..f3be216cf
--- /dev/null
+++ b/R/add_deceased_flag.R
@@ -0,0 +1,40 @@
+#' Create the SLF Deaths lookup
+#'
+#' @description Currently this just uses the NRS death dates 'as is', with no
+#' corrections or modifications, it is expected that this will be expanded to
+#' use the CHI deaths extract from IT as well as taking into account data in
+#' the episode file to assess the validity of a death date.
+#'
+#' @param year The year to process, in FY format.
+#' @param nrs_deaths_data NRS deaths data.
+#' @param chi_deaths_data IT CHI deaths data.
+#' @param write_to_disk (optional) Should the data be written to disk default is
+#' `TRUE` i.e. write the data to disk.
+#'
+#' @return a [tibble][tibble::tibble-package] containing the episode file
+#' @export
+add_deceased_flag <- function(
+    year,
+    refined_death = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi(),
+    write_to_disk = TRUE) {
+  # create slf deaths lookup
+
+  dplyr::mutate(
+    death_date = dplyr::if_else(is.na(.data$record_keydate1),
+      .data$death_date_chi, .data$record_keydate1
+    ),
+    deceased = TRUE,
+    .keep = "unused"
+  ) %>%
+    # save anon chi on disk
+    slfhelper::get_anon_chi()
+
+  if (write_to_disk) {
+    write_file(
+      slf_deaths_lookup,
+      get_slf_deaths_lookup_path(year, check_mode = "write")
+    )
+  }
+
+  return(slf_deaths_lookup)
+}
diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R
index 15d5d4e01..bf6216e57 100644
--- a/R/add_nsu_cohort.R
+++ b/R/add_nsu_cohort.R
@@ -13,6 +13,8 @@ add_nsu_cohort <- function(
     data,
     year,
     nsu_cohort = read_file(get_nsu_path(year)) %>% slfhelper::get_chi()) {
+  cli::cli_alert_info("Add NSU cohort function started at {Sys.time()}")
+
   year_param <- year
 
   if (!check_year_valid(year, "nsu")) {
diff --git a/R/add_ppa_flag.R b/R/add_ppa_flag.R
index bb99f0543..1d5f9739d 100644
--- a/R/add_ppa_flag.R
+++ b/R/add_ppa_flag.R
@@ -8,6 +8,8 @@
 #' @return A data frame to use as a lookup of PPAs
 #' @family episode_file
 add_ppa_flag <- function(data) {
+  cli::cli_alert_info("Add PPA flag function started at {Sys.time()}")
+
   check_variables_exist(
     data,
     variables = c(
diff --git a/R/check_year_valid.R b/R/check_year_valid.R
index 2197d8c0e..da257ff4c 100644
--- a/R/check_year_valid.R
+++ b/R/check_year_valid.R
@@ -17,6 +17,7 @@ check_year_valid <- function(
       "ch",
       "client",
       "cmh",
+      "cost_dna",
       "dd",
       "deaths",
       "dn",
@@ -34,9 +35,9 @@ check_year_valid <- function(
     )) {
   if (year <= "1415" && type %in% c("dn", "sparra")) {
     return(FALSE)
-  } else if (year <= "1516" && type %in% c("cmh", "homelessness")) {
+  } else if (year <= "1516" && type %in% c("cmh", "homelessness", "dd")) {
     return(FALSE)
-  } else if (year <= "1617" && type %in% c("ch", "hc", "sds", "at")) {
+  } else if (year <= "1617" && type %in% c("ch", "hc", "sds", "at", "client", "cost_dna")) {
     return(FALSE)
   } else if (year <= "1718" && type %in% "hhg") {
     return(FALSE)
@@ -46,7 +47,7 @@ check_year_valid <- function(
     return(FALSE)
   } else if (year >= "2425" && type %in% "sparra") {
     return(FALSE)
-  } else if (year >= "2425" && type %in% c("ch", "hc", "sds", "at")) {
+  } else if (year >= "2526" && type %in% c("ch", "hc", "sds", "at")) {
     return(FALSE)
   }
 
diff --git a/R/correct_demographics.R b/R/correct_demographics.R
index d7ef6f469..d221c25ab 100644
--- a/R/correct_demographics.R
+++ b/R/correct_demographics.R
@@ -7,6 +7,8 @@
 #'
 #' @return episode files with updated date of birth and ages
 correct_demographics <- function(data, year) {
+  cli::cli_alert_info("Correct demographics function started at {Sys.time()}")
+
   # keep episodes with missing chi
   data_no_chi <- data %>%
     dplyr::filter(is_missing(.data$chi))
diff --git a/R/cost_uplift.R b/R/cost_uplift.R
index e554c2505..abbbd9b5a 100644
--- a/R/cost_uplift.R
+++ b/R/cost_uplift.R
@@ -5,6 +5,8 @@
 #' @return episode data with uplifted costs
 #' @family episode_file
 apply_cost_uplift <- function(data) {
+  cli::cli_alert_info("Apply cost uplift function started at {Sys.time()}")
+
   data <- data %>%
     # attach a uplift scale as the last column
     lookup_uplift() %>%
@@ -84,3 +86,21 @@ lookup_uplift <- function(data) {
 
   return(data)
 }
+
+#' The latest financial year for Cost uplift setting
+#'
+#' @description Get the latest year for cost uplift
+#' latest_cost_year() is hard coded in cost_uplift().
+#' 2223 is not changed automatically with time passes.
+#' It is changed only when we get a new instruction from somewhere about cost uplift.
+#' Do not change unless specific instructions.
+#' Changing this means that we need to change cost_uplift().
+#'
+#' @return The financial year format
+#'
+#' @export
+#'
+#' @family initialisation
+latest_cost_year <- function() {
+  "2223"
+}
diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index 0bb804c7d..ecb6fc126 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -141,7 +141,7 @@ create_episode_file <- function(
       slf_deaths_lookup
     ) %>%
     add_activity_after_death_flag(year,
-      deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>%
+      deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>%
         slfhelper::get_chi()
     ) %>%
     load_ep_file_vars(year)
@@ -175,7 +175,73 @@ create_episode_file <- function(
         sc_social_worker = NA,
         sc_type_of_housing = NA,
         sc_meals = NA,
-        sc_day_care = NA
+        sc_day_care = NA,
+        social_care_id = NA,
+        sc_dementia = NA,
+        sc_learning_disability = NA,
+        sc_mental_health_disorders = NA,
+        sc_physical_and_sensory_disability = NA,
+        sc_drugs = NA,
+        sc_alcohol = NA,
+        sc_palliative_care = NA,
+        sc_carer = NA,
+        sc_elderly_frail = NA,
+        sc_neurological_condition = NA,
+        sc_autism = NA,
+        sc_other_vulnerable_groups = NA,
+        ch_provider_description = NA
+      )
+  }
+
+  if (!check_year_valid(year, type = "homelessness")) {
+    episode_file <- episode_file %>%
+      dplyr::mutate(
+        hl1_12_months_post_app = NA,
+        hl1_12_months_pre_app = NA,
+        hl1_6after_ep = NA,
+        hl1_6before_ep = NA,
+        hl1_application_ref = NA,
+        hl1_completeness = NA,
+        hl1_during_ep = NA,
+        hl1_in_fy = NA,
+        hl1_property_type = NA,
+        hl1_reason_ftm = NA,
+        hl1_sending_lca = NA
+      )
+  }
+
+  if (!check_year_valid(year, type = "dd")) {
+    episode_file <- episode_file %>%
+      dplyr::mutate(
+        cij_delay = NA,
+        dd_quality = NA,
+        dd_responsible_lca = NA,
+        delay_end_reason = NA,
+        primary_delay_reason = NA,
+        secondary_delay_reason = NA,
+      )
+  }
+
+  if (!check_year_valid(year, type = "dn")) {
+    episode_file <- episode_file %>%
+      dplyr::mutate(
+        ccm = NA,
+        total_no_dn_contacts = NA
+      )
+  }
+
+  if (!check_year_valid(year, type = "cost_dna")) {
+    episode_file <- episode_file %>%
+      dplyr::mutate(
+        cost_total_net_inc_dnas = NA
+      )
+  }
+
+  if (!check_year_valid(year, type = "dn")) {
+    episode_file <- episode_file %>%
+      dplyr::mutate(
+        ccm = NA,
+        total_no_dn_contacts = NA
       )
   }
 
@@ -199,6 +265,8 @@ create_episode_file <- function(
 #'
 #' @return `data` with only the `vars_to_keep` kept
 store_ep_file_vars <- function(data, year, vars_to_keep) {
+  cli::cli_alert_info("Store episode file variables function started at {Sys.time()}")
+
   tempfile_path <- get_file_path(
     directory = get_year_dir(year),
     file_name = stringr::str_glue("temp_ep_file_variable_store_{year}.parquet"),
@@ -236,6 +304,8 @@ store_ep_file_vars <- function(data, year, vars_to_keep) {
 #'
 #' @return The full SLF data.
 load_ep_file_vars <- function(data, year) {
+  cli::cli_alert_info("Load episode file variable function started at {Sys.time()}")
+
   tempfile_path <- get_file_path(
     directory = get_year_dir(year),
     file_name = stringr::str_glue("temp_ep_file_variable_store_{year}.parquet"),
@@ -263,6 +333,8 @@ load_ep_file_vars <- function(data, year) {
 #'
 #' @return A data frame with CIJ markers filled in for those missing.
 fill_missing_cij_markers <- function(data) {
+  cli::cli_alert_info("Fill missing cij markers function started at {Sys.time()}")
+
   fixable_data <- data %>%
     dplyr::filter(
       .data[["recid"]] %in% c("01B", "04B", "GLS", "02B", "DD") & !is.na(.data[["chi"]])
@@ -317,6 +389,8 @@ fill_missing_cij_markers <- function(data) {
 #'
 #' @return The data with CIJ variables corrected.
 correct_cij_vars <- function(data) {
+  cli::cli_alert_info("Correct cij variables function started at {Sys.time()}")
+
   check_variables_exist(
     data,
     c("chi", "recid", "cij_admtype", "cij_pattype_code")
@@ -358,6 +432,8 @@ correct_cij_vars <- function(data) {
 #'
 #' @return The data with cost including dna.
 create_cost_inc_dna <- function(data) {
+  cli::cli_alert_info("Create cost inc dna function started at {Sys.time()}")
+
   check_variables_exist(data, c("cost_total_net", "attendance_status"))
 
   # Create cost including DNAs and modify costs
@@ -382,6 +458,8 @@ create_cost_inc_dna <- function(data) {
 #'
 #' @return The data unchanged (the cohorts are written to disk)
 create_cohort_lookups <- function(data, year, update = latest_update()) {
+  cli::cli_alert_info("Create cohort lookups function started at {Sys.time()}")
+
   create_demographic_cohorts(
     data,
     year,
@@ -421,6 +499,8 @@ join_cohort_lookups <- function(
       col_select = c("anon_chi", "service_use_cohort")
     ) %>%
       slfhelper::get_chi()) {
+  cli::cli_alert_info("Join cohort lookups function started at {Sys.time()}")
+
   join_cohort_lookups <- data %>%
     dplyr::left_join(
       demographic_cohort,
@@ -447,6 +527,13 @@ join_sc_client <- function(data,
                            year,
                            sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(),
                            file_type = c("episode", "individual")) {
+  cli::cli_alert_info("Join social care client function started at {Sys.time()}")
+
+  if (!check_year_valid(year, type = "client")) {
+    data_file <- data
+    return(data_file)
+  }
+
   if (file_type == "episode") {
     # Match on client variables by chi
     data_file <- data %>%
diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index c98531310..273761efc 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -78,7 +78,7 @@ create_individual_file <- function(
 
   if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
     individual_file <- individual_file %>%
-      aggregate_by_chi(exclude_sc_var = TRUE)
+      aggregate_by_chi(year = year, exclude_sc_var = TRUE)
   } else {
     individual_file <- individual_file %>%
       aggregate_ch_episodes() %>%
@@ -115,6 +115,9 @@ create_individual_file <- function(
         hc_personal_hours = NA,
         hc_non_personal_hours = NA,
         hc_reablement_hours = NA,
+        hc_non_personal_hours_cost = NA,
+        hc_personal_hours_cost = NA,
+        hc_reablement_hours_cost = NA,
         at_alarms = NA,
         at_telecare = NA,
         sds_option_1 = NA,
@@ -125,10 +128,33 @@ create_individual_file <- function(
         sc_support_from_unpaid_carer = NA,
         sc_social_worker = NA,
         sc_meals = NA,
-        sc_day_care = NA
+        sc_day_care = NA,
+        sc_type_of_housing = NA,
+        count_not_known = NA,
+        sc_latest_submission = NA,
+        social_care_id = NA,
+        person_id = NA,
+        sc_alcohol = NA,
+        sc_autism = NA,
+        sc_carer = NA,
+        sc_dementia = NA,
+        sc_drugs = NA,
+        sc_elderly_frail = NA,
+        sc_learning_disability = NA,
+        sc_mental_health_disorders = NA,
+        sc_neurological_condition = NA,
+        sc_other_vulnerable_groups = NA,
+        sc_palliative_care = NA,
+        sc_physical_and_sensory_disability = NA
       )
   }
 
+  if (!check_year_valid(year, type = "homelessness")) {
+    individual_file <- individual_file %>%
+      dplyr::mutate(hl1_in_fy = NA)
+  }
+
+
   if (anon_chi_out) {
     individual_file <- individual_file %>%
       tidyr::replace_na(list(chi = "")) %>%
@@ -260,6 +286,8 @@ add_all_columns <- function(episode_file, year) {
 #' @param condition Condition to create new columns based on
 #' @family individual_file
 add_acute_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add acute columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   episode_file %>%
     add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>%
@@ -271,6 +299,8 @@ add_acute_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_mat_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add maternity columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   episode_file %>%
     add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>%
@@ -282,6 +312,8 @@ add_mat_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_mh_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add mental health columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   episode_file %>%
     add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>%
@@ -293,6 +325,8 @@ add_mh_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_gls_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add geriatric long stay columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   episode_file %>%
     add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>%
@@ -304,6 +338,8 @@ add_gls_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_op_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add outpatient columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   episode_file <- episode_file %>%
     add_standard_cols(prefix, condition)
@@ -327,6 +363,8 @@ add_op_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_ae_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add A&E columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   episode_file %>%
     add_standard_cols(prefix, condition, cost = TRUE) %>%
@@ -338,6 +376,8 @@ add_ae_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_pis_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add prescribing columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   episode_file %>%
     add_standard_cols(prefix, condition, cost = TRUE) %>%
@@ -349,6 +389,8 @@ add_pis_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_ooh_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add out of hours columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   episode_file <- episode_file %>%
     add_standard_cols(prefix, condition, cost = TRUE) %>%
@@ -384,6 +426,8 @@ add_ooh_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_dn_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add district nursing columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   if ("total_no_dn_contacts" %in% names(episode_file)) {
     episode_file %>%
@@ -407,6 +451,8 @@ add_dn_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_cmh_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add communicty mental health columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   episode_file %>%
     add_standard_cols(prefix, condition) %>%
@@ -418,6 +464,8 @@ add_cmh_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_dd_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add delayed discharges columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   condition_delay <- substitute(condition & primary_delay_reason != "9")
   episode_file <- episode_file %>%
@@ -439,6 +487,8 @@ add_dd_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_nsu_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add non service users columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   episode_file %>%
     add_standard_cols(prefix, condition) %>%
@@ -450,6 +500,8 @@ add_nsu_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_nrs_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add nrs columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   episode_file %>%
     add_standard_cols(prefix, condition) %>%
@@ -461,6 +513,8 @@ add_nrs_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_hl1_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add homelessness columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   episode_file %>%
     add_standard_cols(prefix, condition)
@@ -471,6 +525,8 @@ add_hl1_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_ch_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add care home columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   episode_file %>%
     add_standard_cols(prefix, condition) %>%
@@ -501,6 +557,8 @@ add_ch_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_hc_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add home care columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
 
   episode_file <- episode_file %>%
@@ -545,6 +603,8 @@ add_hc_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_at_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add alarms telecare columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   episode_file %>%
     add_standard_cols(prefix, condition) %>%
@@ -559,6 +619,8 @@ add_at_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_sds_columns <- function(episode_file, prefix, condition) {
+  cli::cli_alert_info("Add SDS columns function started at {Sys.time()}")
+
   condition <- substitute(condition)
   episode_file %>%
     add_standard_cols(prefix, condition) %>%
@@ -581,6 +643,8 @@ add_sds_columns <- function(episode_file, prefix, condition) {
 #' cij_pattype (lgl)
 #' @family individual_file
 add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, elective = TRUE) {
+  cli::cli_alert_info("Add ipdc columns function started at {Sys.time()}")
+
   condition_i <- substitute(eval(condition) & ipdc == "I")
   episode_file <- episode_file %>%
     dplyr::mutate(
@@ -625,6 +689,8 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi
 #' @param cost Whether to create prefix_cost col, e.g. "Acute_cost"
 #' @family individual_file
 add_standard_cols <- function(episode_file, prefix, condition, episode = FALSE, cost = FALSE) {
+  cli::cli_alert_info("Add standard columns function started at {Sys.time()}")
+
   if (episode) {
     episode_file <- dplyr::mutate(episode_file, "{prefix}_episodes" := dplyr::if_else(eval(condition), 1L, NA_integer_))
   }
@@ -703,6 +769,8 @@ recode_gender <- function(episode_file) {
 #' "dementia" and "dementia_date"
 #' @family individual_file
 condition_cols <- function() {
+  cli::cli_alert_info("Return condition columns function started at {Sys.time()}")
+
   conditions <- slfhelper::ltc_vars
   date_cols <- paste0(conditions, "_date")
   all_cols <- c(conditions, date_cols)
@@ -759,6 +827,8 @@ clean_individual_file <- function(individual_file, year) {
 #'
 #' @inheritParams clean_individual_file
 clean_up_gender <- function(individual_file) {
+  cli::cli_alert_info("Clean up gender column function started at {Sys.time()}")
+
   individual_file %>%
     dplyr::mutate(
       gender = dplyr::case_when(
@@ -785,6 +855,8 @@ join_slf_lookup_vars <- function(individual_file,
                                    col_select = c("gpprac", "cluster", "hbpraccode")
                                  ),
                                  hbrescode_var = "hb2018") {
+  cli::cli_alert_info("Join slf lookup variables function started at {Sys.time()}")
+
   individual_file <- individual_file %>%
     dplyr::left_join(
       slf_postcode_lookup,
diff --git a/R/fill_ch_names.R b/R/fill_ch_names.R
index cd8d18677..b6aa85bb7 100644
--- a/R/fill_ch_names.R
+++ b/R/fill_ch_names.R
@@ -213,7 +213,7 @@ fill_ch_names <- function(ch_data,
       "match_mean2",
       # "open_interval",
       "ch_admission_date",
-      "qtr_start",
+      "period_start_date",
       "ch_date_registered",
       "latest_close_date",
       "ch_active",
@@ -305,7 +305,6 @@ fill_ch_names <- function(ch_data,
     "unique_identifier",
     "matching_quality_indicator",
     "sending_location",
-    "latest_sc_id",
     "chi",
     "ch_name",
     "ch_postcode",
@@ -320,9 +319,6 @@ fill_ch_names <- function(ch_data,
     "ch_admission_date",
     "ch_discharge_date",
     "age",
-    "record_date",
-    "qtr_start",
-    "latest_flag",
     "gender",
     "dob",
     "postcode",
@@ -763,7 +759,6 @@ fill_ch_names <- function(ch_data,
   ## produce output ----
   col_output <- c(
     "sending_location",
-    "latest_sc_id",
     "chi",
     "ch_name",
     "ch_postcode",
@@ -778,9 +773,6 @@ fill_ch_names <- function(ch_data,
     "ch_admission_date",
     "ch_discharge_date",
     "age",
-    "record_date",
-    "qtr_start",
-    "latest_flag",
     "gender",
     "dob",
     "postcode",
diff --git a/R/fill_geographies.R b/R/fill_geographies.R
index c9aee6355..5638c8758 100644
--- a/R/fill_geographies.R
+++ b/R/fill_geographies.R
@@ -16,6 +16,8 @@ fill_geographies <- function(
       get_slf_gpprac_path(),
       col_select = c("gpprac", "cluster", "hbpraccode")
     )) {
+  cli::cli_alert_info("Fill geographies function started at {Sys.time()}")
+
   check_variables_exist(data, c(
     "chi",
     "postcode",
diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R
index 2752b634a..9c21cabe9 100644
--- a/R/get_boxi_extract_path.R
+++ b/R/get_boxi_extract_path.R
@@ -16,12 +16,14 @@ get_boxi_extract_path <- function(
       "ae",
       "ae_cup",
       "acute",
+      "acute_cup",
       "cmh",
       "deaths",
       "dn",
       "gp_ooh-c",
       "gp_ooh-d",
       "gp_ooh-o",
+      "gp_ooh_cup",
       "homelessness",
       "maternity",
       "mh",
@@ -44,11 +46,13 @@ get_boxi_extract_path <- function(
     "ae" ~ "anon-A&E-episode-level-extract",
     "ae_cup" ~ "anon-A&E-UCD-CUP-extract",
     "acute" ~ "anon-Acute-episode-level-extract",
+    "acute_cup" ~ "anon-Acute-CUP-extract",
     "cmh" ~ "anon-Community-MH-contact-level-extract",
     "dn" ~ "anon-District-Nursing-contact-level-extract",
     "gp_ooh-c" ~ "anon-GP-OoH-consultations-extract",
     "gp_ooh-d" ~ "anon-GP-OoH-diagnosis-extract",
     "gp_ooh-o" ~ "anon-GP-OoH-outcomes-extract",
+    "gp_ooh_cup" ~ "anon-GP-OoH-CUP-extract",
     "homelessness" ~ "anon-Homelessness-extract",
     "maternity" ~ "anon-Maternity-episode-level-extract",
     "mh" ~ "anon-Mental-Health-episode-level-extract",
@@ -82,9 +86,11 @@ get_boxi_extract_path <- function(
 #'
 #' @return an [fs::path()] to a dummy file which can be used with targets.
 get_dummy_boxi_extract_path <- function() {
-  get_file_path(
+  dummy_path <- get_file_path(
     directory = get_dev_dir(),
     file_name = ".dummy",
     create = TRUE
   )
+
+  return(dummy_path)
 }
diff --git a/R/get_sc_lookup_paths.R b/R/get_sc_lookup_paths.R
index d201f416f..90a08e7e1 100644
--- a/R/get_sc_lookup_paths.R
+++ b/R/get_sc_lookup_paths.R
@@ -38,11 +38,18 @@ get_sc_demog_lookup_path <- function(update = latest_update(), ...) {
 #' @family social care lookup file paths
 #' @seealso [get_file_path()] for the generic function.
 get_sc_client_lookup_path <- function(year, update = latest_update(), ...) {
-  sc_client_lookup_path <- get_file_path(
-    directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_client_lookup"),
-    file_name = stringr::str_glue("anon-sc_client_lookup_{year}_{update}.parquet"),
-    ...
-  )
-
-  return(sc_client_lookup_path)
+  if (!check_year_valid(year, type = "client")) {
+    return(get_dummy_boxi_extract_path())
+  } else {
+    sc_client_lookup_path <- get_file_path(
+      directory = fs::path(
+        get_slf_dir(),
+        "Social_care",
+        "processed_sc_client_lookup"
+      ),
+      file_name = stringr::str_glue("anon-sc_client_lookup_{year}_{update}.parquet"),
+      ...
+    )
+    return(sc_client_lookup_path)
+  }
 }
diff --git a/R/get_slf_lookup_paths.R b/R/get_slf_lookup_paths.R
index 390a27a5a..e06627e54 100644
--- a/R/get_slf_lookup_paths.R
+++ b/R/get_slf_lookup_paths.R
@@ -73,6 +73,10 @@ get_slf_deaths_lookup_path <- function(year, ...) {
 #' SLF death dates File Path
 #'
 #' @description Get the full path to the BOXI NRS Deaths lookup file for all financial years
+#' Note this name is very similar to the existing slf_deaths_lookup_path
+#' which returns the path for the refined_death with deceased flag for each financial year.
+#' This function will return the combined financial years lookup
+#' i.e. all years put together.
 #'
 #' @param ... additional arguments passed to [get_file_path()]
 #' @param update the update month (defaults to use [latest_update()])
@@ -80,17 +84,17 @@ get_slf_deaths_lookup_path <- function(year, ...) {
 #' @export
 #' @family slf lookup file path
 #' @seealso [get_file_path()] for the generic function.
-
-get_all_slf_deaths_lookup_path <- function(update = latest_update(), ...) {
-  # Note this name is very similar to the existing slf_deaths_lookup_path which returnsthe path for
-  # the processed BOXI extract for each financial year. This function will return the combined financial
+get_combined_slf_deaths_lookup_path <- function(update = latest_update(), ...) {
+  # Note this name is very similar to the existing slf_deaths_lookup_path which returns the path for
+  # the refined_death with deceased flag for each financial year.
+  # This function will return the combined financial
   # years lookup i.e. all years put together.
-  all_slf_deaths_lookup_path <- get_file_path(
+  combined_slf_deaths_lookup_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Deaths"),
-    file_name = stringr::str_glue("anon-all_slf_deaths_lookup_{update}.parquet"),
+    file_name = stringr::str_glue("anon-combined_slf_deaths_lookup_{update}.parquet"),
     ...
   )
-  return(all_slf_deaths_lookup_path)
+  return(combined_slf_deaths_lookup_path)
 }
 
 
diff --git a/R/join_deaths_data.R b/R/join_deaths_data.R
index 7fbf203d0..5e61a2082 100644
--- a/R/join_deaths_data.R
+++ b/R/join_deaths_data.R
@@ -10,10 +10,13 @@ join_deaths_data <- function(
     data,
     year,
     slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) %>% slfhelper::get_chi()) {
+  cli::cli_alert_info("Join deaths data function started at {Sys.time()}")
+
   return(
     data %>%
       dplyr::left_join(
-        slf_deaths_lookup,
+        slf_deaths_lookup %>%
+          dplyr::distinct(chi, .keep_all = TRUE),
         by = "chi",
         na_matches = "never",
         relationship = "many-to-one"
diff --git a/R/join_sparra_hhg.R b/R/join_sparra_hhg.R
index dafaca867..c22e1a9c3 100644
--- a/R/join_sparra_hhg.R
+++ b/R/join_sparra_hhg.R
@@ -5,6 +5,8 @@
 #' @return The data including the SPARRA and HHG variables matched
 #' on to the episode file.
 join_sparra_hhg <- function(data, year) {
+  cli::cli_alert_info("Join SPARRA and HHG function started at {Sys.time()}")
+
   if (check_year_valid(year, "sparra")) {
     data <- dplyr::left_join(
       data,
diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R
index ef4aa4754..d4162b619 100644
--- a/R/link_delayed_discharge_eps.R
+++ b/R/link_delayed_discharge_eps.R
@@ -12,6 +12,14 @@ link_delayed_discharge_eps <- function(
     episode_file,
     year,
     dd_data = read_file(get_source_extract_path(year, "dd")) %>% slfhelper::get_chi()) {
+  cli::cli_alert_info("Link delayed discharge to episode file function started at {Sys.time()}")
+
+  if (!check_year_valid(year, type = "dd")) {
+    episode_file <- episode_file
+    return(episode_file)
+  }
+
+  names_ep <- names(episode_file)
   episode_file <- episode_file %>%
     dplyr::mutate(
       # remember to revoke the cij_end_date with dummy_cij_end
@@ -284,6 +292,11 @@ link_delayed_discharge_eps <- function(
     )) %>%
     dplyr::group_by(.data$chi, .data$cij_marker) %>%
     dplyr::mutate(cij_delay = max(.data$has_delay)) %>%
+    dplyr::mutate(cij_delay = dplyr::if_else(cij_delay == "0",
+      FALSE,
+      TRUE,
+      missing = NA
+    )) %>%
     dplyr::ungroup() %>%
     # add yearstay and monthly beddays
     # count_last = TRUE because DD counts last day and not the first
@@ -297,37 +310,43 @@ link_delayed_discharge_eps <- function(
       yearstay = rowSums(dplyr::pick(dplyr::ends_with("_beddays")))
     ) %>%
     # tidy up and rename columns to match the format of episode files
+    # keep variables from ep files
     dplyr::select(
+      -c(
+        "ep_file_row_id",
+        "year",
+        "recid",
+        "record_keydate1",
+        "record_keydate2",
+        "postcode",
+        "hbtreatcode",
+        "location",
+        "spec",
+        ## following are dummy variables
+        "cij_start_date_lower",
+        "cij_end_date_upper",
+        "cij_end_month",
+        "is_dummy_cij_start",
+        "dummy_cij_start",
+        "is_dummy_cij_end",
+        "dummy_cij_end",
+        "datediff_start",
+        "datediff_end",
+        "has_delay",
+        "is_dummy_keydate2",
+        "dummy_keydate2",
+        "dummy_id"
+      )
+    ) %>%
+    dplyr::rename(
       "year" = "year_dd",
       "recid" = "recid_dd",
       "record_keydate1" = "record_keydate1_dd",
       "record_keydate2" = "record_keydate2_dd",
-      "smrtype",
-      "chi",
-      "gender",
-      "dob",
-      "age",
-      "gpprac",
       "postcode" = "postcode_dd",
-      "dd_responsible_lca",
       "hbtreatcode" = "hbtreatcode_dd",
-      "delay_end_reason",
-      "primary_delay_reason",
-      "secondary_delay_reason",
-      "cij_marker",
-      "cij_start_date",
-      "cij_end_date",
-      "cij_pattype_code",
-      "cij_ipdc",
-      "cij_admtype",
-      "cij_adm_spec",
-      "cij_dis_spec",
-      "cij_delay",
-      "location",
       "spec" = "spec_dd",
-      "dd_quality",
-      dplyr::ends_with("_beddays"),
-      "yearstay"
+      "location" = "location_dd"
     ) %>%
     # Combine DD with episode data
     dplyr::bind_rows(
@@ -343,7 +362,19 @@ link_delayed_discharge_eps <- function(
             "dummy_cij_end"
           )
         )
-    )
+    ) %>%
+    # populate cij_delay dd details back to ep
+    dplyr::group_by(chi, cij_marker) %>%
+    dplyr::mutate(
+      has_dd = any(recid == "DD"),
+      delay_dd = any(cij_delay)
+    ) %>%
+    dplyr::ungroup() %>%
+    dplyr::mutate(cij_delay = dplyr::if_else(has_dd,
+      delay_dd,
+      cij_delay
+    )) %>%
+    dplyr::select(-c("has_dd", "delay_dd", "original_admission_date", "amended_dates"))
 
   return(linked_data)
 }
diff --git a/R/match_on_ltcs.R b/R/match_on_ltcs.R
index 3ed052be8..f0522c00d 100644
--- a/R/match_on_ltcs.R
+++ b/R/match_on_ltcs.R
@@ -12,6 +12,8 @@ match_on_ltcs <- function(
     data,
     year,
     ltc_data = read_file(get_ltcs_path(year)) %>% slfhelper::get_chi()) {
+  cli::cli_alert_info("Match on LTCs function started at {Sys.time()}")
+
   # Match on LTC lookup
   matched <- dplyr::left_join(
     data,
diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R
index d91a77ab3..c46f175c7 100644
--- a/R/process_extract_acute.R
+++ b/R/process_extract_acute.R
@@ -12,7 +12,10 @@
 #' @return the final data as a [tibble][tibble::tibble-package].
 #' @export
 #' @family process extracts
-process_extract_acute <- function(data, year, write_to_disk = TRUE) {
+process_extract_acute <- function(data,
+                                  year,
+                                  acute_cup_path = get_boxi_extract_path(year, "acute_cup"),
+                                  write_to_disk = TRUE) {
   # Only run for a single year
   stopifnot(length(year) == 1L)
 
@@ -59,7 +62,48 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) {
     # Add oldtadm as a factor with labels
     dplyr::mutate(oldtadm = factor(.data$oldtadm,
       levels = 0L:8L
-    ))
+    )) %>%
+    dplyr::mutate(
+      unique_row_num = dplyr::row_number()
+    )
+
+  acute_cup <- read_file(
+    path = acute_cup_path,
+    col_type = readr::cols(
+      "anon_chi" = readr::col_character(),
+      "Acute Admission Date" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Acute Discharge Date" = readr::col_date(format = "%Y/%m/%d %T"),
+      "Acute Admission Type Code" = readr::col_character(),
+      "Acute Discharge Type Code" = readr::col_character(),
+      "Case Reference Number [C]" = readr::col_character(),
+      "CUP Marker" = readr::col_integer(),
+      "CUP Pathway Name" = readr::col_character()
+    )
+  ) %>%
+    dplyr::select(
+      anon_chi = "anon_chi",
+      case_reference_number = "Case Reference Number [C]",
+      record_keydate1 = "Acute Admission Date",
+      record_keydate2 = "Acute Discharge Date",
+      tadm = "Acute Admission Type Code",
+      disch = "Acute Discharge Type Code",
+      cup_marker = "CUP Marker",
+      cup_pathway = "CUP Pathway Name"
+    ) %>%
+    dplyr::distinct() %>%
+    slfhelper::get_chi()
+
+  acute_clean <- acute_clean %>%
+    dplyr::left_join(acute_cup,
+      by = c(
+        "record_keydate1",
+        "record_keydate2",
+        "case_reference_number",
+        "chi",
+        "tadm",
+        "disch"
+      )
+    )
 
   acute_processed <- acute_clean %>%
     dplyr::select(
@@ -106,7 +150,9 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) {
       "cost_total_net",
       tidyselect::ends_with("_beddays"),
       tidyselect::ends_with("_cost"),
-      "uri"
+      "uri",
+      "cup_marker",
+      "cup_pathway"
     ) %>%
     dplyr::arrange(.data$chi, .data$record_keydate1) %>%
     slfhelper::get_anon_chi()
diff --git a/R/process_extract_alarms_telecare.R b/R/process_extract_alarms_telecare.R
index d7b063ace..9d47dd5f0 100644
--- a/R/process_extract_alarms_telecare.R
+++ b/R/process_extract_alarms_telecare.R
@@ -41,12 +41,12 @@ process_extract_alarms_telecare <- function(
       "smrtype",
       "chi",
       "dob",
+      # "person_id",
       "gender",
       "postcode",
       "sc_send_lca",
       "record_keydate1",
       "record_keydate2",
-      "person_id",
       "sc_latest_submission"
     ) %>%
     slfhelper::get_anon_chi()
diff --git a/R/process_extract_care_home.R b/R/process_extract_care_home.R
index 54789eac9..dbf817af4 100644
--- a/R/process_extract_care_home.R
+++ b/R/process_extract_care_home.R
@@ -39,11 +39,11 @@ process_extract_care_home <- function(
       is_date_in_fyyear(year, .data$record_keydate1, .data$record_keydate2)
     ) %>%
     # remove any episodes where the latest submission was before the current year
+    # this is what stops cases being in future files
     dplyr::filter(
       substr(.data$sc_latest_submission, 1L, 4L) >= convert_fyyear_to_year(year)
     )
 
-
   # Data Cleaning ---------------------------------------
   source_ch_clean <- ch_data %>%
     # create variables
@@ -115,7 +115,7 @@ process_extract_care_home <- function(
       "recid",
       "smrtype",
       "chi",
-      "person_id",
+      # "person_id",
       "dob",
       "gender",
       "postcode",
diff --git a/R/process_extract_gp_ooh.R b/R/process_extract_gp_ooh.R
index c889c3db7..e8d07b9e8 100644
--- a/R/process_extract_gp_ooh.R
+++ b/R/process_extract_gp_ooh.R
@@ -12,7 +12,10 @@
 #' @return the final data as a [tibble][tibble::tibble-package].
 #' @export
 #' @family process extracts
-process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) {
+process_extract_gp_ooh <- function(year,
+                                   data_list,
+                                   gp_ooh_cup_path = get_boxi_extract_path(year, "gp_ooh_cup"),
+                                   write_to_disk = TRUE) {
   diagnosis_extract <- process_extract_ooh_diagnosis(data_list[["diagnosis"]], year)
   outcomes_extract <- process_extract_ooh_outcomes(data_list[["outcomes"]], year)
   consultations_extract <- process_extract_ooh_consultations(data_list[["consultations"]], year)
@@ -93,6 +96,39 @@ process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) {
     ) %>%
     dplyr::ungroup()
 
+  ## Link CUP Marker -----
+  gp_ooh_cup_file <- read_file(
+    path = gp_ooh_cup_path,
+    col_type = readr::cols(
+      "GP OOH Consultation Start Date" = readr::col_date(format = "%Y/%m/%d %T"),
+      "GP OOH Consultation Start Time" = readr::col_time(""),
+      "GUID" = readr::col_character(),
+      "CUP Marker" = readr::col_integer(),
+      "CUP Pathway Name" = readr::col_character()
+    )
+  ) %>%
+    dplyr::select(
+      record_keydate1 = "GP OOH Consultation Start Date",
+      keytime1 = "GP OOH Consultation Start Time",
+      ooh_case_id = "GUID",
+      cup_marker = "CUP Marker",
+      cup_pathway = "CUP Pathway Name"
+    ) %>%
+    dplyr::distinct(
+      .data$record_keydate1,
+      .data$keytime1,
+      .data$ooh_case_id,
+      .keep_all = TRUE
+    )
+
+  ooh_clean <- ooh_clean %>%
+    dplyr::left_join(gp_ooh_cup_file,
+      by = dplyr::join_by(
+        "ooh_case_id",
+        "record_keydate1",
+        "keytime1"
+      )
+    )
 
   ## Save Outfile -------------------------------------
 
@@ -122,7 +158,9 @@ process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) {
       tidyselect::starts_with("ooh_outcome"),
       "cost_total_net",
       tidyselect::ends_with("_cost"),
-      "ooh_case_id"
+      "ooh_case_id",
+      cup_marker,
+      cup_pathway
     ) %>%
     slfhelper::get_anon_chi()
 
diff --git a/R/process_extract_home_care.R b/R/process_extract_home_care.R
index 831496bd2..651be172d 100644
--- a/R/process_extract_home_care.R
+++ b/R/process_extract_home_care.R
@@ -96,7 +96,7 @@ process_extract_home_care <- function(
       "cost_total_net",
       "hc_provider",
       "hc_reablement",
-      "person_id"
+      # "person_id"
     ) %>%
     slfhelper::get_anon_chi()
 
diff --git a/R/process_extract_sds.R b/R/process_extract_sds.R
index be53f35ff..f8e5f8579 100644
--- a/R/process_extract_sds.R
+++ b/R/process_extract_sds.R
@@ -41,11 +41,13 @@ process_extract_sds <- function(
       "smrtype",
       "chi",
       "dob",
+      # "person_id",
       "gender",
       "postcode",
+      "sc_send_lca",
       "record_keydate1",
       "record_keydate2",
-      "sc_send_lca"
+      "sc_latest_submission"
     ) %>%
     slfhelper::get_anon_chi()
 
diff --git a/R/process_it_chi_deaths.R b/R/process_it_chi_deaths.R
index 85354880b..cfca32e0f 100644
--- a/R/process_it_chi_deaths.R
+++ b/R/process_it_chi_deaths.R
@@ -17,10 +17,9 @@ process_it_chi_deaths <- function(data, write_to_disk = TRUE) {
       dplyr::desc(.data$death_date_chi)
     ) %>%
     dplyr::distinct(.data$chi, .keep_all = TRUE) %>%
-    # Use the NRS death_date unless it isn't there
-    dplyr::mutate(
-      death_date = dplyr::coalesce(.data$death_date_nrs, .data$death_date_chi)
-    ) %>%
+    # remove death_date_nrs as this is the nrs weekly unvalidated data and we should not use this.
+    # the boxi nrs death date is more reliable as this is provided monthly and is validated.
+    dplyr::select(.data$chi, .data$death_date_chi) %>%
     slfhelper::get_anon_chi()
 
   if (write_to_disk) {
diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R
index 9a5c21974..edc083cd2 100644
--- a/R/process_lookup_deaths.R
+++ b/R/process_lookup_deaths.R
@@ -1,36 +1,29 @@
 #' Create the SLF Deaths lookup
 #'
-#' @description Currently this just uses the NRS death dates 'as is', with no
-#' corrections or modifications, it is expected that this will be expanded to
-#' use the CHI deaths extract from IT as well as taking into account data in
-#' the episode file to assess the validity of a death date.
+#' @description Use all-year refined death data to produce year-specific
+#' slf_deaths_lookup with deceased flag added.
 #'
 #' @param year The year to process, in FY format.
-#' @param nrs_deaths_data NRS deaths data.
-#' @param chi_deaths_data IT CHI deaths data.
+#' @param refined_death refined death date combining nrs and it_chi.
 #' @param write_to_disk (optional) Should the data be written to disk default is
 #' `TRUE` i.e. write the data to disk.
 #'
-#' @return a [tibble][tibble::tibble-package] containing the episode file
+#' @return a [tibble][tibble::tibble-package] add deceased flag to deaths
 #' @export
 process_slf_deaths_lookup <- function(
     year,
-    nrs_deaths_data = read_file(
-      get_source_extract_path(year, "deaths"),
-      col_select = c("chi", "record_keydate1")
-    ),
-    chi_deaths_data = read_file(get_slf_chi_deaths_path()),
+    refined_death = read_file(get_combined_slf_deaths_lookup_path()),
     write_to_disk = TRUE) {
-  slf_deaths_lookup <- nrs_deaths_data %>%
-    # Only modification over 'raw' NRS is to keep the earliest death date
-    dplyr::select("chi", "record_keydate1") %>%
-    dplyr::arrange(.data$record_keydate1) %>%
-    dplyr::distinct(.data$chi, .keep_all = TRUE) %>%
+  # create slf deaths lookup
+  slf_deaths_lookup <- refined_death %>%
+    slfhelper::get_chi() %>%
+    # Filter the chi death dates to the FY as the lookup is by FY
+    dplyr::filter(fy == year) %>%
+    # use the BOXI NRS death date by default, but if it's missing, use the chi death date.
     dplyr::mutate(
-      death_date = .data$record_keydate1,
-      deceased = TRUE,
-      .keep = "unused"
+      deceased = TRUE
     ) %>%
+    # save anon chi on disk
     slfhelper::get_anon_chi()
 
   if (write_to_disk) {
diff --git a/R/process_lookup_homelessness.R b/R/process_lookup_homelessness.R
index b0dc30d51..30772383e 100644
--- a/R/process_lookup_homelessness.R
+++ b/R/process_lookup_homelessness.R
@@ -13,6 +13,12 @@
 create_homelessness_lookup <- function(
     year,
     homelessness_data = read_file(get_source_extract_path(year, "homelessness")) %>% slfhelper::get_chi()) {
+  cli::cli_alert_info("Create homelessness lookup function started at {Sys.time()}")
+
+  # Specify years available for running
+  if (year < "1617") {
+    return(NULL)
+  }
   homelessness_lookup <- homelessness_data %>%
     dplyr::distinct(.data$chi, .data$record_keydate1, .data$record_keydate2) %>%
     tidyr::drop_na(.data$chi) %>%
@@ -35,6 +41,13 @@ create_homelessness_lookup <- function(
 #' @export
 add_homelessness_flag <- function(data, year,
                                   lookup = create_homelessness_lookup(year)) {
+  cli::cli_alert_info("Add homelessness flag function started at {Sys.time()}")
+
+  if (!check_year_valid(year, type = "homelessness")) {
+    data <- data
+    return(data)
+  }
+
   data <- data %>%
     dplyr::left_join(
       lookup %>%
@@ -59,6 +72,13 @@ add_homelessness_flag <- function(data, year,
 #' @return the final data as a [tibble][tibble::tibble-package].
 #' @export
 add_homelessness_date_flags <- function(data, year, lookup = create_homelessness_lookup(year)) {
+  cli::cli_alert_info("Add homelessness date flags function started at {Sys.time()}")
+
+  if (!check_year_valid(year, type = "homelessness")) {
+    data <- data
+    return(data)
+  }
+
   lookup <- lookup %>%
     dplyr::filter(!(is.na(.data$record_keydate2))) %>%
     dplyr::rename(
diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R
index 69818def3..91c08632d 100644
--- a/R/process_lookup_sc_client.R
+++ b/R/process_lookup_sc_client.R
@@ -18,30 +18,36 @@ process_lookup_sc_client <-
            year,
            sc_demographics = read_file(get_sc_demog_lookup_path()) %>%
              slfhelper::get_chi() %>%
-             dplyr::select(c("sending_location", "social_care_id", "chi")),
+             dplyr::select(c("sending_location", "social_care_id", "chi", "latest_flag")),
            write_to_disk = TRUE) {
-    client_clean <- data %>%
-      # Replace 'unknown' responses with NA
-      dplyr::mutate(
-        dplyr::across(
-          c(
-            "support_from_unpaid_carer",
-            "social_worker",
-            "meals",
-            "living_alone",
-            "day_care"
-          ),
-          dplyr::na_if,
-          9L
-        ),
-        type_of_housing = dplyr::na_if(.data$type_of_housing, 6L)
+    # Specify years available for running
+    if (year < "1718") {
+      return(NULL)
+    }
+
+    # Match to demographics lookup to get CHI
+    sc_client_demographics <- data %>%
+      dplyr::right_join(
+        sc_demographics,
+        by = c("sending_location", "social_care_id")
       ) %>%
-      dplyr::group_by(.data$sending_location, .data$social_care_id) %>%
+      # need period for the replace sc id with latest function
+      dplyr::mutate(period = ifelse(!(is.na(.data$financial_quarter)), paste0(.data$financial_year, "Q", financial_quarter),
+        financial_year
+      )) %>%
+      replace_sc_id_with_latest() %>%
+      # remove cases with no data in client
+      dplyr::filter(!(is.na(.data$financial_year))) %>%
+      dplyr::select(-.data$latest_sc_id, -.data$latest_flag, -.data$period)
+
+
+    client_clean <- sc_client_demographics %>%
+      dplyr::group_by(.data$sending_location, .data$social_care_id, .data$chi) %>%
       # summarise to take last submission
       dplyr::summarise(dplyr::across(
         c(
           "dementia",
-          "mental_health_problems",
+          "mental_health_disorders",
           "learning_disability",
           "physical_and_sensory_disability",
           "drugs",
@@ -70,19 +76,9 @@ process_lookup_sc_client <-
             "social_worker",
             "meals",
             "living_alone",
-            "day_care"
-          ),
-          tidyr::replace_na,
-          9L
-        ),
-        type_of_housing = tidyr::replace_na(.data$type_of_housing, 6L)
-      ) %>%
-      # factor labels
-      dplyr::mutate(
-        dplyr::across(
-          c(
+            "day_care",
             "dementia",
-            "mental_health_problems",
+            "mental_health_disorders",
             "learning_disability",
             "physical_and_sensory_disability",
             "drugs",
@@ -92,37 +88,64 @@ process_lookup_sc_client <-
             "elderly_frail",
             "neurological_condition",
             "autism",
-            "other_vulnerable_groups"
+            "other_vulnerable_groups",
+            "type_of_housing"
           ),
-          factor,
-          levels = c(0L, 1L),
-          labels = c("No", "Yes")
-        ),
+          tidyr::replace_na, 9L
+        )
+      ) %>%
+      # factor labels
+      dplyr::mutate(
         dplyr::across(
           c(
             "living_alone",
             "support_from_unpaid_carer",
             "social_worker",
             "meals",
-            "day_care"
+            "day_care",
+            "dementia",
+            "mental_health_disorders",
+            "learning_disability",
+            "physical_and_sensory_disability",
+            "drugs",
+            "alcohol",
+            "palliative_care",
+            "carer",
+            "elderly_frail",
+            "neurological_condition",
+            "autism",
+            "other_vulnerable_groups"
           ),
           factor,
           levels = c(0L, 1L, 9L),
           labels = c("No", "Yes", "Not Known")
         ),
         type_of_housing = factor(.data$type_of_housing,
-          levels = 1L:6L
+          levels = 1L:9L,
+          labels = c(
+            "Mainstream", # 1
+            "Supported", # 2
+            "Long Stay Care Home", # 3
+            "Hospital or other medical establishment", # 4
+            "Homeless", # 5
+            "Penal Institutions", # 6
+            "Not Known", # 7
+            "Other", # 8
+            "Not Known" # 9
+          )
         )
       ) %>%
       # rename variables
       dplyr::rename_with(
-        .cols = -c("sending_location", "social_care_id"),
+        .cols = -c("sending_location", "social_care_id", "chi"),
         .fn = ~ paste0("sc_", .x)
       )
 
+
     sc_client_lookup <- client_clean %>%
       # reorder
       dplyr::select(
+        "chi",
         "sending_location",
         "social_care_id",
         "sc_living_alone",
@@ -130,15 +153,23 @@ process_lookup_sc_client <-
         "sc_social_worker",
         "sc_type_of_housing",
         "sc_meals",
-        "sc_day_care"
-      )
+        "sc_day_care",
+        "sc_dementia",
+        "sc_learning_disability",
+        "sc_mental_health_disorders",
+        "sc_physical_and_sensory_disability",
+        "sc_drugs",
+        "sc_alcohol",
+        "sc_palliative_care",
+        "sc_carer",
+        "sc_elderly_frail",
+        "sc_neurological_condition",
+        "sc_autism",
+        "sc_other_vulnerable_groups"
+      ) %>%
+      create_person_id()
+
 
-    # Match to demographics lookup to get CHI
-    sc_client_lookup <- sc_client_lookup %>%
-      dplyr::left_join(
-        sc_demographics,
-        by = c("sending_location", "social_care_id")
-      )
     sc_client_lookup <-
       dplyr::mutate(sc_client_lookup,
         count_not_known = rowSums(
@@ -147,8 +178,21 @@ process_lookup_sc_client <-
               "sc_living_alone",
               "sc_support_from_unpaid_carer",
               "sc_social_worker",
+              "sc_type_of_housing",
               "sc_meals",
-              "sc_day_care"
+              "sc_day_care",
+              "sc_dementia",
+              "sc_learning_disability",
+              "sc_mental_health_disorders",
+              "sc_physical_and_sensory_disability",
+              "sc_drugs",
+              "sc_alcohol",
+              "sc_palliative_care",
+              "sc_carer",
+              "sc_elderly_frail",
+              "sc_neurological_condition",
+              "sc_autism",
+              "sc_other_vulnerable_groups"
             )
           )) == "Not Known",
           na.rm = TRUE
@@ -156,7 +200,7 @@ process_lookup_sc_client <-
       ) %>%
       dplyr::arrange(.data$chi, .data$count_not_known) %>%
       dplyr::distinct(.data$chi, .keep_all = TRUE) %>%
-      dplyr::select(-.data$sending_location) %>%
+      dplyr::select(-.data$sending_location, -.data$count_not_known) %>%
       slfhelper::get_anon_chi()
 
     if (write_to_disk) {
diff --git a/R/process_lookup_sc_demographics.R b/R/process_lookup_sc_demographics.R
index d6e24c87f..1b29c414c 100644
--- a/R/process_lookup_sc_demographics.R
+++ b/R/process_lookup_sc_demographics.R
@@ -143,8 +143,8 @@ process_lookup_sc_demographics <- function(
     dplyr::ungroup()
 
   # check to make sure all cases of chi are still there
-  dplyr::n_distinct(sc_demog_lookup$chi) # 524810
-  dplyr::n_distinct(sc_demog_lookup$social_care_id) # 636404
+  dplyr::n_distinct(sc_demog_lookup$chi) # 525,834
+  dplyr::n_distinct(sc_demog_lookup$social_care_id) # 637,422
 
   sc_demog_lookup <- sc_demog_lookup %>%
     slfhelper::get_anon_chi()
diff --git a/R/process_refined_death.R b/R/process_refined_death.R
new file mode 100644
index 000000000..dc7663221
--- /dev/null
+++ b/R/process_refined_death.R
@@ -0,0 +1,63 @@
+#' Process the refined death data
+#'
+#' @description This will process
+#' year-specific BOXI NRS death file (written to disk), and
+#' combine them together to get all years NRS file (Not written to disk).
+#' Then join all NRS deaths with IT CHI death data
+#' to get an all-year refined death file (written to disk).
+#'
+#' @param it_chi_deaths it chi death data
+#' @param write_to_disk write the result to disk or not.
+#'
+#' @return refined_death The processed lookup of deaths combining NRS and IT_CHI.
+#' @export
+#' @family process extracts
+process_refined_death <- function(
+    it_chi_deaths = read_file(get_slf_chi_deaths_path()),
+    write_to_disk = TRUE) {
+  years_list <- years_to_run()
+
+  nrs_all_years <- lapply(years_list, (\(year) {
+    read_extract_nrs_deaths(
+      year,
+      get_boxi_extract_path(year, type = "deaths")
+    ) %>%
+      process_extract_nrs_deaths(year,
+        write_to_disk = write_to_disk
+      )
+  })) %>%
+    data.table::rbindlist()
+
+  it_chi_deaths <- it_chi_deaths %>%
+    dplyr::select(c(
+      "anon_chi",
+      "death_date_chi"
+    )) %>%
+    dplyr::arrange(.data$anon_chi, .keep_all = TRUE)
+
+  refined_death <- nrs_all_years %>%
+    dplyr::arrange(.data$anon_chi, .keep_all = TRUE) %>%
+    dplyr::full_join(it_chi_deaths, by = "anon_chi") %>%
+    # use the BOXI NRS death date by default, but if it's missing, use the chi death date.
+    dplyr::mutate(death_date = dplyr::if_else(
+      is.na(.data$record_keydate1),
+      .data$death_date_chi,
+      .data$record_keydate1
+    )) %>%
+    dplyr::select(anon_chi, death_date) %>%
+    # add fy when death happened
+    dplyr::mutate(
+      fy = phsmethods::extract_fin_year(death_date),
+      fy = as.character(paste0(substr(fy, 3, 4), substr(fy, 6, 7)))
+    )
+  # TODO: check distinct death data by chi while keeping chi==NA records
+
+  if (write_to_disk) {
+    write_file(
+      refined_death,
+      get_combined_slf_deaths_lookup_path(create = TRUE)
+    )
+  }
+
+  return(refined_death)
+}
diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R
index c583fa8a7..aafc3d727 100644
--- a/R/process_sc_all_alarms_telecare.R
+++ b/R/process_sc_all_alarms_telecare.R
@@ -87,11 +87,11 @@ process_sc_all_alarms_telecare <- function(
   # Replace social_care_id with latest if needed (assuming replace_sc_id_with_latest is a custom function)
   data <- replace_sc_id_with_latest(data)
 
-  data$person_id <- paste0(
-    data$sending_location,
-    "-",
-    data$social_care_id
-  )
+  # data$person_id <- paste0(
+  #   data$sending_location,
+  #   "-",
+  #   data$social_care_id
+  # )
 
   # Deal with episodes that have a package across quarters
   data[, pkg_count := seq_len(.N), by = list(
@@ -125,7 +125,7 @@ process_sc_all_alarms_telecare <- function(
     dob = data.table::last(dob),
     postcode = data.table::last(postcode),
     recid = data.table::last(recid),
-    person_id = data.table::last(person_id),
+    # person_id = data.table::last(person_id),
     sc_send_lca = data.table::last(sc_send_lca)
   ), by = list(
     sending_location,
diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R
index b7c29fbc7..5478d50cc 100644
--- a/R/process_sc_all_care_home.R
+++ b/R/process_sc_all_care_home.R
@@ -7,8 +7,8 @@
 #' @param data The extract to process
 #' @param sc_demog_lookup The Social Care Demographics lookup produced by
 #' [process_lookup_sc_demographics()].
-#' @param it_chi_deaths_data The processed lookup of deaths from IT produced
-#' with [process_it_chi_deaths()].
+#' @param refined_death The processed lookup of deaths from IT produced
+#' with [process_refined_death()].
 #' @param ch_name_lookup_path Path to the Care Home name Lookup Excel workbook.
 #' @param spd_path (Optional) Path the Scottish Postcode Directory, default is
 #' to use [get_spd_path()].
@@ -19,38 +19,38 @@
 #' @family process extracts
 #'
 #' @export
-#'
 process_sc_all_care_home <- function(
     data,
     sc_demog_lookup = read_file(get_sc_demog_lookup_path()) %>% slfhelper::get_chi(),
-    it_chi_deaths_data = read_file(get_slf_chi_deaths_path()),
-    ch_name_lookup_path = read_file(get_slf_ch_name_lookup_path()),
-    spd_path = read_file(get_spd_path()),
+    refined_death = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi(),
+    ch_name_lookup_path = get_slf_ch_name_lookup_path(),
+    spd_path = get_spd_path(),
     write_to_disk = TRUE) {
   ## Data Cleaning-----------------------------------------------------
+
   ch_clean <- data %>%
     dplyr::mutate(
-      record_date = end_fy_quarter(.data[["period"]]),
-      qtr_start = start_fy_quarter(.data[["period"]]),
-      # Set missing admission date to start of the submitted quarter
+      # Set missing admission date to start of the submitted quarter (n = 2)
       ch_admission_date = dplyr::if_else(
         is.na(.data[["ch_admission_date"]]),
-        .data[["qtr_start"]],
+        .data[["period_start_date"]],
         .data[["ch_admission_date"]]
       ),
-      # TODO check if we should set the dis date to the end of the period?
-      # If the dis date is before admission, remove the dis date
+      # If the dis date is before admission, remove the dis date (n = 5)
       ch_discharge_date = dplyr::if_else(
         .data[["ch_admission_date"]] > .data[["ch_discharge_date"]],
         lubridate::NA_Date_,
         .data[["ch_discharge_date"]]
       )
     ) %>%
-    dplyr::left_join(sc_demog_lookup,
+    dplyr::right_join(sc_demog_lookup, # this is the correct join.
       by = c("sending_location", "social_care_id")
     ) %>%
-    replace_sc_id_with_latest()
+    replace_sc_id_with_latest() %>%
+    dplyr::select(-latest_flag, -latest_sc_id)
+
 
+  # cleaning and matching care home names
   name_postcode_clean <- fill_ch_names(
     ch_data = ch_clean,
     ch_name_lookup_path = ch_name_lookup_path,
@@ -58,23 +58,27 @@ process_sc_all_care_home <- function(
   )
 
   fixed_ch_provider <- name_postcode_clean %>%
+    dplyr::select(-ch_name_validated, -open_interval, -latest_close_date, -ch_name_old, -ch_postcode_old) %>%
     dplyr::mutate(
-      ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]])
+      ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]]) # (n = 2)
     ) %>%
     # sort data
     dplyr::arrange(
-      "sending_location",
-      "social_care_id",
-      "ch_admission_date",
-      "period"
+      .data[["sending_location"]],
+      .data[["social_care_id"]],
+      .data[["period"]],
+      .data[["ch_admission_date"]]
     ) %>%
     dplyr::group_by(
       .data[["sending_location"]],
       .data[["social_care_id"]]
     ) %>%
+    # work out the min and max ch provider in an episode
     dplyr::mutate(
       min_ch_provider = min(.data[["ch_provider"]]),
       max_ch_provider = max(.data[["ch_provider"]]),
+      # if care home provider is different across cases, set to "6".
+      # tidy up ch_provider using 6 when disagreeing values
       ch_provider = dplyr::if_else(
         .data[["min_ch_provider"]] != .data[["max_ch_provider"]],
         6L,
@@ -85,12 +89,9 @@ process_sc_all_care_home <- function(
       -"min_ch_provider",
       -"max_ch_provider"
     ) %>%
-    # tidy up ch_provider using 6 when disagreeing values
-    tidyr::fill(.data[["ch_provider"]], .direction = "downup") %>%
     dplyr::ungroup()
 
 
-
   fixed_nursing_provision <- fixed_ch_provider %>%
     dplyr::group_by(
       .data[["sending_location"]],
@@ -98,7 +99,7 @@ process_sc_all_care_home <- function(
       .data[["ch_admission_date"]]
     ) %>%
     # fill in nursing care provision when missing
-    # but present in the following entry
+    # but present in the following entry (n = 0)
     dplyr::mutate(
       nursing_care_provision = dplyr::na_if(.data[["nursing_care_provision"]], 9L)
     ) %>%
@@ -106,26 +107,44 @@ process_sc_all_care_home <- function(
 
 
   ready_to_merge <- fixed_nursing_provision %>%
-    # remove any duplicate records before merging for speed and simplicity
-    dplyr::distinct() %>%
+    # remove any duplicate records before merging
+    dplyr::distinct() %>% # (n = 3)
+    # sort data
+    dplyr::arrange(
+      .data[["sending_location"]],
+      .data[["social_care_id"]],
+      .data[["ch_admission_date"]],
+      .data[["period"]]
+    ) %>%
+    dplyr::group_by(
+      .data[["sending_location"]],
+      .data[["social_care_id"]],
+      .data[["ch_admission_date"]]
+    ) %>%
     # counter for split episodes
-    dplyr::mutate(
-      split_episode = tidyr::replace_na(
-        .data[["nursing_care_provision"]] != dplyr::lag(
-          .data[["nursing_care_provision"]]
-        ),
-        TRUE
-      ),
-      split_episode_counter = cumsum(.data[["split_episode"]])
+    # a split episode is an episode where the admission date is the same but the nursing provider has changed.
+    # We want to keep the nursing provision changes when we merge cases that have the same admission date
+    dplyr::mutate(previous_nursing_care_provision = dplyr::lag(.data[["nursing_care_provision"]])) %>%
+    # create a T/F flag for if nursing provision was the same as previous record with same admission date
+    dplyr::mutate(split_episode = tidyr::replace_na(.data[["previous_nursing_care_provision"]] != nursing_care_provision, TRUE)) %>%
+    dplyr::group_by(
+      .data[["social_care_id"]],
+      .data[["sending_location"]],
+      .data[["split_episode"]]
     ) %>%
-    dplyr::ungroup()
+    # create a count of each time the nursing provision changes between records with the same admission date
+    dplyr::mutate(split_episode_counter = ifelse(split_episode == TRUE, dplyr::row_number(), NA)) %>%
+    dplyr::group_by(
+      .data[["social_care_id"]],
+      .data[["sending_location"]]
+    ) %>%
+    # fill split episode counter. This will create a new id number for each different nursing provision within an episode
+    tidyr::fill(split_episode_counter, .direction = c("down")) %>%
+    dplyr::select(-previous_nursing_care_provision, -split_episode)
 
-  # Merge records to a single row per episode
-  # where admission is the same
+
+  # Merge records to a single row per episode where admission is the same
   ch_episode <- ready_to_merge %>%
-    # when nursing_care_provision is different on
-    # records within the episode, split the episode
-    # at this point.
     dplyr::group_by(
       .data[["chi"]],
       .data[["sending_location"]],
@@ -138,8 +157,8 @@ process_sc_all_care_home <- function(
       dplyr::desc(.data[["period"]]),
       dplyr::desc(.data[["ch_discharge_date"]]),
       dplyr::desc(.data[["ch_provider"]]),
-      dplyr::desc(.data[["record_date"]]),
-      dplyr::desc(.data[["qtr_start"]]),
+      dplyr::desc(.data[["period_end_date"]]),
+      dplyr::desc(.data[["period_start_date"]]),
       dplyr::desc(.data[["ch_name"]]),
       dplyr::desc(.data[["ch_postcode"]]),
       dplyr::desc(.data[["reason_for_admission"]]),
@@ -150,65 +169,42 @@ process_sc_all_care_home <- function(
     ) %>%
     dplyr::summarise(
       sc_latest_submission = dplyr::first(.data[["period"]]),
-      dplyr::across(
-        c(
-          "ch_discharge_date",
-          "ch_provider",
-          "record_date",
-          "qtr_start",
-          "ch_name",
-          "ch_postcode",
-          "reason_for_admission",
-          "type_of_admission"
-        ),
-        dplyr::first
-      ),
+      dplyr::across(c(
+        "ch_discharge_date",
+        "ch_provider",
+        "period_end_date",
+        "period_start_date",
+        "ch_name",
+        "ch_postcode",
+        "reason_for_admission",
+        "type_of_admission"
+      ), dplyr::first),
       dplyr::across(c("gender", "dob", "postcode"), dplyr::first)
     ) %>%
-    dplyr::ungroup() %>%
-    # Amend dates for split episodes
-    # Change the start and end date as appropriate when an episode is split,
-    # using the start / end date of the submission quarter
-    dplyr::group_by(
-      .data[["chi"]],
-      .data[["sending_location"]],
-      .data[["social_care_id"]],
-      .data[["ch_admission_date"]]
-    ) %>%
-    # counter for latest submission
-    # TODO check if this is the same as split_episode_counter?
-    dplyr::mutate(
-      latest_submission_counter = tidyr::replace_na(
-        .data[["sc_latest_submission"]] != dplyr::lag(
-          .data[["sc_latest_submission"]]
-        ),
-        TRUE
-      ),
-      sum_latest_submission = cumsum(.data[["latest_submission_counter"]])
-    ) %>%
+    # If the admission date is missing use the period start date
+    # otherwise use the start of the quarter
     dplyr::mutate(
-      # If it's the first episode(s) then keep the admission date(s),
-      # otherwise use the start of the quarter
-      ch_admission_date = dplyr::if_else(
-        .data[["sum_latest_submission"]] == min(.data[["sum_latest_submission"]]),
-        .data[["ch_admission_date"]],
-        .data[["qtr_start"]]
+      ch_admission_date = dplyr::if_else(is.na(.data[["ch_admission_date"]]),
+        .data[["period_start_date"]],
+        .data[["ch_admission_date"]]
       ),
       # If it's the last episode(s) then keep the discharge date(s), otherwise
       # use the end of the quarter
-      ch_discharge_date = dplyr::if_else(
-        .data[["sum_latest_submission"]] == max(.data[["sum_latest_submission"]]),
-        .data[["ch_discharge_date"]],
-        .data[["record_date"]]
+      ch_discharge_date = dplyr::if_else(is.na(.data[["ch_discharge_date"]]),
+        .data[["period_end_date"]],
+        .data[["ch_discharge_date"]]
       )
     ) %>%
-    dplyr::ungroup()
+    dplyr::ungroup() %>%
+    dplyr::select(-period_start_date, -split_episode_counter)
+
 
   # Compare to Deaths Data
   # match ch_episode data with deaths data
   matched_deaths_data <- ch_episode %>%
-    dplyr::left_join(it_chi_deaths_data,
-      by = "chi"
+    dplyr::left_join(refined_death,
+      by = "chi",
+      na_matches = "never"
     ) %>%
     # compare discharge date with NRS and CHI death date
     # if either of the dates are 5 or fewer days before discharge
@@ -228,7 +224,7 @@ process_sc_all_care_home <- function(
     dplyr::ungroup() %>%
     # remove any episodes where discharge is now before admission,
     # i.e. death was before admission
-    dplyr::filter(
+    dplyr::filter( # (n = 67)
       !tidyr::replace_na(
         .data[["ch_discharge_date"]] < .data[["ch_admission_date"]],
         FALSE
@@ -237,39 +233,106 @@ process_sc_all_care_home <- function(
 
   # Continuous Care Home Stays
   # Stay will be continuous as long as the admission date is the next day or
-  # earlier than the previous discharge date
-
-  ch_markers <- matched_deaths_data %>%
-    # ch_chi_cis
+  # earlier than the previous discharge date.
+  # creates a CIS  flag for CHI across all of scotland
+  # and a CIS for social care ID and sending location for just that LA
+  ch_chi_markers <- matched_deaths_data %>%
+    # Group the data by chi
     dplyr::group_by(.data[["chi"]]) %>%
+    # Set up previous_discharge_date
+    # The lag function will set the first row to NA.
     dplyr::mutate(
-      continuous_stay_chi = tidyr::replace_na(
-        .data[["ch_admission_date"]] <= dplyr::lag(
-          .data[["ch_discharge_date"]]
-        ) + lubridate::days(1L),
-        TRUE
+      # We want to flag the first episode per chi with row_number
+      row_number = dplyr::row_number(),
+      # create variable for previous discharge date + 1 day
+      previous_discharge_date_chi = dplyr::lag(.data[["ch_discharge_date"]]) +
+        lubridate::days(1L),
+      # if the first row is NA, set this to the ch_discharge_date
+      previous_discharge_date_chi = dplyr::if_else(row_number == 1, .data[["ch_discharge_date"]],
+        .data[["previous_discharge_date_chi"]]
+      )
+    ) %>%
+    # flag continuous stays and create marker
+    # calculate number of days between start_date and end_date on the previous episode
+    dplyr::mutate(
+      days_to_next_rec = floor(
+        lubridate::time_length(lubridate::interval(
+          .data[["previous_discharge_date_chi"]],
+          .data[["ch_admission_date"]]
+        ), "days")
       ),
-      ch_chi_cis = cumsum(.data[["continuous_stay_chi"]])
+      # if there is more than 1 day between (or the last ep for the individual) flag as new ep (Y)
+      # if there is < 1 day (i.e. a pause of up to 1 day or stays overlap flag as same ep (N))
+      new_episode = dplyr::if_else(is.na(days_to_next_rec) | days_to_next_rec > 1, "Y", "N")
     ) %>%
-    dplyr::ungroup() %>%
-    # ch_sc_id_cis
-    # uses the social care id and sending location so can be used for
-    # episodes that are not attached to a CHI number
-    # This will restrict continuous stays to each Local Authority
+    # create continuous marker using flag for new stay
+    dplyr::mutate(
+      ch_chi_cis = purrr::accumulate(new_episode[-1],
+        .init = 1,
+        ~ if (.y == "Y") {
+          .x + 1
+        } else {
+          .x
+        }
+      )
+    ) %>%
+    dplyr::ungroup()
+
+
+  # This is the same but uses the social care id and sending location so can be used for
+  # episodes that are not attached to a CHI number
+  # This will restrict continuous stays to each Local Authority
+  sc_ch_id_markers <- ch_chi_markers %>%
+    # uses social_care_id and sending_location to flag continuous stays.
+    # Will flag cases even if in another LA
     dplyr::group_by(.data[["social_care_id"]], .data[["sending_location"]]) %>%
+    # Set up previous_discharge_date
+    # The lag function will set the first row to NA.
+    dplyr::mutate(
+      # We want to flag the first episode per sc id and sending_location with row_number
+      row_number = dplyr::row_number(),
+      # create variable for previous discharge date + 1 day
+      previous_discharge_date_sc = dplyr::lag(.data[["ch_discharge_date"]]) +
+        lubridate::days(1L),
+      # if the first row is NA, set this to the ch_discharge_date
+      previous_discharge_date_sc = dplyr::if_else(row_number == 1, .data[["ch_discharge_date"]],
+        .data[["previous_discharge_date_sc"]]
+      )
+    ) %>%
+    # flag continuous stays and create marker
+    # calculate number of days between start_date and end_date on the previous episode
     dplyr::mutate(
-      continuous_stay_sc = tidyr::replace_na(
-        .data[["ch_admission_date"]] <= dplyr::lag(
-          .data[["ch_discharge_date"]]
-        ) + lubridate::days(1L),
-        TRUE
+      days_to_next_rec = floor(
+        lubridate::time_length(lubridate::interval(
+          .data[["previous_discharge_date_sc"]],
+          .data[["ch_admission_date"]]
+        ), "days")
       ),
-      ch_sc_id_cis = cumsum(.data[["continuous_stay_sc"]])
+      # if there is more than 1 day between (or the last ep for the individual) flag as new ep (Y)
+      # if there is < 1 day (i.e. a pause of up to 1 day or stays overlap flag as same ep (N))
+      new_episode = dplyr::if_else(is.na(days_to_next_rec) | days_to_next_rec > 1, "Y", "N")
     ) %>%
-    dplyr::ungroup()
+    # create continuous marker using flag for new stay
+    dplyr::mutate(
+      ch_sc_id_cis = purrr::accumulate(new_episode[-1],
+        .init = 1,
+        ~ if (.y == "Y") {
+          .x + 1
+        } else {
+          .x
+        }
+      )
+    ) %>%
+    dplyr::ungroup() %>%
+    # remove variables no longer needed
+    dplyr::select(
+      -previous_discharge_date_chi, -previous_discharge_date_sc, -row_number,
+      -days_to_next_rec, -new_episode
+    )
+
 
-  # Do a recode on the old reason for admission
-  adm_reason_recoded <- ch_markers %>%
+  # Do a recode on the old reason for admission for respite stays.
+  adm_reason_recoded <- sc_ch_id_markers %>%
     dplyr::group_by(
       .data[["social_care_id"]],
       .data[["sending_location"]],
@@ -277,44 +340,52 @@ process_sc_all_care_home <- function(
     ) %>%
     dplyr::mutate(
       ch_ep_start = min(.data[["ch_admission_date"]]),
+      # Creates a vector for the earliest date out of the end of period and discharge date.
+      # And will then select what ever is the latest date out of those
       ch_ep_end = max(
         pmin(
-          .data[["record_date"]],
+          .data[["period_end_date"]],
           .data[["ch_discharge_date"]],
           na.rm = TRUE
         )
       )
     ) %>%
     dplyr::ungroup() %>%
+    # Flag respite stays.
     dplyr::mutate(
-      stay_los = lubridate::time_length(
-        lubridate::interval(.data[["ch_ep_start"]], .data[["ch_ep_end"]]),
-        "weeks"
-      ),
+      stay_los = lubridate::time_length(lubridate::interval(.data[["ch_ep_start"]], .data[["ch_ep_end"]]), "weeks"),
       stay_respite = .data[["stay_los"]] < 6.0,
-      type_of_admission = dplyr::if_else(
-        is.na(.data[["type_of_admission"]]),
-        dplyr::case_when(
-          .data[["reason_for_admission"]] == 1L ~ 1L,
+      type_of_admission = dplyr::if_else(is.na(.data[["type_of_admission"]]),
+        dplyr::case_when(.data[["reason_for_admission"]] == 1L ~ 1L,
           .data[["reason_for_admission"]] == 2L ~ 2L,
-          stay_respite ~ 1L,
+          stay_respite ~ 1L, # (n = 40573)
           .default = 3L
         ),
         .data[["type_of_admission"]]
       )
-    )
+    ) %>%
+    dplyr::select(-ch_ep_start, -ch_ep_end, -stay_los, -stay_respite)
+
 
   ch_data_final <- adm_reason_recoded %>%
-    create_person_id() %>%
     dplyr::rename(
       record_keydate1 = "ch_admission_date",
       record_keydate2 = "ch_discharge_date",
       ch_adm_reason = "type_of_admission",
       ch_nursing = "nursing_care_provision"
     ) %>%
+    # recode the care home provider description
+    dplyr::mutate(ch_provider_description = dplyr::case_when( # from social care syntax
+      ch_provider == 1 ~ "LOCAL AUTHORITY/HSCP/NHS BOARD",
+      ch_provider == 2 ~ "PRIVATE",
+      ch_provider == 3 ~ "OTHER LOCAL AUTHORITY",
+      ch_provider == 4 ~ "THIRD SECTOR",
+      ch_provider == 5 ~ "NHS BOARD",
+      ch_provider == 6 ~ "OTHER"
+    )) %>%
     dplyr::select(
       "chi",
-      "person_id",
+      # "person_id",
       "gender",
       "dob",
       "postcode",
@@ -327,6 +398,7 @@ process_sc_all_care_home <- function(
       "ch_chi_cis",
       "ch_sc_id_cis",
       "ch_provider",
+      "ch_provider_description",
       "ch_nursing",
       "ch_adm_reason",
       "sc_latest_submission"
diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R
index 275001c64..352c4fff3 100644
--- a/R/process_sc_all_home_care.R
+++ b/R/process_sc_all_home_care.R
@@ -37,7 +37,7 @@ process_sc_all_home_care <- function(
   # Match on demographic data ---------------------------------------
 
   matched_hc_data <- replaced_dates %>%
-    dplyr::left_join(
+    dplyr::right_join(
       sc_demog_lookup,
       by = c("sending_location", "social_care_id")
     ) %>%
@@ -194,8 +194,6 @@ process_sc_all_home_care <- function(
         TRUE ~ "HC-Unknown"
       )
     ) %>%
-    # person_id
-    create_person_id(type = "SC") %>%
     # compute lca variable from sending_location
     dplyr::mutate(
       sc_send_lca = convert_sc_sending_location_to_lca(.data$sending_location)
diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R
index 5306c0956..c5b7d43eb 100644
--- a/R/process_sc_all_sds.R
+++ b/R/process_sc_all_sds.R
@@ -128,11 +128,11 @@ process_sc_all_sds <- function(
       "SDS",
       convert_sc_sending_location_to_lca(sending_location)
     )]
-  sds_full_clean_long$person_id <- paste0(
-    sds_full_clean_long$sending_location,
-    "-",
-    sds_full_clean_long$social_care_id
-  )
+  # sds_full_clean_long$person_id <- paste0(
+  #   sds_full_clean_long$sending_location,
+  #   "-",
+  #   sds_full_clean_long$social_care_id
+  # )
 
   # Group, arrange and create flags for episodes
   sds_full_clean_long[,
@@ -176,7 +176,7 @@ process_sc_all_sds <- function(
     dob = data.table::last(dob),
     postcode = data.table::last(postcode),
     recid = data.table::last(recid),
-    person_id = data.table::last(person_id),
+    # person_id = data.table::last(person_id),
     sc_send_lca = data.table::last(sc_send_lca)
   ), by = list(sending_location, social_care_id, smrtype, episode_counter)]
   rm(sds_full_clean_long)
diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R
index ccf8e495c..c45992938 100644
--- a/R/process_tests_episode_file.R
+++ b/R/process_tests_episode_file.R
@@ -31,7 +31,9 @@ process_tests_episode_file <- function(data, year) {
     recid = TRUE
   ) %>%
     dplyr::arrange(.data[["recid"]]) %>%
-    write_tests_xlsx(sheet_name = "ep_file", year, workbook_name = "ep_file")
+    write_tests_xlsx(sheet_name = stringr::str_glue({
+      "ep_file_{year}"
+    }), workbook_name = "ep_file")
 
   return(comparison)
 }
diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R
index 9643a4f3f..900ce7f03 100644
--- a/R/process_tests_individual_file.R
+++ b/R/process_tests_individual_file.R
@@ -34,7 +34,9 @@ process_tests_individual_file <- function(data, year) {
     old_data = produce_individual_file_tests(old_data),
     new_data = produce_individual_file_tests(data)
   ) %>%
-    write_tests_xlsx(sheet_name = "indiv_file", year, workbook_name = "indiv_file")
+    write_tests_xlsx(sheet_name = stringr::str_glue({
+      "indiv_file_{year}"
+    }), workbook_name = "indiv_file")
 
   return(comparison)
 }
diff --git a/R/process_tests_sc_all_at_episodes.R b/R/process_tests_sc_all_at_episodes.R
index c23a4f6ed..8b5580334 100644
--- a/R/process_tests_sc_all_at_episodes.R
+++ b/R/process_tests_sc_all_at_episodes.R
@@ -10,9 +10,6 @@
 #'
 #' @export
 process_tests_sc_all_at_episodes <- function(data) {
-  data <- data %>%
-    slfhelper::get_chi()
-
   comparison <- produce_test_comparison(
     old_data = produce_sc_all_episodes_tests(
       read_file(get_sc_at_episodes_path(update = previous_update()))
diff --git a/R/process_tests_sc_all_ch_episodes.R b/R/process_tests_sc_all_ch_episodes.R
index d42eca2c7..7e9655c06 100644
--- a/R/process_tests_sc_all_ch_episodes.R
+++ b/R/process_tests_sc_all_ch_episodes.R
@@ -10,9 +10,6 @@
 #'
 #' @export
 process_tests_sc_all_ch_episodes <- function(data) {
-  data <- data %>%
-    slfhelper::get_chi()
-
   comparison <- produce_test_comparison(
     old_data = produce_sc_all_episodes_tests(
       read_file(get_sc_ch_episodes_path(update = previous_update()))
diff --git a/R/process_tests_sc_all_hc_episodes.R b/R/process_tests_sc_all_hc_episodes.R
index d037e7908..7194790c0 100644
--- a/R/process_tests_sc_all_hc_episodes.R
+++ b/R/process_tests_sc_all_hc_episodes.R
@@ -10,9 +10,6 @@
 #'
 #' @export
 process_tests_sc_all_hc_episodes <- function(data) {
-  data <- data %>%
-    slfhelper::get_chi()
-
   comparison <- produce_test_comparison(
     old_data = produce_sc_all_episodes_tests(
       read_file(get_sc_hc_episodes_path(update = previous_update()))
diff --git a/R/process_tests_sc_all_sds_episodes.R b/R/process_tests_sc_all_sds_episodes.R
index 91c32d450..cf87a671c 100644
--- a/R/process_tests_sc_all_sds_episodes.R
+++ b/R/process_tests_sc_all_sds_episodes.R
@@ -10,9 +10,6 @@
 #'
 #' @export
 process_tests_sc_all_sds_episodes <- function(data) {
-  data <- data %>%
-    slfhelper::get_chi()
-
   comparison <- produce_test_comparison(
     old_data = produce_sc_all_episodes_tests(
       read_file(get_sc_sds_episodes_path(update = previous_update()))
diff --git a/R/read_extract_acute.R b/R/read_extract_acute.R
index 9f649a560..084363033 100644
--- a/R/read_extract_acute.R
+++ b/R/read_extract_acute.R
@@ -149,6 +149,7 @@ read_extract_acute <- function(year, file_path = get_boxi_extract_path(year = ye
       ipdc = "Inpatient Day Case Identifier Code",
       cij_ipdc = "CIJ Inpatient Day Case Identifier Code (01)",
       lineno = "Line Number (01)",
+      case_reference_number = "Case Reference Number [C]",
       GLS_record = "GLS Record"
     ) %>%
     # replace NA in cost_total_net by 0
diff --git a/R/read_file.R b/R/read_file.R
index be0a6fc65..3d174606c 100644
--- a/R/read_file.R
+++ b/R/read_file.R
@@ -3,8 +3,6 @@
 #' @description Read a file, the function chosen to read the file is dependant
 #' on the file path.
 #'  * `.rds` uses [readr::read_rds()].
-#'  * `.fst` uses [fst::read_fst()].
-#'  * `.sav` and `.zsav` use [haven::read_spss()].
 #'  * `.csv` and `.gz` use [readr::read_csv()]. Note that this assumes any file
 #'  ending with `.gz` is a zipped CSV which isn't necessarily true!
 #'  * `.parquet` uses [arrow::read_parquet()].
@@ -19,9 +17,6 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) {
   valid_extensions <- c(
     "rds",
     "rds.gz",
-    "fst",
-    "sav",
-    "zsav",
     "csv",
     "csv.gz",
     "parquet"
@@ -29,7 +24,7 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) {
 
   # Return an empty tibble if trying to read the dummy path
   if (path == get_dummy_boxi_extract_path()) {
-    return(tibble::tibble())
+    return(tibble::tibble(anon_chi = NA_character_))
   }
 
   ext <- fs::path_ext(path)
@@ -60,9 +55,6 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) {
   data <- switch(ext,
     "rds" = readr::read_rds(file = path),
     "rds.gz" = readr::read_rds(file = path),
-    "fst" = tibble::as_tibble(fst::read_fst(path = path)),
-    "sav" = haven::read_spss(file = path, ...),
-    "zsav" = haven::read_spss(file = path, ...),
     "csv" = readr::read_csv(file = path, ..., show_col_types = FALSE),
     "csv.gz" = readr::read_csv(file = path, ..., show_col_types = FALSE),
     "parquet" = arrow::read_parquet(
diff --git a/R/read_lookup_sc_client.R b/R/read_lookup_sc_client.R
index d0c72c859..6128a1be5 100644
--- a/R/read_lookup_sc_client.R
+++ b/R/read_lookup_sc_client.R
@@ -44,6 +44,7 @@ read_lookup_sc_client <- function(fyyear,
       "day_care"
     ) %>%
     dplyr::filter(.data$financial_year == year) %>%
+    dplyr::collect() %>%
     dplyr::mutate(
       dplyr::across(
         c(
@@ -77,7 +78,9 @@ read_lookup_sc_client <- function(fyyear,
       .data$social_care_id,
       .data$financial_year,
       .data$financial_quarter
-    )
+    ) %>%
+    dplyr::rename("mental_health_disorders" = "mental_health_problems")
+
 
   if (!fs::file_exists(get_sandpit_extract_path(type = "client", year = fyyear))) {
     client_data %>%
diff --git a/R/read_sc_all_care_home.R b/R/read_sc_all_care_home.R
index 89ef7951b..b11879487 100644
--- a/R/read_sc_all_care_home.R
+++ b/R/read_sc_all_care_home.R
@@ -42,7 +42,7 @@ read_sc_all_care_home <- function(sc_dvprod_connection = phs_db_connection(dsn =
   }
 
   ch_data <- ch_data %>%
-    # Correct FY 2017
+    # Correct FY 2017 as data collection only started in 2017 Q4
     dplyr::mutate(period = dplyr::if_else(
       .data$period == "2017",
       "2017Q4",
diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R
index ec3cc5705..a1b53f971 100644
--- a/R/write_tests_xlsx.R
+++ b/R/write_tests_xlsx.R
@@ -24,17 +24,42 @@ write_tests_xlsx <- function(comparison_data,
                                "cross_year"
                              )) {
   # Set up the workbook ----
-  tests_workbook_name <- dplyr::case_when(
-    is.null(year) & workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_ep_file_tests"),
-    !is.null(year) & workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_{year}_ep_file_tests"),
-    is.null(year) & workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_indiv_file_tests"),
-    !is.null(year) & workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_{year}_indiv_file_tests"),
-    is.null(year) & workbook_name == "lookup" ~ stringr::str_glue(latest_update(), "_lookups_tests"),
-    is.null(year) & workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests"),
-    is.null(year) & workbook_name == "cross_year" ~ stringr::str_glue(latest_update(), "_cross_year_tests"),
-    !is.null(year) & workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests"),
-    !is.null(year) & workbook_name == "extract" ~ stringr::str_glue(latest_update(), "_{year}_extract_tests")
-  )
+  if (workbook_name == "ep_file") {
+    if (is.null(year)) {
+      tests_workbook_name <-
+        stringr::str_glue(latest_update(), "_ep_file_tests")
+    }
+  }
+  if (workbook_name == "indiv_file") {
+    if (is.null(year)) {
+      tests_workbook_name <-
+        stringr::str_glue(latest_update(), "_indiv_file_tests")
+    }
+  }
+  if (workbook_name == "lookup") {
+    if (is.null(year)) {
+      tests_workbook_name <-
+        stringr::str_glue(latest_update(), "_lookups_tests")
+    }
+  }
+  if (workbook_name == "sandpit") {
+    tests_workbook_name <-
+      stringr::str_glue(latest_update(), "_sandpit_extract_tests")
+  }
+  if (workbook_name == "cross_year") {
+    if (is.null(year)) {
+      tests_workbook_name <-
+        stringr::str_glue(latest_update(), "_cross_year_tests")
+    }
+  }
+  if (workbook_name == "extract") {
+    if (is.null(year)) {
+    } else {
+      tests_workbook_name <-
+        stringr::str_glue(latest_update(), "_{year}_extract_tests")
+    }
+  }
+
 
   tests_workbook_path <- fs::path(
     get_slf_dir(),
@@ -96,6 +121,14 @@ write_tests_xlsx <- function(comparison_data,
 
   date_today <- stringr::str_to_lower(date_today)
 
+  sheet_name_dated <- ifelse(
+    is.null(year),
+    stringr::str_glue("{sheet_name}_{date_today}"),
+    stringr::str_glue("{year}_{sheet_name}_{date_today}")
+  )
+
+  date_today <- stringr::str_to_lower(date_today)
+
   if (is.null(year)) {
     sheet_name_dated <- stringr::str_glue("{sheet_name}_{date_today}")
   } else {
diff --git a/Run_SLF_Files_manually/run_episode_file_1415.R b/Run_SLF_Files_manually/run_episode_file_1415.R
new file mode 100644
index 000000000..b5a2eab38
--- /dev/null
+++ b/Run_SLF_Files_manually/run_episode_file_1415.R
@@ -0,0 +1,79 @@
+library(targets)
+library(createslf)
+
+year <- "1415"
+
+targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+
+processed_data_list <- list(
+  acute = targets::tar_read(
+    "source_acute_extract_1415",
+    store = targets_store
+  ),
+  ae = targets::tar_read(
+    "source_ae_extract_1415",
+    store = targets_store
+  ),
+  cmh = targets::tar_read(
+    "source_cmh_extract_1415",
+    store = targets_store
+  ),
+  cmh = targets::tar_read(
+    "source_cmh_extract_1415",
+    store = targets_store
+  ),
+  dn = targets::tar_read(
+    "source_dn_extract_1415",
+    store = targets_store
+  ),
+  deaths = targets::tar_read(
+    "source_nrs_deaths_extract_1415",
+    store = targets_store
+  ),
+  homelessness = targets::tar_read(
+    "source_homelessness_extract_1415",
+    store = targets_store
+  ),
+  maternity = targets::tar_read(
+    "source_maternity_extract_1415",
+    store = targets_store
+  ),
+  mental_health = targets::tar_read(
+    "source_mental_health_extract_1415",
+    store = targets_store
+  ),
+  outpatients = targets::tar_read(
+    "source_outpatients_extract_1415",
+    store = targets_store
+  ),
+  gp_ooh = targets::tar_read(
+    "source_ooh_extract_1415",
+    store = targets_store
+  ),
+  prescribing = targets::tar_read(
+    "source_prescribing_extract_1415",
+    store = targets_store
+  ),
+  care_home = targets::tar_read(
+    "source_sc_care_home_1415",
+    store = targets_store
+  ),
+  home_care = targets::tar_read(
+    "source_sc_home_care_1415",
+    store = targets_store
+  ),
+  at = targets::tar_read(
+    "source_sc_alarms_tele_1415",
+    store = targets_store
+  ),
+  sds = targets::tar_read(
+    "source_sc_sds_1415",
+    store = targets_store
+  )
+)
+
+# Run episode file
+create_episode_file(processed_data_list, year = year) %>%
+  process_tests_episode_file(year = year)
+
+## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_1516.R b/Run_SLF_Files_manually/run_episode_file_1516.R
new file mode 100644
index 000000000..59c7ddc63
--- /dev/null
+++ b/Run_SLF_Files_manually/run_episode_file_1516.R
@@ -0,0 +1,79 @@
+library(targets)
+library(createslf)
+
+year <- "1516"
+
+targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+
+processed_data_list <- list(
+  acute = targets::tar_read(
+    "source_acute_extract_1516",
+    store = targets_store
+  ),
+  ae = targets::tar_read(
+    "source_ae_extract_1516",
+    store = targets_store
+  ),
+  cmh = targets::tar_read(
+    "source_cmh_extract_1516",
+    store = targets_store
+  ),
+  cmh = targets::tar_read(
+    "source_cmh_extract_1516",
+    store = targets_store
+  ),
+  dn = targets::tar_read(
+    "source_dn_extract_1516",
+    store = targets_store
+  ),
+  deaths = targets::tar_read(
+    "source_nrs_deaths_extract_1516",
+    store = targets_store
+  ),
+  homelessness = targets::tar_read(
+    "source_homelessness_extract_1516",
+    store = targets_store
+  ),
+  maternity = targets::tar_read(
+    "source_maternity_extract_1516",
+    store = targets_store
+  ),
+  mental_health = targets::tar_read(
+    "source_mental_health_extract_1516",
+    store = targets_store
+  ),
+  outpatients = targets::tar_read(
+    "source_outpatients_extract_1516",
+    store = targets_store
+  ),
+  gp_ooh = targets::tar_read(
+    "source_ooh_extract_1516",
+    store = targets_store
+  ),
+  prescribing = targets::tar_read(
+    "source_prescribing_extract_1516",
+    store = targets_store
+  ),
+  care_home = targets::tar_read(
+    "source_sc_care_home_1516",
+    store = targets_store
+  ),
+  home_care = targets::tar_read(
+    "source_sc_home_care_1516",
+    store = targets_store
+  ),
+  at = targets::tar_read(
+    "source_sc_alarms_tele_1516",
+    store = targets_store
+  ),
+  sds = targets::tar_read(
+    "source_sc_sds_1516",
+    store = targets_store
+  )
+)
+
+# Run episode file
+create_episode_file(processed_data_list, year = year) ## %>%
+# process_tests_episode_file(year = year)
+
+## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_1617.R b/Run_SLF_Files_manually/run_episode_file_1617.R
new file mode 100644
index 000000000..b10372be6
--- /dev/null
+++ b/Run_SLF_Files_manually/run_episode_file_1617.R
@@ -0,0 +1,79 @@
+library(targets)
+library(createslf)
+
+year <- "1617"
+
+targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+
+processed_data_list <- list(
+  acute = targets::tar_read(
+    "source_acute_extract_1617",
+    store = targets_store
+  ),
+  ae = targets::tar_read(
+    "source_ae_extract_1617",
+    store = targets_store
+  ),
+  cmh = targets::tar_read(
+    "source_cmh_extract_1617",
+    store = targets_store
+  ),
+  cmh = targets::tar_read(
+    "source_cmh_extract_1617",
+    store = targets_store
+  ),
+  dn = targets::tar_read(
+    "source_dn_extract_1617",
+    store = targets_store
+  ),
+  deaths = targets::tar_read(
+    "source_nrs_deaths_extract_1617",
+    store = targets_store
+  ),
+  homelessness = targets::tar_read(
+    "source_homelessness_extract_1617",
+    store = targets_store
+  ),
+  maternity = targets::tar_read(
+    "source_maternity_extract_1617",
+    store = targets_store
+  ),
+  mental_health = targets::tar_read(
+    "source_mental_health_extract_1617",
+    store = targets_store
+  ),
+  outpatients = targets::tar_read(
+    "source_outpatients_extract_1617",
+    store = targets_store
+  ),
+  gp_ooh = targets::tar_read(
+    "source_ooh_extract_1617",
+    store = targets_store
+  ),
+  prescribing = targets::tar_read(
+    "source_prescribing_extract_1617",
+    store = targets_store
+  ),
+  care_home = targets::tar_read(
+    "source_sc_care_home_1617",
+    store = targets_store
+  ),
+  home_care = targets::tar_read(
+    "source_sc_home_care_1617",
+    store = targets_store
+  ),
+  at = targets::tar_read(
+    "source_sc_alarms_tele_1617",
+    store = targets_store
+  ),
+  sds = targets::tar_read(
+    "source_sc_sds_1617",
+    store = targets_store
+  )
+)
+
+# Run episode file
+create_episode_file(processed_data_list, year = year) ## %>%
+# process_tests_episode_file(year = year)
+
+## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R
index ab75b94d7..b405b5b6e 100644
--- a/Run_SLF_Files_manually/run_episode_file_1718.R
+++ b/Run_SLF_Files_manually/run_episode_file_1718.R
@@ -3,10 +3,73 @@ library(createslf)
 
 year <- "1718"
 
-processed_data_list <- targets::tar_read("processed_data_list_1718",
-  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+
+processed_data_list <- list(
+  acute = targets::tar_read(
+    "source_acute_extract_1718",
+    store = targets_store
+  ),
+  ae = targets::tar_read(
+    "source_ae_extract_1718",
+    store = targets_store
+  ),
+  cmh = targets::tar_read(
+    "source_cmh_extract_1718",
+    store = targets_store
+  ),
+  dn = targets::tar_read(
+    "source_dn_extract_1718",
+    store = targets_store
+  ),
+  deaths = targets::tar_read(
+    "source_nrs_deaths_extract_1718",
+    store = targets_store
+  ),
+  homelessness = targets::tar_read(
+    "source_homelessness_extract_1718",
+    store = targets_store
+  ),
+  maternity = targets::tar_read(
+    "source_maternity_extract_1718",
+    store = targets_store
+  ),
+  mental_health = targets::tar_read(
+    "source_mental_health_extract_1718",
+    store = targets_store
+  ),
+  outpatients = targets::tar_read(
+    "source_outpatients_extract_1718",
+    store = targets_store
+  ),
+  gp_ooh = targets::tar_read(
+    "source_ooh_extract_1718",
+    store = targets_store
+  ),
+  prescribing = targets::tar_read(
+    "source_prescribing_extract_1718",
+    store = targets_store
+  ),
+  care_home = targets::tar_read(
+    "source_sc_care_home_1718",
+    store = targets_store
+  ),
+  home_care = targets::tar_read(
+    "source_sc_home_care_1718",
+    store = targets_store
+  ),
+  at = targets::tar_read(
+    "source_sc_alarms_tele_1718",
+    store = targets_store
+  ),
+  sds = targets::tar_read(
+    "source_sc_sds_1718",
+    store = targets_store
+  )
 )
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
   process_tests_episode_file(year = year)
+
+## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R
index cd5a7435f..fb3227512 100644
--- a/Run_SLF_Files_manually/run_episode_file_1819.R
+++ b/Run_SLF_Files_manually/run_episode_file_1819.R
@@ -3,10 +3,73 @@ library(createslf)
 
 year <- "1819"
 
-processed_data_list <- targets::tar_read("processed_data_list_1819",
-  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+
+processed_data_list <- list(
+  acute = targets::tar_read(
+    "source_acute_extract_1819",
+    store = targets_store
+  ),
+  ae = targets::tar_read(
+    "source_ae_extract_1819",
+    store = targets_store
+  ),
+  cmh = targets::tar_read(
+    "source_cmh_extract_1819",
+    store = targets_store
+  ),
+  dn = targets::tar_read(
+    "source_dn_extract_1819",
+    store = targets_store
+  ),
+  deaths = targets::tar_read(
+    "source_nrs_deaths_extract_1819",
+    store = targets_store
+  ),
+  homelessness = targets::tar_read(
+    "source_homelessness_extract_1819",
+    store = targets_store
+  ),
+  maternity = targets::tar_read(
+    "source_maternity_extract_1819",
+    store = targets_store
+  ),
+  mental_health = targets::tar_read(
+    "source_mental_health_extract_1819",
+    store = targets_store
+  ),
+  outpatients = targets::tar_read(
+    "source_outpatients_extract_1819",
+    store = targets_store
+  ),
+  gp_ooh = targets::tar_read(
+    "source_ooh_extract_1819",
+    store = targets_store
+  ),
+  prescribing = targets::tar_read(
+    "source_prescribing_extract_1819",
+    store = targets_store
+  ),
+  care_home = targets::tar_read(
+    "source_sc_care_home_1819",
+    store = targets_store
+  ),
+  home_care = targets::tar_read(
+    "source_sc_home_care_1819",
+    store = targets_store
+  ),
+  at = targets::tar_read(
+    "source_sc_alarms_tele_1819",
+    store = targets_store
+  ),
+  sds = targets::tar_read(
+    "source_sc_sds_1819",
+    store = targets_store
+  )
 )
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
   process_tests_episode_file(year = year)
+
+## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R
index a9dc591b1..e2e21bdac 100644
--- a/Run_SLF_Files_manually/run_episode_file_1920.R
+++ b/Run_SLF_Files_manually/run_episode_file_1920.R
@@ -3,10 +3,73 @@ library(createslf)
 
 year <- "1920"
 
-processed_data_list <- targets::tar_read("processed_data_list_1920",
-  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+
+processed_data_list <- list(
+  acute = targets::tar_read(
+    "source_acute_extract_1920",
+    store = targets_store
+  ),
+  ae = targets::tar_read(
+    "source_ae_extract_1920",
+    store = targets_store
+  ),
+  cmh = targets::tar_read(
+    "source_cmh_extract_1920",
+    store = targets_store
+  ),
+  dn = targets::tar_read(
+    "source_dn_extract_1920",
+    store = targets_store
+  ),
+  deaths = targets::tar_read(
+    "source_nrs_deaths_extract_1920",
+    store = targets_store
+  ),
+  homelessness = targets::tar_read(
+    "source_homelessness_extract_1920",
+    store = targets_store
+  ),
+  maternity = targets::tar_read(
+    "source_maternity_extract_1920",
+    store = targets_store
+  ),
+  mental_health = targets::tar_read(
+    "source_mental_health_extract_1920",
+    store = targets_store
+  ),
+  outpatients = targets::tar_read(
+    "source_outpatients_extract_1920",
+    store = targets_store
+  ),
+  gp_ooh = targets::tar_read(
+    "source_ooh_extract_1920",
+    store = targets_store
+  ),
+  prescribing = targets::tar_read(
+    "source_prescribing_extract_1920",
+    store = targets_store
+  ),
+  care_home = targets::tar_read(
+    "source_sc_care_home_1920",
+    store = targets_store
+  ),
+  home_care = targets::tar_read(
+    "source_sc_home_care_1920",
+    store = targets_store
+  ),
+  at = targets::tar_read(
+    "source_sc_alarms_tele_1920",
+    store = targets_store
+  ),
+  sds = targets::tar_read(
+    "source_sc_sds_1920",
+    store = targets_store
+  )
 )
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
   process_tests_episode_file(year = year)
+
+## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R
index 37708ee8b..cf98e80de 100644
--- a/Run_SLF_Files_manually/run_episode_file_2021.R
+++ b/Run_SLF_Files_manually/run_episode_file_2021.R
@@ -3,10 +3,73 @@ library(createslf)
 
 year <- "2021"
 
-processed_data_list <- targets::tar_read("processed_data_list_2021",
-  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+
+processed_data_list <- list(
+  acute = targets::tar_read(
+    "source_acute_extract_2021",
+    store = targets_store
+  ),
+  ae = targets::tar_read(
+    "source_ae_extract_2021",
+    store = targets_store
+  ),
+  cmh = targets::tar_read(
+    "source_cmh_extract_2021",
+    store = targets_store
+  ),
+  dn = targets::tar_read(
+    "source_dn_extract_2021",
+    store = targets_store
+  ),
+  deaths = targets::tar_read(
+    "source_nrs_deaths_extract_2021",
+    store = targets_store
+  ),
+  homelessness = targets::tar_read(
+    "source_homelessness_extract_2021",
+    store = targets_store
+  ),
+  maternity = targets::tar_read(
+    "source_maternity_extract_2021",
+    store = targets_store
+  ),
+  mental_health = targets::tar_read(
+    "source_mental_health_extract_2021",
+    store = targets_store
+  ),
+  outpatients = targets::tar_read(
+    "source_outpatients_extract_2021",
+    store = targets_store
+  ),
+  gp_ooh = targets::tar_read(
+    "source_ooh_extract_2021",
+    store = targets_store
+  ),
+  prescribing = targets::tar_read(
+    "source_prescribing_extract_2021",
+    store = targets_store
+  ),
+  care_home = targets::tar_read(
+    "source_sc_care_home_2021",
+    store = targets_store
+  ),
+  home_care = targets::tar_read(
+    "source_sc_home_care_2021",
+    store = targets_store
+  ),
+  at = targets::tar_read(
+    "source_sc_alarms_tele_2021",
+    store = targets_store
+  ),
+  sds = targets::tar_read(
+    "source_sc_sds_2021",
+    store = targets_store
+  )
 )
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
   process_tests_episode_file(year = year)
+
+## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R
index 47400e2d1..3bcbf2466 100644
--- a/Run_SLF_Files_manually/run_episode_file_2122.R
+++ b/Run_SLF_Files_manually/run_episode_file_2122.R
@@ -3,10 +3,73 @@ library(createslf)
 
 year <- "2122"
 
-processed_data_list <- targets::tar_read("processed_data_list_2122",
-  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+
+processed_data_list <- list(
+  acute = targets::tar_read(
+    "source_acute_extract_2122",
+    store = targets_store
+  ),
+  ae = targets::tar_read(
+    "source_ae_extract_2122",
+    store = targets_store
+  ),
+  cmh = targets::tar_read(
+    "source_cmh_extract_2122",
+    store = targets_store
+  ),
+  dn = targets::tar_read(
+    "source_dn_extract_2122",
+    store = targets_store
+  ),
+  deaths = targets::tar_read(
+    "source_nrs_deaths_extract_2122",
+    store = targets_store
+  ),
+  homelessness = targets::tar_read(
+    "source_homelessness_extract_2122",
+    store = targets_store
+  ),
+  maternity = targets::tar_read(
+    "source_maternity_extract_2122",
+    store = targets_store
+  ),
+  mental_health = targets::tar_read(
+    "source_mental_health_extract_2122",
+    store = targets_store
+  ),
+  outpatients = targets::tar_read(
+    "source_outpatients_extract_2122",
+    store = targets_store
+  ),
+  gp_ooh = targets::tar_read(
+    "source_ooh_extract_2122",
+    store = targets_store
+  ),
+  prescribing = targets::tar_read(
+    "source_prescribing_extract_2122",
+    store = targets_store
+  ),
+  care_home = targets::tar_read(
+    "source_sc_care_home_2122",
+    store = targets_store
+  ),
+  home_care = targets::tar_read(
+    "source_sc_home_care_2122",
+    store = targets_store
+  ),
+  at = targets::tar_read(
+    "source_sc_alarms_tele_2122",
+    store = targets_store
+  ),
+  sds = targets::tar_read(
+    "source_sc_sds_2122",
+    store = targets_store
+  )
 )
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
   process_tests_episode_file(year = year)
+
+## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R
index e64a57f32..af0447eed 100644
--- a/Run_SLF_Files_manually/run_episode_file_2223.R
+++ b/Run_SLF_Files_manually/run_episode_file_2223.R
@@ -3,10 +3,73 @@ library(createslf)
 
 year <- "2223"
 
-processed_data_list <- targets::tar_read("processed_data_list_2223",
-  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+
+processed_data_list <- list(
+  acute = targets::tar_read(
+    "source_acute_extract_2223",
+    store = targets_store
+  ),
+  ae = targets::tar_read(
+    "source_ae_extract_2223",
+    store = targets_store
+  ),
+  cmh = targets::tar_read(
+    "source_cmh_extract_2223",
+    store = targets_store
+  ),
+  dn = targets::tar_read(
+    "source_dn_extract_2223",
+    store = targets_store
+  ),
+  deaths = targets::tar_read(
+    "source_nrs_deaths_extract_2223",
+    store = targets_store
+  ),
+  homelessness = targets::tar_read(
+    "source_homelessness_extract_2223",
+    store = targets_store
+  ),
+  maternity = targets::tar_read(
+    "source_maternity_extract_2223",
+    store = targets_store
+  ),
+  mental_health = targets::tar_read(
+    "source_mental_health_extract_2223",
+    store = targets_store
+  ),
+  outpatients = targets::tar_read(
+    "source_outpatients_extract_2223",
+    store = targets_store
+  ),
+  gp_ooh = targets::tar_read(
+    "source_ooh_extract_2223",
+    store = targets_store
+  ),
+  prescribing = targets::tar_read(
+    "source_prescribing_extract_2223",
+    store = targets_store
+  ),
+  care_home = targets::tar_read(
+    "source_sc_care_home_2223",
+    store = targets_store
+  ),
+  home_care = targets::tar_read(
+    "source_sc_home_care_2223",
+    store = targets_store
+  ),
+  at = targets::tar_read(
+    "source_sc_alarms_tele_2223",
+    store = targets_store
+  ),
+  sds = targets::tar_read(
+    "source_sc_sds_2223",
+    store = targets_store
+  )
 )
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
   process_tests_episode_file(year = year)
+
+## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R
index 4a7f0ad29..bdf16e0f8 100644
--- a/Run_SLF_Files_manually/run_episode_file_2324.R
+++ b/Run_SLF_Files_manually/run_episode_file_2324.R
@@ -3,10 +3,73 @@ library(createslf)
 
 year <- "2324"
 
-processed_data_list <- targets::tar_read("processed_data_list_2324",
-  store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+
+processed_data_list <- list(
+  acute = targets::tar_read(
+    "source_acute_extract_2324",
+    store = targets_store
+  ),
+  ae = targets::tar_read(
+    "source_ae_extract_2324",
+    store = targets_store
+  ),
+  cmh = targets::tar_read(
+    "source_cmh_extract_2324",
+    store = targets_store
+  ),
+  dn = targets::tar_read(
+    "source_dn_extract_2324",
+    store = targets_store
+  ),
+  deaths = targets::tar_read(
+    "source_nrs_deaths_extract_2324",
+    store = targets_store
+  ),
+  homelessness = targets::tar_read(
+    "source_homelessness_extract_2324",
+    store = targets_store
+  ),
+  maternity = targets::tar_read(
+    "source_maternity_extract_2324",
+    store = targets_store
+  ),
+  mental_health = targets::tar_read(
+    "source_mental_health_extract_2324",
+    store = targets_store
+  ),
+  outpatients = targets::tar_read(
+    "source_outpatients_extract_2324",
+    store = targets_store
+  ),
+  gp_ooh = targets::tar_read(
+    "source_ooh_extract_2324",
+    store = targets_store
+  ),
+  prescribing = targets::tar_read(
+    "source_prescribing_extract_2324",
+    store = targets_store
+  ),
+  care_home = targets::tar_read(
+    "source_sc_care_home_2324",
+    store = targets_store
+  ),
+  home_care = targets::tar_read(
+    "source_sc_home_care_2324",
+    store = targets_store
+  ),
+  at = targets::tar_read(
+    "source_sc_alarms_tele_2324",
+    store = targets_store
+  ),
+  sds = targets::tar_read(
+    "source_sc_sds_2324",
+    store = targets_store
+  )
 )
 
 # Run episode file
 create_episode_file(processed_data_list, year = year) %>%
   process_tests_episode_file(year = year)
+
+## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_2425.R b/Run_SLF_Files_manually/run_episode_file_2425.R
new file mode 100644
index 000000000..699c197b3
--- /dev/null
+++ b/Run_SLF_Files_manually/run_episode_file_2425.R
@@ -0,0 +1,75 @@
+library(targets)
+library(createslf)
+
+year <- "2425"
+
+targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")
+
+processed_data_list <- list(
+  acute = targets::tar_read(
+    "source_acute_extract_2425",
+    store = targets_store
+  ),
+  ae = targets::tar_read(
+    "source_ae_extract_2425",
+    store = targets_store
+  ),
+  cmh = targets::tar_read(
+    "source_cmh_extract_2425",
+    store = targets_store
+  ),
+  dn = targets::tar_read(
+    "source_dn_extract_2425",
+    store = targets_store
+  ),
+  deaths = targets::tar_read(
+    "source_nrs_deaths_extract_2425",
+    store = targets_store
+  ),
+  homelessness = targets::tar_read(
+    "source_homelessness_extract_2425",
+    store = targets_store
+  ),
+  maternity = targets::tar_read(
+    "source_maternity_extract_2425",
+    store = targets_store
+  ),
+  mental_health = targets::tar_read(
+    "source_mental_health_extract_2425",
+    store = targets_store
+  ),
+  outpatients = targets::tar_read(
+    "source_outpatients_extract_2425",
+    store = targets_store
+  ),
+  gp_ooh = targets::tar_read(
+    "source_ooh_extract_2425",
+    store = targets_store
+  ),
+  prescribing = targets::tar_read(
+    "source_prescribing_extract_2425",
+    store = targets_store
+  ),
+  care_home = targets::tar_read(
+    "source_sc_care_home_2425",
+    store = targets_store
+  ),
+  home_care = targets::tar_read(
+    "source_sc_home_care_2425",
+    store = targets_store
+  ),
+  at = targets::tar_read(
+    "source_sc_alarms_tele_2425",
+    store = targets_store
+  ),
+  sds = targets::tar_read(
+    "source_sc_sds_2425",
+    store = targets_store
+  )
+)
+
+# Run episode file
+create_episode_file(processed_data_list, year = year) %>%
+  process_tests_episode_file(year = year)
+
+## End of Script ##
diff --git a/Run_SLF_Files_manually/run_individual_file_1415.R b/Run_SLF_Files_manually/run_individual_file_1415.R
new file mode 100644
index 000000000..70aa2bfca
--- /dev/null
+++ b/Run_SLF_Files_manually/run_individual_file_1415.R
@@ -0,0 +1,9 @@
+library(createslf)
+
+year <- "1415"
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_manually/run_individual_file_1516.R b/Run_SLF_Files_manually/run_individual_file_1516.R
new file mode 100644
index 000000000..8e8dae906
--- /dev/null
+++ b/Run_SLF_Files_manually/run_individual_file_1516.R
@@ -0,0 +1,9 @@
+library(createslf)
+
+year <- "1516"
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_manually/run_individual_file_1617.R b/Run_SLF_Files_manually/run_individual_file_1617.R
new file mode 100644
index 000000000..255e4e674
--- /dev/null
+++ b/Run_SLF_Files_manually/run_individual_file_1617.R
@@ -0,0 +1,9 @@
+library(createslf)
+
+year <- "1617"
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_manually/run_individual_file_2425.R b/Run_SLF_Files_manually/run_individual_file_2425.R
new file mode 100644
index 000000000..843eb505c
--- /dev/null
+++ b/Run_SLF_Files_manually/run_individual_file_2425.R
@@ -0,0 +1,9 @@
+library(createslf)
+
+year <- "2425"
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_targets/run_all_targets.R b/Run_SLF_Files_targets/run_all_targets.R
new file mode 100644
index 000000000..fb5b94fab
--- /dev/null
+++ b/Run_SLF_Files_targets/run_all_targets.R
@@ -0,0 +1,5 @@
+library(targets)
+
+# use tar_make_future() to run targets for all years
+# This will run everything needed for creating the episode file.
+tar_make_future()
diff --git a/Run_SLF_Files_targets/run_targets_1415.R b/Run_SLF_Files_targets/run_targets_1415.R
new file mode 100644
index 000000000..a37068c0d
--- /dev/null
+++ b/Run_SLF_Files_targets/run_targets_1415.R
@@ -0,0 +1,9 @@
+library(targets)
+
+year <- "1415"
+
+# use targets for the process until testing episode files
+tar_make_future(
+  # it does not recognise `contains(year)`
+  names = (targets::contains("1415"))
+)
diff --git a/Run_SLF_Files_targets/run_targets_1516.R b/Run_SLF_Files_targets/run_targets_1516.R
new file mode 100644
index 000000000..7930d5bb5
--- /dev/null
+++ b/Run_SLF_Files_targets/run_targets_1516.R
@@ -0,0 +1,9 @@
+library(targets)
+
+year <- "1516"
+
+# use targets for the process until testing episode files
+tar_make_future(
+  # it does not recognise `contains(year)`
+  names = (targets::contains("1516"))
+)
diff --git a/Run_SLF_Files_targets/run_targets_1617.R b/Run_SLF_Files_targets/run_targets_1617.R
new file mode 100644
index 000000000..16361f71e
--- /dev/null
+++ b/Run_SLF_Files_targets/run_targets_1617.R
@@ -0,0 +1,9 @@
+library(targets)
+
+year <- "1617"
+
+# use targets for the process until testing episode files
+tar_make_future(
+  # it does not recognise `contains(year)`
+  names = (targets::contains("1617"))
+)
diff --git a/Run_SLF_Files_targets/run_targets_1718.R b/Run_SLF_Files_targets/run_targets_1718.R
index ac03edd3f..e85a89ff8 100644
--- a/Run_SLF_Files_targets/run_targets_1718.R
+++ b/Run_SLF_Files_targets/run_targets_1718.R
@@ -7,12 +7,3 @@ tar_make_future(
   # it does not recognise `contains(year)`
   names = (targets::contains("1718"))
 )
-
-# use targets to create individual files due to RAM limit
-library(createslf)
-
-episode_file <- arrow::read_parquet(get_slf_episode_path(year))
-
-# Run individual file
-create_individual_file(episode_file, year = year) %>%
-  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_targets/run_targets_1819.R b/Run_SLF_Files_targets/run_targets_1819.R
index b60728359..6957054c8 100644
--- a/Run_SLF_Files_targets/run_targets_1819.R
+++ b/Run_SLF_Files_targets/run_targets_1819.R
@@ -7,12 +7,3 @@ tar_make_future(
   # it does not recognise `contains(year)`
   names = (targets::contains("1819"))
 )
-
-# use targets to create individual files due to RAM limit
-library(createslf)
-
-episode_file <- arrow::read_parquet(get_slf_episode_path(year))
-
-# Run individual file
-create_individual_file(episode_file, year = year) %>%
-  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_targets/run_targets_1920.R b/Run_SLF_Files_targets/run_targets_1920.R
index 897ee0b7a..ef6272fca 100644
--- a/Run_SLF_Files_targets/run_targets_1920.R
+++ b/Run_SLF_Files_targets/run_targets_1920.R
@@ -7,12 +7,3 @@ tar_make_future(
   # it does not recognise `contains(year)`
   names = (targets::contains("1920"))
 )
-
-# use targets to create individual files due to RAM limit
-library(createslf)
-
-episode_file <- arrow::read_parquet(get_slf_episode_path(year))
-
-# Run individual file
-create_individual_file(episode_file, year = year) %>%
-  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_targets/run_targets_2021.R b/Run_SLF_Files_targets/run_targets_2021.R
index 53333c014..237078e05 100644
--- a/Run_SLF_Files_targets/run_targets_2021.R
+++ b/Run_SLF_Files_targets/run_targets_2021.R
@@ -7,12 +7,3 @@ tar_make_future(
   # it does not recognise `contains(year)`
   names = (targets::contains("2021"))
 )
-
-# use targets to create individual files due to RAM limit
-library(createslf)
-
-episode_file <- arrow::read_parquet(get_slf_episode_path(year))
-
-# Run individual file
-create_individual_file(episode_file, year = year) %>%
-  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_targets/run_targets_2122.R b/Run_SLF_Files_targets/run_targets_2122.R
index 457fe33e7..dc64b625f 100644
--- a/Run_SLF_Files_targets/run_targets_2122.R
+++ b/Run_SLF_Files_targets/run_targets_2122.R
@@ -7,12 +7,3 @@ tar_make_future(
   # it does not recognise `contains(year)`
   names = (targets::contains("2122"))
 )
-
-# use targets to create individual files due to RAM limit
-library(createslf)
-
-episode_file <- arrow::read_parquet(get_slf_episode_path(year))
-
-# Run individual file
-create_individual_file(episode_file, year = year) %>%
-  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_targets/run_targets_2223.R b/Run_SLF_Files_targets/run_targets_2223.R
index fc851f3f7..7d8677a80 100644
--- a/Run_SLF_Files_targets/run_targets_2223.R
+++ b/Run_SLF_Files_targets/run_targets_2223.R
@@ -7,12 +7,3 @@ tar_make_future(
   # it does not recognise `contains(year)`
   names = (targets::contains("2223"))
 )
-
-# use targets to create individual files due to RAM limit
-library(createslf)
-
-episode_file <- arrow::read_parquet(get_slf_episode_path(year))
-
-# Run individual file
-create_individual_file(episode_file, year = year) %>%
-  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_targets/run_targets_2324.R b/Run_SLF_Files_targets/run_targets_2324.R
index 3b4c9b240..bcebe5fb8 100644
--- a/Run_SLF_Files_targets/run_targets_2324.R
+++ b/Run_SLF_Files_targets/run_targets_2324.R
@@ -7,12 +7,3 @@ tar_make_future(
   # it does not recognise `contains(year)`
   names = (targets::contains("2324"))
 )
-
-# use targets to create individual files due to RAM limit
-library(createslf)
-
-episode_file <- arrow::read_parquet(get_slf_episode_path(year))
-
-# Run individual file
-create_individual_file(episode_file, year = year) %>%
-  process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_targets/run_targets_2425.R b/Run_SLF_Files_targets/run_targets_2425.R
new file mode 100644
index 000000000..fe849ede8
--- /dev/null
+++ b/Run_SLF_Files_targets/run_targets_2425.R
@@ -0,0 +1,9 @@
+library(targets)
+
+year <- "2425"
+
+# use targets for the process until testing episode files
+tar_make_future(
+  # it does not recognise `contains(year)`
+  names = (targets::contains("2425"))
+)
diff --git a/_targets.R b/_targets.R
index 4ea32c179..0377e487b 100644
--- a/_targets.R
+++ b/_targets.R
@@ -19,7 +19,7 @@ tar_option_set(
   memory = "persistent" # default option
 )
 
-years_to_run <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324")
+years_to_run <- createslf::years_to_run()
 
 list(
   tar_rds(write_to_disk, TRUE),
@@ -167,12 +167,19 @@ list(
       age = as.difftime(28.0, units = "days")
     )
   ),
+  tar_target(
+    refined_death_data,
+    process_refined_death(
+      it_chi_deaths = it_chi_deaths_data,
+      write_to_disk = write_to_disk
+    )
+  ),
   tar_target(
     all_care_home,
     process_sc_all_care_home(
       all_care_home_extract,
       sc_demog_lookup = sc_demog_lookup %>% slfhelper::get_chi(),
-      it_chi_deaths_data = it_chi_deaths_data %>% slfhelper::get_chi(),
+      refined_death = refined_death_data %>% slfhelper::get_chi(),
       ch_name_lookup_path = slf_ch_name_lookup_path,
       spd_path = spd_path,
       write_to_disk = write_to_disk
@@ -204,6 +211,8 @@ list(
     tests_sc_all_sds,
     process_tests_sc_all_sds_episodes(all_sds)
   ),
+
+  # Phase II
   tar_map(
     list(year = years_to_run),
     tar_rds(
@@ -251,11 +260,11 @@ list(
       get_boxi_extract_path(year, type = "mh"),
       read_extract_mental_health(year, !!.x)
     ),
-    tar_file_read(
-      nrs_deaths_data,
-      get_boxi_extract_path(year, type = "deaths"),
-      read_extract_nrs_deaths(year, !!.x)
-    ),
+    # tar_file_read(
+    #   nrs_deaths_data,
+    #   get_boxi_extract_path(year, type = "deaths"),
+    #   read_extract_nrs_deaths(year, !!.x)
+    # ),
     tar_file_read(
       outpatients_data,
       get_boxi_extract_path(year, type = "outpatient"),
@@ -281,6 +290,16 @@ list(
       get_boxi_extract_path(year = year, type = "gp_ooh-c"),
       format = "file"
     ),
+    tar_target(
+      acute_cup_path,
+      get_boxi_extract_path(year, type = "acute_cup"),
+      format = "file"
+    ),
+    tar_target(
+      gp_ooh_cup_path,
+      get_boxi_extract_path(year, type = "gp_ooh_cup"),
+      format = "file"
+    ),
     tar_qs(
       ooh_data,
       read_extract_gp_ooh(
@@ -294,6 +313,7 @@ list(
     tar_target(source_acute_extract, process_extract_acute(
       acute_data,
       year,
+      acute_cup_path,
       write_to_disk = write_to_disk
     )),
     tar_target(
@@ -392,11 +412,14 @@ list(
         year
       )
     ),
-    tar_target(source_mental_health_extract, process_extract_mental_health(
-      mental_health_data,
-      year,
-      write_to_disk = write_to_disk
-    )),
+    tar_target(
+      source_mental_health_extract,
+      process_extract_mental_health(
+        mental_health_data,
+        year,
+        write_to_disk = write_to_disk
+      )
+    ),
     tar_target(
       tests_source_mental_health_extract,
       process_tests_mental_health(
@@ -404,11 +427,20 @@ list(
         year
       )
     ),
-    tar_target(source_nrs_deaths_extract, process_extract_nrs_deaths(
-      nrs_deaths_data,
-      year,
-      write_to_disk = write_to_disk
-    )),
+    # tar_target(source_nrs_deaths_extract, process_extract_nrs_deaths(
+    #   nrs_deaths_data,
+    #   year,
+    #   write_to_disk = write_to_disk
+    # )),
+    tar_target(
+      source_nrs_deaths_extract,
+      # use this anomymous function with redundant but necessary refined_death
+      # to make sure reading year-specific nrs deaths extracts after it is produced
+      (\(year, refined_death_datas) {
+        read_file(get_source_extract_path(year, "deaths")) %>%
+          as.data.frame()
+      })(year, refined_death_data)
+    ),
     tar_target(
       tests_source_nrs_deaths_extract,
       process_tests_nrs_deaths(
@@ -419,6 +451,7 @@ list(
     tar_target(source_ooh_extract, process_extract_gp_ooh(
       year,
       ooh_data,
+      gp_ooh_cup_path,
       write_to_disk = write_to_disk
     )),
     tar_target(
@@ -464,7 +497,7 @@ list(
         year = year,
         sc_demographics = sc_demog_lookup %>%
           slfhelper::get_chi() %>%
-          dplyr::select(c("sending_location", "social_care_id", "chi")),
+          dplyr::select(c("sending_location", "social_care_id", "chi", "latest_flag")),
         write_to_disk = write_to_disk
       )
     ),
@@ -537,31 +570,10 @@ list(
       slf_deaths_lookup,
       process_slf_deaths_lookup(
         year = year,
-        nrs_deaths_data = source_nrs_deaths_extract %>% slfhelper::get_chi(),
-        chi_deaths_data = it_chi_deaths_data %>% slfhelper::get_chi(),
+        refined_death = refined_death_data,
         write_to_disk = write_to_disk
       )
     ),
-    tar_qs(
-      processed_data_list,
-      list(
-        source_acute_extract,
-        source_ae_extract,
-        source_cmh_extract,
-        source_dn_extract,
-        source_homelessness_extract,
-        source_maternity_extract,
-        source_mental_health_extract,
-        source_nrs_deaths_extract,
-        source_ooh_extract,
-        source_outpatients_extract,
-        source_prescribing_extract,
-        source_sc_care_home,
-        source_sc_home_care,
-        source_sc_sds,
-        source_sc_alarms_tele
-      )
-    ),
     tar_file_read(nsu_cohort, get_nsu_path(year), read_file(!!.x)),
     tar_target(
       homelessness_lookup,
@@ -569,79 +581,107 @@ list(
         year,
         homelessness_data = source_homelessness_extract %>% slfhelper::get_chi()
       )
-    ),
-    tar_target(
-      episode_file,
-      create_episode_file(
-        processed_data_list,
-        year,
-        homelessness_lookup = homelessness_lookup,
-        dd_data = source_dd_extract %>% slfhelper::get_chi(),
-        nsu_cohort = nsu_cohort %>% slfhelper::get_chi(),
-        ltc_data = source_ltc_lookup %>% slfhelper::get_chi(),
-        slf_pc_lookup = source_pc_lookup,
-        slf_gpprac_lookup = source_gp_lookup,
-        slf_deaths_lookup = slf_deaths_lookup %>% slfhelper::get_chi(),
-        sc_client = sc_client_lookup %>% slfhelper::get_chi(),
-        write_to_disk
-      )
-    ),
-    tar_target(
-      episode_file_tests,
-      process_tests_episode_file(
-        data = episode_file,
-        year = year
-      )
-    ) # ,
-    # tar_target(
-    #   cross_year_tests,
-    #   process_tests_cross_year(year = year)
-    # ), # ,
-    # tar_target(
-    #   individual_file,
-    #   create_individual_file(
-    #     episode_file = episode_file,
-    #     year = year,
-    #     homelessness_lookup = homelessness_lookup,
-    #     write_to_disk = write_to_disk
-    #   )
-    # ),
-    # tar_target(
-    #   individual_file_tests,
-    #   process_tests_individual_file(
-    #     data = individual_file,
-    #     year = year
-    #   )
-    # ) # ,
-    # tar_target(
-    #   episode_file_dataset,
-    #   arrow::write_dataset(
-    #     dataset = episode_file,
-    #     path = fs::path(
-    #       get_year_dir(year),
-    #       stringr::str_glue("source-episode-file-{year}")
-    #     ),
-    #     format = "parquet",
-    #     # Should correspond to the available slfhelper filters
-    #     partitioning = c("recid", "hscp2018"),
-    #     compression = "zstd",
-    #     version = "latest"
-    #   )
-    # ),
-    # tar_target(
-    #   individual_file_dataset,
-    #   arrow::write_dataset(
-    #     dataset = individual_file,
-    #     path = fs::path(
-    #       get_year_dir(year),
-    #       stringr::str_glue("source-individual-file-{year}")
-    #     ),
-    #     format = "parquet",
-    #     # Should correspond to the available slfhelper filters
-    #     partitioning = c("hscp2018"),
-    #     compression = "zstd",
-    #     version = "latest"
-    #   )
-    # )
-  )
+    )
+  ) # ,
+  # tar_target(
+  #   combined_deaths_lookup,
+  #   process_combined_deaths_lookup()
+  # )
 )
+## End of Targets pipeline ##
+
+################################################################################
+## Redundant code which may still be useful for including ep/indiv files.
+# tar_qs(
+#   processed_data_list,
+#   list(
+#     source_acute_extract,
+#     source_ae_extract,
+#     source_cmh_extract,
+#     source_dn_extract,
+#     source_homelessness_extract,
+#     source_maternity_extract,
+#     source_mental_health_extract,
+#     source_nrs_deaths_extract,
+#     source_ooh_extract,
+#     source_outpatients_extract,
+#     source_prescribing_extract,
+#     source_sc_care_home,
+#     source_sc_home_care,
+#     source_sc_sds,
+#     source_sc_alarms_tele
+#   )
+# ),
+# tar_target(
+#   episode_file,
+#   create_episode_file(
+#     processed_data_list,
+#     year,
+#     homelessness_lookup = homelessness_lookup,
+#     dd_data = source_dd_extract %>% slfhelper::get_chi(),
+#     nsu_cohort = nsu_cohort %>% slfhelper::get_chi(),
+#     ltc_data = source_ltc_lookup %>% slfhelper::get_chi(),
+#     slf_pc_lookup = source_pc_lookup,
+#     slf_gpprac_lookup = source_gp_lookup,
+#     slf_deaths_lookup = slf_deaths_lookup %>% slfhelper::get_chi(),
+#     sc_client = sc_client_lookup %>% slfhelper::get_chi(),
+#     write_to_disk
+#   )
+# ),
+# tar_target(
+#   episode_file_tests,
+#   process_tests_episode_file(
+#     data = episode_file,
+#     year = year
+#   )
+# ) # ,
+# tar_target(
+#   cross_year_tests,
+#   process_tests_cross_year(year = year)
+# ), # ,
+# tar_target(
+#   individual_file,
+#   create_individual_file(
+#     episode_file = episode_file,
+#     year = year,
+#     homelessness_lookup = homelessness_lookup,
+#     write_to_disk = write_to_disk
+#   )
+# ),
+# tar_target(
+#   individual_file_tests,
+#   process_tests_individual_file(
+#     data = individual_file,
+#     year = year
+#   )
+# ) # ,
+# tar_target(
+#   episode_file_dataset,
+#   arrow::write_dataset(
+#     dataset = episode_file,
+#     path = fs::path(
+#       get_year_dir(year),
+#       stringr::str_glue("source-episode-file-{year}")
+#     ),
+#     format = "parquet",
+#     # Should correspond to the available slfhelper filters
+#     partitioning = c("recid", "hscp2018"),
+#     compression = "zstd",
+#     version = "latest"
+#   )
+# ),
+# tar_target(
+#   individual_file_dataset,
+#   arrow::write_dataset(
+#     dataset = individual_file,
+#     path = fs::path(
+#       get_year_dir(year),
+#       stringr::str_glue("source-individual-file-{year}")
+#     ),
+#     format = "parquet",
+#     # Should correspond to the available slfhelper filters
+#     partitioning = c("hscp2018"),
+#     compression = "zstd",
+#     version = "latest"
+#   )
+# )
diff --git a/copy_to_hscdiip.R b/copy_to_hscdiip.R
index 7fb969e8d..8a2dcdc58 100644
--- a/copy_to_hscdiip.R
+++ b/copy_to_hscdiip.R
@@ -1,9 +1,12 @@
+devtools::load_all()
+
 dir_folder <- "/conf/sourcedev/Source_Linkage_File_Updates"
 target_folder <- "/conf/hscdiip/01-Source-linkage-files"
 if (!dir.exists(target_folder)) {
   dir.create(target_folder, mode = "770")
 }
-folders <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324")
+
+folders <- years_to_run()
 year_n <- length(folders)
 resource_consumption <- data.frame(
   year = rep("0", year_n),
@@ -11,22 +14,27 @@ resource_consumption <- data.frame(
   file_size_MB = rep(0, year_n)
 )
 
-for (i in 1:length(folders)) {
+for (i in 1:year_n) {
   timer <- Sys.time()
   print(stringr::str_glue("{folders[i]} starts at {Sys.time()}"))
   folder_path <- file.path(dir_folder, folders[i])
-  old_path <- list.files(folder_path,
-    pattern = "^source-.*parquet",
-    full.names = TRUE
-  )
-  files_name <- basename(old_path)
-  new_path <- file.path(target_folder, files_name)
-  print(files_name)
+
+  file_names <- paste0("source-", c("episode", "individual"), "-file-", folders[i], ".parquet")
+  file_names_im <- paste0("source-", c("episode", "individual"), "-file-", folders[i], "-new.parquet")
+
+  old_path <- file.path(folder_path, file_names)
+  new_path_im <- file.path(target_folder, file_names_im)
+  new_path <- file.path(target_folder, file_names)
+
+  print(file_names)
 
   fs::file_copy(old_path,
-    new_path,
+    new_path_im,
     overwrite = TRUE
   )
+  fs::file_move(new_path_im, new_path)
+  fs::file_chmod(new_path, mode = "640")
+
   resource_consumption$time_consumption[i] <- (Sys.time() - timer)
   file_size <- sum(file.size(old_path)) / 2^20
   resource_consumption$file_size_MB[i] <- file_size
diff --git a/extract_new_nsu_cohort/filter_nsu_duplicates.R b/extract_new_nsu_cohort/filter_nsu_duplicates.R
new file mode 100644
index 000000000..bbe8265e8
--- /dev/null
+++ b/extract_new_nsu_cohort/filter_nsu_duplicates.R
@@ -0,0 +1,192 @@
+################################################################################
+# Name of file - filter_nsu_duplicates.R
+# Original Authors - James McMahon, Jennifer Thom
+# Original Date - August 2021
+# Update - June 2024
+#
+# Written/run on - RStudio Server
+# Version of R - 3.6.1
+#
+# Description - Use this script to filter NSU duplicates when taking a new
+#               extract from the CHILI team.
+#
+# Steps for requesting a new NSU extract for SLFs:
+#       1. Send an email to [phs.chi-recordlinkage@phs.scot] to request a new NSU
+#          extract after the JUNE update.
+#       2. Prepare a service use extract. Run script `get_service_use_cohort.R` to
+#          extract a list of CHI's from the most recent 'full' file.
+#       3. Once the chili team come back to us, send the service use extract to
+#          the analyst directly. Do not send the list of CHIs to the mailbox for
+#          Information Governance purposes.
+#       4. CHILI team will then process the new NSU extract based on who is not in
+#          the service use extract.
+#       5. Run the script `filter_nsu_duplicates.R` to collect the new NSU
+#          extract from the analysts SMRA space - see lines 46-47 and change
+#          username accordingly. Save the extract in:
+#          "/conf/hscdiip/SLF_Extracts/NSU"
+################################################################################
+
+library(dplyr)
+library(purrr)
+library(stringr)
+library(PostcodesioR)
+library(janitor)
+library(fs)
+library(glue)
+
+
+## Setup------------------------------------------------------------------------
+
+## Update line 41##
+# The year of new NSU extract
+year <- "2324"
+
+# Update lines 45-46 ##
+# Analysts username and schema to collect the data.
+analyst <- "ROBERM18"
+schema <- "FINAL_2"
+
+#  setup directory
+nsu_dir <- path("/conf/hscdiip/SLF_Extracts/NSU")
+
+# latest geography file
+spd_path <- get_spd_path()
+
+# Set up connection to SMRA-----------------------------------------------------
+db_connection <- odbc::dbConnect(
+  odbc::odbc(),
+  dsn = "SMRA",
+  uid = Sys.getenv("USER"),
+  pwd = rstudioapi::askForPassword("password")
+)
+
+
+# Read data---------------------------------------------------------------------
+
+# Read NSU data with duplicates from analyst's SMRA space.
+nsu_data <-
+  tbl(db_connection, dbplyr::in_schema(analyst, schema)) %>%
+  collect() %>%
+  clean_names()
+
+
+# Data cleaning-----------------------------------------------------------------
+
+# Find the records with duplicates
+nsu_pc_duplicates <- nsu_data %>%
+  group_by(upi_number) %>%
+  mutate(postcode_count = n_distinct(postcode)) %>%
+  ungroup() %>%
+  filter(postcode_count > 1)
+
+# Get the latest SPD
+spd <- read_file(spd_path) %>%
+  select(pc7, date_of_introduction, date_of_deletion)
+
+# Load some regex to check if a postcode is valid
+pc_regex <-
+  "([Gg][Ii][Rr] 0[Aa]{2})|((([A-Za-z][0-9]{1,2})|(([A-Za-z][A-Ha-hJ-Yj-y][0-9]{1,2})|(([A-Za-z][0-9][A-Za-z])|([A-Za-z][A-Ha-hJ-Yj-y][0-9][A-Za-z]?))))\\s?[0-9][A-Za-z]{2})"
+
+# Main code to check postcodes in various ways
+nsu_pc_duplicates_checked <- nsu_pc_duplicates %>%
+  select(
+    upi_number,
+    start_date,
+    postcode,
+    date_address_changed,
+    gp_prac_no,
+    date_gp_acceptance
+  ) %>%
+  # First check against the regex
+  mutate(invalid_pc = str_detect(postcode, pc_regex, negate = TRUE)) %>%
+  # Now check against the SPD
+  left_join(spd, by = c("postcode" = "pc7")) %>%
+  # Now check against postcodes.io
+  left_join(
+    # Filter to only postcodes that need checking
+    group_by(., upi_number) %>%
+      # UPI has no postcode which matched the SPD
+      filter(
+        all(is.na(
+          date_of_introduction
+        ))
+      ) %>%
+      ungroup() %>%
+      # No need to check invalid postcodes
+      filter(!invalid_pc) %>%
+      # Pass the unique list of postcodes to
+      # postcodes.io
+      pull(postcode) %>%
+      unique() %>%
+      list(postcodes = .) %>%
+      # This function will fail if more than 100 pcs
+      PostcodesioR::bulk_postcode_lookup() %>%
+      # Parse the result, we only want the country
+      map_dfr(~ tibble(
+        postcode = .x$query,
+        # Create an order to make sorting nice later
+        country = ordered(.x$result$country, c("Scotland", "Wales", "England"))
+      ))
+  ) %>%
+  # Sort so that the 'best' postcode is top of the list
+  mutate(priority = case_when(
+    # If they matched SPD,
+    !is.na(date_of_introduction) & is.na(date_of_deletion) ~ 0,
+    # If the matched SPD (and had a d_o_d)
+    !is.na(date_of_introduction) ~ 1,
+    # If they matched the postcodes.io API request
+    !is.na(country) ~ 2,
+    # Invalid postcodes come last
+    invalid_pc ~ Inf,
+    TRUE ~ 99
+  )) %>%
+  arrange(
+    upi_number,
+    priority,
+    # newest introduced come first
+    desc(date_of_introduction),
+    # latest deleted will be first
+    desc(date_of_deletion),
+    # Scotland will be preferred etc.
+    country
+  ) %>%
+  # Flag each row with the assigned priority
+  group_by(upi_number) %>%
+  mutate(keep_priority = row_number()) %>%
+  ungroup()
+
+# Check
+nsu_pc_duplicates_checked %>%
+  count(priority, keep_priority)
+
+final_data <- nsu_data %>%
+  # Filter the main dataset to remove
+  # the duplicate postcodes we decided not to keep
+  anti_join(nsu_pc_duplicates_checked %>%
+    filter(keep_priority > 1)) %>%
+  # Filter any remaining duplicates (none on this test)
+  distinct(upi_number, .keep_all = TRUE) %>%
+  select(
+    chi = upi_number,
+    dob = date_of_birth,
+    postcode,
+    gpprac = gp_prac_no,
+    gender = sex
+  ) %>%
+  mutate(
+    year = year, .before = everything(),
+    dob = as.Date(dob),
+    across(c(gender, gpprac), as.integer)
+  ) %>%
+  arrange(chi) %>%
+  # Save as anon chi on disk
+  slfhelper::get_anon_chi()
+
+# Save data out to be used
+final_data %>%
+  arrow::write_parquet(path(nsu_dir, glue::glue("anon-All_CHIs_20{year}.parquet")),
+    compression = "zstd"
+  )
+
+
+## End of Script ##
diff --git a/extract_new_nsu_cohort/get_service_use_cohort.R b/extract_new_nsu_cohort/get_service_use_cohort.R
new file mode 100644
index 000000000..c29063a32
--- /dev/null
+++ b/extract_new_nsu_cohort/get_service_use_cohort.R
@@ -0,0 +1,56 @@
+################################################################################
+# Name of file - get_service_use_cohort.R
+# Original Authors - Jennifer Thom
+# Original Date - August 2021
+# Update - June 2024
+#
+# Written/run on - RStudio Server
+# Version of R - 3.6.1
+#
+# Description - Use this script to return a list of CHIs from the most recent
+#               SLF episode file (service users) in preparation for requesting
+#               a new NSU cohort for the latest 'full year'
+#
+# Steps for requesting a new NSU extract for SLFs:
+#       1. Send an email to [phs.chi-recordlinkage@phs.scot] to request a new NSU
+#          extract after the JUNE update.
+#       2. Prepare a service use extract. Run script `get_service_use_cohort.R` to
+#          extract a list of CHI's from the most recent 'full' file.
+#       3. Once the chili team come back to us, send the service use extract to
+#          the analyst directly. Do not send the list of CHIs to the mailbox for
+#          Information Governance purposes.
+#       4. CHILI team will then process the new NSU extract based on who is not in
+#          the service use extract.
+#       5. Run the script `filter_nsu_duplicates.R` to collect the new NSU
+#          extract from the analysts SMRA space - see lines 46-47 and change
+#          username accordingly. Save the extract in:
+#          "/conf/hscdiip/SLF_Extracts/NSU"
+#
+################################################################################
+
+# Setup-------------------------------------------------------------------------
+library(fs)
+library(tidyverse)
+
+## Update ##
+# The year of the new NSU extract we want
+year <- "2324"
+
+nsu_dir <- path("/conf/hscdiip/SLF_Extracts/NSU/")
+
+# Read data---------------------------------------------------------------------
+episode_file <- slfhelper::read_slf_episode(year, col_select = "anon_chi") %>%
+  # Remove blank CHI
+  dplyr::filter(!is.na(anon_chi)) %>%
+  # Get CHI version for sending to the CHILI team.
+  # For saving this on disk we want the anon-chi version, save this after sending
+  # to the CHILI team.
+  slfhelper::get_chi()
+
+# Save a parquet file
+episode_file %>%
+  arrow::write_parquet(path(nsu_dir, glue::glue("service_user_extract_{year}.parquet")),
+    compression = "zstd"
+  )
+
+## End of Script ##
diff --git a/man/add_activity_after_death_flag.Rd b/man/add_activity_after_death_flag.Rd
index 36eafe7bd..76a4ca64a 100644
--- a/man/add_activity_after_death_flag.Rd
+++ b/man/add_activity_after_death_flag.Rd
@@ -7,7 +7,8 @@
 add_activity_after_death_flag(
   data,
   year,
-  deaths_data = read_file(get_all_slf_deaths_lookup_path()) \%>\% slfhelper::get_chi()
+  deaths_data = read_file(get_combined_slf_deaths_lookup_path()) \%>\%
+    slfhelper::get_chi()
 )
 }
 \arguments{
diff --git a/man/add_deceased_flag.Rd b/man/add_deceased_flag.Rd
new file mode 100644
index 000000000..c84568522
--- /dev/null
+++ b/man/add_deceased_flag.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/add_deceased_flag.R
+\name{add_deceased_flag}
+\alias{add_deceased_flag}
+\title{Create the SLF Deaths lookup}
+\usage{
+add_deceased_flag(
+  year,
+  refined_death = read_file(get_combined_slf_deaths_lookup_path()) \%>\%
+    slfhelper::get_chi(),
+  write_to_disk = TRUE
+)
+}
+\arguments{
+\item{year}{The year to process, in FY format.}
+
+\item{write_to_disk}{(optional) Should the data be written to disk default is
+\code{TRUE} i.e. write the data to disk.}
+
+\item{nrs_deaths_data}{NRS deaths data.}
+
+\item{chi_deaths_data}{IT CHI deaths data.}
+}
+\value{
+a \link[tibble:tibble-package]{tibble} containing the episode file
+}
+\description{
+Currently this just uses the NRS death dates 'as is', with no
+corrections or modifications, it is expected that this will be expanded to
+use the CHI deaths extract from IT as well as taking into account data in
+the episode file to assess the validity of a death date.
+}
diff --git a/man/check_year_valid.Rd b/man/check_year_valid.Rd
index 91c29861e..59960da30 100644
--- a/man/check_year_valid.Rd
+++ b/man/check_year_valid.Rd
@@ -6,9 +6,9 @@
 \usage{
 check_year_valid(
   year,
-  type = c("acute", "ae", "at", "ch", "client", "cmh", "dd", "deaths", "dn", "gpooh",
-    "hc", "homelessness", "hhg", "maternity", "mh", "nsu", "outpatients", "pis", "sds",
-    "sparra")
+  type = c("acute", "ae", "at", "ch", "client", "cmh", "cost_dna", "dd", "deaths", "dn",
+    "gpooh", "hc", "homelessness", "hhg", "maternity", "mh", "nsu", "outpatients", "pis",
+    "sds", "sparra")
 )
 }
 \arguments{
diff --git a/man/create_homelessness_lookup.Rd b/man/create_homelessness_lookup.Rd
index d6a2f2bc8..9826f4ced 100644
--- a/man/create_homelessness_lookup.Rd
+++ b/man/create_homelessness_lookup.Rd
@@ -50,6 +50,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/get_boxi_extract_path.Rd b/man/get_boxi_extract_path.Rd
index c012ac3ef..5a318834a 100644
--- a/man/get_boxi_extract_path.Rd
+++ b/man/get_boxi_extract_path.Rd
@@ -6,8 +6,9 @@
 \usage{
 get_boxi_extract_path(
   year,
-  type = c("ae", "ae_cup", "acute", "cmh", "deaths", "dn", "gp_ooh-c", "gp_ooh-d",
-    "gp_ooh-o", "homelessness", "maternity", "mh", "outpatients")
+  type = c("ae", "ae_cup", "acute", "acute_cup", "cmh", "deaths", "dn", "gp_ooh-c",
+    "gp_ooh-d", "gp_ooh-o", "gp_ooh_cup", "homelessness", "maternity", "mh",
+    "outpatients")
 )
 }
 \arguments{
diff --git a/man/get_all_slf_deaths_lookup_path.Rd b/man/get_combined_slf_deaths_lookup_path.Rd
similarity index 66%
rename from man/get_all_slf_deaths_lookup_path.Rd
rename to man/get_combined_slf_deaths_lookup_path.Rd
index 2f06b64d3..709773d01 100644
--- a/man/get_all_slf_deaths_lookup_path.Rd
+++ b/man/get_combined_slf_deaths_lookup_path.Rd
@@ -1,10 +1,10 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/get_slf_lookup_paths.R
-\name{get_all_slf_deaths_lookup_path}
-\alias{get_all_slf_deaths_lookup_path}
+\name{get_combined_slf_deaths_lookup_path}
+\alias{get_combined_slf_deaths_lookup_path}
 \title{SLF death dates File Path}
 \usage{
-get_all_slf_deaths_lookup_path(update = latest_update(), ...)
+get_combined_slf_deaths_lookup_path(update = latest_update(), ...)
 }
 \arguments{
 \item{update}{the update month (defaults to use \code{\link[=latest_update]{latest_update()}})}
@@ -13,6 +13,10 @@ get_all_slf_deaths_lookup_path(update = latest_update(), ...)
 }
 \description{
 Get the full path to the BOXI NRS Deaths lookup file for all financial years
+Note this name is very similar to the existing slf_deaths_lookup_path
+which returns the path for the refined_death with deceased flag for each financial year.
+This function will return the combined financial years lookup
+i.e. all years put together.
 }
 \seealso{
 \code{\link[=get_file_path]{get_file_path()}} for the generic function.
diff --git a/man/get_dd_period.Rd b/man/get_dd_period.Rd
index 29bd8baea..c478f401f 100644
--- a/man/get_dd_period.Rd
+++ b/man/get_dd_period.Rd
@@ -17,6 +17,7 @@ Get the period for Delayed Discharge
 Other initialisation: 
 \code{\link{latest_cost_year}()},
 \code{\link{latest_update}()},
-\code{\link{previous_update}()}
+\code{\link{previous_update}()},
+\code{\link{years_to_run}()}
 }
 \concept{initialisation}
diff --git a/man/get_slf_ch_name_lookup_path.Rd b/man/get_slf_ch_name_lookup_path.Rd
index 2660bbeab..1b0c3f3bb 100644
--- a/man/get_slf_ch_name_lookup_path.Rd
+++ b/man/get_slf_ch_name_lookup_path.Rd
@@ -22,7 +22,7 @@ has official Care Home names and addresses provided by the Care Inspectorate.
 \code{\link[=get_file_path]{get_file_path()}} for the generic function.
 
 Other slf lookup file path: 
-\code{\link{get_all_slf_deaths_lookup_path}()},
+\code{\link{get_combined_slf_deaths_lookup_path}()},
 \code{\link{get_slf_chi_deaths_path}()},
 \code{\link{get_slf_deaths_lookup_path}()},
 \code{\link{get_slf_gpprac_path}()},
diff --git a/man/get_slf_chi_deaths_path.Rd b/man/get_slf_chi_deaths_path.Rd
index 0db72d9d3..8ba115dfe 100644
--- a/man/get_slf_chi_deaths_path.Rd
+++ b/man/get_slf_chi_deaths_path.Rd
@@ -22,7 +22,7 @@ Get the full path to the CHI deaths file
 \code{\link[=get_file_path]{get_file_path()}} for the generic function.
 
 Other slf lookup file path: 
-\code{\link{get_all_slf_deaths_lookup_path}()},
+\code{\link{get_combined_slf_deaths_lookup_path}()},
 \code{\link{get_slf_ch_name_lookup_path}()},
 \code{\link{get_slf_deaths_lookup_path}()},
 \code{\link{get_slf_gpprac_path}()},
diff --git a/man/get_slf_deaths_lookup_path.Rd b/man/get_slf_deaths_lookup_path.Rd
index 307c38ad3..ae64e2371 100644
--- a/man/get_slf_deaths_lookup_path.Rd
+++ b/man/get_slf_deaths_lookup_path.Rd
@@ -21,7 +21,7 @@ Get the full path to the SLF deaths lookup file
 \code{\link[=get_file_path]{get_file_path()}} for the generic function.
 
 Other slf lookup file path: 
-\code{\link{get_all_slf_deaths_lookup_path}()},
+\code{\link{get_combined_slf_deaths_lookup_path}()},
 \code{\link{get_slf_ch_name_lookup_path}()},
 \code{\link{get_slf_chi_deaths_path}()},
 \code{\link{get_slf_gpprac_path}()},
diff --git a/man/get_slf_gpprac_path.Rd b/man/get_slf_gpprac_path.Rd
index 1fb23116f..1371f758e 100644
--- a/man/get_slf_gpprac_path.Rd
+++ b/man/get_slf_gpprac_path.Rd
@@ -21,7 +21,7 @@ Get the full path to the SLF GP practice lookup
 \code{\link[=get_file_path]{get_file_path()}} for the generic function.
 
 Other slf lookup file path: 
-\code{\link{get_all_slf_deaths_lookup_path}()},
+\code{\link{get_combined_slf_deaths_lookup_path}()},
 \code{\link{get_slf_ch_name_lookup_path}()},
 \code{\link{get_slf_chi_deaths_path}()},
 \code{\link{get_slf_deaths_lookup_path}()},
diff --git a/man/get_slf_postcode_path.Rd b/man/get_slf_postcode_path.Rd
index f37678695..ee9bc65a9 100644
--- a/man/get_slf_postcode_path.Rd
+++ b/man/get_slf_postcode_path.Rd
@@ -21,7 +21,7 @@ Get the full path to the SLF Postcode lookup
 \code{\link[=get_file_path]{get_file_path()}} for the generic function.
 
 Other slf lookup file path: 
-\code{\link{get_all_slf_deaths_lookup_path}()},
+\code{\link{get_combined_slf_deaths_lookup_path}()},
 \code{\link{get_slf_ch_name_lookup_path}()},
 \code{\link{get_slf_chi_deaths_path}()},
 \code{\link{get_slf_deaths_lookup_path}()},
diff --git a/man/latest_cost_year.Rd b/man/latest_cost_year.Rd
index 0240c6ad0..0045b4efb 100644
--- a/man/latest_cost_year.Rd
+++ b/man/latest_cost_year.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/00-update_refs.R
+% Please edit documentation in R/cost_uplift.R
 \name{latest_cost_year}
 \alias{latest_cost_year}
 \title{The latest financial year for Cost uplift setting}
@@ -11,11 +11,17 @@ The financial year format
 }
 \description{
 Get the latest year for cost uplift
+latest_cost_year() is hard coded in cost_uplift().
+2223 is not changed automatically with time passes.
+It is changed only when we get a new instruction from somewhere about cost uplift.
+Do not change unless specific instructions.
+Changing this means that we need to change cost_uplift().
 }
 \seealso{
 Other initialisation: 
 \code{\link{get_dd_period}()},
 \code{\link{latest_update}()},
-\code{\link{previous_update}()}
+\code{\link{previous_update}()},
+\code{\link{years_to_run}()}
 }
 \concept{initialisation}
diff --git a/man/latest_update.Rd b/man/latest_update.Rd
index b3fbe765c..926e472e4 100644
--- a/man/latest_update.Rd
+++ b/man/latest_update.Rd
@@ -16,6 +16,7 @@ Get the date of the latest update, e.g 'Jun_2022'
 Other initialisation: 
 \code{\link{get_dd_period}()},
 \code{\link{latest_cost_year}()},
-\code{\link{previous_update}()}
+\code{\link{previous_update}()},
+\code{\link{years_to_run}()}
 }
 \concept{initialisation}
diff --git a/man/previous_update.Rd b/man/previous_update.Rd
index f87b4656f..547138700 100644
--- a/man/previous_update.Rd
+++ b/man/previous_update.Rd
@@ -28,6 +28,7 @@ previous_update(override = "May_2023") # Specific Month
 Other initialisation: 
 \code{\link{get_dd_period}()},
 \code{\link{latest_cost_year}()},
-\code{\link{latest_update}()}
+\code{\link{latest_update}()},
+\code{\link{years_to_run}()}
 }
 \concept{initialisation}
diff --git a/man/process_deaths_lookup.Rd b/man/process_combined_deaths_lookup.Rd
similarity index 81%
rename from man/process_deaths_lookup.Rd
rename to man/process_combined_deaths_lookup.Rd
index e897e49a2..7d0a75fc7 100644
--- a/man/process_deaths_lookup.Rd
+++ b/man/process_combined_deaths_lookup.Rd
@@ -1,10 +1,14 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/add_activity_after_death_flag.R
-\name{process_deaths_lookup}
-\alias{process_deaths_lookup}
+\name{process_combined_deaths_lookup}
+\alias{process_combined_deaths_lookup}
 \title{Create and read SLF Deaths lookup from processed BOXI NRS deaths extracts}
 \usage{
-process_deaths_lookup(update = latest_update(), write_to_disk = TRUE, ...)
+process_combined_deaths_lookup(
+  update = latest_update(),
+  write_to_disk = TRUE,
+  ...
+)
 }
 \arguments{
 \item{update}{the update month (defaults to use \code{\link[=latest_update]{latest_update()}})}
diff --git a/man/process_extract_acute.Rd b/man/process_extract_acute.Rd
index 77a99cef3..fae9c7bab 100644
--- a/man/process_extract_acute.Rd
+++ b/man/process_extract_acute.Rd
@@ -4,7 +4,12 @@
 \alias{process_extract_acute}
 \title{Process the Acute extract}
 \usage{
-process_extract_acute(data, year, write_to_disk = TRUE)
+process_extract_acute(
+  data,
+  year,
+  acute_cup_path = get_boxi_extract_path(year, "acute_cup"),
+  write_to_disk = TRUE
+)
 }
 \arguments{
 \item{data}{The extract to process}
@@ -48,6 +53,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_ae.Rd b/man/process_extract_ae.Rd
index 9eec39ba5..36d2bb4d3 100644
--- a/man/process_extract_ae.Rd
+++ b/man/process_extract_ae.Rd
@@ -48,6 +48,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_alarms_telecare.Rd b/man/process_extract_alarms_telecare.Rd
index 76093be7d..016f5e2b6 100644
--- a/man/process_extract_alarms_telecare.Rd
+++ b/man/process_extract_alarms_telecare.Rd
@@ -49,6 +49,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_care_home.Rd b/man/process_extract_care_home.Rd
index 269ae1e7d..a002d30ab 100644
--- a/man/process_extract_care_home.Rd
+++ b/man/process_extract_care_home.Rd
@@ -51,6 +51,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_cmh.Rd b/man/process_extract_cmh.Rd
index 64e085dcf..799b6d717 100644
--- a/man/process_extract_cmh.Rd
+++ b/man/process_extract_cmh.Rd
@@ -48,6 +48,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_delayed_discharges.Rd b/man/process_extract_delayed_discharges.Rd
index c6fd560a7..385bdff2a 100644
--- a/man/process_extract_delayed_discharges.Rd
+++ b/man/process_extract_delayed_discharges.Rd
@@ -48,6 +48,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_district_nursing.Rd b/man/process_extract_district_nursing.Rd
index eb2814fbc..49284b70f 100644
--- a/man/process_extract_district_nursing.Rd
+++ b/man/process_extract_district_nursing.Rd
@@ -55,6 +55,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_gp_ooh.Rd b/man/process_extract_gp_ooh.Rd
index ddec006fe..5c68c35dd 100644
--- a/man/process_extract_gp_ooh.Rd
+++ b/man/process_extract_gp_ooh.Rd
@@ -4,7 +4,12 @@
 \alias{process_extract_gp_ooh}
 \title{Process the GP OoH extract}
 \usage{
-process_extract_gp_ooh(year, data_list, write_to_disk = TRUE)
+process_extract_gp_ooh(
+  year,
+  data_list,
+  gp_ooh_cup_path = get_boxi_extract_path(year, "gp_ooh_cup"),
+  write_to_disk = TRUE
+)
 }
 \arguments{
 \item{year}{The year to process, in FY format.}
@@ -48,6 +53,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_home_care.Rd b/man/process_extract_home_care.Rd
index 4dd609770..98c45a8e2 100644
--- a/man/process_extract_home_care.Rd
+++ b/man/process_extract_home_care.Rd
@@ -49,6 +49,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_homelessness.Rd b/man/process_extract_homelessness.Rd
index 405da34bb..59fe6f283 100644
--- a/man/process_extract_homelessness.Rd
+++ b/man/process_extract_homelessness.Rd
@@ -62,6 +62,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_maternity.Rd b/man/process_extract_maternity.Rd
index 17dd1a64c..19142c4a8 100644
--- a/man/process_extract_maternity.Rd
+++ b/man/process_extract_maternity.Rd
@@ -48,6 +48,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_mental_health.Rd b/man/process_extract_mental_health.Rd
index 5f1fc7330..bd91dc4ec 100644
--- a/man/process_extract_mental_health.Rd
+++ b/man/process_extract_mental_health.Rd
@@ -48,6 +48,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_nrs_deaths.Rd b/man/process_extract_nrs_deaths.Rd
index 1938e15ec..71fab68e2 100644
--- a/man/process_extract_nrs_deaths.Rd
+++ b/man/process_extract_nrs_deaths.Rd
@@ -47,6 +47,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_ooh_consultations.Rd b/man/process_extract_ooh_consultations.Rd
index e00155191..ae4265823 100644
--- a/man/process_extract_ooh_consultations.Rd
+++ b/man/process_extract_ooh_consultations.Rd
@@ -45,6 +45,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_ooh_diagnosis.Rd b/man/process_extract_ooh_diagnosis.Rd
index 2dcbee647..78db15f0f 100644
--- a/man/process_extract_ooh_diagnosis.Rd
+++ b/man/process_extract_ooh_diagnosis.Rd
@@ -45,6 +45,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_ooh_outcomes.Rd b/man/process_extract_ooh_outcomes.Rd
index 31ec64439..d59617e7b 100644
--- a/man/process_extract_ooh_outcomes.Rd
+++ b/man/process_extract_ooh_outcomes.Rd
@@ -45,6 +45,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_outpatients.Rd b/man/process_extract_outpatients.Rd
index 3a46ad119..8af2c6ddf 100644
--- a/man/process_extract_outpatients.Rd
+++ b/man/process_extract_outpatients.Rd
@@ -48,6 +48,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_prescribing.Rd b/man/process_extract_prescribing.Rd
index 195a60bfe..c959ce1e7 100644
--- a/man/process_extract_prescribing.Rd
+++ b/man/process_extract_prescribing.Rd
@@ -48,6 +48,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_extract_sds.Rd b/man/process_extract_sds.Rd
index 03ee60362..b0cc8788a 100644
--- a/man/process_extract_sds.Rd
+++ b/man/process_extract_sds.Rd
@@ -49,6 +49,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_it_chi_deaths.Rd b/man/process_it_chi_deaths.Rd
index 1d8e085ab..757f06aa7 100644
--- a/man/process_it_chi_deaths.Rd
+++ b/man/process_it_chi_deaths.Rd
@@ -45,6 +45,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_lookup_gpprac.Rd b/man/process_lookup_gpprac.Rd
index 107af24c0..bfda08282 100644
--- a/man/process_lookup_gpprac.Rd
+++ b/man/process_lookup_gpprac.Rd
@@ -54,6 +54,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_lookup_postcode.Rd b/man/process_lookup_postcode.Rd
index e556efd51..b8b1ebd4f 100644
--- a/man/process_lookup_postcode.Rd
+++ b/man/process_lookup_postcode.Rd
@@ -55,6 +55,7 @@ Other process extracts:
 \code{\link{process_lookup_gpprac}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_lookup_sc_client.Rd b/man/process_lookup_sc_client.Rd
index 74e402846..4b85a06b5 100644
--- a/man/process_lookup_sc_client.Rd
+++ b/man/process_lookup_sc_client.Rd
@@ -8,7 +8,7 @@ process_lookup_sc_client(
   data,
   year,
   sc_demographics = read_file(get_sc_demog_lookup_path()) \%>\% slfhelper::get_chi()
-    \%>\% dplyr::select(c("sending_location", "social_care_id", "chi")),
+    \%>\% dplyr::select(c("sending_location", "social_care_id", "chi", "latest_flag")),
   write_to_disk = TRUE
 )
 }
@@ -56,6 +56,7 @@ Other process extracts:
 \code{\link{process_lookup_gpprac}()},
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_lookup_sc_demographics.Rd b/man/process_lookup_sc_demographics.Rd
index a89933425..29215f657 100644
--- a/man/process_lookup_sc_demographics.Rd
+++ b/man/process_lookup_sc_demographics.Rd
@@ -52,6 +52,7 @@ Other process extracts:
 \code{\link{process_lookup_gpprac}()},
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_refined_death.Rd b/man/process_refined_death.Rd
new file mode 100644
index 000000000..fd5392eb2
--- /dev/null
+++ b/man/process_refined_death.Rd
@@ -0,0 +1,62 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/process_refined_death.R
+\name{process_refined_death}
+\alias{process_refined_death}
+\title{Process the refined death data}
+\usage{
+process_refined_death(
+  it_chi_deaths = read_file(get_slf_chi_deaths_path()),
+  write_to_disk = TRUE
+)
+}
+\arguments{
+\item{it_chi_deaths}{it chi death data}
+
+\item{write_to_disk}{write the result to disk or not.}
+}
+\value{
+refined_death The processed lookup of deaths combining NRS and IT_CHI.
+}
+\description{
+This will process
+year-specific BOXI NRS death file (written to disk), and
+combine them together to get all years NRS file (Not written to disk).
+Then join all NRS deaths with IT CHI death data
+to get an all-year refined death file (written to disk).
+}
+\seealso{
+Other process extracts: 
+\code{\link{create_homelessness_lookup}()},
+\code{\link{process_extract_acute}()},
+\code{\link{process_extract_ae}()},
+\code{\link{process_extract_alarms_telecare}()},
+\code{\link{process_extract_care_home}()},
+\code{\link{process_extract_cmh}()},
+\code{\link{process_extract_delayed_discharges}()},
+\code{\link{process_extract_district_nursing}()},
+\code{\link{process_extract_gp_ooh}()},
+\code{\link{process_extract_home_care}()},
+\code{\link{process_extract_homelessness}()},
+\code{\link{process_extract_maternity}()},
+\code{\link{process_extract_mental_health}()},
+\code{\link{process_extract_nrs_deaths}()},
+\code{\link{process_extract_ooh_consultations}()},
+\code{\link{process_extract_ooh_diagnosis}()},
+\code{\link{process_extract_ooh_outcomes}()},
+\code{\link{process_extract_outpatients}()},
+\code{\link{process_extract_prescribing}()},
+\code{\link{process_extract_sds}()},
+\code{\link{process_it_chi_deaths}()},
+\code{\link{process_lookup_gpprac}()},
+\code{\link{process_lookup_postcode}()},
+\code{\link{process_lookup_sc_client}()},
+\code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_sc_all_alarms_telecare}()},
+\code{\link{process_sc_all_care_home}()},
+\code{\link{process_sc_all_home_care}()},
+\code{\link{process_sc_all_sds}()},
+\code{\link{read_extract_gp_ooh}()},
+\code{\link{read_it_chi_deaths}()},
+\code{\link{read_lookup_sc_client}()}
+}
+\concept{process extracts}
diff --git a/man/process_sc_all_alarms_telecare.Rd b/man/process_sc_all_alarms_telecare.Rd
index a2e319cbf..1f3eb30e0 100644
--- a/man/process_sc_all_alarms_telecare.Rd
+++ b/man/process_sc_all_alarms_telecare.Rd
@@ -54,6 +54,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
 \code{\link{process_sc_all_sds}()},
diff --git a/man/process_sc_all_care_home.Rd b/man/process_sc_all_care_home.Rd
index 792d2200d..f689c19f4 100644
--- a/man/process_sc_all_care_home.Rd
+++ b/man/process_sc_all_care_home.Rd
@@ -7,9 +7,10 @@
 process_sc_all_care_home(
   data,
   sc_demog_lookup = read_file(get_sc_demog_lookup_path()) \%>\% slfhelper::get_chi(),
-  it_chi_deaths_data = read_file(get_slf_chi_deaths_path()),
-  ch_name_lookup_path = read_file(get_slf_ch_name_lookup_path()),
-  spd_path = read_file(get_spd_path()),
+  refined_death = read_file(get_combined_slf_deaths_lookup_path()) \%>\%
+    slfhelper::get_chi(),
+  ch_name_lookup_path = get_slf_ch_name_lookup_path(),
+  spd_path = get_spd_path(),
   write_to_disk = TRUE
 )
 }
@@ -19,8 +20,8 @@ process_sc_all_care_home(
 \item{sc_demog_lookup}{The Social Care Demographics lookup produced by
 \code{\link[=process_lookup_sc_demographics]{process_lookup_sc_demographics()}}.}
 
-\item{it_chi_deaths_data}{The processed lookup of deaths from IT produced
-with \code{\link[=process_it_chi_deaths]{process_it_chi_deaths()}}.}
+\item{refined_death}{The processed lookup of deaths from IT produced
+with \code{\link[=process_refined_death]{process_refined_death()}}.}
 
 \item{ch_name_lookup_path}{Path to the Care Home name Lookup Excel workbook.}
 
@@ -65,6 +66,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_home_care}()},
 \code{\link{process_sc_all_sds}()},
diff --git a/man/process_sc_all_home_care.Rd b/man/process_sc_all_home_care.Rd
index c6777889f..1f64cad95 100644
--- a/man/process_sc_all_home_care.Rd
+++ b/man/process_sc_all_home_care.Rd
@@ -54,6 +54,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_sds}()},
diff --git a/man/process_sc_all_sds.Rd b/man/process_sc_all_sds.Rd
index f91c9dfb9..43d18b29d 100644
--- a/man/process_sc_all_sds.Rd
+++ b/man/process_sc_all_sds.Rd
@@ -54,6 +54,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/process_slf_deaths_lookup.Rd b/man/process_slf_deaths_lookup.Rd
index 8ad103a2a..80e7559e0 100644
--- a/man/process_slf_deaths_lookup.Rd
+++ b/man/process_slf_deaths_lookup.Rd
@@ -6,28 +6,22 @@
 \usage{
 process_slf_deaths_lookup(
   year,
-  nrs_deaths_data = read_file(get_source_extract_path(year, "deaths"), col_select =
-    c("chi", "record_keydate1")),
-  chi_deaths_data = read_file(get_slf_chi_deaths_path()),
+  refined_death = read_file(get_combined_slf_deaths_lookup_path()),
   write_to_disk = TRUE
 )
 }
 \arguments{
 \item{year}{The year to process, in FY format.}
 
-\item{nrs_deaths_data}{NRS deaths data.}
-
-\item{chi_deaths_data}{IT CHI deaths data.}
+\item{refined_death}{refined death date combining nrs and it_chi.}
 
 \item{write_to_disk}{(optional) Should the data be written to disk default is
 \code{TRUE} i.e. write the data to disk.}
 }
 \value{
-a \link[tibble:tibble-package]{tibble} containing the episode file
+a \link[tibble:tibble-package]{tibble} add deceased flag to deaths
 }
 \description{
-Currently this just uses the NRS death dates 'as is', with no
-corrections or modifications, it is expected that this will be expanded to
-use the CHI deaths extract from IT as well as taking into account data in
-the episode file to assess the validity of a death date.
+Use all-year refined death data to produce year-specific
+slf_deaths_lookup with deceased flag added.
 }
diff --git a/man/read_extract_gp_ooh.Rd b/man/read_extract_gp_ooh.Rd
index ba908127b..61eaf7d32 100644
--- a/man/read_extract_gp_ooh.Rd
+++ b/man/read_extract_gp_ooh.Rd
@@ -55,6 +55,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/read_file.Rd b/man/read_file.Rd
index 1ef351342..d4d94e0df 100644
--- a/man/read_file.Rd
+++ b/man/read_file.Rd
@@ -27,8 +27,6 @@ Read a file, the function chosen to read the file is dependant
 on the file path.
 \itemize{
 \item \code{.rds} uses \code{\link[readr:read_rds]{readr::read_rds()}}.
-\item \code{.fst} uses \code{\link[fst:write_fst]{fst::read_fst()}}.
-\item \code{.sav} and \code{.zsav} use \code{\link[haven:read_spss]{haven::read_spss()}}.
 \item \code{.csv} and \code{.gz} use \code{\link[readr:read_delim]{readr::read_csv()}}. Note that this assumes any file
 ending with \code{.gz} is a zipped CSV which isn't necessarily true!
 \item \code{.parquet} uses \code{\link[arrow:read_parquet]{arrow::read_parquet()}}.
diff --git a/man/read_it_chi_deaths.Rd b/man/read_it_chi_deaths.Rd
index d1bfe5cf7..fe548d84b 100644
--- a/man/read_it_chi_deaths.Rd
+++ b/man/read_it_chi_deaths.Rd
@@ -42,6 +42,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/read_lookup_sc_client.Rd b/man/read_lookup_sc_client.Rd
index 283bc6a9a..4cef9df29 100644
--- a/man/read_lookup_sc_client.Rd
+++ b/man/read_lookup_sc_client.Rd
@@ -48,6 +48,7 @@ Other process extracts:
 \code{\link{process_lookup_postcode}()},
 \code{\link{process_lookup_sc_client}()},
 \code{\link{process_lookup_sc_demographics}()},
+\code{\link{process_refined_death}()},
 \code{\link{process_sc_all_alarms_telecare}()},
 \code{\link{process_sc_all_care_home}()},
 \code{\link{process_sc_all_home_care}()},
diff --git a/man/years_to_run.Rd b/man/years_to_run.Rd
new file mode 100644
index 000000000..188ea7f5f
--- /dev/null
+++ b/man/years_to_run.Rd
@@ -0,0 +1,22 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/00-update_refs.R
+\name{years_to_run}
+\alias{years_to_run}
+\title{The year list for slf to update}
+\usage{
+years_to_run()
+}
+\value{
+The vector of financial years
+}
+\description{
+Get the vector of years to update slf
+}
+\seealso{
+Other initialisation: 
+\code{\link{get_dd_period}()},
+\code{\link{latest_cost_year}()},
+\code{\link{latest_update}()},
+\code{\link{previous_update}()}
+}
+\concept{initialisation}
diff --git a/run_targets_1718.R b/run_targets_1718.R
new file mode 100644
index 000000000..488918e1d
--- /dev/null
+++ b/run_targets_1718.R
@@ -0,0 +1,20 @@
+library(targets)
+
+Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf")
+
+year <- "1718"
+
+# use targets for the process until testing episode files
+tar_make_future(
+  # it does not recognise `contains(year)`
+  names = (targets::contains("1718"))
+)
+
+# use targets to create individual files due to RAM limit
+library(createslf)
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/run_targets_1819.R b/run_targets_1819.R
new file mode 100644
index 000000000..7c63807e8
--- /dev/null
+++ b/run_targets_1819.R
@@ -0,0 +1,20 @@
+library(targets)
+
+Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf")
+
+year <- "1819"
+
+# use targets for the process until testing episode files
+tar_make_future(
+  # it does not recognise `contains(year)`
+  names = (targets::contains("1819"))
+)
+
+# use targets to create individual files due to RAM limit
+library(createslf)
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/run_targets_1920.R b/run_targets_1920.R
new file mode 100644
index 000000000..d3361a34c
--- /dev/null
+++ b/run_targets_1920.R
@@ -0,0 +1,20 @@
+library(targets)
+
+Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf")
+
+year <- "1920"
+
+# use targets for the process until testing episode files
+tar_make_future(
+  # it does not recognise `contains(year)`
+  names = (targets::contains("1920"))
+)
+
+# use targets to create individual files due to RAM limit
+library(createslf)
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/run_targets_2021.R b/run_targets_2021.R
new file mode 100644
index 000000000..efcfaed7a
--- /dev/null
+++ b/run_targets_2021.R
@@ -0,0 +1,20 @@
+library(targets)
+
+Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf")
+
+year <- "2021"
+
+# use targets for the process until testing episode files
+tar_make_future(
+  # it does not recognise `contains(year)`
+  names = (targets::contains("2021"))
+)
+
+# use targets to create individual files due to RAM limit
+library(createslf)
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/run_targets_2122.R b/run_targets_2122.R
new file mode 100644
index 000000000..e92d75c7d
--- /dev/null
+++ b/run_targets_2122.R
@@ -0,0 +1,20 @@
+library(targets)
+
+Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf")
+
+year <- "2122"
+
+# use targets for the process until testing episode files
+tar_make_future(
+  # it does not recognise `contains(year)`
+  names = (targets::contains("2122"))
+)
+
+# use targets to create individual files due to RAM limit
+library(createslf)
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/run_targets_2223.R b/run_targets_2223.R
new file mode 100644
index 000000000..f5c93ee2f
--- /dev/null
+++ b/run_targets_2223.R
@@ -0,0 +1,20 @@
+library(targets)
+
+Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf")
+
+year <- "2223"
+
+# use targets for the process until testing episode files
+tar_make_future(
+  # it does not recognise `contains(year)`
+  names = (targets::contains("2223"))
+)
+
+# use targets to create individual files due to RAM limit
+library(createslf)
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)
diff --git a/run_targets_2324.R b/run_targets_2324.R
new file mode 100644
index 000000000..5e3885bc2
--- /dev/null
+++ b/run_targets_2324.R
@@ -0,0 +1,20 @@
+library(targets)
+
+Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf")
+
+year <- "2324"
+
+# use targets for the process until testing episode files
+tar_make_future(
+  # it does not recognise `contains(year)`
+  names = (targets::contains("2324"))
+)
+
+# use targets to create individual files due to RAM limit
+library(createslf)
+
+episode_file <- arrow::read_parquet(get_slf_episode_path(year))
+
+# Run individual file
+create_individual_file(episode_file, year = year) %>%
+  process_tests_individual_file(year = year)