diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
index 298d28e3c..48ab3c8e3 100644
--- a/.github/workflows/R-CMD-check.yaml
+++ b/.github/workflows/R-CMD-check.yaml
@@ -17,7 +17,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        r_version: ['4.0.2', '4.1.2', 'release']
+        r_version: ['4.1.2', 'release']
 
     env:
       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
diff --git a/NAMESPACE b/NAMESPACE
index 6f1c88841..05ee51fb0 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -1,12 +1,12 @@
 # Generated by roxygen2: do not edit by hand
 
 export("%>%")
-export(add_deceased_flag)
 export(add_homelessness_date_flags)
 export(add_homelessness_flag)
 export(add_hri_variables)
 export(add_nsu_cohort)
 export(check_year_format)
+export(clean_temp_data)
 export(clean_up_free_text)
 export(compute_mid_year_age)
 export(convert_ca_to_lca)
@@ -21,10 +21,12 @@ export(create_episode_file)
 export(create_homelessness_lookup)
 export(create_individual_file)
 export(create_service_use_cohorts)
+export(end_date)
 export(end_fy)
 export(end_fy_quarter)
 export(end_next_fy_quarter)
 export(find_latest_file)
+export(fy)
 export(fy_interval)
 export(get_boxi_extract_path)
 export(get_ch_costs_path)
@@ -89,7 +91,6 @@ export(midpoint_fy)
 export(next_fy)
 export(phs_db_connection)
 export(previous_update)
-export(process_combined_deaths_lookup)
 export(process_costs_ch_rmd)
 export(process_costs_dn_rmd)
 export(process_costs_gp_ooh_rmd)
@@ -156,6 +157,7 @@ export(produce_episode_file_tests)
 export(produce_sc_sandpit_tests)
 export(produce_source_extract_tests)
 export(produce_test_comparison)
+export(qtr)
 export(read_dev_slf_file)
 export(read_extract_acute)
 export(read_extract_ae)
@@ -178,12 +180,14 @@ export(read_sc_all_alarms_telecare)
 export(read_sc_all_care_home)
 export(read_sc_all_home_care)
 export(read_sc_all_sds)
+export(read_temp_data)
 export(rename_hscp)
 export(setup_keyring)
 export(start_fy)
 export(start_fy_quarter)
 export(start_next_fy_quarter)
 export(write_file)
+export(write_temp_data)
 export(years_to_run)
 importFrom(data.table,.N)
 importFrom(data.table,.SD)
diff --git a/NEWS.md b/NEWS.md
index 5a07b8266..c4c9b9e5d 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,12 @@
-# September 2024 Update - Unreleased 
+# December 2024 Update - released 10-Dec-24
+* 24/25 files have been updated, containing data up to September 2024.
+* 17/18 - 23/24 files have been updated.
+* Homelessness completeness flag is now available in 23/24 files.
+* Substance misuse flag updated.
+* Mid-2023 & Mid-2022 population estimates for Scotland have been updated.
+* Mid-2022 Small Area Population Estimates for 2011 Data Zones have been updated.
+
+# September 2024 Update - released 13-Sep-24
 * New 24/25 files created
 * New NSU cohort for 23/24 available
 * New SPARRA scores calculated from April 24/25
diff --git a/00_Sort_BI_Extracts.R b/Pre_processing_scripts/00_Sort_BI_Extracts.R
similarity index 100%
rename from 00_Sort_BI_Extracts.R
rename to Pre_processing_scripts/00_Sort_BI_Extracts.R
diff --git a/copy_to_hscdiip.R b/Pre_processing_scripts/copy_to_hscdiip.R
similarity index 100%
rename from copy_to_hscdiip.R
rename to Pre_processing_scripts/copy_to_hscdiip.R
diff --git a/extract_new_nsu_cohort/filter_nsu_duplicates.R b/Pre_processing_scripts/extract_new_nsu_cohort/filter_nsu_duplicates.R
similarity index 100%
rename from extract_new_nsu_cohort/filter_nsu_duplicates.R
rename to Pre_processing_scripts/extract_new_nsu_cohort/filter_nsu_duplicates.R
diff --git a/extract_new_nsu_cohort/get_service_use_cohort.R b/Pre_processing_scripts/extract_new_nsu_cohort/get_service_use_cohort.R
similarity index 100%
rename from extract_new_nsu_cohort/get_service_use_cohort.R
rename to Pre_processing_scripts/extract_new_nsu_cohort/get_service_use_cohort.R
diff --git a/Pre_processing_scripts/write_anon_chi_files.R b/Pre_processing_scripts/write_anon_chi_files.R
new file mode 100644
index 000000000..1ff39d273
--- /dev/null
+++ b/Pre_processing_scripts/write_anon_chi_files.R
@@ -0,0 +1,70 @@
+################################################################################
+# Name of file -  Write_anon_chi_files.R
+#
+# Original Authors - Jennifer Thom, Zihao Li
+# Original Date - July 2024
+# Written/run on - R Posit
+# Version of R - 4.1.2
+#
+# Description: Run this script in stages to convert chi to anon chi and save files.
+#              By default this is set up to take the delayed discharges file
+#              convert the chi to anon_chi and save to disk. Important for
+#              ensuring we do not save chi anywhere on disk.
+#
+################################################################################
+
+## Stage 1 - Setup environment
+#-------------------------------------------------------------------------------
+
+# Set up directory
+source_dir <- "/conf/hscdiip/SLF_Extracts/Delayed_Discharges"
+
+# Specify type of files e.g parquet, rds, csv
+pattern <- ".parquet"
+cat(stringr::str_glue("Looking in '{source_dir}' for parquet files."))
+
+# List all files in the directory
+parquet_files <- list.files(source_dir, pattern = ".parquet", full.names = TRUE)
+print(stringr::str_glue("Found {length(parquet_files)} parquet files to process."))
+
+# Create a function to read variable names and check if CHI is in the file
+is_chi_in_file <- function(filename) {
+  data <- arrow::read_parquet(filename, nrow = 5)
+  return(grepl("chi", names(data)) %>% any())
+}
+
+
+# Stage 2 - In each file, convert chi to anon_chi and save to disk
+#-------------------------------------------------------------------------------
+
+# create a loop for converting to anon chi in all listed files
+for (data_file in parquet_files) {
+  # specify new name and new file path
+  save_file_path <- file.path(source_dir, paste0("anon-", basename(data_file)))
+  chi_in_file <- is_chi_in_file(data_file)
+
+  # If chi is in the file, convert to anon_chi
+  if (chi_in_file) {
+    read_file(data_file) %>%
+      slfhelper::get_anon_chi() %>%
+      write_file(save_file_path)
+
+    cat("Replaced chi with anon chi:", data_file, "to", save_file_path, "\n")
+  } else {
+    read_file(data_file) %>%
+      write_file(save_file_path)
+    cat("renamed file with anon chi:", data_file, "to", save_file_path, "\n")
+  }
+}
+
+
+# Stage 3 - Remove files with CHI
+#-------------------------------------------------------------------------------
+
+# Create a loop for removing the old files with CHI
+for (data_file in parquet_files) {
+  file.remove(data_file)
+  cat("Removed chi files:", data_file, "in", source_dir, "\n")
+}
+
+# End of Script #
diff --git a/R/00-update_refs.R b/R/00-update_refs.R
index 33022edf6..7ff216c92 100644
--- a/R/00-update_refs.R
+++ b/R/00-update_refs.R
@@ -1,3 +1,109 @@
+################################################################################
+# # Name of file -  00-update_refs.R
+# Original Authors - Jennifer Thom, Zihao Li
+# Original Date - August 2021
+# Update - Oct 2024
+#
+# Written/run on - RStudio Server
+# Version of R - 4.1.2
+#
+# Description - Use this script to update references needed for the SLF update.
+#
+# Manual changes needed to the following Essential Functions:
+#       # End_date
+#       # Check_year_valid
+#       # Delayed_discharges_period
+#       # Latest_update
+#
+################################################################################
+
+#' End date
+#'
+#' @return Get the end date of the latest update period
+#' @export
+#'
+end_date <- function() {
+  ## UPDATE ##
+  # Specify update by indicating end of quarter date
+  # Q1 June = 30062024
+  # Q2 September = 30092024
+  # Q3 December = 31122024
+  # Q4 March = 31032024
+  lubridate::dmy(31122024)
+}
+
+
+#' Check data exists for a year
+#'
+#' @description  Check there is data available for a given year
+#' as some extracts are year dependent. E.g Homelessness
+#' is only available from 2016/17 onwards.
+#'
+#' @param year Financial year
+#' @param type name of extract
+#'
+#' @return A logical TRUE/FALSE
+check_year_valid <- function(
+    year,
+    type = c(
+      "acute",
+      "ae",
+      "at",
+      "ch",
+      "client",
+      "cmh",
+      "cost_dna",
+      "dd",
+      "deaths",
+      "dn",
+      "gpooh",
+      "hc",
+      "homelessness",
+      "hhg",
+      "maternity",
+      "mh",
+      "nsu",
+      "outpatients",
+      "pis",
+      "sds",
+      "sparra"
+    )) {
+  if (year <= "1415" && type %in% c("dn", "sparra")) {
+    return(FALSE)
+  } else if (year <= "1516" && type %in% c("cmh", "homelessness", "dd")) {
+    return(FALSE)
+  } else if (year <= "1617" && type %in% c("ch", "hc", "sds", "at", "client", "cost_dna")) {
+    return(FALSE)
+  } else if (year <= "1718" && type %in% "hhg") {
+    return(FALSE)
+  } else if (year >= "2122" && type %in% c("cmh", "dn")) {
+    return(FALSE)
+  } else if (year >= "2324" && type %in% "hhg") {
+    return(FALSE)
+  } else if (year >= "2425" && type %in% c("nsu", "sds")) {
+    return(FALSE)
+  } else if (year >= "2526" && type %in% c("ch", "hc", "sds", "at", "sparra")) {
+    return(FALSE)
+  }
+
+  return(TRUE)
+}
+
+
+#' Delayed Discharge period
+#'
+#' @description Get the period for Delayed Discharge
+#'
+#' @return The period for the Delayed Discharge file
+#' as MMMYY_MMMYY
+#' @export
+#'
+#' @family initialisation
+get_dd_period <- function() {
+  "Jul16_Sep24"
+}
+
+
 #' Latest update
 #'
 #' @description Get the date of the latest update, e.g 'Jun_2022'
@@ -7,9 +113,10 @@
 #'
 #' @family initialisation
 latest_update <- function() {
-  "Sep_2024"
+  "Dec_2024"
 }
 
+
 #' Previous update
 #'
 #' @param months_ago Number of months since the previous update
@@ -51,19 +158,33 @@ previous_update <- function(months_ago = 3L, override = NULL) {
   return(previous_update)
 }
 
-#' Delayed Discharge period
+
+#' Extract latest FY from end_date
 #'
-#' @description Get the period for Delayed Discharge
+#' @return fy in format "2024"
+#' @export
 #'
-#' @return The period for the Delayed Discharge file
-#' as MMMYY_MMMYY
+fy <- function() {
+  # Latest FY
+  fy <- phsmethods::extract_fin_year(end_date()) %>% substr(1, 4)
+}
+
+
+#' Extract latest quarter from end_date
+#'
+#' @return qtr in format "Q1"
 #' @export
 #'
-#' @family initialisation
-get_dd_period <- function() {
-  "Jul16_Jun24"
+qtr <- function() {
+  # Latest Quarter
+  qtr <- lubridate::quarter(end_date(), fiscal_start = 4)
+
+  qtr <- stringr::str_glue("Q{qtr}")
+
+  return(qtr)
 }
 
+
 #' The year list for slf to update
 #'
 #' @description Get the vector of years to update slf
diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R
index 5e800c80b..abf53fdd7 100644
--- a/R/add_activity_after_death_flag.R
+++ b/R/add_activity_after_death_flag.R
@@ -13,52 +13,48 @@ add_activity_after_death_flag <- function(
     year,
     deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>%
       slfhelper::get_chi()) {
-  cli::cli_alert_info("Add activity after death flag function started at {Sys.time()}")
-
   # to skip warnings no visible binding for global variable ‘.’
   . <- NULL
 
+  data <- data %>%
+    dplyr::mutate(ep_row_id_death = dplyr::row_number())
+
   death_joined <- data %>%
-    dplyr::select(.data$year, .data$chi, .data$record_keydate1, .data$record_keydate2, .data$death_date, .data$deceased) %>%
-    dplyr::filter(!is.na(.data$chi) | .data$chi != "") %>%
-    dplyr::left_join(
-      deaths_data,
+    dplyr::select(
+      "year",
+      "chi",
+      "recid",
+      "record_keydate1",
+      "record_keydate2",
+      "death_date",
+      "deceased",
+      "ep_row_id_death"
+    ) %>%
+    dplyr::filter(!is.na(.data$chi) & .data$chi != "") %>%
+    dplyr::left_join(deaths_data,
       by = "chi",
-      suffix = c("", "_boxi")
+      suffix = c("", "_refined")
     ) %>%
     dplyr::filter(.data$deceased == TRUE) %>%
     dplyr::distinct()
 
-
-  # Check and print error message for records which already have a death_date in the episode file, but this doesn't match the BOXI death date
-  check_death_date_match <- death_joined %>%
-    dplyr::filter(.data$death_date != .data$death_date_boxi)
-
-  if (nrow(check_death_date_match) != 0) {
-    warning("There were records in the episode file which already have a death_date, but does not match the BOXI NRS death date.")
-  }
-
-
-  # Check and print error message for records which have a record_keydate1 after their BOXI death date
-  check_keydate1_death_date <- death_joined %>%
-    dplyr::filter(.data$record_keydate1 > .data$death_date_boxi)
-
-  if (nrow(check_death_date_match) != 0) {
-    warning("There were records in the episode file which have a record_keydate1 after the BOXI NRS death date.")
-  }
-
-
   flag_data <- death_joined %>%
     dplyr::mutate(
-      flag_keydate1 = dplyr::if_else(.data$record_keydate1 > .data$death_date_boxi, 1, 0),
-      flag_keydate2 = dplyr::if_else(.data$record_keydate2 > .data$death_date_boxi, 1, 0),
+      flag_keydate1 = dplyr::if_else(.data$record_keydate1 > .data$death_date_refined, 1, 0),
+      flag_keydate2 = dplyr::if_else(.data$record_keydate2 > .data$death_date_refined, 1, 0),
 
       # Next flag records with 'ongoing' activity after date of death (available from BOXI) if keydate2 is missing and the death date occurs in
       # in the current or a previous financial year.
-      flag_keydate2_missing = dplyr::if_else(((is.na(.data$record_keydate2) | .data$record_keydate2 == "") & (.data$death_date_boxi <= paste0("20", substr(.data$year, 3, 4), "-03-31"))), 1, 0),
+      flag_keydate2_missing = dplyr::if_else(((is.na(.data$record_keydate2) |
+        .data$record_keydate2 == "") &
+        (.data$death_date_refined <= paste0("20", substr(.data$year, 3, 4), "-03-31"))
+      ), 1, 0),
 
       # Also flag records without a death_date in the episode file, but the BOXI death date occurs in the current or a previous financial year.
-      flag_deathdate_missing = dplyr::if_else(((is.na(.data$death_date) | .data$death_date == "") & (.data$death_date_boxi <= paste0("20", substr(.data$year, 3, 4), "-03-31"))), 1, 0)
+      flag_deathdate_missing = dplyr::if_else(((is.na(.data$death_date) |
+        .data$death_date == "") &
+        (.data$death_date_refined <= paste0("20", substr(.data$year, 3, 4), "-03-31"))
+      ), 1, 0)
     ) %>%
     # These should be flagged by one of the two lines of code above, but in these cases, we will also fill in the blank death date if appropriate
 
@@ -69,116 +65,50 @@ add_activity_after_death_flag <- function(
       ~ any(grepl("^1$", c(...)),
         na.rm = TRUE
       ) * 1
-    ))
-
-
-  # Fill in date of death if missing in the episode file but available in BOXI lookup, due to historic dates of death not being carried
-  # over from previous financial years
-  flag_data <- flag_data %>%
+    )) %>%
+    # Fill in date of death if missing in the episode file but available in BOXI lookup, due to historic dates of death not being carried
+    # over from previous financial years
     dplyr::filter(.data$activity_after_death == 1) %>%
     # Remove temporary flag variables used to create activity after death flag and fill in missing death_date
-    dplyr::select(.data$year, .data$chi, .data$record_keydate1, .data$record_keydate2, .data$activity_after_death, .data$death_date_boxi) %>%
+    dplyr::select(
+      "year",
+      "chi",
+      "recid",
+      "record_keydate1",
+      "record_keydate2",
+      "activity_after_death",
+      "death_date_refined",
+      "ep_row_id_death"
+    ) %>%
     dplyr::distinct()
 
   # Match activity after death flag back to episode file
   final_data <- data %>%
     dplyr::left_join(
       flag_data,
-      # TODO: this join_by is not 100% accurate. Consider use ep_file_row_id to join
-      by = c("year", "chi", "record_keydate1", "record_keydate2"),
+      # this join_by is now 100% accurate.
+      by = c(
+        "year",
+        "chi",
+        "recid",
+        "record_keydate1",
+        "record_keydate2",
+        "ep_row_id_death"
+      ),
       na_matches = "never"
     ) %>%
-    dplyr::mutate(death_date = lubridate::as_date(ifelse(is.na(death_date) & !(is.na(death_date_boxi)),
-      death_date_boxi, death_date
+    dplyr::mutate(death_date = lubridate::as_date(ifelse(
+      is.na(.data$death_date) & !(is.na(.data$death_date_refined)),
+      .data$death_date_refined, .data$death_date
     ))) %>%
-    dplyr::select(-death_date_boxi) %>%
-    dplyr::distinct()
-
+    dplyr::select(-"death_date_refined", -"ep_row_id_death") %>%
+    dplyr::distinct() %>%
+    dplyr::mutate(dplyr::if_else(is.na(.data$activity_after_death),
+      0,
+      .data$activity_after_death
+    ))
 
+  cli::cli_alert_info("Add activity after death flag function finished at {Sys.time()}")
 
   return(final_data)
 }
-
-
-#' Create and read SLF Deaths lookup from processed BOXI NRS deaths extracts
-#'
-#' @description The BOXI NRS deaths extract lookup should be created after the extract files for all years have been processed,
-# but before an episode file has been produced. Therefore, all BOXI NRS years should be run before running episode files.
-#'
-#' @param ... additional arguments passed to [get_slf_deaths_lookup_path()]
-#' @param update the update month (defaults to use [latest_update()])
-#'
-#' @param write_to_disk (optional) Should the data be written to disk default is
-#' `TRUE` i.e. write the data to disk.
-#'
-#' @return the final data as a [tibble][tibble::tibble-package].
-#' @export
-#'
-#'
-#'
-# Read data------------------------------------------------
-
-process_combined_deaths_lookup <- function(update = latest_update(),
-                                           write_to_disk = TRUE, ...) {
-  dir_folder <- "/conf/hscdiip/SLF_Extracts/Deaths"
-  file_names <- list.files(dir_folder,
-    pattern = "^anon-slf_deaths_lookup_.*parquet",
-    full.names = TRUE
-  )
-
-  # read all year specific deaths lookups and bind them together
-  all_boxi_deaths <- lapply(file_names, arrow::read_parquet) %>%
-    data.table::rbindlist() %>%
-    # convert to chi for processing
-    slfhelper::get_chi() %>%
-    # Remove rows with missing or blank CHI number - could also use na.omit?
-    # na.omit(all_boxi_deaths)
-    dplyr::filter(!is.na(.data$chi) | .data$chi != "")
-
-  # Check all CHI numbers are valid
-  chi_check <- all_boxi_deaths %>%
-    dplyr::pull(.data$chi) %>%
-    phsmethods::chi_check()
-
-  if (!all(chi_check %in% c("Valid CHI", "Missing (Blank)", "Missing (NA)"))) {
-    # There are some Missing (NA) values in the extracts, but I have excluded them above as they cannot be matched to episode file
-    stop("There were bad CHI numbers in the BOXI NRS file")
-  }
-
-  # Check and print error message for chi numbers with more than one death date
-  duplicates <- all_boxi_deaths %>%
-    janitor::get_dupes(.data$chi)
-
-  if (nrow(duplicates) != 0) {
-    # There are some Missing (NA) values in the extracts, but I have excluded them above as they cannot be matched to episode file
-    warning("There were duplicate death dates in the BOXI NRS file.")
-  }
-
-
-  # We decided to include duplicates as unable to determine which is correct date (unless IT can tell us, however, they don't seem to know
-  # the process well enough), and overall impact will be negligible
-  # Get anon_chi and use this to match onto episode file later
-  all_boxi_deaths <- all_boxi_deaths %>%
-    slfhelper::get_anon_chi()
-
-  # Save out duplicates for further investigation if needed (as anon_chi)
-  if (!missing(duplicates)) {
-    write_file(
-      duplicates,
-      fs::path(get_slf_dir(), "Deaths",
-        file_name = stringr::str_glue("slf_deaths_duplicates_{update}.parquet")
-      )
-    )
-  }
-
-  # Maybe save as its own function
-  # Write the all BOXI NRS deaths lookup file to disk, so this can be used to populate activity after death flag in each episode file
-  if (write_to_disk) {
-    write_file(
-      all_boxi_deaths,
-      get_combined_slf_deaths_lookup_path()
-    )
-  }
-
-  return(all_boxi_deaths)
-}
diff --git a/R/add_deceased_flag.R b/R/add_deceased_flag.R
deleted file mode 100644
index f3be216cf..000000000
--- a/R/add_deceased_flag.R
+++ /dev/null
@@ -1,40 +0,0 @@
-#' Create the SLF Deaths lookup
-#'
-#' @description Currently this just uses the NRS death dates 'as is', with no
-#' corrections or modifications, it is expected that this will be expanded to
-#' use the CHI deaths extract from IT as well as taking into account data in
-#' the episode file to assess the validity of a death date.
-#'
-#' @param year The year to process, in FY format.
-#' @param nrs_deaths_data NRS deaths data.
-#' @param chi_deaths_data IT CHI deaths data.
-#' @param write_to_disk (optional) Should the data be written to disk default is
-#' `TRUE` i.e. write the data to disk.
-#'
-#' @return a [tibble][tibble::tibble-package] containing the episode file
-#' @export
-add_deceased_flag <- function(
-    year,
-    refined_death = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi(),
-    write_to_disk = TRUE) {
-  # create slf deaths lookup
-
-  dplyr::mutate(
-    death_date = dplyr::if_else(is.na(.data$record_keydate1),
-      .data$death_date_chi, .data$record_keydate1
-    ),
-    deceased = TRUE,
-    .keep = "unused"
-  ) %>%
-    # save anon chi on disk
-    slfhelper::get_anon_chi()
-
-  if (write_to_disk) {
-    write_file(
-      slf_deaths_lookup,
-      get_slf_deaths_lookup_path(year, check_mode = "write")
-    )
-  }
-
-  return(slf_deaths_lookup)
-}
diff --git a/R/add_hri_variables.R b/R/add_hri_variables.R
index 3765e1d4d..fd37fba19 100644
--- a/R/add_hri_variables.R
+++ b/R/add_hri_variables.R
@@ -46,6 +46,8 @@ flag_non_scottish_residents <- function(
     ) %>%
     dplyr::select(-"dummy_postcode", -"eng_prac")
 
+  cli::cli_alert_info("Add HRI variables function finished at {Sys.time()}")
+
   return(return_data)
 }
 
diff --git a/R/add_keep_population_flag.R b/R/add_keep_population_flag.R
index 6f2470f53..2f665c954 100644
--- a/R/add_keep_population_flag.R
+++ b/R/add_keep_population_flag.R
@@ -138,6 +138,10 @@ add_keep_population_flag <- function(individual_file, year) {
         )
       )
   }
+
+  cli::cli_alert_info("Add keep population function finished at {Sys.time()}")
+
+  return(individual_file)
 }
 
 
diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R
index bf6216e57..b573c14ac 100644
--- a/R/add_nsu_cohort.R
+++ b/R/add_nsu_cohort.R
@@ -13,8 +13,6 @@ add_nsu_cohort <- function(
     data,
     year,
     nsu_cohort = read_file(get_nsu_path(year)) %>% slfhelper::get_chi()) {
-  cli::cli_alert_info("Add NSU cohort function started at {Sys.time()}")
-
   year_param <- year
 
   if (!check_year_valid(year, "nsu")) {
@@ -118,5 +116,7 @@ add_nsu_cohort <- function(
     ) %>%
     dplyr::select(-dplyr::contains("_nsu"), -"has_chi")
 
+  cli::cli_alert_info("Add NSU cohort function finished at {Sys.time()}")
+
   return(return_df)
 }
diff --git a/R/add_ppa_flag.R b/R/add_ppa_flag.R
index 1d5f9739d..3d3c7eeaf 100644
--- a/R/add_ppa_flag.R
+++ b/R/add_ppa_flag.R
@@ -8,8 +8,6 @@
 #' @return A data frame to use as a lookup of PPAs
 #' @family episode_file
 add_ppa_flag <- function(data) {
-  cli::cli_alert_info("Add PPA flag function started at {Sys.time()}")
-
   check_variables_exist(
     data,
     variables = c(
@@ -227,5 +225,7 @@ add_ppa_flag <- function(data) {
       .data$cij_ppa
     ))
 
+  cli::cli_alert_info("Add PPA flag function finished at {Sys.time()}")
+
   return(ppa_cij_data)
 }
diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R
index ff5cbaad1..010ff5bad 100644
--- a/R/aggregate_by_chi.R
+++ b/R/aggregate_by_chi.R
@@ -10,8 +10,6 @@
 #'
 #' @inheritParams create_individual_file
 aggregate_by_chi <- function(episode_file, year, exclude_sc_var = FALSE) {
-  cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")
-
   # recommended by `data.table` team to tackle the issue
   # "no visible binding for global variable"
   gender <-
@@ -199,6 +197,8 @@ aggregate_by_chi <- function(episode_file, year, exclude_sc_var = FALSE) {
   )
   individual_file <- individual_file[, year := year]
 
+  cli::cli_alert_info("Aggregate by CHI function finished at {Sys.time()}")
+
   # convert back to tibble
   return(dplyr::as_tibble(individual_file))
 }
@@ -246,8 +246,6 @@ vars_contain <- function(data, vars, ignore_case = FALSE) {
 #'
 #' @inheritParams create_individual_file
 aggregate_ch_episodes <- function(episode_file) {
-  cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}")
-
   # recommended by `data.table` team to tackle the issue
   # "no visible binding for global variable"
   ch_no_cost <-
@@ -274,5 +272,7 @@ aggregate_ch_episodes <- function(episode_file) {
   # Convert back to tibble if needed
   episode_file <- tibble::as_tibble(episode_file)
 
+  cli::cli_alert_info("Aggregate ch episodes function finished at {Sys.time()}")
+
   return(episode_file)
 }
diff --git a/R/check_year_valid.R b/R/check_year_valid.R
deleted file mode 100644
index da257ff4c..000000000
--- a/R/check_year_valid.R
+++ /dev/null
@@ -1,55 +0,0 @@
-#' Check data exists for a year
-#'
-#' @description  Check there is data available for a given year
-#' as some extracts are year dependent. E.g Homelessness
-#' is only available from 2016/17 onwards.
-#'
-#' @param year Financial year
-#' @param type name of extract
-#'
-#' @return A logical TRUE/FALSE
-check_year_valid <- function(
-    year,
-    type = c(
-      "acute",
-      "ae",
-      "at",
-      "ch",
-      "client",
-      "cmh",
-      "cost_dna",
-      "dd",
-      "deaths",
-      "dn",
-      "gpooh",
-      "hc",
-      "homelessness",
-      "hhg",
-      "maternity",
-      "mh",
-      "nsu",
-      "outpatients",
-      "pis",
-      "sds",
-      "sparra"
-    )) {
-  if (year <= "1415" && type %in% c("dn", "sparra")) {
-    return(FALSE)
-  } else if (year <= "1516" && type %in% c("cmh", "homelessness", "dd")) {
-    return(FALSE)
-  } else if (year <= "1617" && type %in% c("ch", "hc", "sds", "at", "client", "cost_dna")) {
-    return(FALSE)
-  } else if (year <= "1718" && type %in% "hhg") {
-    return(FALSE)
-  } else if (year >= "2122" && type %in% c("cmh", "dn")) {
-    return(FALSE)
-  } else if (year >= "2324" && type %in% c("nsu", "hhg")) {
-    return(FALSE)
-  } else if (year >= "2425" && type %in% "sparra") {
-    return(FALSE)
-  } else if (year >= "2526" && type %in% c("ch", "hc", "sds", "at")) {
-    return(FALSE)
-  }
-
-  return(TRUE)
-}
diff --git a/R/correct_demographics.R b/R/correct_demographics.R
index d221c25ab..d7322b164 100644
--- a/R/correct_demographics.R
+++ b/R/correct_demographics.R
@@ -7,8 +7,6 @@
 #'
 #' @return episode files with updated date of birth and ages
 correct_demographics <- function(data, year) {
-  cli::cli_alert_info("Correct demographics function started at {Sys.time()}")
-
   # keep episodes with missing chi
   data_no_chi <- data %>%
     dplyr::filter(is_missing(.data$chi))
@@ -102,5 +100,7 @@ correct_demographics <- function(data, year) {
     data_chi
   )
 
+  cli::cli_alert_info("Correct demographics function finished at {Sys.time()}")
+
   return(data)
 }
diff --git a/R/cost_uplift.R b/R/cost_uplift.R
index abbbd9b5a..ea3df4abe 100644
--- a/R/cost_uplift.R
+++ b/R/cost_uplift.R
@@ -5,8 +5,6 @@
 #' @return episode data with uplifted costs
 #' @family episode_file
 apply_cost_uplift <- function(data) {
-  cli::cli_alert_info("Apply cost uplift function started at {Sys.time()}")
-
   data <- data %>%
     # attach a uplift scale as the last column
     lookup_uplift() %>%
@@ -29,6 +27,8 @@ apply_cost_uplift <- function(data) {
     # remove the last uplift column
     dplyr::select(-"uplift")
 
+  cli::cli_alert_info("Apply cost uplift function finished at {Sys.time()}")
+
   return(data)
 }
 
diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index ecb6fc126..6c95ea5c2 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -3,11 +3,12 @@
 #' @param processed_data_list containing data from processed extracts.
 #' @param year The year to process, in FY format.
 #' @param homelessness_lookup the lookup file for homelessness
-#' @param sc_client scoial care lookup file
+#' @param sc_client social care lookup file
 #' @param write_to_disk (optional) Should the data be written to disk default is
 #' `TRUE` i.e. write the data to disk.
 #' @param anon_chi_out (Default:TRUE) Should `anon_chi` be used in the output
 #' (instead of chi)
+#' @param write_temp_to_disk write intermediate data for investigation or debug
 #' @inheritParams add_nsu_cohort
 #' @inheritParams fill_geographies
 #' @inheritParams join_cohort_lookups
@@ -32,11 +33,19 @@ create_episode_file <- function(
     slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) %>% slfhelper::get_chi(),
     sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(),
     write_to_disk = TRUE,
-    anon_chi_out = TRUE) {
+    anon_chi_out = TRUE,
+    write_temp_to_disk = FALSE) {
+  cli::cli_alert_info("Create episode file function started at {Sys.time()}")
+
   processed_data_list <- purrr::discard(processed_data_list, ~ is.null(.x) | identical(.x, tibble::tibble()))
 
   episode_file <- dplyr::bind_rows(processed_data_list) %>%
     slfhelper::get_chi() %>%
+    write_temp_data(year, file_name = "ep_temp1", write_temp_to_disk) %>%
+    add_homelessness_flag(year, lookup = homelessness_lookup) %>%
+    add_homelessness_date_flags(year, lookup = homelessness_lookup) %>%
+    link_delayed_discharge_eps(year, dd_data) %>%
+    write_temp_data(year, file_name = "ep_temp1-2", write_temp_to_disk) %>%
     create_cost_inc_dna() %>%
     apply_cost_uplift() %>%
     store_ep_file_vars(
@@ -120,15 +129,15 @@ create_episode_file <- function(
       # PC8 format may still be used. Ensure here that all datasets are in PC7 format.
       postcode = phsmethods::format_postcode(.data$postcode, "pc7")
     ) %>%
+    write_temp_data(year, file_name = "ep_temp2", write_temp_to_disk) %>%
     correct_cij_vars() %>%
     fill_missing_cij_markers() %>%
-    add_homelessness_flag(year, lookup = homelessness_lookup) %>%
-    add_homelessness_date_flags(year, lookup = homelessness_lookup) %>%
     add_ppa_flag() %>%
-    link_delayed_discharge_eps(year, dd_data) %>%
+    write_temp_data(year, file_name = "ep_temp3", write_temp_to_disk) %>%
     add_nsu_cohort(year, nsu_cohort) %>%
     match_on_ltcs(year, ltc_data) %>%
     correct_demographics(year) %>%
+    write_temp_data(year, file_name = "ep_temp4", write_temp_to_disk) %>%
     create_cohort_lookups(year) %>%
     join_cohort_lookups(year) %>%
     join_sparra_hhg(year) %>%
@@ -140,11 +149,13 @@ create_episode_file <- function(
       year,
       slf_deaths_lookup
     ) %>%
+    write_temp_data(year, file_name = "ep_temp5", write_temp_to_disk) %>%
     add_activity_after_death_flag(year,
       deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>%
         slfhelper::get_chi()
     ) %>%
-    load_ep_file_vars(year)
+    load_ep_file_vars(year) %>%
+    write_temp_data(year, file_name = "ep_temp6", write_temp_to_disk)
 
   if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
     episode_file <- episode_file %>%
@@ -265,8 +276,6 @@ create_episode_file <- function(
 #'
 #' @return `data` with only the `vars_to_keep` kept
 store_ep_file_vars <- function(data, year, vars_to_keep) {
-  cli::cli_alert_info("Store episode file variables function started at {Sys.time()}")
-
   tempfile_path <- get_file_path(
     directory = get_year_dir(year),
     file_name = stringr::str_glue("temp_ep_file_variable_store_{year}.parquet"),
@@ -289,6 +298,8 @@ store_ep_file_vars <- function(data, year, vars_to_keep) {
       path = tempfile_path
     )
 
+  cli::cli_alert_info("Store episode file variables function finished at {Sys.time()}")
+
   return(
     dplyr::select(
       data,
@@ -304,8 +315,6 @@ store_ep_file_vars <- function(data, year, vars_to_keep) {
 #'
 #' @return The full SLF data.
 load_ep_file_vars <- function(data, year) {
-  cli::cli_alert_info("Load episode file variable function started at {Sys.time()}")
-
   tempfile_path <- get_file_path(
     directory = get_year_dir(year),
     file_name = stringr::str_glue("temp_ep_file_variable_store_{year}.parquet"),
@@ -324,6 +333,8 @@ load_ep_file_vars <- function(data, year) {
 
   fs::file_delete(tempfile_path)
 
+  cli::cli_alert_info("Load episode file variable function finished at {Sys.time()}")
+
   return(full_data)
 }
 
@@ -333,8 +344,6 @@ load_ep_file_vars <- function(data, year) {
 #'
 #' @return A data frame with CIJ markers filled in for those missing.
 fill_missing_cij_markers <- function(data) {
-  cli::cli_alert_info("Fill missing cij markers function started at {Sys.time()}")
-
   fixable_data <- data %>%
     dplyr::filter(
       .data[["recid"]] %in% c("01B", "04B", "GLS", "02B", "DD") & !is.na(.data[["chi"]])
@@ -380,6 +389,8 @@ fill_missing_cij_markers <- function(data) {
 
   return_data <- dplyr::bind_rows(non_fixable_data, fixed_data)
 
+  cli::cli_alert_info("Fill missing cij markers function finished at {Sys.time()}")
+
   return(return_data)
 }
 
@@ -389,14 +400,12 @@ fill_missing_cij_markers <- function(data) {
 #'
 #' @return The data with CIJ variables corrected.
 correct_cij_vars <- function(data) {
-  cli::cli_alert_info("Correct cij variables function started at {Sys.time()}")
-
   check_variables_exist(
     data,
     c("chi", "recid", "cij_admtype", "cij_pattype_code")
   )
 
-  data %>%
+  data <- data %>%
     # Change some values of cij_pattype_code based on cij_admtype
     dplyr::mutate(
       cij_admtype = dplyr::if_else(
@@ -424,6 +433,10 @@ correct_cij_vars <- function(data) {
         9L ~ "Other"
       )
     )
+
+  cli::cli_alert_info("Correct cij variables function finished at {Sys.time()}")
+
+  return(data)
 }
 
 #' Create cost total net inc DNA
@@ -432,13 +445,11 @@ correct_cij_vars <- function(data) {
 #'
 #' @return The data with cost including dna.
 create_cost_inc_dna <- function(data) {
-  cli::cli_alert_info("Create cost inc dna function started at {Sys.time()}")
-
   check_variables_exist(data, c("cost_total_net", "attendance_status"))
 
   # Create cost including DNAs and modify costs
   # not including DNAs using cattend
-  data %>%
+  data <- data %>%
     dplyr::mutate(
       cost_total_net_inc_dnas = .data$cost_total_net,
       # In the Cost_Total_Net column set the cost for
@@ -449,6 +460,10 @@ create_cost_inc_dna <- function(data) {
         .data$cost_total_net
       )
     )
+
+  cli::cli_alert_info("Create cost inc dna function finished at {Sys.time()}")
+
+  return(data)
 }
 
 #' Create the cohort lookups
@@ -458,8 +473,6 @@ create_cost_inc_dna <- function(data) {
 #'
 #' @return The data unchanged (the cohorts are written to disk)
 create_cohort_lookups <- function(data, year, update = latest_update()) {
-  cli::cli_alert_info("Create cohort lookups function started at {Sys.time()}")
-
   create_demographic_cohorts(
     data,
     year,
@@ -474,6 +487,7 @@ create_cohort_lookups <- function(data, year, update = latest_update()) {
     write_to_disk = TRUE
   )
 
+  cli::cli_alert_info("Create cohort lookups function finished at {Sys.time()}")
 
   return(data)
 }
@@ -499,8 +513,6 @@ join_cohort_lookups <- function(
       col_select = c("anon_chi", "service_use_cohort")
     ) %>%
       slfhelper::get_chi()) {
-  cli::cli_alert_info("Join cohort lookups function started at {Sys.time()}")
-
   join_cohort_lookups <- data %>%
     dplyr::left_join(
       demographic_cohort,
@@ -511,6 +523,8 @@ join_cohort_lookups <- function(
       by = "chi"
     )
 
+  cli::cli_alert_info("Join cohort lookups function finished at {Sys.time()}")
+
   return(join_cohort_lookups)
 }
 
@@ -527,8 +541,6 @@ join_sc_client <- function(data,
                            year,
                            sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(),
                            file_type = c("episode", "individual")) {
-  cli::cli_alert_info("Join social care client function started at {Sys.time()}")
-
   if (!check_year_valid(year, type = "client")) {
     data_file <- data
     return(data_file)
@@ -551,5 +563,7 @@ join_sc_client <- function(data,
       )
   }
 
+  cli::cli_alert_info("Join social care client function finished at {Sys.time()}")
+
   return(data_file)
 }
diff --git a/R/create_individual_file.R b/R/create_individual_file.R
index 273761efc..8fd930fb1 100644
--- a/R/create_individual_file.R
+++ b/R/create_individual_file.R
@@ -6,6 +6,7 @@
 #' @param homelessness_lookup the lookup file for homelessness
 #' @param anon_chi_in (Default:TRUE) Is `anon_chi` used in the input
 #' (instead of chi).
+#' @param write_temp_to_disk write intermediate data for investigation or debug
 #' @inheritParams create_episode_file
 #'
 #' @return The processed individual file
@@ -17,7 +18,10 @@ create_individual_file <- function(
     homelessness_lookup = create_homelessness_lookup(year),
     write_to_disk = TRUE,
     anon_chi_in = TRUE,
-    anon_chi_out = TRUE) {
+    anon_chi_out = TRUE,
+    write_temp_to_disk) {
+  cli::cli_alert_info("Create individual file function started at {Sys.time()}")
+
   if (anon_chi_in) {
     episode_file <- slfhelper::get_chi(
       episode_file,
@@ -74,30 +78,36 @@ create_individual_file <- function(
     ))) %>%
     remove_blank_chi() %>%
     add_cij_columns() %>%
-    add_all_columns(year = year)
+    add_all_columns(year = year) %>%
+    write_temp_data(year, file_name = "indiv_temp1", write_temp_to_disk)
 
   if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
     individual_file <- individual_file %>%
-      aggregate_by_chi(year = year, exclude_sc_var = TRUE)
+      aggregate_by_chi(year = year, exclude_sc_var = TRUE) %>%
+      write_temp_data(year, file_name = "indiv_temp2", write_temp_to_disk)
   } else {
     individual_file <- individual_file %>%
       aggregate_ch_episodes() %>%
       clean_up_ch(year) %>%
-      aggregate_by_chi(year = year, exclude_sc_var = FALSE)
+      aggregate_by_chi(year = year, exclude_sc_var = FALSE) %>%
+      write_temp_data(year, file_name = "indiv_temp2", write_temp_to_disk)
   }
 
   individual_file <- individual_file %>%
     recode_gender() %>%
     clean_individual_file(year) %>%
     join_cohort_lookups(year) %>%
+    write_temp_data(year, file_name = "indiv_temp3", write_temp_to_disk) %>%
     add_homelessness_flag(year, lookup = homelessness_lookup) %>%
     match_on_ltcs(year) %>%
     join_deaths_data(year) %>%
     join_sparra_hhg(year) %>%
+    write_temp_data(year, file_name = "indiv_temp4", write_temp_to_disk) %>%
     join_slf_lookup_vars() %>%
     dplyr::mutate(year = year) %>%
     add_hri_variables(chi_variable = "chi") %>%
     add_keep_population_flag(year) %>%
+    write_temp_data(year, file_name = "indiv_temp5", write_temp_to_disk) %>%
     join_sc_client(year, file_type = "individual")
 
   if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
@@ -177,11 +187,13 @@ create_individual_file <- function(
 #' @family individual_file
 #' @inheritParams create_individual_file
 remove_blank_chi <- function(episode_file) {
-  cli::cli_alert_info("Remove blank CHI function started at {Sys.time()}")
-
-  episode_file %>%
+  episode_file <- episode_file %>%
     dplyr::mutate(chi = dplyr::na_if(.data$chi, "")) %>%
     dplyr::filter(!is.na(.data$chi))
+
+  cli::cli_alert_info("Remove blank CHI function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 
@@ -191,9 +203,7 @@ remove_blank_chi <- function(episode_file) {
 #' @family individual_file
 #' @inheritParams create_individual_file
 add_cij_columns <- function(episode_file) {
-  cli::cli_alert_info("Add cij columns function started at {Sys.time()}")
-
-  episode_file %>%
+  episode_file <- episode_file %>%
     dplyr::mutate(
       cij_non_el = dplyr::if_else(
         .data$cij_pattype_code == 0L,
@@ -221,6 +231,10 @@ add_cij_columns <- function(episode_file) {
         NA_integer_
       )
     )
+
+  cli::cli_alert_info("Add cij columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add all columns
@@ -230,8 +244,6 @@ add_cij_columns <- function(episode_file) {
 #' @family individual_file
 #' @inheritParams create_individual_file
 add_all_columns <- function(episode_file, year) {
-  cli::cli_alert_info("Add all columns function started at {Sys.time()}")
-
   episode_file <- episode_file %>%
     add_acute_columns("Acute", (.data$smrtype == "Acute-DC" | .data$smrtype == "Acute-IP") & .data$cij_pattype != "Maternity") %>%
     add_mat_columns("Mat", .data$recid == "02B" | .data$cij_pattype == "Maternity") %>%
@@ -277,6 +289,10 @@ add_all_columns <- function(episode_file, year) {
         .data$OP_cost_dnas
       )
     )
+
+  cli::cli_alert_info("Add all columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add Acute columns
@@ -286,12 +302,14 @@ add_all_columns <- function(episode_file, year) {
 #' @param condition Condition to create new columns based on
 #' @family individual_file
 add_acute_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add acute columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
-  episode_file %>%
+  episode_file <- episode_file %>%
     add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>%
     add_ipdc_cols(prefix, condition)
+
+  cli::cli_alert_info("Add acute columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add Mat columns
@@ -299,12 +317,14 @@ add_acute_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_mat_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add maternity columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
-  episode_file %>%
+  episode_file <- episode_file %>%
     add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>%
     add_ipdc_cols(prefix, condition, elective = FALSE)
+
+  cli::cli_alert_info("Add maternity columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add MH columns
@@ -312,12 +332,14 @@ add_mat_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_mh_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add mental health columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
-  episode_file %>%
+  episode_file <- episode_file %>%
     add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>%
     add_ipdc_cols(prefix, condition, ipdc_d = FALSE)
+
+  cli::cli_alert_info("Add mental health columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add GLS columns
@@ -325,12 +347,14 @@ add_mh_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_gls_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add geriatric long stay columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
-  episode_file %>%
+  episode_file <- episode_file %>%
     add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>%
     add_ipdc_cols(prefix, condition, ipdc_d = FALSE)
+
+  cli::cli_alert_info("Add geriatric long stay columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add OP columns
@@ -338,8 +362,6 @@ add_gls_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_op_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add outpatient columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
   episode_file <- episode_file %>%
     add_standard_cols(prefix, condition)
@@ -355,6 +377,9 @@ add_op_columns <- function(episode_file, prefix, condition) {
       "{prefix}_newcons_dnas" := dplyr::if_else(eval(condition_5_8), 1L, NA_integer_),
       "{prefix}_cost_dnas" := dplyr::if_else(eval(condition_5_8), .data$cost_total_net_inc_dnas, NA_real_)
     )
+
+  cli::cli_alert_info("Add outpatient columns function finished at {Sys.time()}")
+
   return(episode_file)
 }
 
@@ -363,12 +388,14 @@ add_op_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_ae_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add A&E columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
-  episode_file %>%
+  episode_file <- episode_file %>%
     add_standard_cols(prefix, condition, cost = TRUE) %>%
     dplyr::mutate("{prefix}_attendances" := dplyr::if_else(eval(condition), 1L, NA_integer_))
+
+  cli::cli_alert_info("Add A&E columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add PIS columns
@@ -376,12 +403,13 @@ add_ae_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_pis_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add prescribing columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
-  episode_file %>%
+  episode_file <- episode_file %>%
     add_standard_cols(prefix, condition, cost = TRUE) %>%
     dplyr::mutate("{prefix}_paid_items" := dplyr::if_else(eval(condition), .data$no_paid_items, NA_integer_))
+  cli::cli_alert_info("Add prescribing columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add OoH columns
@@ -389,8 +417,6 @@ add_pis_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_ooh_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add out of hours columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
   episode_file <- episode_file %>%
     add_standard_cols(prefix, condition, cost = TRUE) %>%
@@ -418,6 +444,8 @@ add_ooh_columns <- function(episode_file, prefix, condition) {
       )
     )
 
+  cli::cli_alert_info("Add out of hours columns function finished at {Sys.time()}")
+
   return(episode_file)
 }
 
@@ -426,11 +454,9 @@ add_ooh_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_dn_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add district nursing columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
   if ("total_no_dn_contacts" %in% names(episode_file)) {
-    episode_file %>%
+    episode_file <- episode_file %>%
       add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>%
       dplyr::mutate(
         "{prefix}_contacts" := dplyr::if_else(
@@ -440,10 +466,14 @@ add_dn_columns <- function(episode_file, prefix, condition) {
         )
       )
   } else {
-    episode_file %>%
+    episode_file <- episode_file %>%
       add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>%
       dplyr::mutate("{prefix}_contacts" := NA_integer_)
   }
+
+  cli::cli_alert_info("Add district nursing columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add CMH columns
@@ -451,12 +481,14 @@ add_dn_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_cmh_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add communicty mental health columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
-  episode_file %>%
+  episode_file <- episode_file %>%
     add_standard_cols(prefix, condition) %>%
     dplyr::mutate("{prefix}_contacts" := dplyr::if_else(eval(condition), 1L, NA_integer_))
+
+  cli::cli_alert_info("Add communicty mental health columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add DD columns
@@ -464,8 +496,6 @@ add_cmh_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_dd_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add delayed discharges columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
   condition_delay <- substitute(condition & primary_delay_reason != "9")
   episode_file <- episode_file %>%
@@ -479,6 +509,9 @@ add_dd_columns <- function(episode_file, prefix, condition) {
       "{prefix}_Code9_episodes" := dplyr::if_else(eval(condition_delay_9), 1L, NA_integer_),
       "{prefix}_Code9_beddays" := dplyr::if_else(eval(condition_delay_9), .data$yearstay, NA_real_)
     )
+
+  cli::cli_alert_info("Add delayed discharges columns function finished at {Sys.time()}")
+
   return(episode_file)
 }
 
@@ -487,12 +520,14 @@ add_dd_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_nsu_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add non service users columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
-  episode_file %>%
+  episode_file <- episode_file %>%
     add_standard_cols(prefix, condition) %>%
     dplyr::mutate("{prefix}" := dplyr::if_else(eval(condition), 1L, NA_integer_))
+
+  cli::cli_alert_info("Add non service users columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add NRS columns
@@ -500,12 +535,14 @@ add_nsu_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_nrs_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add nrs columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
-  episode_file %>%
+  episode_file <- episode_file %>%
     add_standard_cols(prefix, condition) %>%
     dplyr::mutate("{prefix}" := dplyr::if_else(eval(condition), 1L, NA_integer_))
+
+  cli::cli_alert_info("Add nrs columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add HL1 columns
@@ -513,11 +550,13 @@ add_nrs_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_hl1_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add homelessness columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
-  episode_file %>%
+  episode_file <- episode_file %>%
     add_standard_cols(prefix, condition)
+
+  cli::cli_alert_info("Add homelessness columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add CH columns
@@ -525,10 +564,8 @@ add_hl1_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_ch_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add care home columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
-  episode_file %>%
+  episode_file <- episode_file %>%
     add_standard_cols(prefix, condition) %>%
     dplyr::mutate(
       ch_cost_per_day = dplyr::if_else(
@@ -550,6 +587,10 @@ add_ch_columns <- function(episode_file, prefix, condition) {
         .data$ch_ep_end
       )
     )
+
+  cli::cli_alert_info("Add care home columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add HC columns
@@ -557,8 +598,6 @@ add_ch_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_hc_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add home care columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
 
   episode_file <- episode_file %>%
@@ -596,6 +635,10 @@ add_hc_columns <- function(episode_file, prefix, condition) {
       "{prefix}_reablement_hours" := dplyr::if_else(eval(condition_reabl), .data$hc_hours_annual, NA_real_),
       "{prefix}_reablement_hours_cost" := dplyr::if_else(eval(condition_reabl), .data$cost_total_net, NA_real_)
     )
+
+  cli::cli_alert_info("Add home care columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add AT columns
@@ -603,15 +646,17 @@ add_hc_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_at_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add alarms telecare columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
-  episode_file %>%
+  episode_file <- episode_file %>%
     add_standard_cols(prefix, condition) %>%
     dplyr::mutate(
       "{prefix}_alarms" := dplyr::if_else(eval(condition) & .data$smrtype == "AT-Alarm", 1L, NA_integer_),
       "{prefix}_telecare" := dplyr::if_else(eval(condition) & .data$smrtype == "AT-Tele", 1L, NA_integer_)
     )
+
+  cli::cli_alert_info("Add alarms telecare columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add SDS columns
@@ -619,10 +664,8 @@ add_at_columns <- function(episode_file, prefix, condition) {
 #' @inheritParams add_acute_columns
 #' @family individual_file
 add_sds_columns <- function(episode_file, prefix, condition) {
-  cli::cli_alert_info("Add SDS columns function started at {Sys.time()}")
-
   condition <- substitute(condition)
-  episode_file %>%
+  episode_file <- episode_file %>%
     add_standard_cols(prefix, condition) %>%
     dplyr::mutate(
       "{prefix}_option_1" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-1", 1L, NA_integer_),
@@ -630,6 +673,10 @@ add_sds_columns <- function(episode_file, prefix, condition) {
       "{prefix}_option_3" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-3", 1L, NA_integer_),
       "{prefix}_option_4" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-4", 1L, NA_integer_)
     )
+
+  cli::cli_alert_info("Add SDS columns function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Add columns based on IPDC
@@ -643,8 +690,6 @@ add_sds_columns <- function(episode_file, prefix, condition) {
 #' cij_pattype (lgl)
 #' @family individual_file
 add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, elective = TRUE) {
-  cli::cli_alert_info("Add ipdc columns function started at {Sys.time()}")
-
   condition_i <- substitute(eval(condition) & ipdc == "I")
   episode_file <- episode_file %>%
     dplyr::mutate(
@@ -676,6 +721,9 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi
         "{prefix}_daycase_cost" := dplyr::if_else(eval(condition_d), .data$cost_total_net, NA_real_)
       )
   }
+
+  cli::cli_alert_info("Add ipdc columns function finished at {Sys.time()}")
+
   return(episode_file)
 }
 
@@ -689,14 +737,15 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi
 #' @param cost Whether to create prefix_cost col, e.g. "Acute_cost"
 #' @family individual_file
 add_standard_cols <- function(episode_file, prefix, condition, episode = FALSE, cost = FALSE) {
-  cli::cli_alert_info("Add standard columns function started at {Sys.time()}")
-
   if (episode) {
     episode_file <- dplyr::mutate(episode_file, "{prefix}_episodes" := dplyr::if_else(eval(condition), 1L, NA_integer_))
   }
   if (cost) {
     episode_file <- dplyr::mutate(episode_file, "{prefix}_cost" := dplyr::if_else(eval(condition), .data$cost_total_net, NA_real_))
   }
+
+  cli::cli_alert_info("Add standard columns function finished at {Sys.time()}")
+
   return(episode_file)
 }
 
@@ -707,9 +756,7 @@ add_standard_cols <- function(episode_file, prefix, condition, episode = FALSE,
 #' @inheritParams create_individual_file
 #' @family individual_file
 clean_up_ch <- function(episode_file, year) {
-  cli::cli_alert_info("Clean up CH function started at {Sys.time()}")
-
-  episode_file %>%
+  episode_file <- episode_file %>%
     dplyr::mutate(
       fy_end = end_fy(year),
       fy_start = start_fy(year)
@@ -741,6 +788,10 @@ clean_up_ch <- function(episode_file, year) {
       )
     ) %>%
     dplyr::select(-c("fy_end", "fy_start", "term_1", "term_2"))
+
+  cli::cli_alert_info("Clean up CH function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Recode gender
@@ -750,9 +801,7 @@ clean_up_ch <- function(episode_file, year) {
 #' @inheritParams create_individual_file
 #' @family individual_file
 recode_gender <- function(episode_file) {
-  cli::cli_alert_info("Recode Gender function started at {Sys.time()}")
-
-  episode_file %>%
+  episode_file <- episode_file %>%
     dplyr::mutate(
       gender = dplyr::if_else(
         .data$gender %in% c(0L, 9L),
@@ -760,6 +809,10 @@ recode_gender <- function(episode_file) {
         .data$gender
       )
     )
+
+  cli::cli_alert_info("Recode Gender function finished at {Sys.time()}")
+
+  return(episode_file)
 }
 
 #' Condition columns
@@ -769,11 +822,12 @@ recode_gender <- function(episode_file) {
 #' "dementia" and "dementia_date"
 #' @family individual_file
 condition_cols <- function() {
-  cli::cli_alert_info("Return condition columns function started at {Sys.time()}")
-
   conditions <- slfhelper::ltc_vars
   date_cols <- paste0(conditions, "_date")
   all_cols <- c(conditions, date_cols)
+
+  cli::cli_alert_info("Return condition columns function finished at {Sys.time()}")
+
   return(all_cols)
 }
 
@@ -808,9 +862,7 @@ min_no_inf <- function(x) {
 #' @param individual_file Individual file where each row represents a unique CHI
 #' @param year Financial year e.g 1718
 clean_individual_file <- function(individual_file, year) {
-  cli::cli_alert_info("Clean individual file function started at {Sys.time()}")
-
-  individual_file %>%
+  individual_file <- individual_file %>%
     dplyr::select(!dplyr::any_of(c(
       "ch_no_cost",
       "no_paid_items",
@@ -819,6 +871,10 @@ clean_individual_file <- function(individual_file, year) {
     ))) %>%
     clean_up_gender() %>%
     dplyr::mutate(age = compute_mid_year_age(year, .data$dob))
+
+  cli::cli_alert_info("Clean individual file function finished at {Sys.time()}")
+
+  return(individual_file)
 }
 
 #' Clean up gender column
@@ -827,15 +883,16 @@ clean_individual_file <- function(individual_file, year) {
 #'
 #' @inheritParams clean_individual_file
 clean_up_gender <- function(individual_file) {
-  cli::cli_alert_info("Clean up gender column function started at {Sys.time()}")
-
-  individual_file %>%
+  individual_file <- individual_file %>%
     dplyr::mutate(
       gender = dplyr::case_when(
         .data$gender != 1.5 ~ round(.data$gender),
         .default = phsmethods::sex_from_chi(.data$chi, chi_check = FALSE)
       )
     )
+
+  cli::cli_alert_info("Clean up gender column function finished at {Sys.time()}")
+  return(individual_file)
 }
 
 #' Join slf lookup variables
@@ -855,8 +912,6 @@ join_slf_lookup_vars <- function(individual_file,
                                    col_select = c("gpprac", "cluster", "hbpraccode")
                                  ),
                                  hbrescode_var = "hb2018") {
-  cli::cli_alert_info("Join slf lookup variables function started at {Sys.time()}")
-
   individual_file <- individual_file %>%
     dplyr::left_join(
       slf_postcode_lookup,
@@ -868,5 +923,7 @@ join_slf_lookup_vars <- function(individual_file,
     ) %>%
     dplyr::rename(hbrescode = hbrescode_var)
 
+  cli::cli_alert_info("Join slf lookup variables function finished at {Sys.time()}")
+
   return(individual_file)
 }
diff --git a/R/fill_geographies.R b/R/fill_geographies.R
index 5638c8758..9b7721391 100644
--- a/R/fill_geographies.R
+++ b/R/fill_geographies.R
@@ -16,8 +16,6 @@ fill_geographies <- function(
       get_slf_gpprac_path(),
       col_select = c("gpprac", "cluster", "hbpraccode")
     )) {
-  cli::cli_alert_info("Fill geographies function started at {Sys.time()}")
-
   check_variables_exist(data, c(
     "chi",
     "postcode",
@@ -30,7 +28,7 @@ fill_geographies <- function(
     "gpprac"
   ))
 
-  data %>%
+  data <- data %>%
     fill_postcode_geogs(
       slf_pc_lookup = read_file(get_slf_postcode_path())
     ) %>%
@@ -40,6 +38,10 @@ fill_geographies <- function(
         col_select = c("gpprac", "cluster", "hbpraccode")
       )
     )
+
+  cli::cli_alert_info("Fill geographies function finished at {Sys.time()}")
+
+  return(data)
 }
 
 #' Make a postcode lookup for filling to most recent postcodes based on CHI
@@ -189,7 +191,6 @@ fill_gpprac_geographies <- function(
 #'
 #' @return data with matched HSCP and LCA codes
 cascade_geographies <- function(data) {
-  # TODO rework this function into a series of smaller functions which operate on vectors
   # e.g. cascade_hscp_lca <- function(hscp, lca) {...}
   # Would take HSCP and populate any missing LCA using it
   data <- data %>%
diff --git a/R/join_deaths_data.R b/R/join_deaths_data.R
index 5e61a2082..f503af703 100644
--- a/R/join_deaths_data.R
+++ b/R/join_deaths_data.R
@@ -10,16 +10,16 @@ join_deaths_data <- function(
     data,
     year,
     slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) %>% slfhelper::get_chi()) {
-  cli::cli_alert_info("Join deaths data function started at {Sys.time()}")
+  data <- data %>%
+    dplyr::left_join(
+      slf_deaths_lookup %>%
+        dplyr::distinct(.data$chi, .keep_all = TRUE),
+      by = "chi",
+      na_matches = "never",
+      relationship = "many-to-one"
+    )
 
-  return(
-    data %>%
-      dplyr::left_join(
-        slf_deaths_lookup %>%
-          dplyr::distinct(chi, .keep_all = TRUE),
-        by = "chi",
-        na_matches = "never",
-        relationship = "many-to-one"
-      )
-  )
+  cli::cli_alert_info("Join deaths data function finished at {Sys.time()}")
+
+  return(data)
 }
diff --git a/R/join_sparra_hhg.R b/R/join_sparra_hhg.R
index c22e1a9c3..3218d6be7 100644
--- a/R/join_sparra_hhg.R
+++ b/R/join_sparra_hhg.R
@@ -5,8 +5,6 @@
 #' @return The data including the SPARRA and HHG variables matched
 #' on to the episode file.
 join_sparra_hhg <- function(data, year) {
-  cli::cli_alert_info("Join SPARRA and HHG function started at {Sys.time()}")
-
   if (check_year_valid(year, "sparra")) {
     data <- dplyr::left_join(
       data,
@@ -63,5 +61,7 @@ join_sparra_hhg <- function(data, year) {
     data <- dplyr::mutate(data, hhg_end_fy = NA_integer_)
   }
 
+  cli::cli_alert_info("Join SPARRA and HHG function finished at {Sys.time()}")
+
   return(data)
 }
diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R
index d4162b619..bdd656791 100644
--- a/R/link_delayed_discharge_eps.R
+++ b/R/link_delayed_discharge_eps.R
@@ -12,8 +12,6 @@ link_delayed_discharge_eps <- function(
     episode_file,
     year,
     dd_data = read_file(get_source_extract_path(year, "dd")) %>% slfhelper::get_chi()) {
-  cli::cli_alert_info("Link delayed discharge to episode file function started at {Sys.time()}")
-
   if (!check_year_valid(year, type = "dd")) {
     episode_file <- episode_file
     return(episode_file)
@@ -292,7 +290,7 @@ link_delayed_discharge_eps <- function(
     )) %>%
     dplyr::group_by(.data$chi, .data$cij_marker) %>%
     dplyr::mutate(cij_delay = max(.data$has_delay)) %>%
-    dplyr::mutate(cij_delay = dplyr::if_else(cij_delay == "0",
+    dplyr::mutate(cij_delay = dplyr::if_else(.data$cij_delay == "0",
       FALSE,
       TRUE,
       missing = NA
@@ -313,7 +311,6 @@ link_delayed_discharge_eps <- function(
     # keep variables from ep files
     dplyr::select(
       -c(
-        "ep_file_row_id",
         "year",
         "recid",
         "record_keydate1",
@@ -364,17 +361,19 @@ link_delayed_discharge_eps <- function(
         )
     ) %>%
     # populate cij_delay dd details back to ep
-    dplyr::group_by(chi, cij_marker) %>%
+    dplyr::group_by(.data$chi, .data$cij_marker) %>%
     dplyr::mutate(
-      has_dd = any(recid == "DD"),
-      delay_dd = any(cij_delay)
+      has_dd = any(.data$recid == "DD"),
+      delay_dd = any(.data$cij_delay)
     ) %>%
     dplyr::ungroup() %>%
-    dplyr::mutate(cij_delay = dplyr::if_else(has_dd,
-      delay_dd,
-      cij_delay
+    dplyr::mutate(cij_delay = dplyr::if_else(.data$has_dd,
+      .data$delay_dd,
+      .data$cij_delay
     )) %>%
     dplyr::select(-c("has_dd", "delay_dd", "original_admission_date", "amended_dates"))
 
+  cli::cli_alert_info("Link delayed discharge to episode file function finished at {Sys.time()}")
+
   return(linked_data)
 }
diff --git a/R/match_on_ltcs.R b/R/match_on_ltcs.R
index f0522c00d..51c280966 100644
--- a/R/match_on_ltcs.R
+++ b/R/match_on_ltcs.R
@@ -12,8 +12,6 @@ match_on_ltcs <- function(
     data,
     year,
     ltc_data = read_file(get_ltcs_path(year)) %>% slfhelper::get_chi()) {
-  cli::cli_alert_info("Match on LTCs function started at {Sys.time()}")
-
   # Match on LTC lookup
   matched <- dplyr::left_join(
     data,
@@ -32,5 +30,7 @@ match_on_ltcs <- function(
     ) %>%
     dplyr::select(-tidyselect::ends_with("_ltc"))
 
+  cli::cli_alert_info("Match on LTCs function finished at {Sys.time()}")
+
   return(matched)
 }
diff --git a/R/process_costs_rmd.R b/R/process_costs_rmd.R
index bca00871d..bd986fb35 100644
--- a/R/process_costs_rmd.R
+++ b/R/process_costs_rmd.R
@@ -23,7 +23,8 @@ process_costs_rmd <- function(file_name) {
 
   output_dir <- fs::path(
     get_slf_dir(),
-    "Tests"
+    "Tests",
+    "cost_tests"
   )
 
   input_file <- get_file_path(
@@ -44,13 +45,19 @@ process_costs_rmd <- function(file_name) {
     check_mode = "write"
   )
 
-  rmarkdown::render(
-    input = input_file,
-    output_file = output_file,
-    output_format = "html_document",
-    envir = new.env(),
-    quiet = TRUE
-  )
+  if (fs::file_exists(output_file)) {
+    # Do not write file if it already exists
+    output <- NULL
+  } else {
+    # If file does not exist, create it
+    rmarkdown::render(
+      input = input_file,
+      output_file = output_file,
+      output_format = "html_document",
+      envir = new.env(),
+      quiet = TRUE
+    )
+  }
 
   if (fs::file_info(output_file)$user == Sys.getenv("USER")) {
     # Set the correct permissions
diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R
index c46f175c7..ca5206965 100644
--- a/R/process_extract_acute.R
+++ b/R/process_extract_acute.R
@@ -6,6 +6,7 @@
 #'
 #' @param data The extract to process
 #' @param year The year to process, in FY format.
+#' @param acute_cup_path path to acute_cup data
 #' @param write_to_disk (optional) Should the data be written to disk default is
 #' `TRUE` i.e. write the data to disk.
 #'
diff --git a/R/process_extract_ae.R b/R/process_extract_ae.R
index 9a5829cc5..196f6567c 100644
--- a/R/process_extract_ae.R
+++ b/R/process_extract_ae.R
@@ -196,6 +196,8 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) {
     col_type = readr::cols(
       "ED Arrival Date" = readr::col_date(format = "%Y/%m/%d %T"),
       "ED Arrival Time" = readr::col_time(""),
+      "ED Discharge Date" = readr::col_date(format = "%Y/%m/%d %T"),
+      "ED Discharge Time" = readr::col_time(""),
       "ED Case Reference Number [C]" = readr::col_character(),
       "CUP Marker" = readr::col_double(),
       "CUP Pathway Name" = readr::col_character()
@@ -205,6 +207,8 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) {
     dplyr::rename(
       record_keydate1 = "ED Arrival Date",
       keytime1 = "ED Arrival Time",
+      record_keydate2 = "ED Discharge Date",
+      keytime2 = "ED Discharge Time",
       case_ref_number = "ED Case Reference Number [C]",
       cup_marker = "CUP Marker",
       cup_pathway = "CUP Pathway Name"
@@ -217,6 +221,8 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) {
     # Remove any duplicates
     dplyr::distinct(.data$record_keydate1,
       .data$keytime1,
+      .data$record_keydate2,
+      .data$keytime2,
       .data$case_ref_number,
       .keep_all = TRUE
     )
@@ -225,9 +231,12 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) {
   # Join data--------------------------------------------
 
   matched_ae_data <- outfile %>%
-    dplyr::left_join(
-      ae_cup_clean,
-      by = c("record_keydate1", "keytime1", "case_ref_number")
+    dplyr::left_join(ae_cup_clean,
+      by = c(
+        "record_keydate1", "keytime1",
+        "record_keydate2", "keytime2",
+        "case_ref_number"
+      )
     ) %>%
     dplyr::arrange(
       .data$chi,
diff --git a/R/process_extract_alarms_telecare.R b/R/process_extract_alarms_telecare.R
index 9d47dd5f0..0769197cd 100644
--- a/R/process_extract_alarms_telecare.R
+++ b/R/process_extract_alarms_telecare.R
@@ -41,7 +41,6 @@ process_extract_alarms_telecare <- function(
       "smrtype",
       "chi",
       "dob",
-      # "person_id",
       "gender",
       "postcode",
       "sc_send_lca",
diff --git a/R/process_extract_care_home.R b/R/process_extract_care_home.R
index dbf817af4..bdaba4767 100644
--- a/R/process_extract_care_home.R
+++ b/R/process_extract_care_home.R
@@ -115,7 +115,6 @@ process_extract_care_home <- function(
       "recid",
       "smrtype",
       "chi",
-      # "person_id",
       "dob",
       "gender",
       "postcode",
diff --git a/R/process_extract_gp_ooh.R b/R/process_extract_gp_ooh.R
index e8d07b9e8..1f98a35fe 100644
--- a/R/process_extract_gp_ooh.R
+++ b/R/process_extract_gp_ooh.R
@@ -6,6 +6,7 @@
 #'
 #' @param year The year to process, in FY format.
 #' @param data_list A list containing the extracts.
+#' @param gp_ooh_cup_path path to gp ooh cup data
 #' @param write_to_disk (optional) Should the data be written to disk default is
 #' `TRUE` i.e. write the data to disk.
 #'
@@ -88,7 +89,6 @@ process_extract_gp_ooh <- function(year,
     )
 
   # Keep the location descriptions as a lookup.
-  # TODO write the GP OoH lookup out using some functions
   location_lookup <- ooh_clean %>%
     dplyr::group_by(.data$location) %>%
     dplyr::summarise(
@@ -159,8 +159,8 @@ process_extract_gp_ooh <- function(year,
       "cost_total_net",
       tidyselect::ends_with("_cost"),
       "ooh_case_id",
-      cup_marker,
-      cup_pathway
+      "cup_marker",
+      "cup_pathway"
     ) %>%
     slfhelper::get_anon_chi()
 
diff --git a/R/process_extract_home_care.R b/R/process_extract_home_care.R
index 651be172d..8f866e9f0 100644
--- a/R/process_extract_home_care.R
+++ b/R/process_extract_home_care.R
@@ -95,8 +95,7 @@ process_extract_home_care <- function(
       tidyselect::starts_with("hc_cost_"),
       "cost_total_net",
       "hc_provider",
-      "hc_reablement",
-      # "person_id"
+      "hc_reablement"
     ) %>%
     slfhelper::get_anon_chi()
 
diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R
index 49bf3935e..86324e043 100644
--- a/R/process_extract_homelessness.R
+++ b/R/process_extract_homelessness.R
@@ -154,7 +154,6 @@ process_extract_homelessness <- function(
     dplyr::rename(hl1_completeness = "pct_complete_all") %>%
     dplyr::mutate(hl1_completeness = round(.data$hl1_completeness, 1))
 
-  # TODO - Include person_id (from client_id)
   final_data <- hl1_data %>%
     dplyr::select(
       "year",
diff --git a/R/process_extract_consultations.R b/R/process_extract_ooh_consultations.R
similarity index 100%
rename from R/process_extract_consultations.R
rename to R/process_extract_ooh_consultations.R
diff --git a/R/process_extract_sds.R b/R/process_extract_sds.R
index f8e5f8579..bb2c648ea 100644
--- a/R/process_extract_sds.R
+++ b/R/process_extract_sds.R
@@ -41,7 +41,6 @@ process_extract_sds <- function(
       "smrtype",
       "chi",
       "dob",
-      # "person_id",
       "gender",
       "postcode",
       "sc_send_lca",
diff --git a/R/process_lookup_homelessness.R b/R/process_lookup_homelessness.R
index 30772383e..c1adfed0d 100644
--- a/R/process_lookup_homelessness.R
+++ b/R/process_lookup_homelessness.R
@@ -13,8 +13,6 @@
 create_homelessness_lookup <- function(
     year,
     homelessness_data = read_file(get_source_extract_path(year, "homelessness")) %>% slfhelper::get_chi()) {
-  cli::cli_alert_info("Create homelessness lookup function started at {Sys.time()}")
-
   # Specify years available for running
   if (year < "1617") {
     return(NULL)
@@ -24,6 +22,8 @@ create_homelessness_lookup <- function(
     tidyr::drop_na(.data$chi) %>%
     dplyr::mutate(hl1_in_fy = 1L)
 
+  cli::cli_alert_info("Create homelessness lookup function finished at {Sys.time()}")
+
   return(homelessness_lookup)
 }
 
@@ -41,8 +41,6 @@ create_homelessness_lookup <- function(
 #' @export
 add_homelessness_flag <- function(data, year,
                                   lookup = create_homelessness_lookup(year)) {
-  cli::cli_alert_info("Add homelessness flag function started at {Sys.time()}")
-
   if (!check_year_valid(year, type = "homelessness")) {
     data <- data
     return(data)
@@ -57,6 +55,8 @@ add_homelessness_flag <- function(data, year,
     ) %>%
     dplyr::mutate(hl1_in_fy = tidyr::replace_na(.data$hl1_in_fy, 0L))
 
+  cli::cli_alert_info("Add homelessness flag function finished at {Sys.time()}")
+
   return(data)
 }
 
@@ -72,8 +72,6 @@ add_homelessness_flag <- function(data, year,
 #' @return the final data as a [tibble][tibble::tibble-package].
 #' @export
 add_homelessness_date_flags <- function(data, year, lookup = create_homelessness_lookup(year)) {
-  cli::cli_alert_info("Add homelessness date flags function started at {Sys.time()}")
-
   if (!check_year_valid(year, type = "homelessness")) {
     data <- data
     return(data)
@@ -133,5 +131,7 @@ add_homelessness_date_flags <- function(data, year, lookup = create_homelessness
       )
     )
 
+  cli::cli_alert_info("Add homelessness date flags function finished at {Sys.time()}")
+
   return(data)
 }
diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R
index 91c08632d..9b5f2bded 100644
--- a/R/process_lookup_sc_client.R
+++ b/R/process_lookup_sc_client.R
@@ -32,8 +32,9 @@ process_lookup_sc_client <-
         by = c("sending_location", "social_care_id")
       ) %>%
       # need period for the replace sc id with latest function
-      dplyr::mutate(period = ifelse(!(is.na(.data$financial_quarter)), paste0(.data$financial_year, "Q", financial_quarter),
-        financial_year
+      dplyr::mutate(period = ifelse(!(is.na(.data$financial_quarter)),
+        paste0(.data$financial_year, "Q", .data$financial_quarter),
+        .data$financial_year
       )) %>%
       replace_sc_id_with_latest() %>%
       # remove cases with no data in client
diff --git a/R/process_refined_death.R b/R/process_refined_death.R
index dc7663221..f49b30619 100644
--- a/R/process_refined_death.R
+++ b/R/process_refined_death.R
@@ -44,13 +44,18 @@ process_refined_death <- function(
       .data$death_date_chi,
       .data$record_keydate1
     )) %>%
-    dplyr::select(anon_chi, death_date) %>%
+    dplyr::select("anon_chi", "death_date") %>%
     # add fy when death happened
     dplyr::mutate(
-      fy = phsmethods::extract_fin_year(death_date),
+      fy = phsmethods::extract_fin_year(.data$death_date),
       fy = as.character(paste0(substr(fy, 3, 4), substr(fy, 6, 7)))
-    )
-  # TODO: check distinct death data by chi while keeping chi==NA records
+    ) %>%
+    # no need to keep NA
+    dplyr::filter(!is.na(.data$anon_chi)) %>%
+    dplyr::group_by(.data$anon_chi) %>%
+    dplyr::arrange(.data$death_date) %>%
+    dplyr::distinct(.data$anon_chi, .keep_all = TRUE) %>%
+    dplyr::ungroup()
 
   if (write_to_disk) {
     write_file(
diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R
index aafc3d727..cd20f412b 100644
--- a/R/process_sc_all_alarms_telecare.R
+++ b/R/process_sc_all_alarms_telecare.R
@@ -87,11 +87,6 @@ process_sc_all_alarms_telecare <- function(
   # Replace social_care_id with latest if needed (assuming replace_sc_id_with_latest is a custom function)
   data <- replace_sc_id_with_latest(data)
 
-  # data$person_id <- paste0(
-  #   data$sending_location,
-  #   "-",
-  #   data$social_care_id
-  # )
 
   # Deal with episodes that have a package across quarters
   data[, pkg_count := seq_len(.N), by = list(
@@ -125,7 +120,6 @@ process_sc_all_alarms_telecare <- function(
     dob = data.table::last(dob),
     postcode = data.table::last(postcode),
     recid = data.table::last(recid),
-    # person_id = data.table::last(person_id),
     sc_send_lca = data.table::last(sc_send_lca)
   ), by = list(
     sending_location,
diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R
index 5478d50cc..cebcea25c 100644
--- a/R/process_sc_all_care_home.R
+++ b/R/process_sc_all_care_home.R
@@ -47,7 +47,7 @@ process_sc_all_care_home <- function(
       by = c("sending_location", "social_care_id")
     ) %>%
     replace_sc_id_with_latest() %>%
-    dplyr::select(-latest_flag, -latest_sc_id)
+    dplyr::select(-"latest_flag", -"latest_sc_id")
 
 
   # cleaning and matching care home names
@@ -58,7 +58,13 @@ process_sc_all_care_home <- function(
   )
 
   fixed_ch_provider <- name_postcode_clean %>%
-    dplyr::select(-ch_name_validated, -open_interval, -latest_close_date, -ch_name_old, -ch_postcode_old) %>%
+    dplyr::select(
+      -"ch_name_validated",
+      -"open_interval",
+      -"latest_close_date",
+      -"ch_name_old",
+      -"ch_postcode_old"
+    ) %>%
     dplyr::mutate(
       ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]]) # (n = 2)
     ) %>%
@@ -126,21 +132,21 @@ process_sc_all_care_home <- function(
     # We want to keep the nursing provision changes when we merge cases that have the same admission date
     dplyr::mutate(previous_nursing_care_provision = dplyr::lag(.data[["nursing_care_provision"]])) %>%
     # create a T/F flag for if nursing provision was the same as previous record with same admission date
-    dplyr::mutate(split_episode = tidyr::replace_na(.data[["previous_nursing_care_provision"]] != nursing_care_provision, TRUE)) %>%
+    dplyr::mutate(split_episode = tidyr::replace_na(.data[["previous_nursing_care_provision"]] != .data$nursing_care_provision, TRUE)) %>%
     dplyr::group_by(
       .data[["social_care_id"]],
       .data[["sending_location"]],
       .data[["split_episode"]]
     ) %>%
     # create a count of each time the nursing provision changes between records with the same admission date
-    dplyr::mutate(split_episode_counter = ifelse(split_episode == TRUE, dplyr::row_number(), NA)) %>%
+    dplyr::mutate(split_episode_counter = ifelse(.data$split_episode == TRUE, dplyr::row_number(), NA)) %>%
     dplyr::group_by(
       .data[["social_care_id"]],
       .data[["sending_location"]]
     ) %>%
     # fill split episode counter. This will create a new id number for each different nursing provision within an episode
-    tidyr::fill(split_episode_counter, .direction = c("down")) %>%
-    dplyr::select(-previous_nursing_care_provision, -split_episode)
+    tidyr::fill(.data$split_episode_counter, .direction = c("down")) %>%
+    dplyr::select(-"previous_nursing_care_provision", -"split_episode")
 
 
   # Merge records to a single row per episode where admission is the same
@@ -196,7 +202,7 @@ process_sc_all_care_home <- function(
       )
     ) %>%
     dplyr::ungroup() %>%
-    dplyr::select(-period_start_date, -split_episode_counter)
+    dplyr::select(-"period_start_date", -"split_episode_counter")
 
 
   # Compare to Deaths Data
@@ -248,7 +254,7 @@ process_sc_all_care_home <- function(
       previous_discharge_date_chi = dplyr::lag(.data[["ch_discharge_date"]]) +
         lubridate::days(1L),
       # if the first row is NA, set this to the ch_discharge_date
-      previous_discharge_date_chi = dplyr::if_else(row_number == 1, .data[["ch_discharge_date"]],
+      previous_discharge_date_chi = dplyr::if_else(.data$row_number == 1, .data[["ch_discharge_date"]],
         .data[["previous_discharge_date_chi"]]
       )
     ) %>%
@@ -263,11 +269,11 @@ process_sc_all_care_home <- function(
       ),
       # if there is more than 1 day between (or the last ep for the individual) flag as new ep (Y)
       # if there is < 1 day (i.e. a pause of up to 1 day or stays overlap flag as same ep (N))
-      new_episode = dplyr::if_else(is.na(days_to_next_rec) | days_to_next_rec > 1, "Y", "N")
+      new_episode = dplyr::if_else(is.na(.data$days_to_next_rec) | .data$days_to_next_rec > 1, "Y", "N")
     ) %>%
     # create continuous marker using flag for new stay
     dplyr::mutate(
-      ch_chi_cis = purrr::accumulate(new_episode[-1],
+      ch_chi_cis = purrr::accumulate(.data$new_episode[-1],
         .init = 1,
         ~ if (.y == "Y") {
           .x + 1
@@ -295,7 +301,7 @@ process_sc_all_care_home <- function(
       previous_discharge_date_sc = dplyr::lag(.data[["ch_discharge_date"]]) +
         lubridate::days(1L),
       # if the first row is NA, set this to the ch_discharge_date
-      previous_discharge_date_sc = dplyr::if_else(row_number == 1, .data[["ch_discharge_date"]],
+      previous_discharge_date_sc = dplyr::if_else(.data$row_number == 1, .data[["ch_discharge_date"]],
         .data[["previous_discharge_date_sc"]]
       )
     ) %>%
@@ -310,11 +316,11 @@ process_sc_all_care_home <- function(
       ),
       # if there is more than 1 day between (or the last ep for the individual) flag as new ep (Y)
       # if there is < 1 day (i.e. a pause of up to 1 day or stays overlap flag as same ep (N))
-      new_episode = dplyr::if_else(is.na(days_to_next_rec) | days_to_next_rec > 1, "Y", "N")
+      new_episode = dplyr::if_else(is.na(.data$days_to_next_rec) | .data$days_to_next_rec > 1, "Y", "N")
     ) %>%
     # create continuous marker using flag for new stay
     dplyr::mutate(
-      ch_sc_id_cis = purrr::accumulate(new_episode[-1],
+      ch_sc_id_cis = purrr::accumulate(.data$new_episode[-1],
         .init = 1,
         ~ if (.y == "Y") {
           .x + 1
@@ -326,8 +332,11 @@ process_sc_all_care_home <- function(
     dplyr::ungroup() %>%
     # remove variables no longer needed
     dplyr::select(
-      -previous_discharge_date_chi, -previous_discharge_date_sc, -row_number,
-      -days_to_next_rec, -new_episode
+      -"previous_discharge_date_chi",
+      -"previous_discharge_date_sc",
+      -"row_number",
+      -"days_to_next_rec",
+      -"new_episode"
     )
 
 
@@ -364,7 +373,7 @@ process_sc_all_care_home <- function(
         .data[["type_of_admission"]]
       )
     ) %>%
-    dplyr::select(-ch_ep_start, -ch_ep_end, -stay_los, -stay_respite)
+    dplyr::select(-"ch_ep_start", -"ch_ep_end", -"stay_los", -"stay_respite")
 
 
   ch_data_final <- adm_reason_recoded %>%
@@ -385,7 +394,6 @@ process_sc_all_care_home <- function(
     )) %>%
     dplyr::select(
       "chi",
-      # "person_id",
       "gender",
       "dob",
       "postcode",
diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R
index c5b7d43eb..b48be79ad 100644
--- a/R/process_sc_all_sds.R
+++ b/R/process_sc_all_sds.R
@@ -128,11 +128,6 @@ process_sc_all_sds <- function(
       "SDS",
       convert_sc_sending_location_to_lca(sending_location)
     )]
-  # sds_full_clean_long$person_id <- paste0(
-  #   sds_full_clean_long$sending_location,
-  #   "-",
-  #   sds_full_clean_long$social_care_id
-  # )
 
   # Group, arrange and create flags for episodes
   sds_full_clean_long[,
@@ -176,7 +171,6 @@ process_sc_all_sds <- function(
     dob = data.table::last(dob),
     postcode = data.table::last(postcode),
     recid = data.table::last(recid),
-    # person_id = data.table::last(person_id),
     sc_send_lca = data.table::last(sc_send_lca)
   ), by = list(sending_location, social_care_id, smrtype, episode_counter)]
   rm(sds_full_clean_long)
diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R
index c45992938..7a8f957ca 100644
--- a/R/process_tests_episode_file.R
+++ b/R/process_tests_episode_file.R
@@ -31,9 +31,13 @@ process_tests_episode_file <- function(data, year) {
     recid = TRUE
   ) %>%
     dplyr::arrange(.data[["recid"]]) %>%
-    write_tests_xlsx(sheet_name = stringr::str_glue({
-      "ep_file_{year}"
-    }), workbook_name = "ep_file")
+    write_tests_xlsx(
+      sheet_name = stringr::str_glue({
+        "ep_file_{year}"
+      }),
+      year = year,
+      workbook_name = "ep_file"
+    )
 
   return(comparison)
 }
diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R
index 900ce7f03..ae664c09b 100644
--- a/R/process_tests_individual_file.R
+++ b/R/process_tests_individual_file.R
@@ -34,9 +34,13 @@ process_tests_individual_file <- function(data, year) {
     old_data = produce_individual_file_tests(old_data),
     new_data = produce_individual_file_tests(data)
   ) %>%
-    write_tests_xlsx(sheet_name = stringr::str_glue({
-      "indiv_file_{year}"
-    }), workbook_name = "indiv_file")
+    write_tests_xlsx(
+      sheet_name = stringr::str_glue({
+        "indiv_file_{year}"
+      }),
+      year = year,
+      workbook_name = "indiv_file"
+    )
 
   return(comparison)
 }
diff --git a/R/process_tests_it_chi_deaths.R b/R/process_tests_it_chi_deaths.R
index 33ef1c5ff..7c04e3a14 100644
--- a/R/process_tests_it_chi_deaths.R
+++ b/R/process_tests_it_chi_deaths.R
@@ -4,14 +4,12 @@
 #'
 #' @export
 process_tests_it_chi_deaths <- function(data, update = previous_update()) {
-  data <- data %>%
-    slfhelper::get_chi()
-
   comparison <- produce_test_comparison(
     old_data = produce_it_chi_deaths_tests(
-      read_file(get_slf_chi_deaths_path(update = update))
+      read_file(get_slf_chi_deaths_path(update = update)) %>%
+        slfhelper::get_chi()
     ),
-    new_data = produce_it_chi_deaths_tests(data)
+    new_data = produce_it_chi_deaths_tests(data %>% slfhelper::get_chi())
   ) %>%
     write_tests_xlsx(sheet_name = "it_chi_deaths", workbook_name = "lookup")
 
@@ -41,10 +39,8 @@ produce_it_chi_deaths_tests <- function(data) {
     dplyr::mutate(
       n_chi = 1L,
       n_valid_chi = phsmethods::chi_check(.data$chi) == "Valid CHI",
-      n_death_date_nrs = is.na(.data$death_date_nrs),
       n_death_date_chi = is.na(.data$death_date_chi),
-      n_death_date = is.na(.data$death_date),
-      death_year = lubridate::year(.data$death_date),
+      death_year = lubridate::year(.data$death_date_chi),
       "n_deaths_{current_year_0}" := .data$death_year == current_year_0,
       "n_deaths_{current_year_1}" := .data$death_year == current_year_1,
       "n_deaths_{current_year_2}" := .data$death_year == current_year_2,
diff --git a/R/process_tests_sc_demographics.R b/R/process_tests_sc_demographics.R
index c4a81d776..b246bf23a 100644
--- a/R/process_tests_sc_demographics.R
+++ b/R/process_tests_sc_demographics.R
@@ -15,12 +15,15 @@ process_tests_sc_demographics <- function(data) {
 
   comparison <- produce_test_comparison(
     old_data = produce_sc_demog_lookup_tests(
-      read_file(get_sc_demog_lookup_path(update = previous_update()))
+      read_file(get_sc_demog_lookup_path(update = previous_update())) %>%
+        slfhelper::get_chi()
     ),
     new_data = produce_sc_demog_lookup_tests(
       data
     )
-  ) %>%
+  )
+
+  comparison %>%
     write_tests_xlsx(sheet_name = "sc_demographics", workbook_name = "lookup")
 
   return(comparison)
@@ -53,7 +56,8 @@ produce_sc_demog_lookup_tests <- function(data) {
         "chi",
         "gender",
         "dob",
-        "postcode"
+        "postcode",
+        "date_of_death"
       )
     ) %>%
     # use function to sum new test flags
diff --git a/R/produce_homelessness_completeness.R b/R/produce_homelessness_completeness.R
index 9e157df24..df89b5e62 100644
--- a/R/produce_homelessness_completeness.R
+++ b/R/produce_homelessness_completeness.R
@@ -40,9 +40,9 @@ produce_homelessness_completeness <- function(
   sg_all_assessments_annual <-
     openxlsx::read.xlsx(
       sg_pub_path,
-      sheet = "Table 2",
+      sheet = "Table 1", # Manual change - check sheet name
       rows = 8L:39L,
-      cols = 1L:29L,
+      cols = 1L:33L, # Manual change - check workbook for col number for latest year
       colNames = FALSE
     ) %>%
     dplyr::rename_with(~ c(
@@ -53,7 +53,9 @@ produce_homelessness_completeness <- function(
       paste0(paste0("q", 1L:4L), "_", rep(2019L, 4L)),
       paste0(paste0("q", 1L:4L), "_", rep(2020L, 4L)),
       paste0(paste0("q", 1L:4L), "_", rep(2021L, 4L)),
-      paste0(paste0("q", 1L:4L), "_", rep(2022L, 4L))
+      paste0(paste0("q", 1L:4L), "_", rep(2022L, 4L)),
+      paste0(paste0("q", 1L:4L), "_", rep(2023L, 4L))
+      ## Manual change - Add new row here when new year is available in publication
     )) %>%
     tidyr::pivot_longer(
       !"CAName",
@@ -125,7 +127,7 @@ produce_homelessness_completeness <- function(
 get_sg_homelessness_pub_path <- function(...) {
   path <- get_file_path(
     directory = fs::path(get_slf_dir(), "Homelessness"),
-    file_name = "2024.02.07- PHS - Total assessment decisions by LA by Qtr.xlsx",
+    file_name = "2024.11.08 - PHS - Total assessment decisions by LA by Qtr.xlsx",
     ...
   )
 
diff --git a/R/read_lookup_sc_client.R b/R/read_lookup_sc_client.R
index 6128a1be5..bc7dc99f1 100644
--- a/R/read_lookup_sc_client.R
+++ b/R/read_lookup_sc_client.R
@@ -16,9 +16,6 @@ read_lookup_sc_client <- function(fyyear,
 
   # read in data - social care 2 client
   client_data <- dplyr::tbl(sc_dvprod_connection, dbplyr::in_schema("social_care_2", "client")) %>%
-    dplyr::collect()
-
-  client_data <- client_data %>%
     dplyr::select(
       "sending_location",
       "social_care_id",
@@ -79,7 +76,14 @@ read_lookup_sc_client <- function(fyyear,
       .data$financial_year,
       .data$financial_quarter
     ) %>%
-    dplyr::rename("mental_health_disorders" = "mental_health_problems")
+    dplyr::rename("mental_health_disorders" = "mental_health_problems") %>%
+    dplyr::collect()
+
+  latest_quarter <- client_data %>%
+    dplyr::arrange(dplyr::desc(.data$financial_quarter)) %>%
+    dplyr::pull(.data$financial_quarter) %>%
+    utils::head(1)
+  cli::cli_alert_info(stringr::str_glue("Social care client data for Year {fyyear} is available up to Q{latest_quarter}."))
 
 
   if (!fs::file_exists(get_sandpit_extract_path(type = "client", year = fyyear))) {
diff --git a/R/read_lookup_sc_demographics.R b/R/read_lookup_sc_demographics.R
index 1d97e7c30..76b4969df 100644
--- a/R/read_lookup_sc_demographics.R
+++ b/R/read_lookup_sc_demographics.R
@@ -10,9 +10,6 @@ read_lookup_sc_demographics <- function(sc_dvprod_connection = phs_db_connection
     sc_dvprod_connection,
     dbplyr::in_schema("social_care_2", "demographic_snapshot")
   ) %>%
-    dplyr::collect()
-
-  sc_demog <- sc_demog %>%
     dplyr::select(
       "latest_record_flag",
       "period",
@@ -25,7 +22,14 @@ read_lookup_sc_demographics <- function(sc_dvprod_connection = phs_db_connection
       "chi_postcode",
       "submitted_postcode",
       "chi_gender_code"
-    )
+    ) %>%
+    dplyr::collect()
+
+  latest_quarter <- sc_demog %>%
+    dplyr::arrange(dplyr::desc(.data$period)) %>%
+    dplyr::pull(.data$period) %>%
+    utils::head(1)
+  cli::cli_alert_info(stringr::str_glue("Demographics data is available up to {latest_quarter}."))
 
 
   if (!fs::file_exists(get_sandpit_extract_path(type = "demographics"))) {
diff --git a/R/read_sc_all_alarms_telecare.R b/R/read_sc_all_alarms_telecare.R
index 0106bcbda..336c9d2f5 100644
--- a/R/read_sc_all_alarms_telecare.R
+++ b/R/read_sc_all_alarms_telecare.R
@@ -13,9 +13,7 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection
   at_full_data <- dplyr::tbl(
     sc_dvprod_connection,
     dbplyr::in_schema("social_care_2", "equipment_snapshot")
-  ) %>% dplyr::collect()
-
-  at_full_data <- at_full_data %>%
+  ) %>%
     dplyr::select(
       "sending_location",
       "social_care_id",
@@ -27,7 +25,14 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection
       "service_end_date",
       "service_start_date_after_period_end_date"
     ) %>%
-    dplyr::distinct()
+    dplyr::distinct() %>%
+    dplyr::collect()
+
+  latest_quarter <- at_full_data %>%
+    dplyr::arrange(dplyr::desc(.data$period)) %>%
+    dplyr::pull(.data$period) %>%
+    utils::head(1)
+  cli::cli_alert_info(stringr::str_glue("Alarm Telecare data is available up to {latest_quarter}."))
 
   if (!fs::file_exists(get_sandpit_extract_path(type = "at"))) {
     at_full_data %>%
diff --git a/R/read_sc_all_care_home.R b/R/read_sc_all_care_home.R
index b11879487..f758cbd17 100644
--- a/R/read_sc_all_care_home.R
+++ b/R/read_sc_all_care_home.R
@@ -10,9 +10,7 @@ read_sc_all_care_home <- function(sc_dvprod_connection = phs_db_connection(dsn =
   ch_data <- dplyr::tbl(
     sc_dvprod_connection,
     dbplyr::in_schema("social_care_2", "carehome_snapshot")
-  ) %>% dplyr::collect()
-
-  ch_data <- ch_data %>%
+  ) %>%
     dplyr::select(
       "ch_name",
       "ch_postcode",
@@ -29,7 +27,14 @@ read_sc_all_care_home <- function(sc_dvprod_connection = phs_db_connection(dsn =
       "ch_discharge_date",
       "age"
     ) %>%
-    dplyr::distinct()
+    dplyr::distinct() %>%
+    dplyr::collect()
+
+  latest_quarter <- ch_data %>%
+    dplyr::arrange(dplyr::desc(.data$period)) %>%
+    dplyr::pull(.data$period) %>%
+    utils::head(1)
+  cli::cli_alert_info(stringr::str_glue("Care Home data is available up to {latest_quarter}."))
 
   if (!fs::file_exists(get_sandpit_extract_path(type = "ch"))) {
     ch_data %>%
diff --git a/R/read_sc_all_home_care.R b/R/read_sc_all_home_care.R
index 2349cf1cd..ce371cbb9 100644
--- a/R/read_sc_all_home_care.R
+++ b/R/read_sc_all_home_care.R
@@ -10,9 +10,7 @@ read_sc_all_home_care <- function(sc_dvprod_connection = phs_db_connection(dsn =
   home_care_data <- dplyr::tbl(
     sc_dvprod_connection,
     dbplyr::in_schema("social_care_2", "homecare_snapshot")
-  ) %>% dplyr::collect()
-
-  home_care_data <- home_care_data %>%
+  ) %>%
     dplyr::select(
       "sending_location",
       "sending_location_name",
@@ -46,7 +44,14 @@ read_sc_all_home_care <- function(sc_dvprod_connection = phs_db_connection(dsn =
       .data$period
     )) %>%
     # drop rows start date after end date
-    dplyr::distinct()
+    dplyr::distinct() %>%
+    dplyr::collect()
+
+  latest_quarter <- home_care_data %>%
+    dplyr::arrange(dplyr::desc(.data$period)) %>%
+    dplyr::pull(.data$period) %>%
+    utils::head(1)
+  cli::cli_alert_info(stringr::str_glue("Home Care data is available up to {latest_quarter}."))
 
   if (!fs::file_exists(get_sandpit_extract_path(type = "hc"))) {
     home_care_data %>%
diff --git a/R/read_sc_all_sds.R b/R/read_sc_all_sds.R
index 54d3d31ed..d34fa6d1f 100644
--- a/R/read_sc_all_sds.R
+++ b/R/read_sc_all_sds.R
@@ -11,9 +11,6 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR
     sc_dvprod_connection,
     dbplyr::in_schema("social_care_2", "sds_snapshot")
   ) %>%
-    dplyr::collect()
-
-  sds_full_data <- sds_full_data %>%
     dplyr::select(
       "sending_location",
       "social_care_id",
@@ -28,7 +25,14 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR
       "sds_start_date_after_end_date", # get fixed
       "sds_start_date_after_period_end_date" # get removed
     ) %>%
-    dplyr::distinct()
+    dplyr::distinct() %>%
+    dplyr::collect()
+
+  latest_quarter <- sds_full_data %>%
+    dplyr::arrange(dplyr::desc(.data$period)) %>%
+    dplyr::pull(.data$period) %>%
+    utils::head(1)
+  cli::cli_alert_info(stringr::str_glue("SDS data is available up to {latest_quarter}."))
 
   if (!fs::file_exists(get_sandpit_extract_path(type = "sds"))) {
     sds_full_data %>%
diff --git a/R/replace_sc_id_with_latest.R b/R/replace_sc_id_with_latest.R
index 9478ebefe..1a396bcb6 100644
--- a/R/replace_sc_id_with_latest.R
+++ b/R/replace_sc_id_with_latest.R
@@ -48,6 +48,8 @@ replace_sc_id_with_latest <- function(data) {
         .data$latest_sc_id,
         .data$social_care_id
       )
-    )
+    ) %>%
+    dplyr::filter(!is.na(.data$period))
+
   return(return_data)
 }
diff --git a/R/write_temp_data.R b/R/write_temp_data.R
new file mode 100644
index 000000000..d5708c049
--- /dev/null
+++ b/R/write_temp_data.R
@@ -0,0 +1,62 @@
+#' Write a temp data to disk in parquet format for debugging purpose
+#'
+#' @description Write a temp data in parquet format to disk for debugging purpose.
+#' @param data The data to be written
+#' @param year year variable
+#' @param file_name The file name to be written
+#' @param write_temp_to_disk Boolean type, write temp data to disk or not
+#'
+#' @return the data for next step as a [tibble][tibble::tibble-package].
+#' @export
+write_temp_data <-
+  function(data, year, file_name, write_temp_to_disk) {
+    if (write_temp_to_disk) {
+      full_file_name <- stringr::str_glue("{file_name}.parquet")
+      file_path <- file.path(
+        get_year_dir(year),
+        full_file_name
+      )
+
+      write_file(data,
+        path = file_path
+      )
+      cli::cli_alert_info(stringr::str_glue("Writing {full_file_name} to disk finished at {Sys.time()}"))
+    }
+    return(data)
+  }
+
+
+#' Read a temp data from disk for debugging purpose
+#'
+#' @description Read a temp data to disk for debugging purpose.
+#' @param year year variable
+#' @param file_name The file name to be read
+#'
+#' @return the data for next step as a [tibble][tibble::tibble-package].
+#' @export
+read_temp_data <- function(year, file_name) {
+  full_file_name <- stringr::str_glue("{file_name}.parquet")
+  file_path <- file.path(
+    get_year_dir(year),
+    full_file_name
+  )
+
+  return(read_file(file_path))
+}
+
+#' Clean temp data from disk
+#'
+#' @description Clean temp data from disk to save storage.
+#' @param year year variable
+#' @param file_type ep or ind files
+#'
+#' @return the data for next step as a [tibble][tibble::tibble-package].
+#' @export
+clean_temp_data <- function(year, file_type = c("ep", "ind")) {
+  list.files(
+    path = get_year_dir(year),
+    pattern = stringr::str_glue("^{file_type}_temp"),
+    full.names = TRUE
+  ) %>%
+    file.remove()
+}
diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R
index a1b53f971..2241ce519 100644
--- a/R/write_tests_xlsx.R
+++ b/R/write_tests_xlsx.R
@@ -25,16 +25,12 @@ write_tests_xlsx <- function(comparison_data,
                              )) {
   # Set up the workbook ----
   if (workbook_name == "ep_file") {
-    if (is.null(year)) {
-      tests_workbook_name <-
-        stringr::str_glue(latest_update(), "_ep_file_tests")
-    }
+    tests_workbook_name <-
+      stringr::str_glue(latest_update(), "_{year}_ep_file_tests")
   }
   if (workbook_name == "indiv_file") {
-    if (is.null(year)) {
-      tests_workbook_name <-
-        stringr::str_glue(latest_update(), "_indiv_file_tests")
-    }
+    tests_workbook_name <-
+      stringr::str_glue(latest_update(), "_{year}_indiv_file_tests")
   }
   if (workbook_name == "lookup") {
     if (is.null(year)) {
@@ -53,17 +49,16 @@ write_tests_xlsx <- function(comparison_data,
     }
   }
   if (workbook_name == "extract") {
-    if (is.null(year)) {
-    } else {
-      tests_workbook_name <-
-        stringr::str_glue(latest_update(), "_{year}_extract_tests")
-    }
+    tests_workbook_name <-
+      stringr::str_glue(latest_update(), "_{year}_extract_tests")
   }
 
 
   tests_workbook_path <- fs::path(
     get_slf_dir(),
     "Tests",
+    fy(),
+    qtr(),
     tests_workbook_name,
     ext = "xlsx"
   )
diff --git a/Run_SLF_Files_manually/run_episode_file_1415.R b/Run_SLF_Files_manually/run_episode_file_1415.R
index b5a2eab38..32a817813 100644
--- a/Run_SLF_Files_manually/run_episode_file_1415.R
+++ b/Run_SLF_Files_manually/run_episode_file_1415.R
@@ -73,7 +73,7 @@ processed_data_list <- list(
 )
 
 # Run episode file
-create_episode_file(processed_data_list, year = year) %>%
+create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_episode_file(year = year)
 
 ## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_1516.R b/Run_SLF_Files_manually/run_episode_file_1516.R
index 59c7ddc63..899720f5a 100644
--- a/Run_SLF_Files_manually/run_episode_file_1516.R
+++ b/Run_SLF_Files_manually/run_episode_file_1516.R
@@ -73,7 +73,7 @@ processed_data_list <- list(
 )
 
 # Run episode file
-create_episode_file(processed_data_list, year = year) ## %>%
+create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) ## %>%
 # process_tests_episode_file(year = year)
 
 ## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_1617.R b/Run_SLF_Files_manually/run_episode_file_1617.R
index b10372be6..156848f1a 100644
--- a/Run_SLF_Files_manually/run_episode_file_1617.R
+++ b/Run_SLF_Files_manually/run_episode_file_1617.R
@@ -73,7 +73,7 @@ processed_data_list <- list(
 )
 
 # Run episode file
-create_episode_file(processed_data_list, year = year) ## %>%
+create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) ## %>%
 # process_tests_episode_file(year = year)
 
 ## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R
index b405b5b6e..a78912e58 100644
--- a/Run_SLF_Files_manually/run_episode_file_1718.R
+++ b/Run_SLF_Files_manually/run_episode_file_1718.R
@@ -69,7 +69,7 @@ processed_data_list <- list(
 )
 
 # Run episode file
-create_episode_file(processed_data_list, year = year) %>%
+create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_episode_file(year = year)
 
 ## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R
index fb3227512..7d1f8de94 100644
--- a/Run_SLF_Files_manually/run_episode_file_1819.R
+++ b/Run_SLF_Files_manually/run_episode_file_1819.R
@@ -69,7 +69,7 @@ processed_data_list <- list(
 )
 
 # Run episode file
-create_episode_file(processed_data_list, year = year) %>%
+create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_episode_file(year = year)
 
 ## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R
index e2e21bdac..903c0c550 100644
--- a/Run_SLF_Files_manually/run_episode_file_1920.R
+++ b/Run_SLF_Files_manually/run_episode_file_1920.R
@@ -69,7 +69,7 @@ processed_data_list <- list(
 )
 
 # Run episode file
-create_episode_file(processed_data_list, year = year) %>%
+create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_episode_file(year = year)
 
 ## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R
index cf98e80de..9c579bd0e 100644
--- a/Run_SLF_Files_manually/run_episode_file_2021.R
+++ b/Run_SLF_Files_manually/run_episode_file_2021.R
@@ -69,7 +69,7 @@ processed_data_list <- list(
 )
 
 # Run episode file
-create_episode_file(processed_data_list, year = year) %>%
+create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_episode_file(year = year)
 
 ## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R
index 3bcbf2466..881e4aa5c 100644
--- a/Run_SLF_Files_manually/run_episode_file_2122.R
+++ b/Run_SLF_Files_manually/run_episode_file_2122.R
@@ -69,7 +69,7 @@ processed_data_list <- list(
 )
 
 # Run episode file
-create_episode_file(processed_data_list, year = year) %>%
+create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_episode_file(year = year)
 
 ## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R
index af0447eed..c2034020c 100644
--- a/Run_SLF_Files_manually/run_episode_file_2223.R
+++ b/Run_SLF_Files_manually/run_episode_file_2223.R
@@ -69,7 +69,7 @@ processed_data_list <- list(
 )
 
 # Run episode file
-create_episode_file(processed_data_list, year = year) %>%
+create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_episode_file(year = year)
 
 ## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R
index bdf16e0f8..7951c97c2 100644
--- a/Run_SLF_Files_manually/run_episode_file_2324.R
+++ b/Run_SLF_Files_manually/run_episode_file_2324.R
@@ -69,7 +69,7 @@ processed_data_list <- list(
 )
 
 # Run episode file
-create_episode_file(processed_data_list, year = year) %>%
+create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_episode_file(year = year)
 
 ## End of Script ##
diff --git a/Run_SLF_Files_manually/run_episode_file_2425.R b/Run_SLF_Files_manually/run_episode_file_2425.R
index 699c197b3..25897ab67 100644
--- a/Run_SLF_Files_manually/run_episode_file_2425.R
+++ b/Run_SLF_Files_manually/run_episode_file_2425.R
@@ -69,7 +69,7 @@ processed_data_list <- list(
 )
 
 # Run episode file
-create_episode_file(processed_data_list, year = year) %>%
+create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_episode_file(year = year)
 
 ## End of Script ##
diff --git a/Run_SLF_Files_manually/run_individual_file_1415.R b/Run_SLF_Files_manually/run_individual_file_1415.R
index 70aa2bfca..c8a09e5d4 100644
--- a/Run_SLF_Files_manually/run_individual_file_1415.R
+++ b/Run_SLF_Files_manually/run_individual_file_1415.R
@@ -2,8 +2,10 @@ library(createslf)
 
 year <- "1415"
 
+clean_temp_data(year, "ep")
+
 episode_file <- arrow::read_parquet(get_slf_episode_path(year))
 
 # Run individual file
-create_individual_file(episode_file, year = year) %>%
+create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_manually/run_individual_file_1516.R b/Run_SLF_Files_manually/run_individual_file_1516.R
index 8e8dae906..8a4f0da37 100644
--- a/Run_SLF_Files_manually/run_individual_file_1516.R
+++ b/Run_SLF_Files_manually/run_individual_file_1516.R
@@ -2,8 +2,10 @@ library(createslf)
 
 year <- "1516"
 
+clean_temp_data(year, "ep")
+
 episode_file <- arrow::read_parquet(get_slf_episode_path(year))
 
 # Run individual file
-create_individual_file(episode_file, year = year) %>%
+create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_manually/run_individual_file_1617.R b/Run_SLF_Files_manually/run_individual_file_1617.R
index 255e4e674..6b3f7e670 100644
--- a/Run_SLF_Files_manually/run_individual_file_1617.R
+++ b/Run_SLF_Files_manually/run_individual_file_1617.R
@@ -2,8 +2,10 @@ library(createslf)
 
 year <- "1617"
 
+clean_temp_data(year, "ep")
+
 episode_file <- arrow::read_parquet(get_slf_episode_path(year))
 
 # Run individual file
-create_individual_file(episode_file, year = year) %>%
+create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_manually/run_individual_file_1718.R b/Run_SLF_Files_manually/run_individual_file_1718.R
index 777948fc7..12fa4c7fc 100644
--- a/Run_SLF_Files_manually/run_individual_file_1718.R
+++ b/Run_SLF_Files_manually/run_individual_file_1718.R
@@ -2,8 +2,10 @@ library(createslf)
 
 year <- "1718"
 
+clean_temp_data(year, "ep")
+
 episode_file <- arrow::read_parquet(get_slf_episode_path(year))
 
 # Run individual file
-create_individual_file(episode_file, year = year) %>%
+create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_manually/run_individual_file_1819.R b/Run_SLF_Files_manually/run_individual_file_1819.R
index 18839b2ea..f979f2052 100644
--- a/Run_SLF_Files_manually/run_individual_file_1819.R
+++ b/Run_SLF_Files_manually/run_individual_file_1819.R
@@ -2,8 +2,10 @@ library(createslf)
 
 year <- "1819"
 
+clean_temp_data(year, "ep")
+
 episode_file <- arrow::read_parquet(get_slf_episode_path(year))
 
 # Run individual file
-create_individual_file(episode_file, year = year) %>%
+create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_manually/run_individual_file_1920.R b/Run_SLF_Files_manually/run_individual_file_1920.R
index 3567d5c5d..4deffd8bc 100644
--- a/Run_SLF_Files_manually/run_individual_file_1920.R
+++ b/Run_SLF_Files_manually/run_individual_file_1920.R
@@ -2,8 +2,10 @@ library(createslf)
 
 year <- "1920"
 
+clean_temp_data(year, "ep")
+
 episode_file <- arrow::read_parquet(get_slf_episode_path(year))
 
 # Run individual file
-create_individual_file(episode_file, year = year) %>%
+create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_manually/run_individual_file_2021.R b/Run_SLF_Files_manually/run_individual_file_2021.R
index 8a78924b3..8df4228a7 100644
--- a/Run_SLF_Files_manually/run_individual_file_2021.R
+++ b/Run_SLF_Files_manually/run_individual_file_2021.R
@@ -2,8 +2,10 @@ library(createslf)
 
 year <- "2021"
 
+clean_temp_data(year, "ep")
+
 episode_file <- arrow::read_parquet(get_slf_episode_path(year))
 
 # Run individual file
-create_individual_file(episode_file, year = year) %>%
+create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_manually/run_individual_file_2122.R b/Run_SLF_Files_manually/run_individual_file_2122.R
index 9ceaa571c..66e23df5f 100644
--- a/Run_SLF_Files_manually/run_individual_file_2122.R
+++ b/Run_SLF_Files_manually/run_individual_file_2122.R
@@ -2,8 +2,10 @@ library(createslf)
 
 year <- "2122"
 
+clean_temp_data(year, "ep")
+
 episode_file <- arrow::read_parquet(get_slf_episode_path(year))
 
 # Run individual file
-create_individual_file(episode_file, year = year) %>%
+create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_manually/run_individual_file_2223.R b/Run_SLF_Files_manually/run_individual_file_2223.R
index b83507dbc..5884cde13 100644
--- a/Run_SLF_Files_manually/run_individual_file_2223.R
+++ b/Run_SLF_Files_manually/run_individual_file_2223.R
@@ -2,8 +2,10 @@ library(createslf)
 
 year <- "2223"
 
+clean_temp_data(year, "ep")
+
 episode_file <- arrow::read_parquet(get_slf_episode_path(year))
 
 # Run individual file
-create_individual_file(episode_file, year = year) %>%
+create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_manually/run_individual_file_2324.R b/Run_SLF_Files_manually/run_individual_file_2324.R
index 3f6cf0fba..3f3c5023f 100644
--- a/Run_SLF_Files_manually/run_individual_file_2324.R
+++ b/Run_SLF_Files_manually/run_individual_file_2324.R
@@ -2,8 +2,10 @@ library(createslf)
 
 year <- "2324"
 
+clean_temp_data(year, "ep")
+
 episode_file <- arrow::read_parquet(get_slf_episode_path(year))
 
 # Run individual file
-create_individual_file(episode_file, year = year) %>%
+create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_manually/run_individual_file_2425.R b/Run_SLF_Files_manually/run_individual_file_2425.R
index 843eb505c..15d598c2e 100644
--- a/Run_SLF_Files_manually/run_individual_file_2425.R
+++ b/Run_SLF_Files_manually/run_individual_file_2425.R
@@ -2,8 +2,10 @@ library(createslf)
 
 year <- "2425"
 
+clean_temp_data(year, "ep")
+
 episode_file <- arrow::read_parquet(get_slf_episode_path(year))
 
 # Run individual file
-create_individual_file(episode_file, year = year) %>%
+create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>%
   process_tests_individual_file(year = year)
diff --git a/Run_SLF_Files_targets/run_all_targets.R b/Run_SLF_Files_targets/run_all_targets.R
index fb5b94fab..ab973b04e 100644
--- a/Run_SLF_Files_targets/run_all_targets.R
+++ b/Run_SLF_Files_targets/run_all_targets.R
@@ -1,4 +1,5 @@
 library(targets)
+library(createslf)
 
 # use tar_make_future() to run targets for all years
 # This will run everything needed for creating the episode file.
diff --git a/_SPSS_archived/All_years/04-Social_Care/00-Social_Care_functions.R b/_SPSS_archived/All_years/04-Social_Care/00-Social_Care_functions.R
index 9e924e474..47d2fe0a2 100644
--- a/_SPSS_archived/All_years/04-Social_Care/00-Social_Care_functions.R
+++ b/_SPSS_archived/All_years/04-Social_Care/00-Social_Care_functions.R
@@ -50,7 +50,6 @@ phs_db_connection <- function(dsn, username = Sys.getenv("USER")) {
 }
 
 
-# TODO- check R conversion for SC demog lookup. This may differ
 #' Social Care Demographic Lookup File Path
 #'
 #' @description Get the file path for the Social Care Demographic lookup file
diff --git a/_SPSS_archived/All_years/04-Social_Care/02-Home_care_data.R b/_SPSS_archived/All_years/04-Social_Care/02-Home_care_data.R
index bd35a0d89..69945ce2b 100644
--- a/_SPSS_archived/All_years/04-Social_Care/02-Home_care_data.R
+++ b/_SPSS_archived/All_years/04-Social_Care/02-Home_care_data.R
@@ -14,7 +14,6 @@ source("All_years/04-Social_Care/00-Social_Care_functions.R")
 sc_con <- phs_db_connection(dsn = "DVPROD")
 
 # Read demographic file
-# TODO replace the demographic file with R code
 demog_file <- read_demog_file(
   social_care_dir = social_care_dir,
   latest_update = latest_update()
diff --git a/_targets.R b/_targets.R
index 0377e487b..3ac733e1a 100644
--- a/_targets.R
+++ b/_targets.R
@@ -22,6 +22,8 @@ tar_option_set(
 years_to_run <- createslf::years_to_run()
 
 list(
+  ## Phase I, all years ----
+  tar_rds(test_mode, TRUE),
   tar_rds(write_to_disk, TRUE),
   tar_rds(
     file_path_ext_clean,
@@ -68,6 +70,7 @@ list(
     priority = 0.9
   ),
   tar_target(
+    # Tests, LOOKUP series
     tests_sc_demog_lookup,
     process_tests_sc_demographics(sc_demog_lookup)
   ),
@@ -80,8 +83,13 @@ list(
     priority = 0.9
   ),
   tar_target(
+    # Tests, LOOKUP series
     tests_it_chi_deaths,
-    process_tests_it_chi_deaths(it_chi_deaths_data)
+    # use anonymous function to sequence writing tests to excel
+    # to avoid conflicts of writing to the same file at the same time
+    (\(it_chi_deaths_data, tests_sc_demog_lookup) {
+      process_tests_it_chi_deaths(it_chi_deaths_data)
+    })(it_chi_deaths_data, tests_sc_demog_lookup)
   ),
   tar_target(
     source_gp_lookup,
@@ -94,8 +102,11 @@ list(
     priority = 0.9
   ),
   tar_target(
+    # Tests, LOOKUP series
     tests_source_gp_lookup,
-    process_tests_lookup_gpprac(source_gp_lookup)
+    (\(source_gp_lookup, tests_it_chi_deaths) {
+      process_tests_lookup_gpprac(source_gp_lookup)
+    })(source_gp_lookup, tests_it_chi_deaths)
   ),
   tar_target(
     source_pc_lookup,
@@ -108,15 +119,18 @@ list(
     priority = 0.9
   ),
   tar_target(
+    # Tests, LOOKUP series
     tests_source_pc_lookup,
-    process_tests_lookup_pc(source_pc_lookup)
+    (\(source_pc_lookup, tests_source_gp_lookup) {
+      process_tests_lookup_pc(source_pc_lookup)
+    })(source_pc_lookup, tests_source_gp_lookup)
   ),
-  ## Cost Lookups ##
+  ### Cost Lookups ----
   tar_target(ch_cost_lookup, process_costs_ch_rmd(), priority = 0.8),
   tar_target(dn_cost_lookup, process_costs_dn_rmd(), priority = 0.8),
   tar_target(hc_cost_lookup, process_costs_hc_rmd(), priority = 0.8),
   tar_target(gp_ooh_cost_lookup, process_costs_gp_ooh_rmd()),
-  ## Social Care - 'All' data ##
+  ### Social Care - 'All' data ----
   tar_target(
     all_at_extract,
     read_sc_all_alarms_telecare(),
@@ -135,8 +149,11 @@ list(
     priority = 0.5
   ),
   tar_target(
+    # Tests, LOOKUP series
     tests_sc_all_at,
-    process_tests_sc_all_at_episodes(all_at)
+    (\(all_at, tests_source_pc_lookup) {
+      process_tests_sc_all_at_episodes(all_at)
+    })(all_at, tests_source_pc_lookup)
   ),
   tar_target(
     all_home_care_extract,
@@ -156,8 +173,11 @@ list(
     priority = 0.5
   ),
   tar_target(
+    # Tests, LOOKUP series
     tests_sc_all_home_care,
-    process_tests_sc_all_hc_episodes(all_home_care)
+    (\(all_home_care, tests_sc_all_at) {
+      process_tests_sc_all_hc_episodes(all_home_care)
+    })(all_home_care, tests_sc_all_at)
   ),
   tar_target(
     all_care_home_extract,
@@ -187,8 +207,11 @@ list(
     priority = 0.5
   ),
   tar_target(
+    # Tests, LOOKUP series
     tests_all_care_home,
-    process_tests_sc_all_ch_episodes(all_care_home)
+    (\(all_care_home, tests_sc_all_home_care) {
+      process_tests_sc_all_ch_episodes(all_care_home)
+    })(all_care_home, tests_sc_all_home_care)
   ),
   tar_target(
     all_sds_extract,
@@ -208,11 +231,14 @@ list(
     priority = 0.5
   ),
   tar_target(
+    # Tests, LOOKUP series
     tests_sc_all_sds,
-    process_tests_sc_all_sds_episodes(all_sds)
+    (\(all_sds, tests_all_care_home) {
+      process_tests_sc_all_sds_episodes(all_sds)
+    })(all_sds, tests_all_care_home)
   ),
 
-  # Phase II
+  ## Phase II, year specific ----
   tar_map(
     list(year = years_to_run),
     tar_rds(
@@ -309,7 +335,7 @@ list(
         consultations_data_path
       )
     ),
-    ### Target source processed extracts ###
+    ### Target source processed extracts ----
     tar_target(source_acute_extract, process_extract_acute(
       acute_data,
       year,
@@ -317,6 +343,7 @@ list(
       write_to_disk = write_to_disk
     )),
     tar_target(
+      # Tests, EXTRACT series
       tests_source_acute_extract,
       process_tests_acute(
         source_acute_extract,
@@ -329,11 +356,14 @@ list(
       write_to_disk = write_to_disk
     )),
     tar_target(
+      # Tests, EXTRACT series
       tests_source_ae_extract,
-      process_tests_ae(
-        source_ae_extract,
-        year
-      )
+      (\(source_ae_extract, year, tests_source_acute_extract) {
+        process_tests_ae(
+          source_ae_extract,
+          year
+        )
+      })(source_ae_extract, year, tests_source_acute_extract)
     ),
     tar_target(source_cmh_extract, process_extract_cmh(
       cmh_data,
@@ -341,11 +371,14 @@ list(
       write_to_disk = write_to_disk
     )),
     tar_target(
+      # Tests, EXTRACT series
       tests_source_cmh_extract,
-      process_tests_cmh(
-        source_cmh_extract,
-        year
-      )
+      (\(source_cmh_extract, year, tests_source_ae_extract){
+        process_tests_cmh(
+          source_cmh_extract,
+          year
+        )
+      })(source_cmh_extract, year, tests_source_ae_extract)
     ),
     tar_target(source_dd_extract, process_extract_delayed_discharges(
       dd_data,
@@ -353,11 +386,14 @@ list(
       write_to_disk = write_to_disk
     )),
     tar_target(
+      # Tests, EXTRACT series
       tests_source_dd_extract,
-      process_tests_delayed_discharges(
-        source_dd_extract,
-        year
-      )
+      (\(source_dd_extract, year, tests_source_cmh_extract) {
+        process_tests_delayed_discharges(
+          source_dd_extract,
+          year
+        )
+      })(source_dd_extract, year, tests_source_cmh_extract)
     ),
     tar_target(source_dn_extract, process_extract_district_nursing(
       dn_data,
@@ -366,11 +402,14 @@ list(
       write_to_disk = write_to_disk
     )),
     tar_target(
+      # Tests, EXTRACT series
       tests_source_dn_extract,
-      process_tests_district_nursing(
-        source_dn_extract,
-        year
-      )
+      (\(source_dn_extract, year, tests_source_dd_extract){
+        process_tests_district_nursing(
+          source_dn_extract,
+          year
+        )
+      })(source_dn_extract, year, tests_source_dd_extract)
     ),
     tar_target(
       source_homelessness_extract,
@@ -382,11 +421,14 @@ list(
       )
     ),
     tar_target(
+      # Tests, EXTRACT series
       tests_source_homelessness_extract,
-      process_tests_homelessness(
-        source_homelessness_extract,
-        year
-      )
+      (\(source_homelessness_extract, year, tests_source_dn_extract){
+        process_tests_homelessness(
+          source_homelessness_extract,
+          year
+        )
+      })(source_homelessness_extract, year, tests_source_dn_extract)
     ),
     tar_target(source_ltc_lookup, process_lookup_ltc(
       ltc_data,
@@ -394,11 +436,14 @@ list(
       write_to_disk = write_to_disk
     )),
     tar_target(
+      # Tests, EXTRACT series
       tests_ltc,
-      process_tests_ltcs(
-        source_ltc_lookup,
-        year
-      )
+      (\(source_ltc_lookup, year, tests_source_homelessness_extract){
+        process_tests_ltcs(
+          source_ltc_lookup,
+          year
+        )
+      })(source_ltc_lookup, year, tests_source_homelessness_extract)
     ),
     tar_target(source_maternity_extract, process_extract_maternity(
       maternity_data,
@@ -406,11 +451,14 @@ list(
       write_to_disk = write_to_disk
     )),
     tar_target(
+      # Tests, EXTRACT series
       tests_source_maternity_extract,
-      process_tests_maternity(
-        source_maternity_extract,
-        year
-      )
+      (\(source_maternity_extract, year, tests_ltc){
+        process_tests_maternity(
+          source_maternity_extract,
+          year
+        )
+      })(source_maternity_extract, year, tests_ltc)
     ),
     tar_target(
       source_mental_health_extract,
@@ -421,11 +469,14 @@ list(
       )
     ),
     tar_target(
+      # Tests, EXTRACT series
       tests_source_mental_health_extract,
-      process_tests_mental_health(
-        source_mental_health_extract,
-        year
-      )
+      (\(source_mental_health_extract, year, tests_source_maternity_extract){
+        process_tests_mental_health(
+          source_mental_health_extract,
+          year
+        )
+      })(source_mental_health_extract, year, tests_source_maternity_extract)
     ),
     # tar_target(source_nrs_deaths_extract, process_extract_nrs_deaths(
     #   nrs_deaths_data,
@@ -434,19 +485,22 @@ list(
     # )),
     tar_target(
       source_nrs_deaths_extract,
-      # use this anomymous function with redundant but necessary refined_death
+      # use this anonymous function with redundant but necessary refined_death
       # to make sure reading year-specific nrs deaths extracts after it is produced
-      (\(year, refined_death_datas) {
+      (\(year, refined_death_data) {
         read_file(get_source_extract_path(year, "deaths")) %>%
           as.data.frame()
       })(year, refined_death_data)
     ),
     tar_target(
+      # Tests, EXTRACT series
       tests_source_nrs_deaths_extract,
-      process_tests_nrs_deaths(
-        source_nrs_deaths_extract,
-        year
-      )
+      (\(source_nrs_deaths_extract, year, tests_source_mental_health_extract){
+        process_tests_nrs_deaths(
+          source_nrs_deaths_extract,
+          year
+        )
+      })(source_nrs_deaths_extract, year, tests_source_mental_health_extract)
     ),
     tar_target(source_ooh_extract, process_extract_gp_ooh(
       year,
@@ -455,11 +509,14 @@ list(
       write_to_disk = write_to_disk
     )),
     tar_target(
+      # Tests, EXTRACT series
       tests_source_ooh_extract,
-      process_tests_gp_ooh(
-        source_ooh_extract,
-        year
-      )
+      (\(source_ooh_extract, year, tests_source_nrs_deaths_extract){
+        process_tests_gp_ooh(
+          source_ooh_extract,
+          year
+        )
+      })(source_ooh_extract, year, tests_source_nrs_deaths_extract)
     ),
     tar_target(source_outpatients_extract, process_extract_outpatients(
       outpatients_data,
@@ -467,11 +524,14 @@ list(
       write_to_disk = write_to_disk
     )),
     tar_target(
+      # Tests, EXTRACT series
       tests_source_outpatients_extract,
-      process_tests_outpatients(
-        source_outpatients_extract,
-        year
-      )
+      (\(source_outpatients_extract, year, tests_source_ooh_extract){
+        process_tests_outpatients(
+          source_outpatients_extract,
+          year
+        )
+      })(source_outpatients_extract, year, tests_source_ooh_extract)
     ),
     tar_target(source_prescribing_extract, process_extract_prescribing(
       prescribing_data,
@@ -479,13 +539,16 @@ list(
       write_to_disk = write_to_disk
     )),
     tar_target(
+      # Tests, EXTRACT series
       tests_prescribing,
-      process_tests_prescribing(
-        source_prescribing_extract,
-        year
-      )
+      (\(source_prescribing_extract, year, tests_source_outpatients_extract){
+        process_tests_prescribing(
+          source_prescribing_extract,
+          year
+        )
+      })(source_prescribing_extract, year, tests_source_outpatients_extract)
     ),
-    ### Target process year specific social care ###
+    ### Target process year specific social care ----
     tar_target(
       sc_client_data,
       read_lookup_sc_client(fyyear = year)
@@ -502,8 +565,11 @@ list(
       )
     ),
     tar_target(
+      # Tests, EXTRACT series
       tests_sc_client_lookup,
-      process_tests_sc_client_lookup(sc_client_lookup, year = year)
+      (\(sc_client_lookup, year, tests_prescribing){
+        process_tests_sc_client_lookup(sc_client_lookup, year = year)
+      })(sc_client_lookup, year, tests_prescribing)
     ),
     tar_target(
       source_sc_alarms_tele,
@@ -514,11 +580,14 @@ list(
       )
     ),
     tar_target(
+      # Tests, EXTRACT series
       tests_alarms_telecare,
-      process_tests_alarms_telecare(
-        data = source_sc_alarms_tele,
-        year = year
-      )
+      (\(source_sc_alarms_tele, year, tests_sc_client_lookup){
+        process_tests_alarms_telecare(
+          data = source_sc_alarms_tele,
+          year = year
+        )
+      })(source_sc_alarms_tele, year, tests_sc_client_lookup)
     ),
     tar_target(
       source_sc_care_home,
@@ -530,11 +599,14 @@ list(
       )
     ),
     tar_target(
+      # Tests, EXTRACT series
       tests_care_home,
-      process_tests_care_home(
-        data = source_sc_care_home,
-        year = year
-      )
+      (\(source_sc_care_home, year, tests_alarms_telecare){
+        process_tests_care_home(
+          data = source_sc_care_home,
+          year = year
+        )
+      })(source_sc_care_home, year, tests_alarms_telecare)
     ),
     tar_target(
       source_sc_home_care,
@@ -545,11 +617,14 @@ list(
       )
     ),
     tar_target(
+      # Tests, EXTRACT series
       tests_home_care,
-      process_tests_home_care(
-        data = source_sc_home_care,
-        year = year
-      )
+      (\(source_sc_home_care, year, tests_care_home){
+        process_tests_home_care(
+          data = source_sc_home_care,
+          year = year
+        )
+      })(source_sc_home_care, year, tests_care_home)
     ),
     tar_target(
       source_sc_sds,
@@ -560,11 +635,14 @@ list(
       )
     ),
     tar_target(
+      # Tests, EXTRACT series
       tests_sds,
-      process_tests_sds(
-        data = source_sc_sds,
-        year = year
-      )
+      (\(source_sc_sds, year, tests_home_care){
+        process_tests_sds(
+          data = source_sc_sds,
+          year = year
+        )
+      })(source_sc_sds, year, tests_home_care)
     ),
     tar_target(
       slf_deaths_lookup,
@@ -590,7 +668,7 @@ list(
 )
 ## End of Targets pipeline ##
 
-################################################################################
+## Phase III, create ep file and ind file----
 ## Redundant code which may still be useful for including ep/indiv files.
 # tar_qs(
 #   processed_data_list,
diff --git a/inst/WORDLIST b/inst/WORDLIST
index 7edd722c5..88dc043fe 100644
--- a/inst/WORDLIST
+++ b/inst/WORDLIST
@@ -86,6 +86,7 @@ funs
 fy
 fyear
 fyyear
+gp
 gpprac
 hb
 hbpraccode
@@ -94,10 +95,12 @@ hc
 hscdiip
 hscp
 inc
+ind
 interzone
 lca
 lgl
 normal’
+nrs
 old’
 opendata
 overcounting
@@ -109,7 +112,6 @@ reablement
 recid
 recids
 sc
-scoial
 sds
 slf
 slfhelper
diff --git a/man/add_deceased_flag.Rd b/man/add_deceased_flag.Rd
deleted file mode 100644
index c84568522..000000000
--- a/man/add_deceased_flag.Rd
+++ /dev/null
@@ -1,32 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/add_deceased_flag.R
-\name{add_deceased_flag}
-\alias{add_deceased_flag}
-\title{Create the SLF Deaths lookup}
-\usage{
-add_deceased_flag(
-  year,
-  refined_death = read_file(get_combined_slf_deaths_lookup_path()) \%>\%
-    slfhelper::get_chi(),
-  write_to_disk = TRUE
-)
-}
-\arguments{
-\item{year}{The year to process, in FY format.}
-
-\item{write_to_disk}{(optional) Should the data be written to disk default is
-\code{TRUE} i.e. write the data to disk.}
-
-\item{nrs_deaths_data}{NRS deaths data.}
-
-\item{chi_deaths_data}{IT CHI deaths data.}
-}
-\value{
-a \link[tibble:tibble-package]{tibble} containing the episode file
-}
-\description{
-Currently this just uses the NRS death dates 'as is', with no
-corrections or modifications, it is expected that this will be expanded to
-use the CHI deaths extract from IT as well as taking into account data in
-the episode file to assess the validity of a death date.
-}
diff --git a/man/check_year_valid.Rd b/man/check_year_valid.Rd
index 59960da30..7b704d2ec 100644
--- a/man/check_year_valid.Rd
+++ b/man/check_year_valid.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/check_year_valid.R
+% Please edit documentation in R/00-update_refs.R
 \name{check_year_valid}
 \alias{check_year_valid}
 \title{Check data exists for a year}
diff --git a/man/clean_temp_data.Rd b/man/clean_temp_data.Rd
new file mode 100644
index 000000000..c26dcde8e
--- /dev/null
+++ b/man/clean_temp_data.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/write_temp_data.R
+\name{clean_temp_data}
+\alias{clean_temp_data}
+\title{Clean temp data from disk}
+\usage{
+clean_temp_data(year, file_type = c("ep", "ind"))
+}
+\arguments{
+\item{year}{year variable}
+
+\item{file_type}{ep or ind files}
+}
+\value{
+the data for next step as a \link[tibble:tibble-package]{tibble}.
+}
+\description{
+Clean temp data from disk to save storage.
+}
diff --git a/man/create_episode_file.Rd b/man/create_episode_file.Rd
index d6bd6d526..a1bf6878b 100644
--- a/man/create_episode_file.Rd
+++ b/man/create_episode_file.Rd
@@ -18,7 +18,8 @@ create_episode_file(
     slfhelper::get_chi(),
   sc_client = read_file(get_sc_client_lookup_path(year)) \%>\% slfhelper::get_chi(),
   write_to_disk = TRUE,
-  anon_chi_out = TRUE
+  anon_chi_out = TRUE,
+  write_temp_to_disk = FALSE
 )
 }
 \arguments{
@@ -40,13 +41,15 @@ create_episode_file(
 
 \item{slf_deaths_lookup}{The SLF deaths lookup.}
 
-\item{sc_client}{scoial care lookup file}
+\item{sc_client}{social care lookup file}
 
 \item{write_to_disk}{(optional) Should the data be written to disk default is
 \code{TRUE} i.e. write the data to disk.}
 
 \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output
 (instead of chi)}
+
+\item{write_temp_to_disk}{write intermediate data for investigation or debug}
 }
 \value{
 a \link[tibble:tibble-package]{tibble} containing the episode file
diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd
index bfa584d54..9cb57cff0 100644
--- a/man/create_individual_file.Rd
+++ b/man/create_individual_file.Rd
@@ -10,7 +10,8 @@ create_individual_file(
   homelessness_lookup = create_homelessness_lookup(year),
   write_to_disk = TRUE,
   anon_chi_in = TRUE,
-  anon_chi_out = TRUE
+  anon_chi_out = TRUE,
+  write_temp_to_disk
 )
 }
 \arguments{
@@ -28,6 +29,8 @@ create_individual_file(
 
 \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output
 (instead of chi)}
+
+\item{write_temp_to_disk}{write intermediate data for investigation or debug}
 }
 \value{
 The processed individual file
diff --git a/man/end_date.Rd b/man/end_date.Rd
new file mode 100644
index 000000000..9d617ac10
--- /dev/null
+++ b/man/end_date.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/00-update_refs.R
+\name{end_date}
+\alias{end_date}
+\title{End date}
+\usage{
+end_date()
+}
+\value{
+Get the end date of the latest update period
+}
+\description{
+End date
+}
diff --git a/man/fy.Rd b/man/fy.Rd
new file mode 100644
index 000000000..21c4b47f1
--- /dev/null
+++ b/man/fy.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/00-update_refs.R
+\name{fy}
+\alias{fy}
+\title{Extract latest FY from end_date}
+\usage{
+fy()
+}
+\value{
+fy in format "2024"
+}
+\description{
+Extract latest FY from end_date
+}
diff --git a/man/process_combined_deaths_lookup.Rd b/man/process_combined_deaths_lookup.Rd
deleted file mode 100644
index 7d0a75fc7..000000000
--- a/man/process_combined_deaths_lookup.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/add_activity_after_death_flag.R
-\name{process_combined_deaths_lookup}
-\alias{process_combined_deaths_lookup}
-\title{Create and read SLF Deaths lookup from processed BOXI NRS deaths extracts}
-\usage{
-process_combined_deaths_lookup(
-  update = latest_update(),
-  write_to_disk = TRUE,
-  ...
-)
-}
-\arguments{
-\item{update}{the update month (defaults to use \code{\link[=latest_update]{latest_update()}})}
-
-\item{write_to_disk}{(optional) Should the data be written to disk default is
-\code{TRUE} i.e. write the data to disk.}
-
-\item{...}{additional arguments passed to \code{\link[=get_slf_deaths_lookup_path]{get_slf_deaths_lookup_path()}}}
-}
-\value{
-the final data as a \link[tibble:tibble-package]{tibble}.
-}
-\description{
-The BOXI NRS deaths extract lookup should be created after the extract files for all years have been processed,
-}
diff --git a/man/process_extract_acute.Rd b/man/process_extract_acute.Rd
index fae9c7bab..772a7bcec 100644
--- a/man/process_extract_acute.Rd
+++ b/man/process_extract_acute.Rd
@@ -16,6 +16,8 @@ process_extract_acute(
 
 \item{year}{The year to process, in FY format.}
 
+\item{acute_cup_path}{path to acute_cup data}
+
 \item{write_to_disk}{(optional) Should the data be written to disk default is
 \code{TRUE} i.e. write the data to disk.}
 }
diff --git a/man/process_extract_gp_ooh.Rd b/man/process_extract_gp_ooh.Rd
index 5c68c35dd..14ddb346b 100644
--- a/man/process_extract_gp_ooh.Rd
+++ b/man/process_extract_gp_ooh.Rd
@@ -16,6 +16,8 @@ process_extract_gp_ooh(
 
 \item{data_list}{A list containing the extracts.}
 
+\item{gp_ooh_cup_path}{path to gp ooh cup data}
+
 \item{write_to_disk}{(optional) Should the data be written to disk default is
 \code{TRUE} i.e. write the data to disk.}
 }
diff --git a/man/process_extract_ooh_consultations.Rd b/man/process_extract_ooh_consultations.Rd
index ae4265823..22159c2ed 100644
--- a/man/process_extract_ooh_consultations.Rd
+++ b/man/process_extract_ooh_consultations.Rd
@@ -1,5 +1,5 @@
 % Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/process_extract_consultations.R
+% Please edit documentation in R/process_extract_ooh_consultations.R
 \name{process_extract_ooh_consultations}
 \alias{process_extract_ooh_consultations}
 \title{Process the GP OOH Consultations extract}
diff --git a/man/qtr.Rd b/man/qtr.Rd
new file mode 100644
index 000000000..79629f858
--- /dev/null
+++ b/man/qtr.Rd
@@ -0,0 +1,14 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/00-update_refs.R
+\name{qtr}
+\alias{qtr}
+\title{Extract latest quarter from end_date}
+\usage{
+qtr()
+}
+\value{
+qtr in format "Q1"
+}
+\description{
+Extract latest quarter from end_date
+}
diff --git a/man/read_temp_data.Rd b/man/read_temp_data.Rd
new file mode 100644
index 000000000..709d8362d
--- /dev/null
+++ b/man/read_temp_data.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/write_temp_data.R
+\name{read_temp_data}
+\alias{read_temp_data}
+\title{Read a temp data from disk for debugging purpose}
+\usage{
+read_temp_data(year, file_name)
+}
+\arguments{
+\item{year}{year variable}
+
+\item{file_name}{The file name to be read}
+}
+\value{
+the data for next step as a \link[tibble:tibble-package]{tibble}.
+}
+\description{
+Read a temp data to disk for debugging purpose.
+}
diff --git a/man/write_temp_data.Rd b/man/write_temp_data.Rd
new file mode 100644
index 000000000..6ee32e7a2
--- /dev/null
+++ b/man/write_temp_data.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/write_temp_data.R
+\name{write_temp_data}
+\alias{write_temp_data}
+\title{Write a temp data to disk in parquet format for debugging purpose}
+\usage{
+write_temp_data(data, year, file_name, write_temp_to_disk)
+}
+\arguments{
+\item{data}{The data to be written}
+
+\item{year}{year variable}
+
+\item{file_name}{The file name to be written}
+
+\item{write_temp_to_disk}{Boolean type, write temp data to disk or not}
+}
+\value{
+the data for next step as a \link[tibble:tibble-package]{tibble}.
+}
+\description{
+Write a temp data in parquet format to disk for debugging purpose.
+}
diff --git a/run_targets_1718.R b/run_targets_1718.R
deleted file mode 100644
index 488918e1d..000000000
--- a/run_targets_1718.R
+++ /dev/null
@@ -1,20 +0,0 @@
-library(targets)
-
-Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf")
-
-year <- "1718"
-
-# use targets for the process until testing episode files
-tar_make_future(
-  # it does not recognise `contains(year)`
-  names = (targets::contains("1718"))
-)
-
-# use targets to create individual files due to RAM limit
-library(createslf)
-
-episode_file <- arrow::read_parquet(get_slf_episode_path(year))
-
-# Run individual file
-create_individual_file(episode_file, year = year) %>%
-  process_tests_individual_file(year = year)
diff --git a/run_targets_1819.R b/run_targets_1819.R
deleted file mode 100644
index 7c63807e8..000000000
--- a/run_targets_1819.R
+++ /dev/null
@@ -1,20 +0,0 @@
-library(targets)
-
-Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf")
-
-year <- "1819"
-
-# use targets for the process until testing episode files
-tar_make_future(
-  # it does not recognise `contains(year)`
-  names = (targets::contains("1819"))
-)
-
-# use targets to create individual files due to RAM limit
-library(createslf)
-
-episode_file <- arrow::read_parquet(get_slf_episode_path(year))
-
-# Run individual file
-create_individual_file(episode_file, year = year) %>%
-  process_tests_individual_file(year = year)
diff --git a/run_targets_1920.R b/run_targets_1920.R
deleted file mode 100644
index d3361a34c..000000000
--- a/run_targets_1920.R
+++ /dev/null
@@ -1,20 +0,0 @@
-library(targets)
-
-Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf")
-
-year <- "1920"
-
-# use targets for the process until testing episode files
-tar_make_future(
-  # it does not recognise `contains(year)`
-  names = (targets::contains("1920"))
-)
-
-# use targets to create individual files due to RAM limit
-library(createslf)
-
-episode_file <- arrow::read_parquet(get_slf_episode_path(year))
-
-# Run individual file
-create_individual_file(episode_file, year = year) %>%
-  process_tests_individual_file(year = year)
diff --git a/run_targets_2021.R b/run_targets_2021.R
deleted file mode 100644
index efcfaed7a..000000000
--- a/run_targets_2021.R
+++ /dev/null
@@ -1,20 +0,0 @@
-library(targets)
-
-Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf")
-
-year <- "2021"
-
-# use targets for the process until testing episode files
-tar_make_future(
-  # it does not recognise `contains(year)`
-  names = (targets::contains("2021"))
-)
-
-# use targets to create individual files due to RAM limit
-library(createslf)
-
-episode_file <- arrow::read_parquet(get_slf_episode_path(year))
-
-# Run individual file
-create_individual_file(episode_file, year = year) %>%
-  process_tests_individual_file(year = year)
diff --git a/run_targets_2122.R b/run_targets_2122.R
deleted file mode 100644
index e92d75c7d..000000000
--- a/run_targets_2122.R
+++ /dev/null
@@ -1,20 +0,0 @@
-library(targets)
-
-Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf")
-
-year <- "2122"
-
-# use targets for the process until testing episode files
-tar_make_future(
-  # it does not recognise `contains(year)`
-  names = (targets::contains("2122"))
-)
-
-# use targets to create individual files due to RAM limit
-library(createslf)
-
-episode_file <- arrow::read_parquet(get_slf_episode_path(year))
-
-# Run individual file
-create_individual_file(episode_file, year = year) %>%
-  process_tests_individual_file(year = year)
diff --git a/run_targets_2223.R b/run_targets_2223.R
deleted file mode 100644
index f5c93ee2f..000000000
--- a/run_targets_2223.R
+++ /dev/null
@@ -1,20 +0,0 @@
-library(targets)
-
-Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf")
-
-year <- "2223"
-
-# use targets for the process until testing episode files
-tar_make_future(
-  # it does not recognise `contains(year)`
-  names = (targets::contains("2223"))
-)
-
-# use targets to create individual files due to RAM limit
-library(createslf)
-
-episode_file <- arrow::read_parquet(get_slf_episode_path(year))
-
-# Run individual file
-create_individual_file(episode_file, year = year) %>%
-  process_tests_individual_file(year = year)
diff --git a/run_targets_2324.R b/run_targets_2324.R
deleted file mode 100644
index 5e3885bc2..000000000
--- a/run_targets_2324.R
+++ /dev/null
@@ -1,20 +0,0 @@
-library(targets)
-
-Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf")
-
-year <- "2324"
-
-# use targets for the process until testing episode files
-tar_make_future(
-  # it does not recognise `contains(year)`
-  names = (targets::contains("2324"))
-)
-
-# use targets to create individual files due to RAM limit
-library(createslf)
-
-episode_file <- arrow::read_parquet(get_slf_episode_path(year))
-
-# Run individual file
-create_individual_file(episode_file, year = year) %>%
-  process_tests_individual_file(year = year)
diff --git a/tests/testthat/_snaps/get_dd_path.md b/tests/testthat/_snaps/get_dd_path.md
index 4f7740e79..8f8be2c48 100644
--- a/tests/testthat/_snaps/get_dd_path.md
+++ b/tests/testthat/_snaps/get_dd_path.md
@@ -3,7 +3,7 @@
     Code
       dplyr::glimpse(latest_dd_file, width = 0)
     Output
-      Rows: 206,029
+      Rows: 219,954
       Columns: 14
       $ cennum                 <dbl> ~
       $ MONTHFLAG              <chr> ~
diff --git a/tests/testthat/test-check_year_valid.R b/tests/testthat/test-check_year_valid.R
index 134e2d6b4..c139a43f0 100644
--- a/tests/testthat/test-check_year_valid.R
+++ b/tests/testthat/test-check_year_valid.R
@@ -50,7 +50,8 @@ test_that("Check year valid works for specific datasets ", {
   expect_true(check_year_valid("2021", "nsu"))
   expect_true(check_year_valid("2122", "nsu"))
   expect_true(check_year_valid("2223", "nsu"))
-  expect_false(check_year_valid("2324", "nsu"))
+  expect_true(check_year_valid("2324", "nsu"))
+  expect_false(check_year_valid("2425", "nsu"))
 
   # SPARRA
   expect_false(check_year_valid("1415", "sparra"))
@@ -64,6 +65,7 @@ test_that("Check year valid works for specific datasets ", {
   expect_true(check_year_valid("2122", "sparra"))
   expect_true(check_year_valid("2223", "sparra"))
   expect_true(check_year_valid("2324", "sparra"))
+  expect_true(check_year_valid("2425", "sparra"))
 
   # HHG
   expect_false(check_year_valid("1415", "hhg"))
diff --git a/tests/testthat/test-read_file.R b/tests/testthat/test-read_file.R
index ecd39acec..642ff69cf 100644
--- a/tests/testthat/test-read_file.R
+++ b/tests/testthat/test-read_file.R
@@ -3,9 +3,6 @@ skip_on_ci()
 test_that("read_file works", {
   rds_path <- tempfile(fileext = ".rds")
   rds_gz_path <- tempfile(fileext = ".rds.gz")
-  fst_path <- tempfile(fileext = ".fst")
-  sav_path <- tempfile(fileext = ".sav")
-  zsav_path <- tempfile(fileext = ".zsav")
   csv_path <- tempfile(fileext = ".csv")
   csv_gz_path <- tempfile(fileext = ".csv.gz")
   parquet_path <- tempfile(fileext = ".parquet")
@@ -14,18 +11,12 @@ test_that("read_file works", {
 
   readr::write_rds(aq_data, rds_path)
   readr::write_rds(aq_data, rds_gz_path)
-  fst::write_fst(aq_data, fst_path)
-  haven::write_sav(aq_data, sav_path)
-  haven::write_sav(aq_data, zsav_path, compress = "zsav")
   readr::write_csv(aq_data, csv_path)
   readr::write_csv(aq_data, csv_gz_path)
   arrow::write_parquet(aq_data, parquet_path)
 
   expect_equal(aq_data, read_file(rds_path))
   expect_equal(aq_data, read_file(rds_gz_path))
-  expect_equal(aq_data, read_file(fst_path))
-  expect_equal(aq_data, haven::zap_formats(read_file(sav_path)))
-  expect_equal(aq_data, haven::zap_formats(read_file(zsav_path)))
   expect_equal(aq_data, read_file(csv_gz_path))
   expect_equal(aq_data, read_file(parquet_path))
 })