diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R
index c5a26da12..00260bb8e 100644
--- a/R/add_nsu_cohort.R
+++ b/R/add_nsu_cohort.R
@@ -2,13 +2,17 @@
 #'
 #' @param data The input data frame
 #' @param year The year being processed
+#' @param nsu_cohort The NSU data for the year
 #'
 #' @return A data frame containing the Non-Service Users as additional rows
 #' @export
 #'
 #' @family episode file
 #' @seealso [get_nsu_path()]
-add_nsu_cohort <- function(data, year) {
+add_nsu_cohort <- function(
+    data,
+    year,
+    nsu_cohort = read_file(get_nsu_path(year))) {
   year_param <- year
 
   if (!check_year_valid(year, "NSU")) {
@@ -29,9 +33,9 @@ add_nsu_cohort <- function(data, year) {
     )
   )
 
-  matched <- dplyr::full_join(data,
-    # NSU cohort file
-    read_file(get_nsu_path(year)) %>%
+  matched <- dplyr::full_join(
+    data,
+    nsu_cohort %>%
       dplyr::mutate(
         dob = as.Date(.data[["dob"]]),
         gpprac = convert_eng_gpprac_to_dummy(.data[["gpprac"]])
@@ -110,7 +114,6 @@ add_nsu_cohort <- function(data, year) {
         .data[["chi"]]
       )
     ) %>%
-    # Remove the additional columns
     dplyr::select(-dplyr::contains("_nsu"), -"has_chi")
 
   return(return_df)
diff --git a/R/create_episode_file.R b/R/create_episode_file.R
index bad42be5e..3dc33e193 100644
--- a/R/create_episode_file.R
+++ b/R/create_episode_file.R
@@ -1,17 +1,32 @@
-#' Create the Source Episode file
+#' Produce the Source Episode file
 #'
 #' @param processed_data_list containing data from processed extracts.
 #' @param year The year to process, in FY format.
 #' @param write_to_disk (optional) Should the data be written to disk default is
 #' `TRUE` i.e. write the data to disk.
+#' @inheritParams add_nsu_cohort
+#' @inheritParams fill_geographies
+#' @inheritParams join_cohort_lookups
+#' @inheritParams join_deaths_data
+#' @inheritParams match_on_ltcs
+#' @inheritParams link_delayed_discharge_eps
 #' @param anon_chi_out (Default:TRUE) Should `anon_chi` be used in the output
-#' (instead of chi).
+#' (instead of chi)
 #'
-#' @return the Source Episode file as a [tibble][tibble::tibble-package].
+#' @return a [tibble][tibble::tibble-package] containing the episode file
 #' @export
 create_episode_file <- function(
     processed_data_list,
     year,
+    dd_data = read_file(get_source_extract_path(year, "DD")),
+    nsu_cohort = read_file(get_nsu_path(year)),
+    ltc_data = read_file(get_ltcs_path(year)),
+    slf_pc_lookup = read_file(get_slf_postcode_path()),
+    slf_gpprac_lookup = read_file(
+      get_slf_gpprac_path(),
+      col_select = c("gpprac", "cluster", "hbpraccode")
+    ),
+    slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)),
     write_to_disk = TRUE,
     anon_chi_out = TRUE) {
   episode_file <- dplyr::bind_rows(processed_data_list) %>%
@@ -99,15 +114,21 @@ create_episode_file <- function(
     correct_cij_vars() %>%
     fill_missing_cij_markers() %>%
     add_ppa_flag() %>%
-    link_delayed_discharge_eps(year) %>%
-    add_nsu_cohort(year) %>%
-    match_on_ltcs(year) %>%
+    link_delayed_discharge_eps(year, dd_data) %>%
+    add_nsu_cohort(year, nsu_cohort) %>%
+    match_on_ltcs(year, ltc_data) %>%
     correct_demographics(year) %>%
     create_cohort_lookups(year) %>%
     join_cohort_lookups(year) %>%
     join_sparra_hhg(year) %>%
-    fill_geographies() %>%
-    join_deaths_data(year) %>%
+    fill_geographies(
+      slf_pc_lookup,
+      slf_gpprac_lookup
+    ) %>%
+    join_deaths_data(
+      year,
+      slf_deaths_lookup
+    ) %>%
     load_ep_file_vars(year)
 
   if (anon_chi_out) {
@@ -354,22 +375,28 @@ create_cohort_lookups <- function(data, year, update = latest_update()) {
 #'
 #' @inheritParams store_ep_file_vars
 #' @inheritParams get_demographic_cohorts_path
+#' @param demographic_cohort,service_use_cohort The cohort data
 #'
 #' @return The data including the Demographic and Service Use lookups.
-join_cohort_lookups <- function(data, year, update = latest_update()) {
+join_cohort_lookups <- function(
+    data,
+    year,
+    update = latest_update(),
+    demographic_cohort = read_file(
+      get_demographic_cohorts_path(year, update),
+      col_select = c("chi", "demographic_cohort")
+    ),
+    service_use_cohort = read_file(
+      get_service_use_cohorts_path(year, update),
+      col_select = c("chi", "service_use_cohort")
+    )) {
   join_cohort_lookups <- data %>%
     dplyr::left_join(
-      read_file(
-        get_demographic_cohorts_path(year, update),
-        col_select = c("chi", "demographic_cohort")
-      ),
+      demographic_cohort,
       by = "chi"
     ) %>%
     dplyr::left_join(
-      read_file(
-        get_service_use_cohorts_path(year, update),
-        col_select = c("chi", "service_use_cohort")
-      ),
+      service_use_cohort,
       by = "chi"
     )
 
diff --git a/R/fill_geographies.R b/R/fill_geographies.R
index 8f4a470e8..c9aee6355 100644
--- a/R/fill_geographies.R
+++ b/R/fill_geographies.R
@@ -4,10 +4,18 @@
 #' then use the lookups to match on additional variables.
 #'
 #' @param data the SLF
+#' @param slf_pc_lookup The SLF Postcode lookup
+#' @param slf_gpprac_lookup The SLF GP Practice lookup
 #'
 #' @return a [tibble][tibble::tibble-package] of the SLF with improved
 #' Postcode and GP Practice details.
-fill_geographies <- function(data) {
+fill_geographies <- function(
+    data,
+    slf_pc_lookup = read_file(get_slf_postcode_path()),
+    slf_gpprac_lookup = read_file(
+      get_slf_gpprac_path(),
+      col_select = c("gpprac", "cluster", "hbpraccode")
+    )) {
   check_variables_exist(data, c(
     "chi",
     "postcode",
@@ -21,8 +29,15 @@ fill_geographies <- function(data) {
   ))
 
   data %>%
-    fill_postcode_geogs() %>%
-    fill_gpprac_geographies()
+    fill_postcode_geogs(
+      slf_pc_lookup = read_file(get_slf_postcode_path())
+    ) %>%
+    fill_gpprac_geographies(
+      slf_gpprac_lookup = read_file(
+        get_slf_gpprac_path(),
+        col_select = c("gpprac", "cluster", "hbpraccode")
+      )
+    )
 }
 
 #' Make a postcode lookup for filling to most recent postcodes based on CHI
@@ -86,9 +101,9 @@ make_gpprac_lookup <- function(data) {
   return(gpprac_lookup)
 }
 
-fill_postcode_geogs <- function(data) {
-  slf_pc_lookup <- read_file(get_slf_postcode_path())
-
+fill_postcode_geogs <- function(
+    data,
+    slf_pc_lookup) {
   filled_postcodes <- dplyr::left_join(
     data,
     make_postcode_lookup(data),
@@ -123,17 +138,20 @@ fill_postcode_geogs <- function(data) {
       lca = dplyr::coalesce(.data$lca, .data$lca_old),
       datazone2011 = dplyr::coalesce(.data$datazone2011, .data$datazone2011_old)
     ) %>%
-    dplyr::select(!c("hb2018", "hscp", "lca_old", "datazone2011_old", "most_recent_postcode"))
+    dplyr::select(!c(
+      "hb2018",
+      "hscp",
+      "lca_old",
+      "datazone2011_old",
+      "most_recent_postcode"
+    ))
 
   return(filled_postcodes)
 }
 
-fill_gpprac_geographies <- function(data) {
-  gpprac_ref <- read_file(
-    get_slf_gpprac_path(),
-    col_select = c("gpprac", "cluster", "hbpraccode")
-  )
-
+fill_gpprac_geographies <- function(
+    data,
+    slf_gpprac_lookup) {
   filled_gpprac <- dplyr::left_join(
     data,
     make_gpprac_lookup(data),
@@ -147,7 +165,12 @@ fill_gpprac_geographies <- function(data) {
         .data$gpprac
       )
     ) %>%
-    dplyr::left_join(gpprac_ref, by = "gpprac", suffix = c("_old", "")) %>%
+    dplyr::left_join(
+      slf_gpprac_lookup %>%
+        dplyr::select("gpprac", "cluster", "hbpraccode"),
+      by = "gpprac",
+      suffix = c("_old", "")
+    ) %>%
     dplyr::mutate(
       hbpraccode = dplyr::coalesce(.data$hbpraccode, .data$hbpraccode_old)
     ) %>%
diff --git a/R/get_source_extract_path.R b/R/get_source_extract_path.R
index 4cb5eef44..37ed545cf 100644
--- a/R/get_source_extract_path.R
+++ b/R/get_source_extract_path.R
@@ -41,34 +41,34 @@ get_source_extract_path <- function(
   type <- match.arg(type)
 
   if (!check_year_valid(year, type)) {
-    return(NA)
+    return(get_dummy_boxi_extract_path())
   }
 
   file_name <- dplyr::case_match(
     type,
     "Acute" ~ "acute_for_source",
-    "AE" ~ "a&e_for_source",
-    "AT" ~ "Alarms-Telecare-for-source",
+    "AE" ~ "a_and_e_for_source",
+    "AT" ~ "alarms-telecare-for-source",
     "CH" ~ "care_home_for_source",
-    "CMH" ~ "CMH_for_source",
+    "CMH" ~ "cmh_for_source",
     "Client" ~ "client_for_source",
-    "DD" ~ "DD_for_source",
+    "DD" ~ "delayed_discharge_for_source",
     "Deaths" ~ "deaths_for_source",
-    "DN" ~ "DN_for_source",
-    "GPOoH" ~ "GP_OOH_for_source",
-    "HC" ~ "Home_Care_for_source",
+    "DN" ~ "district_nursing_for_source",
+    "GPOoH" ~ "gp_ooh_for_source",
+    "HC" ~ "home_care_for_source",
     "Homelessness" ~ "homelessness_for_source",
     "Maternity" ~ "maternity_for_source",
     "MH" ~ "mental_health_for_source",
-    "DD" ~ "DD_for_source",
     "Outpatients" ~ "outpatients_for_source",
-    "PIS" ~ "prescribing_file_for_source",
-    "SDS" ~ "SDS-for-source"
-  )
+    "PIS" ~ "prescribing_for_source",
+    "SDS" ~ "sds_for_source"
+  ) %>%
+    stringr::str_glue("-{year}.parquet")
 
   source_extract_path <- get_file_path(
     directory = get_year_dir(year),
-    file_name = stringr::str_glue("{file_name}-20{year}.parquet"),
+    file_name = file_name,
     ...
   )
 
diff --git a/R/get_sparra_hhg_paths.R b/R/get_sparra_hhg_paths.R
index 2fd1a69f9..157160ed4 100644
--- a/R/get_sparra_hhg_paths.R
+++ b/R/get_sparra_hhg_paths.R
@@ -10,6 +10,10 @@
 #' @family extract file paths
 #' @seealso [get_file_path()] for the generic function.
 get_hhg_path <- function(year, ...) {
+  if (!check_year_valid(year, "HHG")) {
+    return(get_dummy_boxi_extract_path())
+  }
+
   hhg_file_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "HHG"),
     file_name = stringr::str_glue("HHG-20{year}.parquet"),
@@ -31,6 +35,10 @@ get_hhg_path <- function(year, ...) {
 #' @family extract file paths
 #' @seealso [get_file_path()] for the generic function.
 get_sparra_path <- function(year, ...) {
+  if (!check_year_valid(year, "SPARRA")) {
+    return(get_dummy_boxi_extract_path())
+  }
+
   sparra_file_path <- get_file_path(
     directory = fs::path(get_slf_dir(), "SPARRA"),
     file_name = stringr::str_glue("SPARRA-20{year}.parquet"),
diff --git a/R/join_deaths_data.R b/R/join_deaths_data.R
index 694d2e2b9..89bcbbe13 100644
--- a/R/join_deaths_data.R
+++ b/R/join_deaths_data.R
@@ -2,16 +2,14 @@
 #'
 #' @param data Episode file data
 #' @param year financial year, e.g. '1920'
-#' @param slf_deaths_lookup_path Path to slf deaths lookup.
+#' @param slf_deaths_lookup The SLF deaths lookup.
 #'
 #' @return The data including the deaths lookup matched
 #'         on to the episode file.
 join_deaths_data <- function(
     data,
     year,
-    slf_deaths_lookup_path = get_slf_deaths_lookup_path(year)) {
-  slf_deaths_lookup <- read_file(slf_deaths_lookup_path)
-
+    slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year))) {
   return(
     data %>%
       dplyr::left_join(
diff --git a/R/match_on_ltcs.R b/R/match_on_ltcs.R
index 42345655a..f83f31325 100644
--- a/R/match_on_ltcs.R
+++ b/R/match_on_ltcs.R
@@ -5,13 +5,17 @@
 #'
 #' @param data episode files
 #' @param year financial year, e.g. '1920'
+#' @param ltc_data The LTC data for the year
 #'
 #' @return data matched with long term conditions
-match_on_ltcs <- function(data, year) {
+match_on_ltcs <- function(
+    data,
+    year,
+    ltc_data = read_file(get_ltcs_path(year))) {
   # Match on LTC lookup
   matched <- dplyr::left_join(
     data,
-    read_file(get_ltcs_path(year)),
+    ltc_data,
     by = "chi",
     suffix = c("", "_ltc")
   ) %>%
diff --git a/R/read_file.R b/R/read_file.R
index 2941b62ed..be0a6fc65 100644
--- a/R/read_file.R
+++ b/R/read_file.R
@@ -27,6 +27,11 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) {
     "parquet"
   )
 
+  # Return an empty tibble if trying to read the dummy path
+  if (path == get_dummy_boxi_extract_path()) {
+    return(tibble::tibble())
+  }
+
   ext <- fs::path_ext(path)
 
   if (ext == "gz") {
diff --git a/_targets.R b/_targets.R
index db26477ef..a9fa80d7a 100644
--- a/_targets.R
+++ b/_targets.R
@@ -543,11 +543,18 @@ list(
         source_sc_alarms_tele
       )
     ),
+    tar_file_read(nsu_cohort, get_nsu_path(year), read_file(!!.x)),
     tar_target(
       episode_file,
       create_episode_file(
         processed_data_list,
         year,
+        dd_data = source_dd_extract,
+        nsu_cohort = nsu_cohort,
+        ltc_data = source_ltc_lookup,
+        slf_pc_lookup = source_pc_lookup,
+        slf_gpprac_lookup = source_gp_lookup,
+        slf_deaths_lookup = slf_deaths_lookup,
         write_to_disk
       )
     ),
diff --git a/man/add_nsu_cohort.Rd b/man/add_nsu_cohort.Rd
index 723c105e1..4ea9324e0 100644
--- a/man/add_nsu_cohort.Rd
+++ b/man/add_nsu_cohort.Rd
@@ -4,12 +4,14 @@
 \alias{add_nsu_cohort}
 \title{Add NSU cohort to working file}
 \usage{
-add_nsu_cohort(data, year)
+add_nsu_cohort(data, year, nsu_cohort = read_file(get_nsu_path(year)))
 }
 \arguments{
 \item{data}{The input data frame}
 
 \item{year}{The year being processed}
+
+\item{nsu_cohort}{The NSU data for the year}
 }
 \value{
 A data frame containing the Non-Service Users as additional rows
diff --git a/man/create_episode_file.Rd b/man/create_episode_file.Rd
index e1bda32b9..c1ce0e063 100644
--- a/man/create_episode_file.Rd
+++ b/man/create_episode_file.Rd
@@ -2,11 +2,18 @@
 % Please edit documentation in R/create_episode_file.R
 \name{create_episode_file}
 \alias{create_episode_file}
-\title{Create the Source Episode file}
+\title{Produce the Source Episode file}
 \usage{
 create_episode_file(
   processed_data_list,
   year,
+  dd_data = read_file(get_source_extract_path(year, "DD")),
+  nsu_cohort = read_file(get_nsu_path(year)),
+  ltc_data = read_file(get_ltcs_path(year)),
+  slf_pc_lookup = read_file(get_slf_postcode_path()),
+  slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac",
+    "cluster", "hbpraccode")),
+  slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)),
   write_to_disk = TRUE,
   anon_chi_out = TRUE
 )
@@ -16,15 +23,27 @@ create_episode_file(
 
 \item{year}{The year to process, in FY format.}
 
+\item{dd_data}{The processed DD extract}
+
+\item{nsu_cohort}{The NSU data for the year}
+
+\item{ltc_data}{The LTC data for the year}
+
+\item{slf_pc_lookup}{The SLF Postcode lookup}
+
+\item{slf_gpprac_lookup}{The SLF GP Practice lookup}
+
+\item{slf_deaths_lookup}{The SLF deaths lookup.}
+
 \item{write_to_disk}{(optional) Should the data be written to disk default is
 \code{TRUE} i.e. write the data to disk.}
 
 \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output
-(instead of chi).}
+(instead of chi)}
 }
 \value{
-the Source Episode file as a \link[tibble:tibble-package]{tibble}.
+a \link[tibble:tibble-package]{tibble} containing the episode file
 }
 \description{
-Create the Source Episode file
+Produce the Source Episode file
 }
diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd
index 4fd9a4a53..c4502e5ae 100644
--- a/man/create_individual_file.Rd
+++ b/man/create_individual_file.Rd
@@ -24,7 +24,7 @@ create_individual_file(
 (instead of chi).}
 
 \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output
-(instead of chi).}
+(instead of chi)}
 }
 \value{
 The processed individual file
diff --git a/man/fill_geographies.Rd b/man/fill_geographies.Rd
index 5308fd8d0..bb619405b 100644
--- a/man/fill_geographies.Rd
+++ b/man/fill_geographies.Rd
@@ -4,10 +4,19 @@
 \alias{fill_geographies}
 \title{Fill postcode and GP practice geographies}
 \usage{
-fill_geographies(data)
+fill_geographies(
+  data,
+  slf_pc_lookup = read_file(get_slf_postcode_path()),
+  slf_gpprac_lookup = read_file(get_slf_gpprac_path(), col_select = c("gpprac",
+    "cluster", "hbpraccode"))
+)
 }
 \arguments{
 \item{data}{the SLF}
+
+\item{slf_pc_lookup}{The SLF Postcode lookup}
+
+\item{slf_gpprac_lookup}{The SLF GP Practice lookup}
 }
 \value{
 a \link[tibble:tibble-package]{tibble} of the SLF with improved
diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd
index 15a860a36..3ef549cc3 100644
--- a/man/join_cohort_lookups.Rd
+++ b/man/join_cohort_lookups.Rd
@@ -4,7 +4,15 @@
 \alias{join_cohort_lookups}
 \title{Join cohort lookups}
 \usage{
-join_cohort_lookups(data, year, update = latest_update())
+join_cohort_lookups(
+  data,
+  year,
+  update = latest_update(),
+  demographic_cohort = read_file(get_demographic_cohorts_path(year, update), col_select =
+    c("chi", "demographic_cohort")),
+  service_use_cohort = read_file(get_service_use_cohorts_path(year, update), col_select =
+    c("chi", "service_use_cohort"))
+)
 }
 \arguments{
 \item{data}{The in-progress episode file data.}
@@ -12,6 +20,8 @@ join_cohort_lookups(data, year, update = latest_update())
 \item{year}{The year to process, in FY format.}
 
 \item{update}{The update to use}
+
+\item{demographic_cohort, service_use_cohort}{The cohort data}
 }
 \value{
 The data including the Demographic and Service Use lookups.
diff --git a/man/join_deaths_data.Rd b/man/join_deaths_data.Rd
index 6508d7893..f3b68fe1a 100644
--- a/man/join_deaths_data.Rd
+++ b/man/join_deaths_data.Rd
@@ -7,7 +7,7 @@
 join_deaths_data(
   data,
   year,
-  slf_deaths_lookup_path = get_slf_deaths_lookup_path(year)
+  slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year))
 )
 }
 \arguments{
@@ -15,7 +15,7 @@ join_deaths_data(
 
 \item{year}{financial year, e.g. '1920'}
 
-\item{slf_deaths_lookup_path}{Path to slf deaths lookup.}
+\item{slf_deaths_lookup}{The SLF deaths lookup.}
 }
 \value{
 The data including the deaths lookup matched
diff --git a/man/match_on_ltcs.Rd b/man/match_on_ltcs.Rd
index 0c7e7fb53..e0def00cc 100644
--- a/man/match_on_ltcs.Rd
+++ b/man/match_on_ltcs.Rd
@@ -4,12 +4,14 @@
 \alias{match_on_ltcs}
 \title{Match on LTC DoB and dates of LTC incidence}
 \usage{
-match_on_ltcs(data, year)
+match_on_ltcs(data, year, ltc_data = read_file(get_ltcs_path(year)))
 }
 \arguments{
 \item{data}{episode files}
 
 \item{year}{financial year, e.g. '1920'}
+
+\item{ltc_data}{The LTC data for the year}
 }
 \value{
 data matched with long term conditions