diff --git a/NAMESPACE b/NAMESPACE index 6f1c88841..2d8e52a46 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,6 +7,7 @@ export(add_homelessness_flag) export(add_hri_variables) export(add_nsu_cohort) export(check_year_format) +export(clean_temp_data) export(clean_up_free_text) export(compute_mid_year_age) export(convert_ca_to_lca) @@ -178,12 +179,14 @@ export(read_sc_all_alarms_telecare) export(read_sc_all_care_home) export(read_sc_all_home_care) export(read_sc_all_sds) +export(read_temp_data) export(rename_hscp) export(setup_keyring) export(start_fy) export(start_fy_quarter) export(start_next_fy_quarter) export(write_file) +export(write_temp_data) export(years_to_run) importFrom(data.table,.N) importFrom(data.table,.SD) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 2d560449b..a1d5672af 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -32,13 +32,15 @@ create_episode_file <- function( slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) %>% slfhelper::get_chi(), sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(), write_to_disk = TRUE, - anon_chi_out = TRUE) { + anon_chi_out = TRUE, + write_temp_to_disk = FALSE) { cli::cli_alert_info("Create episode file function started at {Sys.time()}") processed_data_list <- purrr::discard(processed_data_list, ~ is.null(.x) | identical(.x, tibble::tibble())) episode_file <- dplyr::bind_rows(processed_data_list) %>% slfhelper::get_chi() %>% + write_temp_data(year, file_name = "ep_temp1", write_temp_to_disk) %>% create_cost_inc_dna() %>% apply_cost_uplift() %>% store_ep_file_vars( @@ -122,15 +124,18 @@ create_episode_file <- function( # PC8 format may still be used. Ensure here that all datasets are in PC7 format. postcode = phsmethods::format_postcode(.data$postcode, "pc7") ) %>% + write_temp_data(year, file_name = "ep_temp2", write_temp_to_disk) %>% correct_cij_vars() %>% fill_missing_cij_markers() %>% add_homelessness_flag(year, lookup = homelessness_lookup) %>% add_homelessness_date_flags(year, lookup = homelessness_lookup) %>% add_ppa_flag() %>% + write_temp_data(year, file_name = "ep_temp3", write_temp_to_disk) %>% link_delayed_discharge_eps(year, dd_data) %>% add_nsu_cohort(year, nsu_cohort) %>% match_on_ltcs(year, ltc_data) %>% correct_demographics(year) %>% + write_temp_data(year, file_name = "ep_temp4", write_temp_to_disk) %>% create_cohort_lookups(year) %>% join_cohort_lookups(year) %>% join_sparra_hhg(year) %>% @@ -142,11 +147,13 @@ create_episode_file <- function( year, slf_deaths_lookup ) %>% + write_temp_data(year, file_name = "ep_temp5", write_temp_to_disk) %>% add_activity_after_death_flag(year, deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi() ) %>% - load_ep_file_vars(year) + load_ep_file_vars(year) %>% + write_temp_data(year, file_name = "ep_temp6", write_temp_to_disk) if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { episode_file <- episode_file %>% diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 8079bc948..751874ec9 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -17,7 +17,8 @@ create_individual_file <- function( homelessness_lookup = create_homelessness_lookup(year), write_to_disk = TRUE, anon_chi_in = TRUE, - anon_chi_out = TRUE) { + anon_chi_out = TRUE, + write_temp_to_disk) { cli::cli_alert_info("Create individual file function finished at {Sys.time()}") if (anon_chi_in) { @@ -76,30 +77,36 @@ create_individual_file <- function( ))) %>% remove_blank_chi() %>% add_cij_columns() %>% - add_all_columns(year = year) + add_all_columns(year = year) %>% + write_temp_data(year, file_name = "indiv_temp1", write_temp_to_disk) if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { individual_file <- individual_file %>% - aggregate_by_chi(year = year, exclude_sc_var = TRUE) + aggregate_by_chi(year = year, exclude_sc_var = TRUE) %>% + write_temp_data(year, file_name = "indiv_temp2", write_temp_to_disk) } else { individual_file <- individual_file %>% aggregate_ch_episodes() %>% clean_up_ch(year) %>% - aggregate_by_chi(year = year, exclude_sc_var = FALSE) + aggregate_by_chi(year = year, exclude_sc_var = FALSE) %>% + write_temp_data(year, file_name = "indiv_temp2", write_temp_to_disk) } individual_file <- individual_file %>% recode_gender() %>% clean_individual_file(year) %>% join_cohort_lookups(year) %>% + write_temp_data(year, file_name = "indiv_temp3", write_temp_to_disk) %>% add_homelessness_flag(year, lookup = homelessness_lookup) %>% match_on_ltcs(year) %>% join_deaths_data(year) %>% join_sparra_hhg(year) %>% + write_temp_data(year, file_name = "indiv_temp4", write_temp_to_disk) %>% join_slf_lookup_vars() %>% dplyr::mutate(year = year) %>% add_hri_variables(chi_variable = "chi") %>% add_keep_population_flag(year) %>% + write_temp_data(year, file_name = "indiv_temp5", write_temp_to_disk) %>% join_sc_client(year, file_type = "individual") if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { diff --git a/R/write_temp_data.R b/R/write_temp_data.R new file mode 100644 index 000000000..f714e66da --- /dev/null +++ b/R/write_temp_data.R @@ -0,0 +1,62 @@ +#' Write a temp data to disk in parquet format for debugging purpose +#' +#' @description Write a temp data in parquet format to disk for debugging purpose. +#' @param data The data to be written +#' @param year year variable +#' @param file_name The file name to be written +#' @param write_temp_to_disk Boolean type, write temp data to disk or not +#' +#' @return the data for next step as a [tibble][tibble::tibble-package]. +#' @export +write_temp_data <- + function(data, year, file_name, write_temp_to_disk) { + if (write_temp_to_disk) { + full_file_name <- stringr::str_glue("{file_name}.parquet") + file_path <- file.path( + get_year_dir(year), + full_file_name + ) + + cli::cli_alert_info(stringr::str_glue("Writing {full_file_name} to disk started at {Sys.time()}")) + + write_file(data, + path = file_path + ) + } + return(data) + } + + +#' Read a temp data from disk for debugging purpose +#' +#' @description Read a temp data to disk for debugging purpose. +#' @param year year variable +#' @param file_name The file name to be read +#' +#' @return the data for next step as a [tibble][tibble::tibble-package]. +#' @export +read_temp_data <- function(year, file_name) { + full_file_name <- stringr::str_glue("{file_name}.parquet") + file_path <- file.path( + get_year_dir(year), + full_file_name + ) + + return(read_file(file_path)) +} + +#' Clean temp data from disk +#' +#' @description Clean temp data from disk to save storage. +#' @param year year variable +#' @param file_type ep or ind files +#' +#' @return the data for next step as a [tibble][tibble::tibble-package]. +#' @export +clean_temp_data <- function(year, file_type = c("ep", "ind")) { + list.files( + path = get_year_dir(year), + pattern = stringr::str_glue("^{file_type}_temp") + ) %>% + file.remove() +} diff --git a/Run_SLF_Files_manually/run_individual_file_1415.R b/Run_SLF_Files_manually/run_individual_file_1415.R index 70aa2bfca..37bf7fe24 100644 --- a/Run_SLF_Files_manually/run_individual_file_1415.R +++ b/Run_SLF_Files_manually/run_individual_file_1415.R @@ -2,6 +2,8 @@ library(createslf) year <- "1415" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_1516.R b/Run_SLF_Files_manually/run_individual_file_1516.R index 8e8dae906..8c6cc48e6 100644 --- a/Run_SLF_Files_manually/run_individual_file_1516.R +++ b/Run_SLF_Files_manually/run_individual_file_1516.R @@ -2,6 +2,8 @@ library(createslf) year <- "1516" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_1617.R b/Run_SLF_Files_manually/run_individual_file_1617.R index 255e4e674..5105ef393 100644 --- a/Run_SLF_Files_manually/run_individual_file_1617.R +++ b/Run_SLF_Files_manually/run_individual_file_1617.R @@ -2,6 +2,8 @@ library(createslf) year <- "1617" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_1718.R b/Run_SLF_Files_manually/run_individual_file_1718.R index 777948fc7..328ef78aa 100644 --- a/Run_SLF_Files_manually/run_individual_file_1718.R +++ b/Run_SLF_Files_manually/run_individual_file_1718.R @@ -2,6 +2,8 @@ library(createslf) year <- "1718" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_1819.R b/Run_SLF_Files_manually/run_individual_file_1819.R index 18839b2ea..db9d56455 100644 --- a/Run_SLF_Files_manually/run_individual_file_1819.R +++ b/Run_SLF_Files_manually/run_individual_file_1819.R @@ -2,6 +2,8 @@ library(createslf) year <- "1819" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_1920.R b/Run_SLF_Files_manually/run_individual_file_1920.R index 3567d5c5d..80b8f15fb 100644 --- a/Run_SLF_Files_manually/run_individual_file_1920.R +++ b/Run_SLF_Files_manually/run_individual_file_1920.R @@ -2,6 +2,8 @@ library(createslf) year <- "1920" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_2021.R b/Run_SLF_Files_manually/run_individual_file_2021.R index 8a78924b3..7b60a2afe 100644 --- a/Run_SLF_Files_manually/run_individual_file_2021.R +++ b/Run_SLF_Files_manually/run_individual_file_2021.R @@ -2,6 +2,8 @@ library(createslf) year <- "2021" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_2122.R b/Run_SLF_Files_manually/run_individual_file_2122.R index 9ceaa571c..623e54aac 100644 --- a/Run_SLF_Files_manually/run_individual_file_2122.R +++ b/Run_SLF_Files_manually/run_individual_file_2122.R @@ -2,6 +2,8 @@ library(createslf) year <- "2122" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_2223.R b/Run_SLF_Files_manually/run_individual_file_2223.R index b83507dbc..b16c672cb 100644 --- a/Run_SLF_Files_manually/run_individual_file_2223.R +++ b/Run_SLF_Files_manually/run_individual_file_2223.R @@ -2,6 +2,8 @@ library(createslf) year <- "2223" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_2324.R b/Run_SLF_Files_manually/run_individual_file_2324.R index 3f6cf0fba..9b27d33ad 100644 --- a/Run_SLF_Files_manually/run_individual_file_2324.R +++ b/Run_SLF_Files_manually/run_individual_file_2324.R @@ -2,6 +2,8 @@ library(createslf) year <- "2324" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/Run_SLF_Files_manually/run_individual_file_2425.R b/Run_SLF_Files_manually/run_individual_file_2425.R index 843eb505c..4a4f25762 100644 --- a/Run_SLF_Files_manually/run_individual_file_2425.R +++ b/Run_SLF_Files_manually/run_individual_file_2425.R @@ -2,6 +2,8 @@ library(createslf) year <- "2425" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file diff --git a/_targets.R b/_targets.R index 206d375a4..3ac733e1a 100644 --- a/_targets.R +++ b/_targets.R @@ -23,6 +23,7 @@ years_to_run <- createslf::years_to_run() list( ## Phase I, all years ---- + tar_rds(test_mode, TRUE), tar_rds(write_to_disk, TRUE), tar_rds( file_path_ext_clean, diff --git a/man/clean_temp_data.Rd b/man/clean_temp_data.Rd new file mode 100644 index 000000000..c26dcde8e --- /dev/null +++ b/man/clean_temp_data.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/write_temp_data.R +\name{clean_temp_data} +\alias{clean_temp_data} +\title{Clean temp data from disk} +\usage{ +clean_temp_data(year, file_type = c("ep", "ind")) +} +\arguments{ +\item{year}{year variable} + +\item{file_type}{ep or ind files} +} +\value{ +the data for next step as a \link[tibble:tibble-package]{tibble}. +} +\description{ +Clean temp data from disk to save storage. +} diff --git a/man/create_episode_file.Rd b/man/create_episode_file.Rd index d6bd6d526..0657a8cbb 100644 --- a/man/create_episode_file.Rd +++ b/man/create_episode_file.Rd @@ -18,7 +18,8 @@ create_episode_file( slfhelper::get_chi(), sc_client = read_file(get_sc_client_lookup_path(year)) \%>\% slfhelper::get_chi(), write_to_disk = TRUE, - anon_chi_out = TRUE + anon_chi_out = TRUE, + write_temp_to_disk = FALSE ) } \arguments{ diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd index bfa584d54..902c3e9bd 100644 --- a/man/create_individual_file.Rd +++ b/man/create_individual_file.Rd @@ -10,7 +10,8 @@ create_individual_file( homelessness_lookup = create_homelessness_lookup(year), write_to_disk = TRUE, anon_chi_in = TRUE, - anon_chi_out = TRUE + anon_chi_out = TRUE, + write_temp_to_disk ) } \arguments{ diff --git a/man/read_temp_data.Rd b/man/read_temp_data.Rd new file mode 100644 index 000000000..709d8362d --- /dev/null +++ b/man/read_temp_data.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/write_temp_data.R +\name{read_temp_data} +\alias{read_temp_data} +\title{Read a temp data from disk for debugging purpose} +\usage{ +read_temp_data(year, file_name) +} +\arguments{ +\item{year}{year variable} + +\item{file_name}{The file name to be read} +} +\value{ +the data for next step as a \link[tibble:tibble-package]{tibble}. +} +\description{ +Read a temp data to disk for debugging purpose. +} diff --git a/man/write_temp_data.Rd b/man/write_temp_data.Rd new file mode 100644 index 000000000..6ee32e7a2 --- /dev/null +++ b/man/write_temp_data.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/write_temp_data.R +\name{write_temp_data} +\alias{write_temp_data} +\title{Write a temp data to disk in parquet format for debugging purpose} +\usage{ +write_temp_data(data, year, file_name, write_temp_to_disk) +} +\arguments{ +\item{data}{The data to be written} + +\item{year}{year variable} + +\item{file_name}{The file name to be written} + +\item{write_temp_to_disk}{Boolean type, write temp data to disk or not} +} +\value{ +the data for next step as a \link[tibble:tibble-package]{tibble}. +} +\description{ +Write a temp data in parquet format to disk for debugging purpose. +}