Skip to content

Commit

Permalink
Write temp data (#1014)
Browse files Browse the repository at this point in the history
* add_test_to_filename and write_temp_data function

* Update documentation

* remove test_mode default

* Update documentation

* add read_temp_data

* Style code

* Update documentation

* change test_mode to write_temp_to_disk and add clean temp function

* Update documentation

* Style code

* Style code

* Include extra temp file

---------

Co-authored-by: lizihao-anu <[email protected]>
Co-authored-by: Jennit07 <[email protected]>
Co-authored-by: Jennit07 <[email protected]>
Co-authored-by: Jennifer Thom <[email protected]>
  • Loading branch information
5 people authored Oct 16, 2024
1 parent e61f1af commit 0be1daa
Show file tree
Hide file tree
Showing 21 changed files with 173 additions and 8 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(add_homelessness_flag)
export(add_hri_variables)
export(add_nsu_cohort)
export(check_year_format)
export(clean_temp_data)
export(clean_up_free_text)
export(compute_mid_year_age)
export(convert_ca_to_lca)
Expand Down Expand Up @@ -178,12 +179,14 @@ export(read_sc_all_alarms_telecare)
export(read_sc_all_care_home)
export(read_sc_all_home_care)
export(read_sc_all_sds)
export(read_temp_data)
export(rename_hscp)
export(setup_keyring)
export(start_fy)
export(start_fy_quarter)
export(start_next_fy_quarter)
export(write_file)
export(write_temp_data)
export(years_to_run)
importFrom(data.table,.N)
importFrom(data.table,.SD)
Expand Down
11 changes: 9 additions & 2 deletions R/create_episode_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@ create_episode_file <- function(
slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) %>% slfhelper::get_chi(),
sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(),
write_to_disk = TRUE,
anon_chi_out = TRUE) {
anon_chi_out = TRUE,
write_temp_to_disk = FALSE) {
cli::cli_alert_info("Create episode file function started at {Sys.time()}")

processed_data_list <- purrr::discard(processed_data_list, ~ is.null(.x) | identical(.x, tibble::tibble()))

episode_file <- dplyr::bind_rows(processed_data_list) %>%
slfhelper::get_chi() %>%
write_temp_data(year, file_name = "ep_temp1", write_temp_to_disk) %>%
create_cost_inc_dna() %>%
apply_cost_uplift() %>%
store_ep_file_vars(
Expand Down Expand Up @@ -122,15 +124,18 @@ create_episode_file <- function(
# PC8 format may still be used. Ensure here that all datasets are in PC7 format.
postcode = phsmethods::format_postcode(.data$postcode, "pc7")
) %>%
write_temp_data(year, file_name = "ep_temp2", write_temp_to_disk) %>%
correct_cij_vars() %>%
fill_missing_cij_markers() %>%
add_homelessness_flag(year, lookup = homelessness_lookup) %>%
add_homelessness_date_flags(year, lookup = homelessness_lookup) %>%
add_ppa_flag() %>%
write_temp_data(year, file_name = "ep_temp3", write_temp_to_disk) %>%
link_delayed_discharge_eps(year, dd_data) %>%
add_nsu_cohort(year, nsu_cohort) %>%
match_on_ltcs(year, ltc_data) %>%
correct_demographics(year) %>%
write_temp_data(year, file_name = "ep_temp4", write_temp_to_disk) %>%
create_cohort_lookups(year) %>%
join_cohort_lookups(year) %>%
join_sparra_hhg(year) %>%
Expand All @@ -142,11 +147,13 @@ create_episode_file <- function(
year,
slf_deaths_lookup
) %>%
write_temp_data(year, file_name = "ep_temp5", write_temp_to_disk) %>%
add_activity_after_death_flag(year,
deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>%
slfhelper::get_chi()
) %>%
load_ep_file_vars(year)
load_ep_file_vars(year) %>%
write_temp_data(year, file_name = "ep_temp6", write_temp_to_disk)

if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
episode_file <- episode_file %>%
Expand Down
15 changes: 11 additions & 4 deletions R/create_individual_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ create_individual_file <- function(
homelessness_lookup = create_homelessness_lookup(year),
write_to_disk = TRUE,
anon_chi_in = TRUE,
anon_chi_out = TRUE) {
anon_chi_out = TRUE,
write_temp_to_disk) {
cli::cli_alert_info("Create individual file function finished at {Sys.time()}")

if (anon_chi_in) {
Expand Down Expand Up @@ -76,30 +77,36 @@ create_individual_file <- function(
))) %>%
remove_blank_chi() %>%
add_cij_columns() %>%
add_all_columns(year = year)
add_all_columns(year = year) %>%
write_temp_data(year, file_name = "indiv_temp1", write_temp_to_disk)

if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
individual_file <- individual_file %>%
aggregate_by_chi(year = year, exclude_sc_var = TRUE)
aggregate_by_chi(year = year, exclude_sc_var = TRUE) %>%
write_temp_data(year, file_name = "indiv_temp2", write_temp_to_disk)
} else {
individual_file <- individual_file %>%
aggregate_ch_episodes() %>%
clean_up_ch(year) %>%
aggregate_by_chi(year = year, exclude_sc_var = FALSE)
aggregate_by_chi(year = year, exclude_sc_var = FALSE) %>%
write_temp_data(year, file_name = "indiv_temp2", write_temp_to_disk)
}

individual_file <- individual_file %>%
recode_gender() %>%
clean_individual_file(year) %>%
join_cohort_lookups(year) %>%
write_temp_data(year, file_name = "indiv_temp3", write_temp_to_disk) %>%
add_homelessness_flag(year, lookup = homelessness_lookup) %>%
match_on_ltcs(year) %>%
join_deaths_data(year) %>%
join_sparra_hhg(year) %>%
write_temp_data(year, file_name = "indiv_temp4", write_temp_to_disk) %>%
join_slf_lookup_vars() %>%
dplyr::mutate(year = year) %>%
add_hri_variables(chi_variable = "chi") %>%
add_keep_population_flag(year) %>%
write_temp_data(year, file_name = "indiv_temp5", write_temp_to_disk) %>%
join_sc_client(year, file_type = "individual")

if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
Expand Down
62 changes: 62 additions & 0 deletions R/write_temp_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#' Write a temp data to disk in parquet format for debugging purpose
#'
#' @description Write a temp data in parquet format to disk for debugging purpose.
#' @param data The data to be written
#' @param year year variable
#' @param file_name The file name to be written
#' @param write_temp_to_disk Boolean type, write temp data to disk or not
#'
#' @return the data for next step as a [tibble][tibble::tibble-package].
#' @export
write_temp_data <-
function(data, year, file_name, write_temp_to_disk) {
if (write_temp_to_disk) {
full_file_name <- stringr::str_glue("{file_name}.parquet")
file_path <- file.path(
get_year_dir(year),
full_file_name
)

cli::cli_alert_info(stringr::str_glue("Writing {full_file_name} to disk started at {Sys.time()}"))

write_file(data,
path = file_path
)
}
return(data)
}


#' Read a temp data from disk for debugging purpose
#'
#' @description Read a temp data to disk for debugging purpose.
#' @param year year variable
#' @param file_name The file name to be read
#'
#' @return the data for next step as a [tibble][tibble::tibble-package].
#' @export
read_temp_data <- function(year, file_name) {
full_file_name <- stringr::str_glue("{file_name}.parquet")
file_path <- file.path(
get_year_dir(year),
full_file_name
)

return(read_file(file_path))
}

#' Clean temp data from disk
#'
#' @description Clean temp data from disk to save storage.
#' @param year year variable
#' @param file_type ep or ind files
#'
#' @return the data for next step as a [tibble][tibble::tibble-package].
#' @export
clean_temp_data <- function(year, file_type = c("ep", "ind")) {
list.files(
path = get_year_dir(year),
pattern = stringr::str_glue("^{file_type}_temp")
) %>%
file.remove()
}
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1415.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1415"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1516.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1516"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1617.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1617"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1718.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1718"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1819.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1819"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1920.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1920"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_2021.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "2021"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_2122.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "2122"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_2223.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "2223"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_2324.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "2324"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_2425.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "2425"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
1 change: 1 addition & 0 deletions _targets.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ tar_option_set(
years_to_run <- createslf::years_to_run()

list(
tar_rds(test_mode, TRUE),
tar_rds(write_to_disk, TRUE),
tar_rds(
file_path_ext_clean,
Expand Down
19 changes: 19 additions & 0 deletions man/clean_temp_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/create_episode_file.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/create_individual_file.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

19 changes: 19 additions & 0 deletions man/read_temp_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 23 additions & 0 deletions man/write_temp_data.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 0be1daa

Please sign in to comment.