diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 298d28e3c..48ab3c8e3 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -17,7 +17,7 @@ jobs: strategy: fail-fast: false matrix: - r_version: ['4.0.2', '4.1.2', 'release'] + r_version: ['4.1.2', 'release'] env: GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} diff --git a/NAMESPACE b/NAMESPACE index 6f1c88841..05ee51fb0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,12 +1,12 @@ # Generated by roxygen2: do not edit by hand export("%>%") -export(add_deceased_flag) export(add_homelessness_date_flags) export(add_homelessness_flag) export(add_hri_variables) export(add_nsu_cohort) export(check_year_format) +export(clean_temp_data) export(clean_up_free_text) export(compute_mid_year_age) export(convert_ca_to_lca) @@ -21,10 +21,12 @@ export(create_episode_file) export(create_homelessness_lookup) export(create_individual_file) export(create_service_use_cohorts) +export(end_date) export(end_fy) export(end_fy_quarter) export(end_next_fy_quarter) export(find_latest_file) +export(fy) export(fy_interval) export(get_boxi_extract_path) export(get_ch_costs_path) @@ -89,7 +91,6 @@ export(midpoint_fy) export(next_fy) export(phs_db_connection) export(previous_update) -export(process_combined_deaths_lookup) export(process_costs_ch_rmd) export(process_costs_dn_rmd) export(process_costs_gp_ooh_rmd) @@ -156,6 +157,7 @@ export(produce_episode_file_tests) export(produce_sc_sandpit_tests) export(produce_source_extract_tests) export(produce_test_comparison) +export(qtr) export(read_dev_slf_file) export(read_extract_acute) export(read_extract_ae) @@ -178,12 +180,14 @@ export(read_sc_all_alarms_telecare) export(read_sc_all_care_home) export(read_sc_all_home_care) export(read_sc_all_sds) +export(read_temp_data) export(rename_hscp) export(setup_keyring) export(start_fy) export(start_fy_quarter) export(start_next_fy_quarter) export(write_file) +export(write_temp_data) export(years_to_run) importFrom(data.table,.N) importFrom(data.table,.SD) diff --git a/NEWS.md b/NEWS.md index 5a07b8266..c4c9b9e5d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,12 @@ -# September 2024 Update - Unreleased +# December 2024 Update - released 10-Dec-24 +* 24/25 files have been updated, containing data up to September 2024. +* 17/18 - 23/24 files have been updated. +* Homelessness completeness flag is now available in 23/24 files. +* Substance misuse flag updated. +* Mid-2023 & Mid-2022 population estimates for Scotland have been updated. +* Mid-2022 Small Area Population Estimates for 2011 Data Zones have been updated. + +# September 2024 Update - released 13-Sep-24 * New 24/25 files created * New NSU cohort for 23/24 available * New SPARRA scores calculated from April 24/25 diff --git a/00_Sort_BI_Extracts.R b/Pre_processing_scripts/00_Sort_BI_Extracts.R similarity index 100% rename from 00_Sort_BI_Extracts.R rename to Pre_processing_scripts/00_Sort_BI_Extracts.R diff --git a/copy_to_hscdiip.R b/Pre_processing_scripts/copy_to_hscdiip.R similarity index 100% rename from copy_to_hscdiip.R rename to Pre_processing_scripts/copy_to_hscdiip.R diff --git a/extract_new_nsu_cohort/filter_nsu_duplicates.R b/Pre_processing_scripts/extract_new_nsu_cohort/filter_nsu_duplicates.R similarity index 100% rename from extract_new_nsu_cohort/filter_nsu_duplicates.R rename to Pre_processing_scripts/extract_new_nsu_cohort/filter_nsu_duplicates.R diff --git a/extract_new_nsu_cohort/get_service_use_cohort.R b/Pre_processing_scripts/extract_new_nsu_cohort/get_service_use_cohort.R similarity index 100% rename from extract_new_nsu_cohort/get_service_use_cohort.R rename to Pre_processing_scripts/extract_new_nsu_cohort/get_service_use_cohort.R diff --git a/Pre_processing_scripts/write_anon_chi_files.R b/Pre_processing_scripts/write_anon_chi_files.R new file mode 100644 index 000000000..1ff39d273 --- /dev/null +++ b/Pre_processing_scripts/write_anon_chi_files.R @@ -0,0 +1,70 @@ +################################################################################ +# Name of file - Write_anon_chi_files.R +# +# Original Authors - Jennifer Thom, Zihao Li +# Original Date - July 2024 +# Written/run on - R Posit +# Version of R - 4.1.2 +# +# Description: Run this script in stages to convert chi to anon chi and save files. +# By default this is set up to take the delayed discharges file +# convert the chi to anon_chi and save to disk. Important for +# ensuring we do not save chi anywhere on disk. +# +################################################################################ + +## Stage 1 - Setup environment +#------------------------------------------------------------------------------- + +# Set up directory +source_dir <- "/conf/hscdiip/SLF_Extracts/Delayed_Discharges" + +# Specify type of files e.g parquet, rds, csv +pattern <- ".parquet" +cat(stringr::str_glue("Looking in '{source_dir}' for parquet files.")) + +# List all files in the directory +parquet_files <- list.files(source_dir, pattern = ".parquet", full.names = TRUE) +print(stringr::str_glue("Found {length(parquet_files)} parquet files to process.")) + +# Create a function to read variable names and check if CHI is in the file +is_chi_in_file <- function(filename) { + data <- arrow::read_parquet(filename, nrow = 5) + return(grepl("chi", names(data)) %>% any()) +} + + +# Stage 2 - In each file, convert chi to anon_chi and save to disk +#------------------------------------------------------------------------------- + +# create a loop for converting to anon chi in all listed files +for (data_file in parquet_files) { + # specify new name and new file path + save_file_path <- file.path(source_dir, paste0("anon-", basename(data_file))) + chi_in_file <- is_chi_in_file(data_file) + + # If chi is in the file, convert to anon_chi + if (chi_in_file) { + read_file(data_file) %>% + slfhelper::get_anon_chi() %>% + write_file(save_file_path) + + cat("Replaced chi with anon chi:", data_file, "to", save_file_path, "\n") + } else { + read_file(data_file) %>% + write_file(save_file_path) + cat("renamed file with anon chi:", data_file, "to", save_file_path, "\n") + } +} + + +# Stage 3 - Remove files with CHI +#------------------------------------------------------------------------------- + +# Create a loop for removing the old files with CHI +for (data_file in parquet_files) { + file.remove(data_file) + cat("Removed chi files:", data_file, "in", source_dir, "\n") +} + +# End of Script # diff --git a/R/00-update_refs.R b/R/00-update_refs.R index 33022edf6..7ff216c92 100644 --- a/R/00-update_refs.R +++ b/R/00-update_refs.R @@ -1,3 +1,109 @@ +################################################################################ +# # Name of file - 00-update_refs.R +# Original Authors - Jennifer Thom, Zihao Li +# Original Date - August 2021 +# Update - Oct 2024 +# +# Written/run on - RStudio Server +# Version of R - 4.1.2 +# +# Description - Use this script to update references needed for the SLF update. +# +# Manual changes needed to the following Essential Functions: +# # End_date +# # Check_year_valid +# # Delayed_discharges_period +# # Latest_update +# +################################################################################ + +#' End date +#' +#' @return Get the end date of the latest update period +#' @export +#' +end_date <- function() { + ## UPDATE ## + # Specify update by indicating end of quarter date + # Q1 June = 30062024 + # Q2 September = 30092024 + # Q3 December = 31122024 + # Q4 March = 31032024 + lubridate::dmy(31122024) +} + + +#' Check data exists for a year +#' +#' @description Check there is data available for a given year +#' as some extracts are year dependent. E.g Homelessness +#' is only available from 2016/17 onwards. +#' +#' @param year Financial year +#' @param type name of extract +#' +#' @return A logical TRUE/FALSE +check_year_valid <- function( + year, + type = c( + "acute", + "ae", + "at", + "ch", + "client", + "cmh", + "cost_dna", + "dd", + "deaths", + "dn", + "gpooh", + "hc", + "homelessness", + "hhg", + "maternity", + "mh", + "nsu", + "outpatients", + "pis", + "sds", + "sparra" + )) { + if (year <= "1415" && type %in% c("dn", "sparra")) { + return(FALSE) + } else if (year <= "1516" && type %in% c("cmh", "homelessness", "dd")) { + return(FALSE) + } else if (year <= "1617" && type %in% c("ch", "hc", "sds", "at", "client", "cost_dna")) { + return(FALSE) + } else if (year <= "1718" && type %in% "hhg") { + return(FALSE) + } else if (year >= "2122" && type %in% c("cmh", "dn")) { + return(FALSE) + } else if (year >= "2324" && type %in% "hhg") { + return(FALSE) + } else if (year >= "2425" && type %in% c("nsu", "sds")) { + return(FALSE) + } else if (year >= "2526" && type %in% c("ch", "hc", "sds", "at", "sparra")) { + return(FALSE) + } + + return(TRUE) +} + + +#' Delayed Discharge period +#' +#' @description Get the period for Delayed Discharge +#' +#' @return The period for the Delayed Discharge file +#' as MMMYY_MMMYY +#' @export +#' +#' @family initialisation +get_dd_period <- function() { + "Jul16_Sep24" +} + + #' Latest update #' #' @description Get the date of the latest update, e.g 'Jun_2022' @@ -7,9 +113,10 @@ #' #' @family initialisation latest_update <- function() { - "Sep_2024" + "Dec_2024" } + #' Previous update #' #' @param months_ago Number of months since the previous update @@ -51,19 +158,33 @@ previous_update <- function(months_ago = 3L, override = NULL) { return(previous_update) } -#' Delayed Discharge period + +#' Extract latest FY from end_date #' -#' @description Get the period for Delayed Discharge +#' @return fy in format "2024" +#' @export #' -#' @return The period for the Delayed Discharge file -#' as MMMYY_MMMYY +fy <- function() { + # Latest FY + fy <- phsmethods::extract_fin_year(end_date()) %>% substr(1, 4) +} + + +#' Extract latest quarter from end_date +#' +#' @return qtr in format "Q1" #' @export #' -#' @family initialisation -get_dd_period <- function() { - "Jul16_Jun24" +qtr <- function() { + # Latest Quarter + qtr <- lubridate::quarter(end_date(), fiscal_start = 4) + + qtr <- stringr::str_glue("Q{qtr}") + + return(qtr) } + #' The year list for slf to update #' #' @description Get the vector of years to update slf diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index 5e800c80b..abf53fdd7 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -13,52 +13,48 @@ add_activity_after_death_flag <- function( year, deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi()) { - cli::cli_alert_info("Add activity after death flag function started at {Sys.time()}") - # to skip warnings no visible binding for global variable ‘.’ . <- NULL + data <- data %>% + dplyr::mutate(ep_row_id_death = dplyr::row_number()) + death_joined <- data %>% - dplyr::select(.data$year, .data$chi, .data$record_keydate1, .data$record_keydate2, .data$death_date, .data$deceased) %>% - dplyr::filter(!is.na(.data$chi) | .data$chi != "") %>% - dplyr::left_join( - deaths_data, + dplyr::select( + "year", + "chi", + "recid", + "record_keydate1", + "record_keydate2", + "death_date", + "deceased", + "ep_row_id_death" + ) %>% + dplyr::filter(!is.na(.data$chi) & .data$chi != "") %>% + dplyr::left_join(deaths_data, by = "chi", - suffix = c("", "_boxi") + suffix = c("", "_refined") ) %>% dplyr::filter(.data$deceased == TRUE) %>% dplyr::distinct() - - # Check and print error message for records which already have a death_date in the episode file, but this doesn't match the BOXI death date - check_death_date_match <- death_joined %>% - dplyr::filter(.data$death_date != .data$death_date_boxi) - - if (nrow(check_death_date_match) != 0) { - warning("There were records in the episode file which already have a death_date, but does not match the BOXI NRS death date.") - } - - - # Check and print error message for records which have a record_keydate1 after their BOXI death date - check_keydate1_death_date <- death_joined %>% - dplyr::filter(.data$record_keydate1 > .data$death_date_boxi) - - if (nrow(check_death_date_match) != 0) { - warning("There were records in the episode file which have a record_keydate1 after the BOXI NRS death date.") - } - - flag_data <- death_joined %>% dplyr::mutate( - flag_keydate1 = dplyr::if_else(.data$record_keydate1 > .data$death_date_boxi, 1, 0), - flag_keydate2 = dplyr::if_else(.data$record_keydate2 > .data$death_date_boxi, 1, 0), + flag_keydate1 = dplyr::if_else(.data$record_keydate1 > .data$death_date_refined, 1, 0), + flag_keydate2 = dplyr::if_else(.data$record_keydate2 > .data$death_date_refined, 1, 0), # Next flag records with 'ongoing' activity after date of death (available from BOXI) if keydate2 is missing and the death date occurs in # in the current or a previous financial year. - flag_keydate2_missing = dplyr::if_else(((is.na(.data$record_keydate2) | .data$record_keydate2 == "") & (.data$death_date_boxi <= paste0("20", substr(.data$year, 3, 4), "-03-31"))), 1, 0), + flag_keydate2_missing = dplyr::if_else(((is.na(.data$record_keydate2) | + .data$record_keydate2 == "") & + (.data$death_date_refined <= paste0("20", substr(.data$year, 3, 4), "-03-31")) + ), 1, 0), # Also flag records without a death_date in the episode file, but the BOXI death date occurs in the current or a previous financial year. - flag_deathdate_missing = dplyr::if_else(((is.na(.data$death_date) | .data$death_date == "") & (.data$death_date_boxi <= paste0("20", substr(.data$year, 3, 4), "-03-31"))), 1, 0) + flag_deathdate_missing = dplyr::if_else(((is.na(.data$death_date) | + .data$death_date == "") & + (.data$death_date_refined <= paste0("20", substr(.data$year, 3, 4), "-03-31")) + ), 1, 0) ) %>% # These should be flagged by one of the two lines of code above, but in these cases, we will also fill in the blank death date if appropriate @@ -69,116 +65,50 @@ add_activity_after_death_flag <- function( ~ any(grepl("^1$", c(...)), na.rm = TRUE ) * 1 - )) - - - # Fill in date of death if missing in the episode file but available in BOXI lookup, due to historic dates of death not being carried - # over from previous financial years - flag_data <- flag_data %>% + )) %>% + # Fill in date of death if missing in the episode file but available in BOXI lookup, due to historic dates of death not being carried + # over from previous financial years dplyr::filter(.data$activity_after_death == 1) %>% # Remove temporary flag variables used to create activity after death flag and fill in missing death_date - dplyr::select(.data$year, .data$chi, .data$record_keydate1, .data$record_keydate2, .data$activity_after_death, .data$death_date_boxi) %>% + dplyr::select( + "year", + "chi", + "recid", + "record_keydate1", + "record_keydate2", + "activity_after_death", + "death_date_refined", + "ep_row_id_death" + ) %>% dplyr::distinct() # Match activity after death flag back to episode file final_data <- data %>% dplyr::left_join( flag_data, - # TODO: this join_by is not 100% accurate. Consider use ep_file_row_id to join - by = c("year", "chi", "record_keydate1", "record_keydate2"), + # this join_by is now 100% accurate. + by = c( + "year", + "chi", + "recid", + "record_keydate1", + "record_keydate2", + "ep_row_id_death" + ), na_matches = "never" ) %>% - dplyr::mutate(death_date = lubridate::as_date(ifelse(is.na(death_date) & !(is.na(death_date_boxi)), - death_date_boxi, death_date + dplyr::mutate(death_date = lubridate::as_date(ifelse( + is.na(.data$death_date) & !(is.na(.data$death_date_refined)), + .data$death_date_refined, .data$death_date ))) %>% - dplyr::select(-death_date_boxi) %>% - dplyr::distinct() - + dplyr::select(-"death_date_refined", -"ep_row_id_death") %>% + dplyr::distinct() %>% + dplyr::mutate(dplyr::if_else(is.na(.data$activity_after_death), + 0, + .data$activity_after_death + )) + cli::cli_alert_info("Add activity after death flag function finished at {Sys.time()}") return(final_data) } - - -#' Create and read SLF Deaths lookup from processed BOXI NRS deaths extracts -#' -#' @description The BOXI NRS deaths extract lookup should be created after the extract files for all years have been processed, -# but before an episode file has been produced. Therefore, all BOXI NRS years should be run before running episode files. -#' -#' @param ... additional arguments passed to [get_slf_deaths_lookup_path()] -#' @param update the update month (defaults to use [latest_update()]) -#' -#' @param write_to_disk (optional) Should the data be written to disk default is -#' `TRUE` i.e. write the data to disk. -#' -#' @return the final data as a [tibble][tibble::tibble-package]. -#' @export -#' -#' -#' -# Read data------------------------------------------------ - -process_combined_deaths_lookup <- function(update = latest_update(), - write_to_disk = TRUE, ...) { - dir_folder <- "/conf/hscdiip/SLF_Extracts/Deaths" - file_names <- list.files(dir_folder, - pattern = "^anon-slf_deaths_lookup_.*parquet", - full.names = TRUE - ) - - # read all year specific deaths lookups and bind them together - all_boxi_deaths <- lapply(file_names, arrow::read_parquet) %>% - data.table::rbindlist() %>% - # convert to chi for processing - slfhelper::get_chi() %>% - # Remove rows with missing or blank CHI number - could also use na.omit? - # na.omit(all_boxi_deaths) - dplyr::filter(!is.na(.data$chi) | .data$chi != "") - - # Check all CHI numbers are valid - chi_check <- all_boxi_deaths %>% - dplyr::pull(.data$chi) %>% - phsmethods::chi_check() - - if (!all(chi_check %in% c("Valid CHI", "Missing (Blank)", "Missing (NA)"))) { - # There are some Missing (NA) values in the extracts, but I have excluded them above as they cannot be matched to episode file - stop("There were bad CHI numbers in the BOXI NRS file") - } - - # Check and print error message for chi numbers with more than one death date - duplicates <- all_boxi_deaths %>% - janitor::get_dupes(.data$chi) - - if (nrow(duplicates) != 0) { - # There are some Missing (NA) values in the extracts, but I have excluded them above as they cannot be matched to episode file - warning("There were duplicate death dates in the BOXI NRS file.") - } - - - # We decided to include duplicates as unable to determine which is correct date (unless IT can tell us, however, they don't seem to know - # the process well enough), and overall impact will be negligible - # Get anon_chi and use this to match onto episode file later - all_boxi_deaths <- all_boxi_deaths %>% - slfhelper::get_anon_chi() - - # Save out duplicates for further investigation if needed (as anon_chi) - if (!missing(duplicates)) { - write_file( - duplicates, - fs::path(get_slf_dir(), "Deaths", - file_name = stringr::str_glue("slf_deaths_duplicates_{update}.parquet") - ) - ) - } - - # Maybe save as its own function - # Write the all BOXI NRS deaths lookup file to disk, so this can be used to populate activity after death flag in each episode file - if (write_to_disk) { - write_file( - all_boxi_deaths, - get_combined_slf_deaths_lookup_path() - ) - } - - return(all_boxi_deaths) -} diff --git a/R/add_deceased_flag.R b/R/add_deceased_flag.R deleted file mode 100644 index f3be216cf..000000000 --- a/R/add_deceased_flag.R +++ /dev/null @@ -1,40 +0,0 @@ -#' Create the SLF Deaths lookup -#' -#' @description Currently this just uses the NRS death dates 'as is', with no -#' corrections or modifications, it is expected that this will be expanded to -#' use the CHI deaths extract from IT as well as taking into account data in -#' the episode file to assess the validity of a death date. -#' -#' @param year The year to process, in FY format. -#' @param nrs_deaths_data NRS deaths data. -#' @param chi_deaths_data IT CHI deaths data. -#' @param write_to_disk (optional) Should the data be written to disk default is -#' `TRUE` i.e. write the data to disk. -#' -#' @return a [tibble][tibble::tibble-package] containing the episode file -#' @export -add_deceased_flag <- function( - year, - refined_death = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi(), - write_to_disk = TRUE) { - # create slf deaths lookup - - dplyr::mutate( - death_date = dplyr::if_else(is.na(.data$record_keydate1), - .data$death_date_chi, .data$record_keydate1 - ), - deceased = TRUE, - .keep = "unused" - ) %>% - # save anon chi on disk - slfhelper::get_anon_chi() - - if (write_to_disk) { - write_file( - slf_deaths_lookup, - get_slf_deaths_lookup_path(year, check_mode = "write") - ) - } - - return(slf_deaths_lookup) -} diff --git a/R/add_hri_variables.R b/R/add_hri_variables.R index 3765e1d4d..fd37fba19 100644 --- a/R/add_hri_variables.R +++ b/R/add_hri_variables.R @@ -46,6 +46,8 @@ flag_non_scottish_residents <- function( ) %>% dplyr::select(-"dummy_postcode", -"eng_prac") + cli::cli_alert_info("Add HRI variables function finished at {Sys.time()}") + return(return_data) } diff --git a/R/add_keep_population_flag.R b/R/add_keep_population_flag.R index 6f2470f53..2f665c954 100644 --- a/R/add_keep_population_flag.R +++ b/R/add_keep_population_flag.R @@ -138,6 +138,10 @@ add_keep_population_flag <- function(individual_file, year) { ) ) } + + cli::cli_alert_info("Add keep population function finished at {Sys.time()}") + + return(individual_file) } diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R index bf6216e57..b573c14ac 100644 --- a/R/add_nsu_cohort.R +++ b/R/add_nsu_cohort.R @@ -13,8 +13,6 @@ add_nsu_cohort <- function( data, year, nsu_cohort = read_file(get_nsu_path(year)) %>% slfhelper::get_chi()) { - cli::cli_alert_info("Add NSU cohort function started at {Sys.time()}") - year_param <- year if (!check_year_valid(year, "nsu")) { @@ -118,5 +116,7 @@ add_nsu_cohort <- function( ) %>% dplyr::select(-dplyr::contains("_nsu"), -"has_chi") + cli::cli_alert_info("Add NSU cohort function finished at {Sys.time()}") + return(return_df) } diff --git a/R/add_ppa_flag.R b/R/add_ppa_flag.R index 1d5f9739d..3d3c7eeaf 100644 --- a/R/add_ppa_flag.R +++ b/R/add_ppa_flag.R @@ -8,8 +8,6 @@ #' @return A data frame to use as a lookup of PPAs #' @family episode_file add_ppa_flag <- function(data) { - cli::cli_alert_info("Add PPA flag function started at {Sys.time()}") - check_variables_exist( data, variables = c( @@ -227,5 +225,7 @@ add_ppa_flag <- function(data) { .data$cij_ppa )) + cli::cli_alert_info("Add PPA flag function finished at {Sys.time()}") + return(ppa_cij_data) } diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R index ff5cbaad1..010ff5bad 100644 --- a/R/aggregate_by_chi.R +++ b/R/aggregate_by_chi.R @@ -10,8 +10,6 @@ #' #' @inheritParams create_individual_file aggregate_by_chi <- function(episode_file, year, exclude_sc_var = FALSE) { - cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") - # recommended by `data.table` team to tackle the issue # "no visible binding for global variable" gender <- @@ -199,6 +197,8 @@ aggregate_by_chi <- function(episode_file, year, exclude_sc_var = FALSE) { ) individual_file <- individual_file[, year := year] + cli::cli_alert_info("Aggregate by CHI function finished at {Sys.time()}") + # convert back to tibble return(dplyr::as_tibble(individual_file)) } @@ -246,8 +246,6 @@ vars_contain <- function(data, vars, ignore_case = FALSE) { #' #' @inheritParams create_individual_file aggregate_ch_episodes <- function(episode_file) { - cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}") - # recommended by `data.table` team to tackle the issue # "no visible binding for global variable" ch_no_cost <- @@ -274,5 +272,7 @@ aggregate_ch_episodes <- function(episode_file) { # Convert back to tibble if needed episode_file <- tibble::as_tibble(episode_file) + cli::cli_alert_info("Aggregate ch episodes function finished at {Sys.time()}") + return(episode_file) } diff --git a/R/check_year_valid.R b/R/check_year_valid.R deleted file mode 100644 index da257ff4c..000000000 --- a/R/check_year_valid.R +++ /dev/null @@ -1,55 +0,0 @@ -#' Check data exists for a year -#' -#' @description Check there is data available for a given year -#' as some extracts are year dependent. E.g Homelessness -#' is only available from 2016/17 onwards. -#' -#' @param year Financial year -#' @param type name of extract -#' -#' @return A logical TRUE/FALSE -check_year_valid <- function( - year, - type = c( - "acute", - "ae", - "at", - "ch", - "client", - "cmh", - "cost_dna", - "dd", - "deaths", - "dn", - "gpooh", - "hc", - "homelessness", - "hhg", - "maternity", - "mh", - "nsu", - "outpatients", - "pis", - "sds", - "sparra" - )) { - if (year <= "1415" && type %in% c("dn", "sparra")) { - return(FALSE) - } else if (year <= "1516" && type %in% c("cmh", "homelessness", "dd")) { - return(FALSE) - } else if (year <= "1617" && type %in% c("ch", "hc", "sds", "at", "client", "cost_dna")) { - return(FALSE) - } else if (year <= "1718" && type %in% "hhg") { - return(FALSE) - } else if (year >= "2122" && type %in% c("cmh", "dn")) { - return(FALSE) - } else if (year >= "2324" && type %in% c("nsu", "hhg")) { - return(FALSE) - } else if (year >= "2425" && type %in% "sparra") { - return(FALSE) - } else if (year >= "2526" && type %in% c("ch", "hc", "sds", "at")) { - return(FALSE) - } - - return(TRUE) -} diff --git a/R/correct_demographics.R b/R/correct_demographics.R index d221c25ab..d7322b164 100644 --- a/R/correct_demographics.R +++ b/R/correct_demographics.R @@ -7,8 +7,6 @@ #' #' @return episode files with updated date of birth and ages correct_demographics <- function(data, year) { - cli::cli_alert_info("Correct demographics function started at {Sys.time()}") - # keep episodes with missing chi data_no_chi <- data %>% dplyr::filter(is_missing(.data$chi)) @@ -102,5 +100,7 @@ correct_demographics <- function(data, year) { data_chi ) + cli::cli_alert_info("Correct demographics function finished at {Sys.time()}") + return(data) } diff --git a/R/cost_uplift.R b/R/cost_uplift.R index abbbd9b5a..ea3df4abe 100644 --- a/R/cost_uplift.R +++ b/R/cost_uplift.R @@ -5,8 +5,6 @@ #' @return episode data with uplifted costs #' @family episode_file apply_cost_uplift <- function(data) { - cli::cli_alert_info("Apply cost uplift function started at {Sys.time()}") - data <- data %>% # attach a uplift scale as the last column lookup_uplift() %>% @@ -29,6 +27,8 @@ apply_cost_uplift <- function(data) { # remove the last uplift column dplyr::select(-"uplift") + cli::cli_alert_info("Apply cost uplift function finished at {Sys.time()}") + return(data) } diff --git a/R/create_episode_file.R b/R/create_episode_file.R index ecb6fc126..6c95ea5c2 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -3,11 +3,12 @@ #' @param processed_data_list containing data from processed extracts. #' @param year The year to process, in FY format. #' @param homelessness_lookup the lookup file for homelessness -#' @param sc_client scoial care lookup file +#' @param sc_client social care lookup file #' @param write_to_disk (optional) Should the data be written to disk default is #' `TRUE` i.e. write the data to disk. #' @param anon_chi_out (Default:TRUE) Should `anon_chi` be used in the output #' (instead of chi) +#' @param write_temp_to_disk write intermediate data for investigation or debug #' @inheritParams add_nsu_cohort #' @inheritParams fill_geographies #' @inheritParams join_cohort_lookups @@ -32,11 +33,19 @@ create_episode_file <- function( slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) %>% slfhelper::get_chi(), sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(), write_to_disk = TRUE, - anon_chi_out = TRUE) { + anon_chi_out = TRUE, + write_temp_to_disk = FALSE) { + cli::cli_alert_info("Create episode file function started at {Sys.time()}") + processed_data_list <- purrr::discard(processed_data_list, ~ is.null(.x) | identical(.x, tibble::tibble())) episode_file <- dplyr::bind_rows(processed_data_list) %>% slfhelper::get_chi() %>% + write_temp_data(year, file_name = "ep_temp1", write_temp_to_disk) %>% + add_homelessness_flag(year, lookup = homelessness_lookup) %>% + add_homelessness_date_flags(year, lookup = homelessness_lookup) %>% + link_delayed_discharge_eps(year, dd_data) %>% + write_temp_data(year, file_name = "ep_temp1-2", write_temp_to_disk) %>% create_cost_inc_dna() %>% apply_cost_uplift() %>% store_ep_file_vars( @@ -120,15 +129,15 @@ create_episode_file <- function( # PC8 format may still be used. Ensure here that all datasets are in PC7 format. postcode = phsmethods::format_postcode(.data$postcode, "pc7") ) %>% + write_temp_data(year, file_name = "ep_temp2", write_temp_to_disk) %>% correct_cij_vars() %>% fill_missing_cij_markers() %>% - add_homelessness_flag(year, lookup = homelessness_lookup) %>% - add_homelessness_date_flags(year, lookup = homelessness_lookup) %>% add_ppa_flag() %>% - link_delayed_discharge_eps(year, dd_data) %>% + write_temp_data(year, file_name = "ep_temp3", write_temp_to_disk) %>% add_nsu_cohort(year, nsu_cohort) %>% match_on_ltcs(year, ltc_data) %>% correct_demographics(year) %>% + write_temp_data(year, file_name = "ep_temp4", write_temp_to_disk) %>% create_cohort_lookups(year) %>% join_cohort_lookups(year) %>% join_sparra_hhg(year) %>% @@ -140,11 +149,13 @@ create_episode_file <- function( year, slf_deaths_lookup ) %>% + write_temp_data(year, file_name = "ep_temp5", write_temp_to_disk) %>% add_activity_after_death_flag(year, deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi() ) %>% - load_ep_file_vars(year) + load_ep_file_vars(year) %>% + write_temp_data(year, file_name = "ep_temp6", write_temp_to_disk) if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { episode_file <- episode_file %>% @@ -265,8 +276,6 @@ create_episode_file <- function( #' #' @return `data` with only the `vars_to_keep` kept store_ep_file_vars <- function(data, year, vars_to_keep) { - cli::cli_alert_info("Store episode file variables function started at {Sys.time()}") - tempfile_path <- get_file_path( directory = get_year_dir(year), file_name = stringr::str_glue("temp_ep_file_variable_store_{year}.parquet"), @@ -289,6 +298,8 @@ store_ep_file_vars <- function(data, year, vars_to_keep) { path = tempfile_path ) + cli::cli_alert_info("Store episode file variables function finished at {Sys.time()}") + return( dplyr::select( data, @@ -304,8 +315,6 @@ store_ep_file_vars <- function(data, year, vars_to_keep) { #' #' @return The full SLF data. load_ep_file_vars <- function(data, year) { - cli::cli_alert_info("Load episode file variable function started at {Sys.time()}") - tempfile_path <- get_file_path( directory = get_year_dir(year), file_name = stringr::str_glue("temp_ep_file_variable_store_{year}.parquet"), @@ -324,6 +333,8 @@ load_ep_file_vars <- function(data, year) { fs::file_delete(tempfile_path) + cli::cli_alert_info("Load episode file variable function finished at {Sys.time()}") + return(full_data) } @@ -333,8 +344,6 @@ load_ep_file_vars <- function(data, year) { #' #' @return A data frame with CIJ markers filled in for those missing. fill_missing_cij_markers <- function(data) { - cli::cli_alert_info("Fill missing cij markers function started at {Sys.time()}") - fixable_data <- data %>% dplyr::filter( .data[["recid"]] %in% c("01B", "04B", "GLS", "02B", "DD") & !is.na(.data[["chi"]]) @@ -380,6 +389,8 @@ fill_missing_cij_markers <- function(data) { return_data <- dplyr::bind_rows(non_fixable_data, fixed_data) + cli::cli_alert_info("Fill missing cij markers function finished at {Sys.time()}") + return(return_data) } @@ -389,14 +400,12 @@ fill_missing_cij_markers <- function(data) { #' #' @return The data with CIJ variables corrected. correct_cij_vars <- function(data) { - cli::cli_alert_info("Correct cij variables function started at {Sys.time()}") - check_variables_exist( data, c("chi", "recid", "cij_admtype", "cij_pattype_code") ) - data %>% + data <- data %>% # Change some values of cij_pattype_code based on cij_admtype dplyr::mutate( cij_admtype = dplyr::if_else( @@ -424,6 +433,10 @@ correct_cij_vars <- function(data) { 9L ~ "Other" ) ) + + cli::cli_alert_info("Correct cij variables function finished at {Sys.time()}") + + return(data) } #' Create cost total net inc DNA @@ -432,13 +445,11 @@ correct_cij_vars <- function(data) { #' #' @return The data with cost including dna. create_cost_inc_dna <- function(data) { - cli::cli_alert_info("Create cost inc dna function started at {Sys.time()}") - check_variables_exist(data, c("cost_total_net", "attendance_status")) # Create cost including DNAs and modify costs # not including DNAs using cattend - data %>% + data <- data %>% dplyr::mutate( cost_total_net_inc_dnas = .data$cost_total_net, # In the Cost_Total_Net column set the cost for @@ -449,6 +460,10 @@ create_cost_inc_dna <- function(data) { .data$cost_total_net ) ) + + cli::cli_alert_info("Create cost inc dna function finished at {Sys.time()}") + + return(data) } #' Create the cohort lookups @@ -458,8 +473,6 @@ create_cost_inc_dna <- function(data) { #' #' @return The data unchanged (the cohorts are written to disk) create_cohort_lookups <- function(data, year, update = latest_update()) { - cli::cli_alert_info("Create cohort lookups function started at {Sys.time()}") - create_demographic_cohorts( data, year, @@ -474,6 +487,7 @@ create_cohort_lookups <- function(data, year, update = latest_update()) { write_to_disk = TRUE ) + cli::cli_alert_info("Create cohort lookups function finished at {Sys.time()}") return(data) } @@ -499,8 +513,6 @@ join_cohort_lookups <- function( col_select = c("anon_chi", "service_use_cohort") ) %>% slfhelper::get_chi()) { - cli::cli_alert_info("Join cohort lookups function started at {Sys.time()}") - join_cohort_lookups <- data %>% dplyr::left_join( demographic_cohort, @@ -511,6 +523,8 @@ join_cohort_lookups <- function( by = "chi" ) + cli::cli_alert_info("Join cohort lookups function finished at {Sys.time()}") + return(join_cohort_lookups) } @@ -527,8 +541,6 @@ join_sc_client <- function(data, year, sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(), file_type = c("episode", "individual")) { - cli::cli_alert_info("Join social care client function started at {Sys.time()}") - if (!check_year_valid(year, type = "client")) { data_file <- data return(data_file) @@ -551,5 +563,7 @@ join_sc_client <- function(data, ) } + cli::cli_alert_info("Join social care client function finished at {Sys.time()}") + return(data_file) } diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 273761efc..8fd930fb1 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -6,6 +6,7 @@ #' @param homelessness_lookup the lookup file for homelessness #' @param anon_chi_in (Default:TRUE) Is `anon_chi` used in the input #' (instead of chi). +#' @param write_temp_to_disk write intermediate data for investigation or debug #' @inheritParams create_episode_file #' #' @return The processed individual file @@ -17,7 +18,10 @@ create_individual_file <- function( homelessness_lookup = create_homelessness_lookup(year), write_to_disk = TRUE, anon_chi_in = TRUE, - anon_chi_out = TRUE) { + anon_chi_out = TRUE, + write_temp_to_disk) { + cli::cli_alert_info("Create individual file function started at {Sys.time()}") + if (anon_chi_in) { episode_file <- slfhelper::get_chi( episode_file, @@ -74,30 +78,36 @@ create_individual_file <- function( ))) %>% remove_blank_chi() %>% add_cij_columns() %>% - add_all_columns(year = year) + add_all_columns(year = year) %>% + write_temp_data(year, file_name = "indiv_temp1", write_temp_to_disk) if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { individual_file <- individual_file %>% - aggregate_by_chi(year = year, exclude_sc_var = TRUE) + aggregate_by_chi(year = year, exclude_sc_var = TRUE) %>% + write_temp_data(year, file_name = "indiv_temp2", write_temp_to_disk) } else { individual_file <- individual_file %>% aggregate_ch_episodes() %>% clean_up_ch(year) %>% - aggregate_by_chi(year = year, exclude_sc_var = FALSE) + aggregate_by_chi(year = year, exclude_sc_var = FALSE) %>% + write_temp_data(year, file_name = "indiv_temp2", write_temp_to_disk) } individual_file <- individual_file %>% recode_gender() %>% clean_individual_file(year) %>% join_cohort_lookups(year) %>% + write_temp_data(year, file_name = "indiv_temp3", write_temp_to_disk) %>% add_homelessness_flag(year, lookup = homelessness_lookup) %>% match_on_ltcs(year) %>% join_deaths_data(year) %>% join_sparra_hhg(year) %>% + write_temp_data(year, file_name = "indiv_temp4", write_temp_to_disk) %>% join_slf_lookup_vars() %>% dplyr::mutate(year = year) %>% add_hri_variables(chi_variable = "chi") %>% add_keep_population_flag(year) %>% + write_temp_data(year, file_name = "indiv_temp5", write_temp_to_disk) %>% join_sc_client(year, file_type = "individual") if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { @@ -177,11 +187,13 @@ create_individual_file <- function( #' @family individual_file #' @inheritParams create_individual_file remove_blank_chi <- function(episode_file) { - cli::cli_alert_info("Remove blank CHI function started at {Sys.time()}") - - episode_file %>% + episode_file <- episode_file %>% dplyr::mutate(chi = dplyr::na_if(.data$chi, "")) %>% dplyr::filter(!is.na(.data$chi)) + + cli::cli_alert_info("Remove blank CHI function finished at {Sys.time()}") + + return(episode_file) } @@ -191,9 +203,7 @@ remove_blank_chi <- function(episode_file) { #' @family individual_file #' @inheritParams create_individual_file add_cij_columns <- function(episode_file) { - cli::cli_alert_info("Add cij columns function started at {Sys.time()}") - - episode_file %>% + episode_file <- episode_file %>% dplyr::mutate( cij_non_el = dplyr::if_else( .data$cij_pattype_code == 0L, @@ -221,6 +231,10 @@ add_cij_columns <- function(episode_file) { NA_integer_ ) ) + + cli::cli_alert_info("Add cij columns function finished at {Sys.time()}") + + return(episode_file) } #' Add all columns @@ -230,8 +244,6 @@ add_cij_columns <- function(episode_file) { #' @family individual_file #' @inheritParams create_individual_file add_all_columns <- function(episode_file, year) { - cli::cli_alert_info("Add all columns function started at {Sys.time()}") - episode_file <- episode_file %>% add_acute_columns("Acute", (.data$smrtype == "Acute-DC" | .data$smrtype == "Acute-IP") & .data$cij_pattype != "Maternity") %>% add_mat_columns("Mat", .data$recid == "02B" | .data$cij_pattype == "Maternity") %>% @@ -277,6 +289,10 @@ add_all_columns <- function(episode_file, year) { .data$OP_cost_dnas ) ) + + cli::cli_alert_info("Add all columns function finished at {Sys.time()}") + + return(episode_file) } #' Add Acute columns @@ -286,12 +302,14 @@ add_all_columns <- function(episode_file, year) { #' @param condition Condition to create new columns based on #' @family individual_file add_acute_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add acute columns function started at {Sys.time()}") - condition <- substitute(condition) - episode_file %>% + episode_file <- episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% add_ipdc_cols(prefix, condition) + + cli::cli_alert_info("Add acute columns function finished at {Sys.time()}") + + return(episode_file) } #' Add Mat columns @@ -299,12 +317,14 @@ add_acute_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_mat_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add maternity columns function started at {Sys.time()}") - condition <- substitute(condition) - episode_file %>% + episode_file <- episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% add_ipdc_cols(prefix, condition, elective = FALSE) + + cli::cli_alert_info("Add maternity columns function finished at {Sys.time()}") + + return(episode_file) } #' Add MH columns @@ -312,12 +332,14 @@ add_mat_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_mh_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add mental health columns function started at {Sys.time()}") - condition <- substitute(condition) - episode_file %>% + episode_file <- episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% add_ipdc_cols(prefix, condition, ipdc_d = FALSE) + + cli::cli_alert_info("Add mental health columns function finished at {Sys.time()}") + + return(episode_file) } #' Add GLS columns @@ -325,12 +347,14 @@ add_mh_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_gls_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add geriatric long stay columns function started at {Sys.time()}") - condition <- substitute(condition) - episode_file %>% + episode_file <- episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% add_ipdc_cols(prefix, condition, ipdc_d = FALSE) + + cli::cli_alert_info("Add geriatric long stay columns function finished at {Sys.time()}") + + return(episode_file) } #' Add OP columns @@ -338,8 +362,6 @@ add_gls_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_op_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add outpatient columns function started at {Sys.time()}") - condition <- substitute(condition) episode_file <- episode_file %>% add_standard_cols(prefix, condition) @@ -355,6 +377,9 @@ add_op_columns <- function(episode_file, prefix, condition) { "{prefix}_newcons_dnas" := dplyr::if_else(eval(condition_5_8), 1L, NA_integer_), "{prefix}_cost_dnas" := dplyr::if_else(eval(condition_5_8), .data$cost_total_net_inc_dnas, NA_real_) ) + + cli::cli_alert_info("Add outpatient columns function finished at {Sys.time()}") + return(episode_file) } @@ -363,12 +388,14 @@ add_op_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_ae_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add A&E columns function started at {Sys.time()}") - condition <- substitute(condition) - episode_file %>% + episode_file <- episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% dplyr::mutate("{prefix}_attendances" := dplyr::if_else(eval(condition), 1L, NA_integer_)) + + cli::cli_alert_info("Add A&E columns function finished at {Sys.time()}") + + return(episode_file) } #' Add PIS columns @@ -376,12 +403,13 @@ add_ae_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_pis_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add prescribing columns function started at {Sys.time()}") - condition <- substitute(condition) - episode_file %>% + episode_file <- episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% dplyr::mutate("{prefix}_paid_items" := dplyr::if_else(eval(condition), .data$no_paid_items, NA_integer_)) + cli::cli_alert_info("Add prescribing columns function finished at {Sys.time()}") + + return(episode_file) } #' Add OoH columns @@ -389,8 +417,6 @@ add_pis_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_ooh_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add out of hours columns function started at {Sys.time()}") - condition <- substitute(condition) episode_file <- episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% @@ -418,6 +444,8 @@ add_ooh_columns <- function(episode_file, prefix, condition) { ) ) + cli::cli_alert_info("Add out of hours columns function finished at {Sys.time()}") + return(episode_file) } @@ -426,11 +454,9 @@ add_ooh_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_dn_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add district nursing columns function started at {Sys.time()}") - condition <- substitute(condition) if ("total_no_dn_contacts" %in% names(episode_file)) { - episode_file %>% + episode_file <- episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% dplyr::mutate( "{prefix}_contacts" := dplyr::if_else( @@ -440,10 +466,14 @@ add_dn_columns <- function(episode_file, prefix, condition) { ) ) } else { - episode_file %>% + episode_file <- episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% dplyr::mutate("{prefix}_contacts" := NA_integer_) } + + cli::cli_alert_info("Add district nursing columns function finished at {Sys.time()}") + + return(episode_file) } #' Add CMH columns @@ -451,12 +481,14 @@ add_dn_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_cmh_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add communicty mental health columns function started at {Sys.time()}") - condition <- substitute(condition) - episode_file %>% + episode_file <- episode_file %>% add_standard_cols(prefix, condition) %>% dplyr::mutate("{prefix}_contacts" := dplyr::if_else(eval(condition), 1L, NA_integer_)) + + cli::cli_alert_info("Add communicty mental health columns function finished at {Sys.time()}") + + return(episode_file) } #' Add DD columns @@ -464,8 +496,6 @@ add_cmh_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_dd_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add delayed discharges columns function started at {Sys.time()}") - condition <- substitute(condition) condition_delay <- substitute(condition & primary_delay_reason != "9") episode_file <- episode_file %>% @@ -479,6 +509,9 @@ add_dd_columns <- function(episode_file, prefix, condition) { "{prefix}_Code9_episodes" := dplyr::if_else(eval(condition_delay_9), 1L, NA_integer_), "{prefix}_Code9_beddays" := dplyr::if_else(eval(condition_delay_9), .data$yearstay, NA_real_) ) + + cli::cli_alert_info("Add delayed discharges columns function finished at {Sys.time()}") + return(episode_file) } @@ -487,12 +520,14 @@ add_dd_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_nsu_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add non service users columns function started at {Sys.time()}") - condition <- substitute(condition) - episode_file %>% + episode_file <- episode_file %>% add_standard_cols(prefix, condition) %>% dplyr::mutate("{prefix}" := dplyr::if_else(eval(condition), 1L, NA_integer_)) + + cli::cli_alert_info("Add non service users columns function finished at {Sys.time()}") + + return(episode_file) } #' Add NRS columns @@ -500,12 +535,14 @@ add_nsu_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_nrs_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add nrs columns function started at {Sys.time()}") - condition <- substitute(condition) - episode_file %>% + episode_file <- episode_file %>% add_standard_cols(prefix, condition) %>% dplyr::mutate("{prefix}" := dplyr::if_else(eval(condition), 1L, NA_integer_)) + + cli::cli_alert_info("Add nrs columns function finished at {Sys.time()}") + + return(episode_file) } #' Add HL1 columns @@ -513,11 +550,13 @@ add_nrs_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_hl1_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add homelessness columns function started at {Sys.time()}") - condition <- substitute(condition) - episode_file %>% + episode_file <- episode_file %>% add_standard_cols(prefix, condition) + + cli::cli_alert_info("Add homelessness columns function finished at {Sys.time()}") + + return(episode_file) } #' Add CH columns @@ -525,10 +564,8 @@ add_hl1_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_ch_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add care home columns function started at {Sys.time()}") - condition <- substitute(condition) - episode_file %>% + episode_file <- episode_file %>% add_standard_cols(prefix, condition) %>% dplyr::mutate( ch_cost_per_day = dplyr::if_else( @@ -550,6 +587,10 @@ add_ch_columns <- function(episode_file, prefix, condition) { .data$ch_ep_end ) ) + + cli::cli_alert_info("Add care home columns function finished at {Sys.time()}") + + return(episode_file) } #' Add HC columns @@ -557,8 +598,6 @@ add_ch_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_hc_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add home care columns function started at {Sys.time()}") - condition <- substitute(condition) episode_file <- episode_file %>% @@ -596,6 +635,10 @@ add_hc_columns <- function(episode_file, prefix, condition) { "{prefix}_reablement_hours" := dplyr::if_else(eval(condition_reabl), .data$hc_hours_annual, NA_real_), "{prefix}_reablement_hours_cost" := dplyr::if_else(eval(condition_reabl), .data$cost_total_net, NA_real_) ) + + cli::cli_alert_info("Add home care columns function finished at {Sys.time()}") + + return(episode_file) } #' Add AT columns @@ -603,15 +646,17 @@ add_hc_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_at_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add alarms telecare columns function started at {Sys.time()}") - condition <- substitute(condition) - episode_file %>% + episode_file <- episode_file %>% add_standard_cols(prefix, condition) %>% dplyr::mutate( "{prefix}_alarms" := dplyr::if_else(eval(condition) & .data$smrtype == "AT-Alarm", 1L, NA_integer_), "{prefix}_telecare" := dplyr::if_else(eval(condition) & .data$smrtype == "AT-Tele", 1L, NA_integer_) ) + + cli::cli_alert_info("Add alarms telecare columns function finished at {Sys.time()}") + + return(episode_file) } #' Add SDS columns @@ -619,10 +664,8 @@ add_at_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_sds_columns <- function(episode_file, prefix, condition) { - cli::cli_alert_info("Add SDS columns function started at {Sys.time()}") - condition <- substitute(condition) - episode_file %>% + episode_file <- episode_file %>% add_standard_cols(prefix, condition) %>% dplyr::mutate( "{prefix}_option_1" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-1", 1L, NA_integer_), @@ -630,6 +673,10 @@ add_sds_columns <- function(episode_file, prefix, condition) { "{prefix}_option_3" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-3", 1L, NA_integer_), "{prefix}_option_4" := dplyr::if_else(eval(condition) & .data$smrtype == "SDS-4", 1L, NA_integer_) ) + + cli::cli_alert_info("Add SDS columns function finished at {Sys.time()}") + + return(episode_file) } #' Add columns based on IPDC @@ -643,8 +690,6 @@ add_sds_columns <- function(episode_file, prefix, condition) { #' cij_pattype (lgl) #' @family individual_file add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, elective = TRUE) { - cli::cli_alert_info("Add ipdc columns function started at {Sys.time()}") - condition_i <- substitute(eval(condition) & ipdc == "I") episode_file <- episode_file %>% dplyr::mutate( @@ -676,6 +721,9 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi "{prefix}_daycase_cost" := dplyr::if_else(eval(condition_d), .data$cost_total_net, NA_real_) ) } + + cli::cli_alert_info("Add ipdc columns function finished at {Sys.time()}") + return(episode_file) } @@ -689,14 +737,15 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi #' @param cost Whether to create prefix_cost col, e.g. "Acute_cost" #' @family individual_file add_standard_cols <- function(episode_file, prefix, condition, episode = FALSE, cost = FALSE) { - cli::cli_alert_info("Add standard columns function started at {Sys.time()}") - if (episode) { episode_file <- dplyr::mutate(episode_file, "{prefix}_episodes" := dplyr::if_else(eval(condition), 1L, NA_integer_)) } if (cost) { episode_file <- dplyr::mutate(episode_file, "{prefix}_cost" := dplyr::if_else(eval(condition), .data$cost_total_net, NA_real_)) } + + cli::cli_alert_info("Add standard columns function finished at {Sys.time()}") + return(episode_file) } @@ -707,9 +756,7 @@ add_standard_cols <- function(episode_file, prefix, condition, episode = FALSE, #' @inheritParams create_individual_file #' @family individual_file clean_up_ch <- function(episode_file, year) { - cli::cli_alert_info("Clean up CH function started at {Sys.time()}") - - episode_file %>% + episode_file <- episode_file %>% dplyr::mutate( fy_end = end_fy(year), fy_start = start_fy(year) @@ -741,6 +788,10 @@ clean_up_ch <- function(episode_file, year) { ) ) %>% dplyr::select(-c("fy_end", "fy_start", "term_1", "term_2")) + + cli::cli_alert_info("Clean up CH function finished at {Sys.time()}") + + return(episode_file) } #' Recode gender @@ -750,9 +801,7 @@ clean_up_ch <- function(episode_file, year) { #' @inheritParams create_individual_file #' @family individual_file recode_gender <- function(episode_file) { - cli::cli_alert_info("Recode Gender function started at {Sys.time()}") - - episode_file %>% + episode_file <- episode_file %>% dplyr::mutate( gender = dplyr::if_else( .data$gender %in% c(0L, 9L), @@ -760,6 +809,10 @@ recode_gender <- function(episode_file) { .data$gender ) ) + + cli::cli_alert_info("Recode Gender function finished at {Sys.time()}") + + return(episode_file) } #' Condition columns @@ -769,11 +822,12 @@ recode_gender <- function(episode_file) { #' "dementia" and "dementia_date" #' @family individual_file condition_cols <- function() { - cli::cli_alert_info("Return condition columns function started at {Sys.time()}") - conditions <- slfhelper::ltc_vars date_cols <- paste0(conditions, "_date") all_cols <- c(conditions, date_cols) + + cli::cli_alert_info("Return condition columns function finished at {Sys.time()}") + return(all_cols) } @@ -808,9 +862,7 @@ min_no_inf <- function(x) { #' @param individual_file Individual file where each row represents a unique CHI #' @param year Financial year e.g 1718 clean_individual_file <- function(individual_file, year) { - cli::cli_alert_info("Clean individual file function started at {Sys.time()}") - - individual_file %>% + individual_file <- individual_file %>% dplyr::select(!dplyr::any_of(c( "ch_no_cost", "no_paid_items", @@ -819,6 +871,10 @@ clean_individual_file <- function(individual_file, year) { ))) %>% clean_up_gender() %>% dplyr::mutate(age = compute_mid_year_age(year, .data$dob)) + + cli::cli_alert_info("Clean individual file function finished at {Sys.time()}") + + return(individual_file) } #' Clean up gender column @@ -827,15 +883,16 @@ clean_individual_file <- function(individual_file, year) { #' #' @inheritParams clean_individual_file clean_up_gender <- function(individual_file) { - cli::cli_alert_info("Clean up gender column function started at {Sys.time()}") - - individual_file %>% + individual_file <- individual_file %>% dplyr::mutate( gender = dplyr::case_when( .data$gender != 1.5 ~ round(.data$gender), .default = phsmethods::sex_from_chi(.data$chi, chi_check = FALSE) ) ) + + cli::cli_alert_info("Clean up gender column function finished at {Sys.time()}") + return(individual_file) } #' Join slf lookup variables @@ -855,8 +912,6 @@ join_slf_lookup_vars <- function(individual_file, col_select = c("gpprac", "cluster", "hbpraccode") ), hbrescode_var = "hb2018") { - cli::cli_alert_info("Join slf lookup variables function started at {Sys.time()}") - individual_file <- individual_file %>% dplyr::left_join( slf_postcode_lookup, @@ -868,5 +923,7 @@ join_slf_lookup_vars <- function(individual_file, ) %>% dplyr::rename(hbrescode = hbrescode_var) + cli::cli_alert_info("Join slf lookup variables function finished at {Sys.time()}") + return(individual_file) } diff --git a/R/fill_geographies.R b/R/fill_geographies.R index 5638c8758..9b7721391 100644 --- a/R/fill_geographies.R +++ b/R/fill_geographies.R @@ -16,8 +16,6 @@ fill_geographies <- function( get_slf_gpprac_path(), col_select = c("gpprac", "cluster", "hbpraccode") )) { - cli::cli_alert_info("Fill geographies function started at {Sys.time()}") - check_variables_exist(data, c( "chi", "postcode", @@ -30,7 +28,7 @@ fill_geographies <- function( "gpprac" )) - data %>% + data <- data %>% fill_postcode_geogs( slf_pc_lookup = read_file(get_slf_postcode_path()) ) %>% @@ -40,6 +38,10 @@ fill_geographies <- function( col_select = c("gpprac", "cluster", "hbpraccode") ) ) + + cli::cli_alert_info("Fill geographies function finished at {Sys.time()}") + + return(data) } #' Make a postcode lookup for filling to most recent postcodes based on CHI @@ -189,7 +191,6 @@ fill_gpprac_geographies <- function( #' #' @return data with matched HSCP and LCA codes cascade_geographies <- function(data) { - # TODO rework this function into a series of smaller functions which operate on vectors # e.g. cascade_hscp_lca <- function(hscp, lca) {...} # Would take HSCP and populate any missing LCA using it data <- data %>% diff --git a/R/join_deaths_data.R b/R/join_deaths_data.R index 5e61a2082..f503af703 100644 --- a/R/join_deaths_data.R +++ b/R/join_deaths_data.R @@ -10,16 +10,16 @@ join_deaths_data <- function( data, year, slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) %>% slfhelper::get_chi()) { - cli::cli_alert_info("Join deaths data function started at {Sys.time()}") + data <- data %>% + dplyr::left_join( + slf_deaths_lookup %>% + dplyr::distinct(.data$chi, .keep_all = TRUE), + by = "chi", + na_matches = "never", + relationship = "many-to-one" + ) - return( - data %>% - dplyr::left_join( - slf_deaths_lookup %>% - dplyr::distinct(chi, .keep_all = TRUE), - by = "chi", - na_matches = "never", - relationship = "many-to-one" - ) - ) + cli::cli_alert_info("Join deaths data function finished at {Sys.time()}") + + return(data) } diff --git a/R/join_sparra_hhg.R b/R/join_sparra_hhg.R index c22e1a9c3..3218d6be7 100644 --- a/R/join_sparra_hhg.R +++ b/R/join_sparra_hhg.R @@ -5,8 +5,6 @@ #' @return The data including the SPARRA and HHG variables matched #' on to the episode file. join_sparra_hhg <- function(data, year) { - cli::cli_alert_info("Join SPARRA and HHG function started at {Sys.time()}") - if (check_year_valid(year, "sparra")) { data <- dplyr::left_join( data, @@ -63,5 +61,7 @@ join_sparra_hhg <- function(data, year) { data <- dplyr::mutate(data, hhg_end_fy = NA_integer_) } + cli::cli_alert_info("Join SPARRA and HHG function finished at {Sys.time()}") + return(data) } diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index d4162b619..bdd656791 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -12,8 +12,6 @@ link_delayed_discharge_eps <- function( episode_file, year, dd_data = read_file(get_source_extract_path(year, "dd")) %>% slfhelper::get_chi()) { - cli::cli_alert_info("Link delayed discharge to episode file function started at {Sys.time()}") - if (!check_year_valid(year, type = "dd")) { episode_file <- episode_file return(episode_file) @@ -292,7 +290,7 @@ link_delayed_discharge_eps <- function( )) %>% dplyr::group_by(.data$chi, .data$cij_marker) %>% dplyr::mutate(cij_delay = max(.data$has_delay)) %>% - dplyr::mutate(cij_delay = dplyr::if_else(cij_delay == "0", + dplyr::mutate(cij_delay = dplyr::if_else(.data$cij_delay == "0", FALSE, TRUE, missing = NA @@ -313,7 +311,6 @@ link_delayed_discharge_eps <- function( # keep variables from ep files dplyr::select( -c( - "ep_file_row_id", "year", "recid", "record_keydate1", @@ -364,17 +361,19 @@ link_delayed_discharge_eps <- function( ) ) %>% # populate cij_delay dd details back to ep - dplyr::group_by(chi, cij_marker) %>% + dplyr::group_by(.data$chi, .data$cij_marker) %>% dplyr::mutate( - has_dd = any(recid == "DD"), - delay_dd = any(cij_delay) + has_dd = any(.data$recid == "DD"), + delay_dd = any(.data$cij_delay) ) %>% dplyr::ungroup() %>% - dplyr::mutate(cij_delay = dplyr::if_else(has_dd, - delay_dd, - cij_delay + dplyr::mutate(cij_delay = dplyr::if_else(.data$has_dd, + .data$delay_dd, + .data$cij_delay )) %>% dplyr::select(-c("has_dd", "delay_dd", "original_admission_date", "amended_dates")) + cli::cli_alert_info("Link delayed discharge to episode file function finished at {Sys.time()}") + return(linked_data) } diff --git a/R/match_on_ltcs.R b/R/match_on_ltcs.R index f0522c00d..51c280966 100644 --- a/R/match_on_ltcs.R +++ b/R/match_on_ltcs.R @@ -12,8 +12,6 @@ match_on_ltcs <- function( data, year, ltc_data = read_file(get_ltcs_path(year)) %>% slfhelper::get_chi()) { - cli::cli_alert_info("Match on LTCs function started at {Sys.time()}") - # Match on LTC lookup matched <- dplyr::left_join( data, @@ -32,5 +30,7 @@ match_on_ltcs <- function( ) %>% dplyr::select(-tidyselect::ends_with("_ltc")) + cli::cli_alert_info("Match on LTCs function finished at {Sys.time()}") + return(matched) } diff --git a/R/process_costs_rmd.R b/R/process_costs_rmd.R index bca00871d..bd986fb35 100644 --- a/R/process_costs_rmd.R +++ b/R/process_costs_rmd.R @@ -23,7 +23,8 @@ process_costs_rmd <- function(file_name) { output_dir <- fs::path( get_slf_dir(), - "Tests" + "Tests", + "cost_tests" ) input_file <- get_file_path( @@ -44,13 +45,19 @@ process_costs_rmd <- function(file_name) { check_mode = "write" ) - rmarkdown::render( - input = input_file, - output_file = output_file, - output_format = "html_document", - envir = new.env(), - quiet = TRUE - ) + if (fs::file_exists(output_file)) { + # Do not write file if it already exists + output <- NULL + } else { + # If file does not exist, create it + rmarkdown::render( + input = input_file, + output_file = output_file, + output_format = "html_document", + envir = new.env(), + quiet = TRUE + ) + } if (fs::file_info(output_file)$user == Sys.getenv("USER")) { # Set the correct permissions diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R index c46f175c7..ca5206965 100644 --- a/R/process_extract_acute.R +++ b/R/process_extract_acute.R @@ -6,6 +6,7 @@ #' #' @param data The extract to process #' @param year The year to process, in FY format. +#' @param acute_cup_path path to acute_cup data #' @param write_to_disk (optional) Should the data be written to disk default is #' `TRUE` i.e. write the data to disk. #' diff --git a/R/process_extract_ae.R b/R/process_extract_ae.R index 9a5829cc5..196f6567c 100644 --- a/R/process_extract_ae.R +++ b/R/process_extract_ae.R @@ -196,6 +196,8 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) { col_type = readr::cols( "ED Arrival Date" = readr::col_date(format = "%Y/%m/%d %T"), "ED Arrival Time" = readr::col_time(""), + "ED Discharge Date" = readr::col_date(format = "%Y/%m/%d %T"), + "ED Discharge Time" = readr::col_time(""), "ED Case Reference Number [C]" = readr::col_character(), "CUP Marker" = readr::col_double(), "CUP Pathway Name" = readr::col_character() @@ -205,6 +207,8 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) { dplyr::rename( record_keydate1 = "ED Arrival Date", keytime1 = "ED Arrival Time", + record_keydate2 = "ED Discharge Date", + keytime2 = "ED Discharge Time", case_ref_number = "ED Case Reference Number [C]", cup_marker = "CUP Marker", cup_pathway = "CUP Pathway Name" @@ -217,6 +221,8 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) { # Remove any duplicates dplyr::distinct(.data$record_keydate1, .data$keytime1, + .data$record_keydate2, + .data$keytime2, .data$case_ref_number, .keep_all = TRUE ) @@ -225,9 +231,12 @@ process_extract_ae <- function(data, year, write_to_disk = TRUE) { # Join data-------------------------------------------- matched_ae_data <- outfile %>% - dplyr::left_join( - ae_cup_clean, - by = c("record_keydate1", "keytime1", "case_ref_number") + dplyr::left_join(ae_cup_clean, + by = c( + "record_keydate1", "keytime1", + "record_keydate2", "keytime2", + "case_ref_number" + ) ) %>% dplyr::arrange( .data$chi, diff --git a/R/process_extract_alarms_telecare.R b/R/process_extract_alarms_telecare.R index 9d47dd5f0..0769197cd 100644 --- a/R/process_extract_alarms_telecare.R +++ b/R/process_extract_alarms_telecare.R @@ -41,7 +41,6 @@ process_extract_alarms_telecare <- function( "smrtype", "chi", "dob", - # "person_id", "gender", "postcode", "sc_send_lca", diff --git a/R/process_extract_care_home.R b/R/process_extract_care_home.R index dbf817af4..bdaba4767 100644 --- a/R/process_extract_care_home.R +++ b/R/process_extract_care_home.R @@ -115,7 +115,6 @@ process_extract_care_home <- function( "recid", "smrtype", "chi", - # "person_id", "dob", "gender", "postcode", diff --git a/R/process_extract_gp_ooh.R b/R/process_extract_gp_ooh.R index e8d07b9e8..1f98a35fe 100644 --- a/R/process_extract_gp_ooh.R +++ b/R/process_extract_gp_ooh.R @@ -6,6 +6,7 @@ #' #' @param year The year to process, in FY format. #' @param data_list A list containing the extracts. +#' @param gp_ooh_cup_path path to gp ooh cup data #' @param write_to_disk (optional) Should the data be written to disk default is #' `TRUE` i.e. write the data to disk. #' @@ -88,7 +89,6 @@ process_extract_gp_ooh <- function(year, ) # Keep the location descriptions as a lookup. - # TODO write the GP OoH lookup out using some functions location_lookup <- ooh_clean %>% dplyr::group_by(.data$location) %>% dplyr::summarise( @@ -159,8 +159,8 @@ process_extract_gp_ooh <- function(year, "cost_total_net", tidyselect::ends_with("_cost"), "ooh_case_id", - cup_marker, - cup_pathway + "cup_marker", + "cup_pathway" ) %>% slfhelper::get_anon_chi() diff --git a/R/process_extract_home_care.R b/R/process_extract_home_care.R index 651be172d..8f866e9f0 100644 --- a/R/process_extract_home_care.R +++ b/R/process_extract_home_care.R @@ -95,8 +95,7 @@ process_extract_home_care <- function( tidyselect::starts_with("hc_cost_"), "cost_total_net", "hc_provider", - "hc_reablement", - # "person_id" + "hc_reablement" ) %>% slfhelper::get_anon_chi() diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R index 49bf3935e..86324e043 100644 --- a/R/process_extract_homelessness.R +++ b/R/process_extract_homelessness.R @@ -154,7 +154,6 @@ process_extract_homelessness <- function( dplyr::rename(hl1_completeness = "pct_complete_all") %>% dplyr::mutate(hl1_completeness = round(.data$hl1_completeness, 1)) - # TODO - Include person_id (from client_id) final_data <- hl1_data %>% dplyr::select( "year", diff --git a/R/process_extract_consultations.R b/R/process_extract_ooh_consultations.R similarity index 100% rename from R/process_extract_consultations.R rename to R/process_extract_ooh_consultations.R diff --git a/R/process_extract_sds.R b/R/process_extract_sds.R index f8e5f8579..bb2c648ea 100644 --- a/R/process_extract_sds.R +++ b/R/process_extract_sds.R @@ -41,7 +41,6 @@ process_extract_sds <- function( "smrtype", "chi", "dob", - # "person_id", "gender", "postcode", "sc_send_lca", diff --git a/R/process_lookup_homelessness.R b/R/process_lookup_homelessness.R index 30772383e..c1adfed0d 100644 --- a/R/process_lookup_homelessness.R +++ b/R/process_lookup_homelessness.R @@ -13,8 +13,6 @@ create_homelessness_lookup <- function( year, homelessness_data = read_file(get_source_extract_path(year, "homelessness")) %>% slfhelper::get_chi()) { - cli::cli_alert_info("Create homelessness lookup function started at {Sys.time()}") - # Specify years available for running if (year < "1617") { return(NULL) @@ -24,6 +22,8 @@ create_homelessness_lookup <- function( tidyr::drop_na(.data$chi) %>% dplyr::mutate(hl1_in_fy = 1L) + cli::cli_alert_info("Create homelessness lookup function finished at {Sys.time()}") + return(homelessness_lookup) } @@ -41,8 +41,6 @@ create_homelessness_lookup <- function( #' @export add_homelessness_flag <- function(data, year, lookup = create_homelessness_lookup(year)) { - cli::cli_alert_info("Add homelessness flag function started at {Sys.time()}") - if (!check_year_valid(year, type = "homelessness")) { data <- data return(data) @@ -57,6 +55,8 @@ add_homelessness_flag <- function(data, year, ) %>% dplyr::mutate(hl1_in_fy = tidyr::replace_na(.data$hl1_in_fy, 0L)) + cli::cli_alert_info("Add homelessness flag function finished at {Sys.time()}") + return(data) } @@ -72,8 +72,6 @@ add_homelessness_flag <- function(data, year, #' @return the final data as a [tibble][tibble::tibble-package]. #' @export add_homelessness_date_flags <- function(data, year, lookup = create_homelessness_lookup(year)) { - cli::cli_alert_info("Add homelessness date flags function started at {Sys.time()}") - if (!check_year_valid(year, type = "homelessness")) { data <- data return(data) @@ -133,5 +131,7 @@ add_homelessness_date_flags <- function(data, year, lookup = create_homelessness ) ) + cli::cli_alert_info("Add homelessness date flags function finished at {Sys.time()}") + return(data) } diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index 91c08632d..9b5f2bded 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -32,8 +32,9 @@ process_lookup_sc_client <- by = c("sending_location", "social_care_id") ) %>% # need period for the replace sc id with latest function - dplyr::mutate(period = ifelse(!(is.na(.data$financial_quarter)), paste0(.data$financial_year, "Q", financial_quarter), - financial_year + dplyr::mutate(period = ifelse(!(is.na(.data$financial_quarter)), + paste0(.data$financial_year, "Q", .data$financial_quarter), + .data$financial_year )) %>% replace_sc_id_with_latest() %>% # remove cases with no data in client diff --git a/R/process_refined_death.R b/R/process_refined_death.R index dc7663221..f49b30619 100644 --- a/R/process_refined_death.R +++ b/R/process_refined_death.R @@ -44,13 +44,18 @@ process_refined_death <- function( .data$death_date_chi, .data$record_keydate1 )) %>% - dplyr::select(anon_chi, death_date) %>% + dplyr::select("anon_chi", "death_date") %>% # add fy when death happened dplyr::mutate( - fy = phsmethods::extract_fin_year(death_date), + fy = phsmethods::extract_fin_year(.data$death_date), fy = as.character(paste0(substr(fy, 3, 4), substr(fy, 6, 7))) - ) - # TODO: check distinct death data by chi while keeping chi==NA records + ) %>% + # no need to keep NA + dplyr::filter(!is.na(.data$anon_chi)) %>% + dplyr::group_by(.data$anon_chi) %>% + dplyr::arrange(.data$death_date) %>% + dplyr::distinct(.data$anon_chi, .keep_all = TRUE) %>% + dplyr::ungroup() if (write_to_disk) { write_file( diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R index aafc3d727..cd20f412b 100644 --- a/R/process_sc_all_alarms_telecare.R +++ b/R/process_sc_all_alarms_telecare.R @@ -87,11 +87,6 @@ process_sc_all_alarms_telecare <- function( # Replace social_care_id with latest if needed (assuming replace_sc_id_with_latest is a custom function) data <- replace_sc_id_with_latest(data) - # data$person_id <- paste0( - # data$sending_location, - # "-", - # data$social_care_id - # ) # Deal with episodes that have a package across quarters data[, pkg_count := seq_len(.N), by = list( @@ -125,7 +120,6 @@ process_sc_all_alarms_telecare <- function( dob = data.table::last(dob), postcode = data.table::last(postcode), recid = data.table::last(recid), - # person_id = data.table::last(person_id), sc_send_lca = data.table::last(sc_send_lca) ), by = list( sending_location, diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index 5478d50cc..cebcea25c 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -47,7 +47,7 @@ process_sc_all_care_home <- function( by = c("sending_location", "social_care_id") ) %>% replace_sc_id_with_latest() %>% - dplyr::select(-latest_flag, -latest_sc_id) + dplyr::select(-"latest_flag", -"latest_sc_id") # cleaning and matching care home names @@ -58,7 +58,13 @@ process_sc_all_care_home <- function( ) fixed_ch_provider <- name_postcode_clean %>% - dplyr::select(-ch_name_validated, -open_interval, -latest_close_date, -ch_name_old, -ch_postcode_old) %>% + dplyr::select( + -"ch_name_validated", + -"open_interval", + -"latest_close_date", + -"ch_name_old", + -"ch_postcode_old" + ) %>% dplyr::mutate( ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]]) # (n = 2) ) %>% @@ -126,21 +132,21 @@ process_sc_all_care_home <- function( # We want to keep the nursing provision changes when we merge cases that have the same admission date dplyr::mutate(previous_nursing_care_provision = dplyr::lag(.data[["nursing_care_provision"]])) %>% # create a T/F flag for if nursing provision was the same as previous record with same admission date - dplyr::mutate(split_episode = tidyr::replace_na(.data[["previous_nursing_care_provision"]] != nursing_care_provision, TRUE)) %>% + dplyr::mutate(split_episode = tidyr::replace_na(.data[["previous_nursing_care_provision"]] != .data$nursing_care_provision, TRUE)) %>% dplyr::group_by( .data[["social_care_id"]], .data[["sending_location"]], .data[["split_episode"]] ) %>% # create a count of each time the nursing provision changes between records with the same admission date - dplyr::mutate(split_episode_counter = ifelse(split_episode == TRUE, dplyr::row_number(), NA)) %>% + dplyr::mutate(split_episode_counter = ifelse(.data$split_episode == TRUE, dplyr::row_number(), NA)) %>% dplyr::group_by( .data[["social_care_id"]], .data[["sending_location"]] ) %>% # fill split episode counter. This will create a new id number for each different nursing provision within an episode - tidyr::fill(split_episode_counter, .direction = c("down")) %>% - dplyr::select(-previous_nursing_care_provision, -split_episode) + tidyr::fill(.data$split_episode_counter, .direction = c("down")) %>% + dplyr::select(-"previous_nursing_care_provision", -"split_episode") # Merge records to a single row per episode where admission is the same @@ -196,7 +202,7 @@ process_sc_all_care_home <- function( ) ) %>% dplyr::ungroup() %>% - dplyr::select(-period_start_date, -split_episode_counter) + dplyr::select(-"period_start_date", -"split_episode_counter") # Compare to Deaths Data @@ -248,7 +254,7 @@ process_sc_all_care_home <- function( previous_discharge_date_chi = dplyr::lag(.data[["ch_discharge_date"]]) + lubridate::days(1L), # if the first row is NA, set this to the ch_discharge_date - previous_discharge_date_chi = dplyr::if_else(row_number == 1, .data[["ch_discharge_date"]], + previous_discharge_date_chi = dplyr::if_else(.data$row_number == 1, .data[["ch_discharge_date"]], .data[["previous_discharge_date_chi"]] ) ) %>% @@ -263,11 +269,11 @@ process_sc_all_care_home <- function( ), # if there is more than 1 day between (or the last ep for the individual) flag as new ep (Y) # if there is < 1 day (i.e. a pause of up to 1 day or stays overlap flag as same ep (N)) - new_episode = dplyr::if_else(is.na(days_to_next_rec) | days_to_next_rec > 1, "Y", "N") + new_episode = dplyr::if_else(is.na(.data$days_to_next_rec) | .data$days_to_next_rec > 1, "Y", "N") ) %>% # create continuous marker using flag for new stay dplyr::mutate( - ch_chi_cis = purrr::accumulate(new_episode[-1], + ch_chi_cis = purrr::accumulate(.data$new_episode[-1], .init = 1, ~ if (.y == "Y") { .x + 1 @@ -295,7 +301,7 @@ process_sc_all_care_home <- function( previous_discharge_date_sc = dplyr::lag(.data[["ch_discharge_date"]]) + lubridate::days(1L), # if the first row is NA, set this to the ch_discharge_date - previous_discharge_date_sc = dplyr::if_else(row_number == 1, .data[["ch_discharge_date"]], + previous_discharge_date_sc = dplyr::if_else(.data$row_number == 1, .data[["ch_discharge_date"]], .data[["previous_discharge_date_sc"]] ) ) %>% @@ -310,11 +316,11 @@ process_sc_all_care_home <- function( ), # if there is more than 1 day between (or the last ep for the individual) flag as new ep (Y) # if there is < 1 day (i.e. a pause of up to 1 day or stays overlap flag as same ep (N)) - new_episode = dplyr::if_else(is.na(days_to_next_rec) | days_to_next_rec > 1, "Y", "N") + new_episode = dplyr::if_else(is.na(.data$days_to_next_rec) | .data$days_to_next_rec > 1, "Y", "N") ) %>% # create continuous marker using flag for new stay dplyr::mutate( - ch_sc_id_cis = purrr::accumulate(new_episode[-1], + ch_sc_id_cis = purrr::accumulate(.data$new_episode[-1], .init = 1, ~ if (.y == "Y") { .x + 1 @@ -326,8 +332,11 @@ process_sc_all_care_home <- function( dplyr::ungroup() %>% # remove variables no longer needed dplyr::select( - -previous_discharge_date_chi, -previous_discharge_date_sc, -row_number, - -days_to_next_rec, -new_episode + -"previous_discharge_date_chi", + -"previous_discharge_date_sc", + -"row_number", + -"days_to_next_rec", + -"new_episode" ) @@ -364,7 +373,7 @@ process_sc_all_care_home <- function( .data[["type_of_admission"]] ) ) %>% - dplyr::select(-ch_ep_start, -ch_ep_end, -stay_los, -stay_respite) + dplyr::select(-"ch_ep_start", -"ch_ep_end", -"stay_los", -"stay_respite") ch_data_final <- adm_reason_recoded %>% @@ -385,7 +394,6 @@ process_sc_all_care_home <- function( )) %>% dplyr::select( "chi", - # "person_id", "gender", "dob", "postcode", diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R index c5b7d43eb..b48be79ad 100644 --- a/R/process_sc_all_sds.R +++ b/R/process_sc_all_sds.R @@ -128,11 +128,6 @@ process_sc_all_sds <- function( "SDS", convert_sc_sending_location_to_lca(sending_location) )] - # sds_full_clean_long$person_id <- paste0( - # sds_full_clean_long$sending_location, - # "-", - # sds_full_clean_long$social_care_id - # ) # Group, arrange and create flags for episodes sds_full_clean_long[, @@ -176,7 +171,6 @@ process_sc_all_sds <- function( dob = data.table::last(dob), postcode = data.table::last(postcode), recid = data.table::last(recid), - # person_id = data.table::last(person_id), sc_send_lca = data.table::last(sc_send_lca) ), by = list(sending_location, social_care_id, smrtype, episode_counter)] rm(sds_full_clean_long) diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R index c45992938..7a8f957ca 100644 --- a/R/process_tests_episode_file.R +++ b/R/process_tests_episode_file.R @@ -31,9 +31,13 @@ process_tests_episode_file <- function(data, year) { recid = TRUE ) %>% dplyr::arrange(.data[["recid"]]) %>% - write_tests_xlsx(sheet_name = stringr::str_glue({ - "ep_file_{year}" - }), workbook_name = "ep_file") + write_tests_xlsx( + sheet_name = stringr::str_glue({ + "ep_file_{year}" + }), + year = year, + workbook_name = "ep_file" + ) return(comparison) } diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R index 900ce7f03..ae664c09b 100644 --- a/R/process_tests_individual_file.R +++ b/R/process_tests_individual_file.R @@ -34,9 +34,13 @@ process_tests_individual_file <- function(data, year) { old_data = produce_individual_file_tests(old_data), new_data = produce_individual_file_tests(data) ) %>% - write_tests_xlsx(sheet_name = stringr::str_glue({ - "indiv_file_{year}" - }), workbook_name = "indiv_file") + write_tests_xlsx( + sheet_name = stringr::str_glue({ + "indiv_file_{year}" + }), + year = year, + workbook_name = "indiv_file" + ) return(comparison) } diff --git a/R/process_tests_it_chi_deaths.R b/R/process_tests_it_chi_deaths.R index 33ef1c5ff..7c04e3a14 100644 --- a/R/process_tests_it_chi_deaths.R +++ b/R/process_tests_it_chi_deaths.R @@ -4,14 +4,12 @@ #' #' @export process_tests_it_chi_deaths <- function(data, update = previous_update()) { - data <- data %>% - slfhelper::get_chi() - comparison <- produce_test_comparison( old_data = produce_it_chi_deaths_tests( - read_file(get_slf_chi_deaths_path(update = update)) + read_file(get_slf_chi_deaths_path(update = update)) %>% + slfhelper::get_chi() ), - new_data = produce_it_chi_deaths_tests(data) + new_data = produce_it_chi_deaths_tests(data %>% slfhelper::get_chi()) ) %>% write_tests_xlsx(sheet_name = "it_chi_deaths", workbook_name = "lookup") @@ -41,10 +39,8 @@ produce_it_chi_deaths_tests <- function(data) { dplyr::mutate( n_chi = 1L, n_valid_chi = phsmethods::chi_check(.data$chi) == "Valid CHI", - n_death_date_nrs = is.na(.data$death_date_nrs), n_death_date_chi = is.na(.data$death_date_chi), - n_death_date = is.na(.data$death_date), - death_year = lubridate::year(.data$death_date), + death_year = lubridate::year(.data$death_date_chi), "n_deaths_{current_year_0}" := .data$death_year == current_year_0, "n_deaths_{current_year_1}" := .data$death_year == current_year_1, "n_deaths_{current_year_2}" := .data$death_year == current_year_2, diff --git a/R/process_tests_sc_demographics.R b/R/process_tests_sc_demographics.R index c4a81d776..b246bf23a 100644 --- a/R/process_tests_sc_demographics.R +++ b/R/process_tests_sc_demographics.R @@ -15,12 +15,15 @@ process_tests_sc_demographics <- function(data) { comparison <- produce_test_comparison( old_data = produce_sc_demog_lookup_tests( - read_file(get_sc_demog_lookup_path(update = previous_update())) + read_file(get_sc_demog_lookup_path(update = previous_update())) %>% + slfhelper::get_chi() ), new_data = produce_sc_demog_lookup_tests( data ) - ) %>% + ) + + comparison %>% write_tests_xlsx(sheet_name = "sc_demographics", workbook_name = "lookup") return(comparison) @@ -53,7 +56,8 @@ produce_sc_demog_lookup_tests <- function(data) { "chi", "gender", "dob", - "postcode" + "postcode", + "date_of_death" ) ) %>% # use function to sum new test flags diff --git a/R/produce_homelessness_completeness.R b/R/produce_homelessness_completeness.R index 9e157df24..df89b5e62 100644 --- a/R/produce_homelessness_completeness.R +++ b/R/produce_homelessness_completeness.R @@ -40,9 +40,9 @@ produce_homelessness_completeness <- function( sg_all_assessments_annual <- openxlsx::read.xlsx( sg_pub_path, - sheet = "Table 2", + sheet = "Table 1", # Manual change - check sheet name rows = 8L:39L, - cols = 1L:29L, + cols = 1L:33L, # Manual change - check workbook for col number for latest year colNames = FALSE ) %>% dplyr::rename_with(~ c( @@ -53,7 +53,9 @@ produce_homelessness_completeness <- function( paste0(paste0("q", 1L:4L), "_", rep(2019L, 4L)), paste0(paste0("q", 1L:4L), "_", rep(2020L, 4L)), paste0(paste0("q", 1L:4L), "_", rep(2021L, 4L)), - paste0(paste0("q", 1L:4L), "_", rep(2022L, 4L)) + paste0(paste0("q", 1L:4L), "_", rep(2022L, 4L)), + paste0(paste0("q", 1L:4L), "_", rep(2023L, 4L)) + ## Manual change - Add new row here when new year is available in publication )) %>% tidyr::pivot_longer( !"CAName", @@ -125,7 +127,7 @@ produce_homelessness_completeness <- function( get_sg_homelessness_pub_path <- function(...) { path <- get_file_path( directory = fs::path(get_slf_dir(), "Homelessness"), - file_name = "2024.02.07- PHS - Total assessment decisions by LA by Qtr.xlsx", + file_name = "2024.11.08 - PHS - Total assessment decisions by LA by Qtr.xlsx", ... ) diff --git a/R/read_lookup_sc_client.R b/R/read_lookup_sc_client.R index 6128a1be5..bc7dc99f1 100644 --- a/R/read_lookup_sc_client.R +++ b/R/read_lookup_sc_client.R @@ -16,9 +16,6 @@ read_lookup_sc_client <- function(fyyear, # read in data - social care 2 client client_data <- dplyr::tbl(sc_dvprod_connection, dbplyr::in_schema("social_care_2", "client")) %>% - dplyr::collect() - - client_data <- client_data %>% dplyr::select( "sending_location", "social_care_id", @@ -79,7 +76,14 @@ read_lookup_sc_client <- function(fyyear, .data$financial_year, .data$financial_quarter ) %>% - dplyr::rename("mental_health_disorders" = "mental_health_problems") + dplyr::rename("mental_health_disorders" = "mental_health_problems") %>% + dplyr::collect() + + latest_quarter <- client_data %>% + dplyr::arrange(dplyr::desc(.data$financial_quarter)) %>% + dplyr::pull(.data$financial_quarter) %>% + utils::head(1) + cli::cli_alert_info(stringr::str_glue("Social care client data for Year {fyyear} is available up to Q{latest_quarter}.")) if (!fs::file_exists(get_sandpit_extract_path(type = "client", year = fyyear))) { diff --git a/R/read_lookup_sc_demographics.R b/R/read_lookup_sc_demographics.R index 1d97e7c30..76b4969df 100644 --- a/R/read_lookup_sc_demographics.R +++ b/R/read_lookup_sc_demographics.R @@ -10,9 +10,6 @@ read_lookup_sc_demographics <- function(sc_dvprod_connection = phs_db_connection sc_dvprod_connection, dbplyr::in_schema("social_care_2", "demographic_snapshot") ) %>% - dplyr::collect() - - sc_demog <- sc_demog %>% dplyr::select( "latest_record_flag", "period", @@ -25,7 +22,14 @@ read_lookup_sc_demographics <- function(sc_dvprod_connection = phs_db_connection "chi_postcode", "submitted_postcode", "chi_gender_code" - ) + ) %>% + dplyr::collect() + + latest_quarter <- sc_demog %>% + dplyr::arrange(dplyr::desc(.data$period)) %>% + dplyr::pull(.data$period) %>% + utils::head(1) + cli::cli_alert_info(stringr::str_glue("Demographics data is available up to {latest_quarter}.")) if (!fs::file_exists(get_sandpit_extract_path(type = "demographics"))) { diff --git a/R/read_sc_all_alarms_telecare.R b/R/read_sc_all_alarms_telecare.R index 0106bcbda..336c9d2f5 100644 --- a/R/read_sc_all_alarms_telecare.R +++ b/R/read_sc_all_alarms_telecare.R @@ -13,9 +13,7 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection at_full_data <- dplyr::tbl( sc_dvprod_connection, dbplyr::in_schema("social_care_2", "equipment_snapshot") - ) %>% dplyr::collect() - - at_full_data <- at_full_data %>% + ) %>% dplyr::select( "sending_location", "social_care_id", @@ -27,7 +25,14 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection "service_end_date", "service_start_date_after_period_end_date" ) %>% - dplyr::distinct() + dplyr::distinct() %>% + dplyr::collect() + + latest_quarter <- at_full_data %>% + dplyr::arrange(dplyr::desc(.data$period)) %>% + dplyr::pull(.data$period) %>% + utils::head(1) + cli::cli_alert_info(stringr::str_glue("Alarm Telecare data is available up to {latest_quarter}.")) if (!fs::file_exists(get_sandpit_extract_path(type = "at"))) { at_full_data %>% diff --git a/R/read_sc_all_care_home.R b/R/read_sc_all_care_home.R index b11879487..f758cbd17 100644 --- a/R/read_sc_all_care_home.R +++ b/R/read_sc_all_care_home.R @@ -10,9 +10,7 @@ read_sc_all_care_home <- function(sc_dvprod_connection = phs_db_connection(dsn = ch_data <- dplyr::tbl( sc_dvprod_connection, dbplyr::in_schema("social_care_2", "carehome_snapshot") - ) %>% dplyr::collect() - - ch_data <- ch_data %>% + ) %>% dplyr::select( "ch_name", "ch_postcode", @@ -29,7 +27,14 @@ read_sc_all_care_home <- function(sc_dvprod_connection = phs_db_connection(dsn = "ch_discharge_date", "age" ) %>% - dplyr::distinct() + dplyr::distinct() %>% + dplyr::collect() + + latest_quarter <- ch_data %>% + dplyr::arrange(dplyr::desc(.data$period)) %>% + dplyr::pull(.data$period) %>% + utils::head(1) + cli::cli_alert_info(stringr::str_glue("Care Home data is available up to {latest_quarter}.")) if (!fs::file_exists(get_sandpit_extract_path(type = "ch"))) { ch_data %>% diff --git a/R/read_sc_all_home_care.R b/R/read_sc_all_home_care.R index 2349cf1cd..ce371cbb9 100644 --- a/R/read_sc_all_home_care.R +++ b/R/read_sc_all_home_care.R @@ -10,9 +10,7 @@ read_sc_all_home_care <- function(sc_dvprod_connection = phs_db_connection(dsn = home_care_data <- dplyr::tbl( sc_dvprod_connection, dbplyr::in_schema("social_care_2", "homecare_snapshot") - ) %>% dplyr::collect() - - home_care_data <- home_care_data %>% + ) %>% dplyr::select( "sending_location", "sending_location_name", @@ -46,7 +44,14 @@ read_sc_all_home_care <- function(sc_dvprod_connection = phs_db_connection(dsn = .data$period )) %>% # drop rows start date after end date - dplyr::distinct() + dplyr::distinct() %>% + dplyr::collect() + + latest_quarter <- home_care_data %>% + dplyr::arrange(dplyr::desc(.data$period)) %>% + dplyr::pull(.data$period) %>% + utils::head(1) + cli::cli_alert_info(stringr::str_glue("Home Care data is available up to {latest_quarter}.")) if (!fs::file_exists(get_sandpit_extract_path(type = "hc"))) { home_care_data %>% diff --git a/R/read_sc_all_sds.R b/R/read_sc_all_sds.R index 54d3d31ed..d34fa6d1f 100644 --- a/R/read_sc_all_sds.R +++ b/R/read_sc_all_sds.R @@ -11,9 +11,6 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR sc_dvprod_connection, dbplyr::in_schema("social_care_2", "sds_snapshot") ) %>% - dplyr::collect() - - sds_full_data <- sds_full_data %>% dplyr::select( "sending_location", "social_care_id", @@ -28,7 +25,14 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR "sds_start_date_after_end_date", # get fixed "sds_start_date_after_period_end_date" # get removed ) %>% - dplyr::distinct() + dplyr::distinct() %>% + dplyr::collect() + + latest_quarter <- sds_full_data %>% + dplyr::arrange(dplyr::desc(.data$period)) %>% + dplyr::pull(.data$period) %>% + utils::head(1) + cli::cli_alert_info(stringr::str_glue("SDS data is available up to {latest_quarter}.")) if (!fs::file_exists(get_sandpit_extract_path(type = "sds"))) { sds_full_data %>% diff --git a/R/replace_sc_id_with_latest.R b/R/replace_sc_id_with_latest.R index 9478ebefe..1a396bcb6 100644 --- a/R/replace_sc_id_with_latest.R +++ b/R/replace_sc_id_with_latest.R @@ -48,6 +48,8 @@ replace_sc_id_with_latest <- function(data) { .data$latest_sc_id, .data$social_care_id ) - ) + ) %>% + dplyr::filter(!is.na(.data$period)) + return(return_data) } diff --git a/R/write_temp_data.R b/R/write_temp_data.R new file mode 100644 index 000000000..d5708c049 --- /dev/null +++ b/R/write_temp_data.R @@ -0,0 +1,62 @@ +#' Write a temp data to disk in parquet format for debugging purpose +#' +#' @description Write a temp data in parquet format to disk for debugging purpose. +#' @param data The data to be written +#' @param year year variable +#' @param file_name The file name to be written +#' @param write_temp_to_disk Boolean type, write temp data to disk or not +#' +#' @return the data for next step as a [tibble][tibble::tibble-package]. +#' @export +write_temp_data <- + function(data, year, file_name, write_temp_to_disk) { + if (write_temp_to_disk) { + full_file_name <- stringr::str_glue("{file_name}.parquet") + file_path <- file.path( + get_year_dir(year), + full_file_name + ) + + write_file(data, + path = file_path + ) + cli::cli_alert_info(stringr::str_glue("Writing {full_file_name} to disk finished at {Sys.time()}")) + } + return(data) + } + + +#' Read a temp data from disk for debugging purpose +#' +#' @description Read a temp data to disk for debugging purpose. +#' @param year year variable +#' @param file_name The file name to be read +#' +#' @return the data for next step as a [tibble][tibble::tibble-package]. +#' @export +read_temp_data <- function(year, file_name) { + full_file_name <- stringr::str_glue("{file_name}.parquet") + file_path <- file.path( + get_year_dir(year), + full_file_name + ) + + return(read_file(file_path)) +} + +#' Clean temp data from disk +#' +#' @description Clean temp data from disk to save storage. +#' @param year year variable +#' @param file_type ep or ind files +#' +#' @return the data for next step as a [tibble][tibble::tibble-package]. +#' @export +clean_temp_data <- function(year, file_type = c("ep", "ind")) { + list.files( + path = get_year_dir(year), + pattern = stringr::str_glue("^{file_type}_temp"), + full.names = TRUE + ) %>% + file.remove() +} diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R index a1b53f971..2241ce519 100644 --- a/R/write_tests_xlsx.R +++ b/R/write_tests_xlsx.R @@ -25,16 +25,12 @@ write_tests_xlsx <- function(comparison_data, )) { # Set up the workbook ---- if (workbook_name == "ep_file") { - if (is.null(year)) { - tests_workbook_name <- - stringr::str_glue(latest_update(), "_ep_file_tests") - } + tests_workbook_name <- + stringr::str_glue(latest_update(), "_{year}_ep_file_tests") } if (workbook_name == "indiv_file") { - if (is.null(year)) { - tests_workbook_name <- - stringr::str_glue(latest_update(), "_indiv_file_tests") - } + tests_workbook_name <- + stringr::str_glue(latest_update(), "_{year}_indiv_file_tests") } if (workbook_name == "lookup") { if (is.null(year)) { @@ -53,17 +49,16 @@ write_tests_xlsx <- function(comparison_data, } } if (workbook_name == "extract") { - if (is.null(year)) { - } else { - tests_workbook_name <- - stringr::str_glue(latest_update(), "_{year}_extract_tests") - } + tests_workbook_name <- + stringr::str_glue(latest_update(), "_{year}_extract_tests") } tests_workbook_path <- fs::path( get_slf_dir(), "Tests", + fy(), + qtr(), tests_workbook_name, ext = "xlsx" ) diff --git a/Run_SLF_Files_manually/run_episode_file_1415.R b/Run_SLF_Files_manually/run_episode_file_1415.R index b5a2eab38..32a817813 100644 --- a/Run_SLF_Files_manually/run_episode_file_1415.R +++ b/Run_SLF_Files_manually/run_episode_file_1415.R @@ -73,7 +73,7 @@ processed_data_list <- list( ) # Run episode file -create_episode_file(processed_data_list, year = year) %>% +create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>% process_tests_episode_file(year = year) ## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1516.R b/Run_SLF_Files_manually/run_episode_file_1516.R index 59c7ddc63..899720f5a 100644 --- a/Run_SLF_Files_manually/run_episode_file_1516.R +++ b/Run_SLF_Files_manually/run_episode_file_1516.R @@ -73,7 +73,7 @@ processed_data_list <- list( ) # Run episode file -create_episode_file(processed_data_list, year = year) ## %>% +create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) ## %>% # process_tests_episode_file(year = year) ## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1617.R b/Run_SLF_Files_manually/run_episode_file_1617.R index b10372be6..156848f1a 100644 --- a/Run_SLF_Files_manually/run_episode_file_1617.R +++ b/Run_SLF_Files_manually/run_episode_file_1617.R @@ -73,7 +73,7 @@ processed_data_list <- list( ) # Run episode file -create_episode_file(processed_data_list, year = year) ## %>% +create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) ## %>% # process_tests_episode_file(year = year) ## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R index b405b5b6e..a78912e58 100644 --- a/Run_SLF_Files_manually/run_episode_file_1718.R +++ b/Run_SLF_Files_manually/run_episode_file_1718.R @@ -69,7 +69,7 @@ processed_data_list <- list( ) # Run episode file -create_episode_file(processed_data_list, year = year) %>% +create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>% process_tests_episode_file(year = year) ## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R index fb3227512..7d1f8de94 100644 --- a/Run_SLF_Files_manually/run_episode_file_1819.R +++ b/Run_SLF_Files_manually/run_episode_file_1819.R @@ -69,7 +69,7 @@ processed_data_list <- list( ) # Run episode file -create_episode_file(processed_data_list, year = year) %>% +create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>% process_tests_episode_file(year = year) ## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R index e2e21bdac..903c0c550 100644 --- a/Run_SLF_Files_manually/run_episode_file_1920.R +++ b/Run_SLF_Files_manually/run_episode_file_1920.R @@ -69,7 +69,7 @@ processed_data_list <- list( ) # Run episode file -create_episode_file(processed_data_list, year = year) %>% +create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>% process_tests_episode_file(year = year) ## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R index cf98e80de..9c579bd0e 100644 --- a/Run_SLF_Files_manually/run_episode_file_2021.R +++ b/Run_SLF_Files_manually/run_episode_file_2021.R @@ -69,7 +69,7 @@ processed_data_list <- list( ) # Run episode file -create_episode_file(processed_data_list, year = year) %>% +create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>% process_tests_episode_file(year = year) ## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R index 3bcbf2466..881e4aa5c 100644 --- a/Run_SLF_Files_manually/run_episode_file_2122.R +++ b/Run_SLF_Files_manually/run_episode_file_2122.R @@ -69,7 +69,7 @@ processed_data_list <- list( ) # Run episode file -create_episode_file(processed_data_list, year = year) %>% +create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>% process_tests_episode_file(year = year) ## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R index af0447eed..c2034020c 100644 --- a/Run_SLF_Files_manually/run_episode_file_2223.R +++ b/Run_SLF_Files_manually/run_episode_file_2223.R @@ -69,7 +69,7 @@ processed_data_list <- list( ) # Run episode file -create_episode_file(processed_data_list, year = year) %>% +create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>% process_tests_episode_file(year = year) ## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R index bdf16e0f8..7951c97c2 100644 --- a/Run_SLF_Files_manually/run_episode_file_2324.R +++ b/Run_SLF_Files_manually/run_episode_file_2324.R @@ -69,7 +69,7 @@ processed_data_list <- list( ) # Run episode file -create_episode_file(processed_data_list, year = year) %>% +create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>% process_tests_episode_file(year = year) ## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2425.R b/Run_SLF_Files_manually/run_episode_file_2425.R index 699c197b3..25897ab67 100644 --- a/Run_SLF_Files_manually/run_episode_file_2425.R +++ b/Run_SLF_Files_manually/run_episode_file_2425.R @@ -69,7 +69,7 @@ processed_data_list <- list( ) # Run episode file -create_episode_file(processed_data_list, year = year) %>% +create_episode_file(processed_data_list, year = year, write_temp_to_disk = FALSE) %>% process_tests_episode_file(year = year) ## End of Script ## diff --git a/Run_SLF_Files_manually/run_individual_file_1415.R b/Run_SLF_Files_manually/run_individual_file_1415.R index 70aa2bfca..c8a09e5d4 100644 --- a/Run_SLF_Files_manually/run_individual_file_1415.R +++ b/Run_SLF_Files_manually/run_individual_file_1415.R @@ -2,8 +2,10 @@ library(createslf) year <- "1415" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year) %>% +create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>% process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_1516.R b/Run_SLF_Files_manually/run_individual_file_1516.R index 8e8dae906..8a4f0da37 100644 --- a/Run_SLF_Files_manually/run_individual_file_1516.R +++ b/Run_SLF_Files_manually/run_individual_file_1516.R @@ -2,8 +2,10 @@ library(createslf) year <- "1516" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year) %>% +create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>% process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_1617.R b/Run_SLF_Files_manually/run_individual_file_1617.R index 255e4e674..6b3f7e670 100644 --- a/Run_SLF_Files_manually/run_individual_file_1617.R +++ b/Run_SLF_Files_manually/run_individual_file_1617.R @@ -2,8 +2,10 @@ library(createslf) year <- "1617" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year) %>% +create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>% process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_1718.R b/Run_SLF_Files_manually/run_individual_file_1718.R index 777948fc7..12fa4c7fc 100644 --- a/Run_SLF_Files_manually/run_individual_file_1718.R +++ b/Run_SLF_Files_manually/run_individual_file_1718.R @@ -2,8 +2,10 @@ library(createslf) year <- "1718" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year) %>% +create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>% process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_1819.R b/Run_SLF_Files_manually/run_individual_file_1819.R index 18839b2ea..f979f2052 100644 --- a/Run_SLF_Files_manually/run_individual_file_1819.R +++ b/Run_SLF_Files_manually/run_individual_file_1819.R @@ -2,8 +2,10 @@ library(createslf) year <- "1819" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year) %>% +create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>% process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_1920.R b/Run_SLF_Files_manually/run_individual_file_1920.R index 3567d5c5d..4deffd8bc 100644 --- a/Run_SLF_Files_manually/run_individual_file_1920.R +++ b/Run_SLF_Files_manually/run_individual_file_1920.R @@ -2,8 +2,10 @@ library(createslf) year <- "1920" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year) %>% +create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>% process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_2021.R b/Run_SLF_Files_manually/run_individual_file_2021.R index 8a78924b3..8df4228a7 100644 --- a/Run_SLF_Files_manually/run_individual_file_2021.R +++ b/Run_SLF_Files_manually/run_individual_file_2021.R @@ -2,8 +2,10 @@ library(createslf) year <- "2021" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year) %>% +create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>% process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_2122.R b/Run_SLF_Files_manually/run_individual_file_2122.R index 9ceaa571c..66e23df5f 100644 --- a/Run_SLF_Files_manually/run_individual_file_2122.R +++ b/Run_SLF_Files_manually/run_individual_file_2122.R @@ -2,8 +2,10 @@ library(createslf) year <- "2122" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year) %>% +create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>% process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_2223.R b/Run_SLF_Files_manually/run_individual_file_2223.R index b83507dbc..5884cde13 100644 --- a/Run_SLF_Files_manually/run_individual_file_2223.R +++ b/Run_SLF_Files_manually/run_individual_file_2223.R @@ -2,8 +2,10 @@ library(createslf) year <- "2223" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year) %>% +create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>% process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_2324.R b/Run_SLF_Files_manually/run_individual_file_2324.R index 3f6cf0fba..3f3c5023f 100644 --- a/Run_SLF_Files_manually/run_individual_file_2324.R +++ b/Run_SLF_Files_manually/run_individual_file_2324.R @@ -2,8 +2,10 @@ library(createslf) year <- "2324" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year) %>% +create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>% process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_2425.R b/Run_SLF_Files_manually/run_individual_file_2425.R index 843eb505c..15d598c2e 100644 --- a/Run_SLF_Files_manually/run_individual_file_2425.R +++ b/Run_SLF_Files_manually/run_individual_file_2425.R @@ -2,8 +2,10 @@ library(createslf) year <- "2425" +clean_temp_data(year, "ep") + episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year) %>% +create_individual_file(episode_file, year = year, write_temp_to_disk = FALSE) %>% process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_all_targets.R b/Run_SLF_Files_targets/run_all_targets.R index fb5b94fab..ab973b04e 100644 --- a/Run_SLF_Files_targets/run_all_targets.R +++ b/Run_SLF_Files_targets/run_all_targets.R @@ -1,4 +1,5 @@ library(targets) +library(createslf) # use tar_make_future() to run targets for all years # This will run everything needed for creating the episode file. diff --git a/_SPSS_archived/All_years/04-Social_Care/00-Social_Care_functions.R b/_SPSS_archived/All_years/04-Social_Care/00-Social_Care_functions.R index 9e924e474..47d2fe0a2 100644 --- a/_SPSS_archived/All_years/04-Social_Care/00-Social_Care_functions.R +++ b/_SPSS_archived/All_years/04-Social_Care/00-Social_Care_functions.R @@ -50,7 +50,6 @@ phs_db_connection <- function(dsn, username = Sys.getenv("USER")) { } -# TODO- check R conversion for SC demog lookup. This may differ #' Social Care Demographic Lookup File Path #' #' @description Get the file path for the Social Care Demographic lookup file diff --git a/_SPSS_archived/All_years/04-Social_Care/02-Home_care_data.R b/_SPSS_archived/All_years/04-Social_Care/02-Home_care_data.R index bd35a0d89..69945ce2b 100644 --- a/_SPSS_archived/All_years/04-Social_Care/02-Home_care_data.R +++ b/_SPSS_archived/All_years/04-Social_Care/02-Home_care_data.R @@ -14,7 +14,6 @@ source("All_years/04-Social_Care/00-Social_Care_functions.R") sc_con <- phs_db_connection(dsn = "DVPROD") # Read demographic file -# TODO replace the demographic file with R code demog_file <- read_demog_file( social_care_dir = social_care_dir, latest_update = latest_update() diff --git a/_targets.R b/_targets.R index 0377e487b..3ac733e1a 100644 --- a/_targets.R +++ b/_targets.R @@ -22,6 +22,8 @@ tar_option_set( years_to_run <- createslf::years_to_run() list( + ## Phase I, all years ---- + tar_rds(test_mode, TRUE), tar_rds(write_to_disk, TRUE), tar_rds( file_path_ext_clean, @@ -68,6 +70,7 @@ list( priority = 0.9 ), tar_target( + # Tests, LOOKUP series tests_sc_demog_lookup, process_tests_sc_demographics(sc_demog_lookup) ), @@ -80,8 +83,13 @@ list( priority = 0.9 ), tar_target( + # Tests, LOOKUP series tests_it_chi_deaths, - process_tests_it_chi_deaths(it_chi_deaths_data) + # use anonymous function to sequence writing tests to excel + # to avoid conflicts of writing to the same file at the same time + (\(it_chi_deaths_data, tests_sc_demog_lookup) { + process_tests_it_chi_deaths(it_chi_deaths_data) + })(it_chi_deaths_data, tests_sc_demog_lookup) ), tar_target( source_gp_lookup, @@ -94,8 +102,11 @@ list( priority = 0.9 ), tar_target( + # Tests, LOOKUP series tests_source_gp_lookup, - process_tests_lookup_gpprac(source_gp_lookup) + (\(source_gp_lookup, tests_it_chi_deaths) { + process_tests_lookup_gpprac(source_gp_lookup) + })(source_gp_lookup, tests_it_chi_deaths) ), tar_target( source_pc_lookup, @@ -108,15 +119,18 @@ list( priority = 0.9 ), tar_target( + # Tests, LOOKUP series tests_source_pc_lookup, - process_tests_lookup_pc(source_pc_lookup) + (\(source_pc_lookup, tests_source_gp_lookup) { + process_tests_lookup_pc(source_pc_lookup) + })(source_pc_lookup, tests_source_gp_lookup) ), - ## Cost Lookups ## + ### Cost Lookups ---- tar_target(ch_cost_lookup, process_costs_ch_rmd(), priority = 0.8), tar_target(dn_cost_lookup, process_costs_dn_rmd(), priority = 0.8), tar_target(hc_cost_lookup, process_costs_hc_rmd(), priority = 0.8), tar_target(gp_ooh_cost_lookup, process_costs_gp_ooh_rmd()), - ## Social Care - 'All' data ## + ### Social Care - 'All' data ---- tar_target( all_at_extract, read_sc_all_alarms_telecare(), @@ -135,8 +149,11 @@ list( priority = 0.5 ), tar_target( + # Tests, LOOKUP series tests_sc_all_at, - process_tests_sc_all_at_episodes(all_at) + (\(all_at, tests_source_pc_lookup) { + process_tests_sc_all_at_episodes(all_at) + })(all_at, tests_source_pc_lookup) ), tar_target( all_home_care_extract, @@ -156,8 +173,11 @@ list( priority = 0.5 ), tar_target( + # Tests, LOOKUP series tests_sc_all_home_care, - process_tests_sc_all_hc_episodes(all_home_care) + (\(all_home_care, tests_sc_all_at) { + process_tests_sc_all_hc_episodes(all_home_care) + })(all_home_care, tests_sc_all_at) ), tar_target( all_care_home_extract, @@ -187,8 +207,11 @@ list( priority = 0.5 ), tar_target( + # Tests, LOOKUP series tests_all_care_home, - process_tests_sc_all_ch_episodes(all_care_home) + (\(all_care_home, tests_sc_all_home_care) { + process_tests_sc_all_ch_episodes(all_care_home) + })(all_care_home, tests_sc_all_home_care) ), tar_target( all_sds_extract, @@ -208,11 +231,14 @@ list( priority = 0.5 ), tar_target( + # Tests, LOOKUP series tests_sc_all_sds, - process_tests_sc_all_sds_episodes(all_sds) + (\(all_sds, tests_all_care_home) { + process_tests_sc_all_sds_episodes(all_sds) + })(all_sds, tests_all_care_home) ), - # Phase II + ## Phase II, year specific ---- tar_map( list(year = years_to_run), tar_rds( @@ -309,7 +335,7 @@ list( consultations_data_path ) ), - ### Target source processed extracts ### + ### Target source processed extracts ---- tar_target(source_acute_extract, process_extract_acute( acute_data, year, @@ -317,6 +343,7 @@ list( write_to_disk = write_to_disk )), tar_target( + # Tests, EXTRACT series tests_source_acute_extract, process_tests_acute( source_acute_extract, @@ -329,11 +356,14 @@ list( write_to_disk = write_to_disk )), tar_target( + # Tests, EXTRACT series tests_source_ae_extract, - process_tests_ae( - source_ae_extract, - year - ) + (\(source_ae_extract, year, tests_source_acute_extract) { + process_tests_ae( + source_ae_extract, + year + ) + })(source_ae_extract, year, tests_source_acute_extract) ), tar_target(source_cmh_extract, process_extract_cmh( cmh_data, @@ -341,11 +371,14 @@ list( write_to_disk = write_to_disk )), tar_target( + # Tests, EXTRACT series tests_source_cmh_extract, - process_tests_cmh( - source_cmh_extract, - year - ) + (\(source_cmh_extract, year, tests_source_ae_extract){ + process_tests_cmh( + source_cmh_extract, + year + ) + })(source_cmh_extract, year, tests_source_ae_extract) ), tar_target(source_dd_extract, process_extract_delayed_discharges( dd_data, @@ -353,11 +386,14 @@ list( write_to_disk = write_to_disk )), tar_target( + # Tests, EXTRACT series tests_source_dd_extract, - process_tests_delayed_discharges( - source_dd_extract, - year - ) + (\(source_dd_extract, year, tests_source_cmh_extract) { + process_tests_delayed_discharges( + source_dd_extract, + year + ) + })(source_dd_extract, year, tests_source_cmh_extract) ), tar_target(source_dn_extract, process_extract_district_nursing( dn_data, @@ -366,11 +402,14 @@ list( write_to_disk = write_to_disk )), tar_target( + # Tests, EXTRACT series tests_source_dn_extract, - process_tests_district_nursing( - source_dn_extract, - year - ) + (\(source_dn_extract, year, tests_source_dd_extract){ + process_tests_district_nursing( + source_dn_extract, + year + ) + })(source_dn_extract, year, tests_source_dd_extract) ), tar_target( source_homelessness_extract, @@ -382,11 +421,14 @@ list( ) ), tar_target( + # Tests, EXTRACT series tests_source_homelessness_extract, - process_tests_homelessness( - source_homelessness_extract, - year - ) + (\(source_homelessness_extract, year, tests_source_dn_extract){ + process_tests_homelessness( + source_homelessness_extract, + year + ) + })(source_homelessness_extract, year, tests_source_dn_extract) ), tar_target(source_ltc_lookup, process_lookup_ltc( ltc_data, @@ -394,11 +436,14 @@ list( write_to_disk = write_to_disk )), tar_target( + # Tests, EXTRACT series tests_ltc, - process_tests_ltcs( - source_ltc_lookup, - year - ) + (\(source_ltc_lookup, year, tests_source_homelessness_extract){ + process_tests_ltcs( + source_ltc_lookup, + year + ) + })(source_ltc_lookup, year, tests_source_homelessness_extract) ), tar_target(source_maternity_extract, process_extract_maternity( maternity_data, @@ -406,11 +451,14 @@ list( write_to_disk = write_to_disk )), tar_target( + # Tests, EXTRACT series tests_source_maternity_extract, - process_tests_maternity( - source_maternity_extract, - year - ) + (\(source_maternity_extract, year, tests_ltc){ + process_tests_maternity( + source_maternity_extract, + year + ) + })(source_maternity_extract, year, tests_ltc) ), tar_target( source_mental_health_extract, @@ -421,11 +469,14 @@ list( ) ), tar_target( + # Tests, EXTRACT series tests_source_mental_health_extract, - process_tests_mental_health( - source_mental_health_extract, - year - ) + (\(source_mental_health_extract, year, tests_source_maternity_extract){ + process_tests_mental_health( + source_mental_health_extract, + year + ) + })(source_mental_health_extract, year, tests_source_maternity_extract) ), # tar_target(source_nrs_deaths_extract, process_extract_nrs_deaths( # nrs_deaths_data, @@ -434,19 +485,22 @@ list( # )), tar_target( source_nrs_deaths_extract, - # use this anomymous function with redundant but necessary refined_death + # use this anonymous function with redundant but necessary refined_death # to make sure reading year-specific nrs deaths extracts after it is produced - (\(year, refined_death_datas) { + (\(year, refined_death_data) { read_file(get_source_extract_path(year, "deaths")) %>% as.data.frame() })(year, refined_death_data) ), tar_target( + # Tests, EXTRACT series tests_source_nrs_deaths_extract, - process_tests_nrs_deaths( - source_nrs_deaths_extract, - year - ) + (\(source_nrs_deaths_extract, year, tests_source_mental_health_extract){ + process_tests_nrs_deaths( + source_nrs_deaths_extract, + year + ) + })(source_nrs_deaths_extract, year, tests_source_mental_health_extract) ), tar_target(source_ooh_extract, process_extract_gp_ooh( year, @@ -455,11 +509,14 @@ list( write_to_disk = write_to_disk )), tar_target( + # Tests, EXTRACT series tests_source_ooh_extract, - process_tests_gp_ooh( - source_ooh_extract, - year - ) + (\(source_ooh_extract, year, tests_source_nrs_deaths_extract){ + process_tests_gp_ooh( + source_ooh_extract, + year + ) + })(source_ooh_extract, year, tests_source_nrs_deaths_extract) ), tar_target(source_outpatients_extract, process_extract_outpatients( outpatients_data, @@ -467,11 +524,14 @@ list( write_to_disk = write_to_disk )), tar_target( + # Tests, EXTRACT series tests_source_outpatients_extract, - process_tests_outpatients( - source_outpatients_extract, - year - ) + (\(source_outpatients_extract, year, tests_source_ooh_extract){ + process_tests_outpatients( + source_outpatients_extract, + year + ) + })(source_outpatients_extract, year, tests_source_ooh_extract) ), tar_target(source_prescribing_extract, process_extract_prescribing( prescribing_data, @@ -479,13 +539,16 @@ list( write_to_disk = write_to_disk )), tar_target( + # Tests, EXTRACT series tests_prescribing, - process_tests_prescribing( - source_prescribing_extract, - year - ) + (\(source_prescribing_extract, year, tests_source_outpatients_extract){ + process_tests_prescribing( + source_prescribing_extract, + year + ) + })(source_prescribing_extract, year, tests_source_outpatients_extract) ), - ### Target process year specific social care ### + ### Target process year specific social care ---- tar_target( sc_client_data, read_lookup_sc_client(fyyear = year) @@ -502,8 +565,11 @@ list( ) ), tar_target( + # Tests, EXTRACT series tests_sc_client_lookup, - process_tests_sc_client_lookup(sc_client_lookup, year = year) + (\(sc_client_lookup, year, tests_prescribing){ + process_tests_sc_client_lookup(sc_client_lookup, year = year) + })(sc_client_lookup, year, tests_prescribing) ), tar_target( source_sc_alarms_tele, @@ -514,11 +580,14 @@ list( ) ), tar_target( + # Tests, EXTRACT series tests_alarms_telecare, - process_tests_alarms_telecare( - data = source_sc_alarms_tele, - year = year - ) + (\(source_sc_alarms_tele, year, tests_sc_client_lookup){ + process_tests_alarms_telecare( + data = source_sc_alarms_tele, + year = year + ) + })(source_sc_alarms_tele, year, tests_sc_client_lookup) ), tar_target( source_sc_care_home, @@ -530,11 +599,14 @@ list( ) ), tar_target( + # Tests, EXTRACT series tests_care_home, - process_tests_care_home( - data = source_sc_care_home, - year = year - ) + (\(source_sc_care_home, year, tests_alarms_telecare){ + process_tests_care_home( + data = source_sc_care_home, + year = year + ) + })(source_sc_care_home, year, tests_alarms_telecare) ), tar_target( source_sc_home_care, @@ -545,11 +617,14 @@ list( ) ), tar_target( + # Tests, EXTRACT series tests_home_care, - process_tests_home_care( - data = source_sc_home_care, - year = year - ) + (\(source_sc_home_care, year, tests_care_home){ + process_tests_home_care( + data = source_sc_home_care, + year = year + ) + })(source_sc_home_care, year, tests_care_home) ), tar_target( source_sc_sds, @@ -560,11 +635,14 @@ list( ) ), tar_target( + # Tests, EXTRACT series tests_sds, - process_tests_sds( - data = source_sc_sds, - year = year - ) + (\(source_sc_sds, year, tests_home_care){ + process_tests_sds( + data = source_sc_sds, + year = year + ) + })(source_sc_sds, year, tests_home_care) ), tar_target( slf_deaths_lookup, @@ -590,7 +668,7 @@ list( ) ## End of Targets pipeline ## -################################################################################ +## Phase III, create ep file and ind file---- ## Redundant code which may still be useful for including ep/indiv files. # tar_qs( # processed_data_list, diff --git a/inst/WORDLIST b/inst/WORDLIST index 7edd722c5..88dc043fe 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -86,6 +86,7 @@ funs fy fyear fyyear +gp gpprac hb hbpraccode @@ -94,10 +95,12 @@ hc hscdiip hscp inc +ind interzone lca lgl normal’ +nrs old’ opendata overcounting @@ -109,7 +112,6 @@ reablement recid recids sc -scoial sds slf slfhelper diff --git a/man/add_deceased_flag.Rd b/man/add_deceased_flag.Rd deleted file mode 100644 index c84568522..000000000 --- a/man/add_deceased_flag.Rd +++ /dev/null @@ -1,32 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_deceased_flag.R -\name{add_deceased_flag} -\alias{add_deceased_flag} -\title{Create the SLF Deaths lookup} -\usage{ -add_deceased_flag( - year, - refined_death = read_file(get_combined_slf_deaths_lookup_path()) \%>\% - slfhelper::get_chi(), - write_to_disk = TRUE -) -} -\arguments{ -\item{year}{The year to process, in FY format.} - -\item{write_to_disk}{(optional) Should the data be written to disk default is -\code{TRUE} i.e. write the data to disk.} - -\item{nrs_deaths_data}{NRS deaths data.} - -\item{chi_deaths_data}{IT CHI deaths data.} -} -\value{ -a \link[tibble:tibble-package]{tibble} containing the episode file -} -\description{ -Currently this just uses the NRS death dates 'as is', with no -corrections or modifications, it is expected that this will be expanded to -use the CHI deaths extract from IT as well as taking into account data in -the episode file to assess the validity of a death date. -} diff --git a/man/check_year_valid.Rd b/man/check_year_valid.Rd index 59960da30..7b704d2ec 100644 --- a/man/check_year_valid.Rd +++ b/man/check_year_valid.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/check_year_valid.R +% Please edit documentation in R/00-update_refs.R \name{check_year_valid} \alias{check_year_valid} \title{Check data exists for a year} diff --git a/man/clean_temp_data.Rd b/man/clean_temp_data.Rd new file mode 100644 index 000000000..c26dcde8e --- /dev/null +++ b/man/clean_temp_data.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/write_temp_data.R +\name{clean_temp_data} +\alias{clean_temp_data} +\title{Clean temp data from disk} +\usage{ +clean_temp_data(year, file_type = c("ep", "ind")) +} +\arguments{ +\item{year}{year variable} + +\item{file_type}{ep or ind files} +} +\value{ +the data for next step as a \link[tibble:tibble-package]{tibble}. +} +\description{ +Clean temp data from disk to save storage. +} diff --git a/man/create_episode_file.Rd b/man/create_episode_file.Rd index d6bd6d526..a1bf6878b 100644 --- a/man/create_episode_file.Rd +++ b/man/create_episode_file.Rd @@ -18,7 +18,8 @@ create_episode_file( slfhelper::get_chi(), sc_client = read_file(get_sc_client_lookup_path(year)) \%>\% slfhelper::get_chi(), write_to_disk = TRUE, - anon_chi_out = TRUE + anon_chi_out = TRUE, + write_temp_to_disk = FALSE ) } \arguments{ @@ -40,13 +41,15 @@ create_episode_file( \item{slf_deaths_lookup}{The SLF deaths lookup.} -\item{sc_client}{scoial care lookup file} +\item{sc_client}{social care lookup file} \item{write_to_disk}{(optional) Should the data be written to disk default is \code{TRUE} i.e. write the data to disk.} \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output (instead of chi)} + +\item{write_temp_to_disk}{write intermediate data for investigation or debug} } \value{ a \link[tibble:tibble-package]{tibble} containing the episode file diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd index bfa584d54..9cb57cff0 100644 --- a/man/create_individual_file.Rd +++ b/man/create_individual_file.Rd @@ -10,7 +10,8 @@ create_individual_file( homelessness_lookup = create_homelessness_lookup(year), write_to_disk = TRUE, anon_chi_in = TRUE, - anon_chi_out = TRUE + anon_chi_out = TRUE, + write_temp_to_disk ) } \arguments{ @@ -28,6 +29,8 @@ create_individual_file( \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output (instead of chi)} + +\item{write_temp_to_disk}{write intermediate data for investigation or debug} } \value{ The processed individual file diff --git a/man/end_date.Rd b/man/end_date.Rd new file mode 100644 index 000000000..9d617ac10 --- /dev/null +++ b/man/end_date.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/00-update_refs.R +\name{end_date} +\alias{end_date} +\title{End date} +\usage{ +end_date() +} +\value{ +Get the end date of the latest update period +} +\description{ +End date +} diff --git a/man/fy.Rd b/man/fy.Rd new file mode 100644 index 000000000..21c4b47f1 --- /dev/null +++ b/man/fy.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/00-update_refs.R +\name{fy} +\alias{fy} +\title{Extract latest FY from end_date} +\usage{ +fy() +} +\value{ +fy in format "2024" +} +\description{ +Extract latest FY from end_date +} diff --git a/man/process_combined_deaths_lookup.Rd b/man/process_combined_deaths_lookup.Rd deleted file mode 100644 index 7d0a75fc7..000000000 --- a/man/process_combined_deaths_lookup.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/add_activity_after_death_flag.R -\name{process_combined_deaths_lookup} -\alias{process_combined_deaths_lookup} -\title{Create and read SLF Deaths lookup from processed BOXI NRS deaths extracts} -\usage{ -process_combined_deaths_lookup( - update = latest_update(), - write_to_disk = TRUE, - ... -) -} -\arguments{ -\item{update}{the update month (defaults to use \code{\link[=latest_update]{latest_update()}})} - -\item{write_to_disk}{(optional) Should the data be written to disk default is -\code{TRUE} i.e. write the data to disk.} - -\item{...}{additional arguments passed to \code{\link[=get_slf_deaths_lookup_path]{get_slf_deaths_lookup_path()}}} -} -\value{ -the final data as a \link[tibble:tibble-package]{tibble}. -} -\description{ -The BOXI NRS deaths extract lookup should be created after the extract files for all years have been processed, -} diff --git a/man/process_extract_acute.Rd b/man/process_extract_acute.Rd index fae9c7bab..772a7bcec 100644 --- a/man/process_extract_acute.Rd +++ b/man/process_extract_acute.Rd @@ -16,6 +16,8 @@ process_extract_acute( \item{year}{The year to process, in FY format.} +\item{acute_cup_path}{path to acute_cup data} + \item{write_to_disk}{(optional) Should the data be written to disk default is \code{TRUE} i.e. write the data to disk.} } diff --git a/man/process_extract_gp_ooh.Rd b/man/process_extract_gp_ooh.Rd index 5c68c35dd..14ddb346b 100644 --- a/man/process_extract_gp_ooh.Rd +++ b/man/process_extract_gp_ooh.Rd @@ -16,6 +16,8 @@ process_extract_gp_ooh( \item{data_list}{A list containing the extracts.} +\item{gp_ooh_cup_path}{path to gp ooh cup data} + \item{write_to_disk}{(optional) Should the data be written to disk default is \code{TRUE} i.e. write the data to disk.} } diff --git a/man/process_extract_ooh_consultations.Rd b/man/process_extract_ooh_consultations.Rd index ae4265823..22159c2ed 100644 --- a/man/process_extract_ooh_consultations.Rd +++ b/man/process_extract_ooh_consultations.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/process_extract_consultations.R +% Please edit documentation in R/process_extract_ooh_consultations.R \name{process_extract_ooh_consultations} \alias{process_extract_ooh_consultations} \title{Process the GP OOH Consultations extract} diff --git a/man/qtr.Rd b/man/qtr.Rd new file mode 100644 index 000000000..79629f858 --- /dev/null +++ b/man/qtr.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/00-update_refs.R +\name{qtr} +\alias{qtr} +\title{Extract latest quarter from end_date} +\usage{ +qtr() +} +\value{ +qtr in format "Q1" +} +\description{ +Extract latest quarter from end_date +} diff --git a/man/read_temp_data.Rd b/man/read_temp_data.Rd new file mode 100644 index 000000000..709d8362d --- /dev/null +++ b/man/read_temp_data.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/write_temp_data.R +\name{read_temp_data} +\alias{read_temp_data} +\title{Read a temp data from disk for debugging purpose} +\usage{ +read_temp_data(year, file_name) +} +\arguments{ +\item{year}{year variable} + +\item{file_name}{The file name to be read} +} +\value{ +the data for next step as a \link[tibble:tibble-package]{tibble}. +} +\description{ +Read a temp data to disk for debugging purpose. +} diff --git a/man/write_temp_data.Rd b/man/write_temp_data.Rd new file mode 100644 index 000000000..6ee32e7a2 --- /dev/null +++ b/man/write_temp_data.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/write_temp_data.R +\name{write_temp_data} +\alias{write_temp_data} +\title{Write a temp data to disk in parquet format for debugging purpose} +\usage{ +write_temp_data(data, year, file_name, write_temp_to_disk) +} +\arguments{ +\item{data}{The data to be written} + +\item{year}{year variable} + +\item{file_name}{The file name to be written} + +\item{write_temp_to_disk}{Boolean type, write temp data to disk or not} +} +\value{ +the data for next step as a \link[tibble:tibble-package]{tibble}. +} +\description{ +Write a temp data in parquet format to disk for debugging purpose. +} diff --git a/run_targets_1718.R b/run_targets_1718.R deleted file mode 100644 index 488918e1d..000000000 --- a/run_targets_1718.R +++ /dev/null @@ -1,20 +0,0 @@ -library(targets) - -Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") - -year <- "1718" - -# use targets for the process until testing episode files -tar_make_future( - # it does not recognise `contains(year)` - names = (targets::contains("1718")) -) - -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/run_targets_1819.R b/run_targets_1819.R deleted file mode 100644 index 7c63807e8..000000000 --- a/run_targets_1819.R +++ /dev/null @@ -1,20 +0,0 @@ -library(targets) - -Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") - -year <- "1819" - -# use targets for the process until testing episode files -tar_make_future( - # it does not recognise `contains(year)` - names = (targets::contains("1819")) -) - -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/run_targets_1920.R b/run_targets_1920.R deleted file mode 100644 index d3361a34c..000000000 --- a/run_targets_1920.R +++ /dev/null @@ -1,20 +0,0 @@ -library(targets) - -Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") - -year <- "1920" - -# use targets for the process until testing episode files -tar_make_future( - # it does not recognise `contains(year)` - names = (targets::contains("1920")) -) - -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/run_targets_2021.R b/run_targets_2021.R deleted file mode 100644 index efcfaed7a..000000000 --- a/run_targets_2021.R +++ /dev/null @@ -1,20 +0,0 @@ -library(targets) - -Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") - -year <- "2021" - -# use targets for the process until testing episode files -tar_make_future( - # it does not recognise `contains(year)` - names = (targets::contains("2021")) -) - -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/run_targets_2122.R b/run_targets_2122.R deleted file mode 100644 index e92d75c7d..000000000 --- a/run_targets_2122.R +++ /dev/null @@ -1,20 +0,0 @@ -library(targets) - -Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") - -year <- "2122" - -# use targets for the process until testing episode files -tar_make_future( - # it does not recognise `contains(year)` - names = (targets::contains("2122")) -) - -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/run_targets_2223.R b/run_targets_2223.R deleted file mode 100644 index f5c93ee2f..000000000 --- a/run_targets_2223.R +++ /dev/null @@ -1,20 +0,0 @@ -library(targets) - -Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") - -year <- "2223" - -# use targets for the process until testing episode files -tar_make_future( - # it does not recognise `contains(year)` - names = (targets::contains("2223")) -) - -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/run_targets_2324.R b/run_targets_2324.R deleted file mode 100644 index 5e3885bc2..000000000 --- a/run_targets_2324.R +++ /dev/null @@ -1,20 +0,0 @@ -library(targets) - -Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") - -year <- "2324" - -# use targets for the process until testing episode files -tar_make_future( - # it does not recognise `contains(year)` - names = (targets::contains("2324")) -) - -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/tests/testthat/_snaps/get_dd_path.md b/tests/testthat/_snaps/get_dd_path.md index 4f7740e79..8f8be2c48 100644 --- a/tests/testthat/_snaps/get_dd_path.md +++ b/tests/testthat/_snaps/get_dd_path.md @@ -3,7 +3,7 @@ Code dplyr::glimpse(latest_dd_file, width = 0) Output - Rows: 206,029 + Rows: 219,954 Columns: 14 $ cennum ~ $ MONTHFLAG ~ diff --git a/tests/testthat/test-check_year_valid.R b/tests/testthat/test-check_year_valid.R index 134e2d6b4..c139a43f0 100644 --- a/tests/testthat/test-check_year_valid.R +++ b/tests/testthat/test-check_year_valid.R @@ -50,7 +50,8 @@ test_that("Check year valid works for specific datasets ", { expect_true(check_year_valid("2021", "nsu")) expect_true(check_year_valid("2122", "nsu")) expect_true(check_year_valid("2223", "nsu")) - expect_false(check_year_valid("2324", "nsu")) + expect_true(check_year_valid("2324", "nsu")) + expect_false(check_year_valid("2425", "nsu")) # SPARRA expect_false(check_year_valid("1415", "sparra")) @@ -64,6 +65,7 @@ test_that("Check year valid works for specific datasets ", { expect_true(check_year_valid("2122", "sparra")) expect_true(check_year_valid("2223", "sparra")) expect_true(check_year_valid("2324", "sparra")) + expect_true(check_year_valid("2425", "sparra")) # HHG expect_false(check_year_valid("1415", "hhg")) diff --git a/tests/testthat/test-read_file.R b/tests/testthat/test-read_file.R index ecd39acec..642ff69cf 100644 --- a/tests/testthat/test-read_file.R +++ b/tests/testthat/test-read_file.R @@ -3,9 +3,6 @@ skip_on_ci() test_that("read_file works", { rds_path <- tempfile(fileext = ".rds") rds_gz_path <- tempfile(fileext = ".rds.gz") - fst_path <- tempfile(fileext = ".fst") - sav_path <- tempfile(fileext = ".sav") - zsav_path <- tempfile(fileext = ".zsav") csv_path <- tempfile(fileext = ".csv") csv_gz_path <- tempfile(fileext = ".csv.gz") parquet_path <- tempfile(fileext = ".parquet") @@ -14,18 +11,12 @@ test_that("read_file works", { readr::write_rds(aq_data, rds_path) readr::write_rds(aq_data, rds_gz_path) - fst::write_fst(aq_data, fst_path) - haven::write_sav(aq_data, sav_path) - haven::write_sav(aq_data, zsav_path, compress = "zsav") readr::write_csv(aq_data, csv_path) readr::write_csv(aq_data, csv_gz_path) arrow::write_parquet(aq_data, parquet_path) expect_equal(aq_data, read_file(rds_path)) expect_equal(aq_data, read_file(rds_gz_path)) - expect_equal(aq_data, read_file(fst_path)) - expect_equal(aq_data, haven::zap_formats(read_file(sav_path))) - expect_equal(aq_data, haven::zap_formats(read_file(zsav_path))) expect_equal(aq_data, read_file(csv_gz_path)) expect_equal(aq_data, read_file(parquet_path)) })