diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 7b21f93d1..1abb216ea 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -6,6 +6,7 @@ admtype ADPE adtf alstr +anomymous arrivalmode arth atlassian @@ -40,6 +41,7 @@ CNWs codecov Comhairle commhosp +communicty congen copd costinc @@ -53,6 +55,7 @@ customise cvd dataframe datamart +datas datazone datediff dateformat @@ -169,8 +172,8 @@ lubridate magrittr markdownguide Matern -Mcbride -mcmahon +mcnicol +megan microsoft MIU MMMYY @@ -203,6 +206,7 @@ parkinsons patflow pattype PCEC +pcs PERTH PHIBCS phs @@ -215,6 +219,7 @@ PLICS popluation Posix postcodes +Postcodesio PPAs prac praccode @@ -222,6 +227,7 @@ ptypes purrr quickstart rankdir +rbindlist rcmdcheck rdd rdname @@ -232,13 +238,13 @@ readr readxl reasonwait recid +recordlinkage refailure reflectoring refsource reftype relaint renviron -returnsthe rlang rmarkdown Rnw diff --git a/DESCRIPTION b/DESCRIPTION index 3d731a0af..28a8303ff 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,12 +3,8 @@ Title: Create the Source Linkage Files Version: 0.0.0.9000 Authors@R: c( person("Public Health Scotland", , , "phs.source@phs.scot", role = c("cre", "cph")), + person("Megan", "McNicol", , "megan.mcnicol2@phs.scot", role = "aut"), person("Jennifer", "Thom", , "jennifer.thom@phs.scot", role = "aut"), - person("James", "McMahon", , "james.mcmahon@phs.scot", role = "aut", - comment = c(ORCID = "0000-0002-5380-2029")), - person("Catherine", "Holland", , "catherine.holland@phs.scot", role = "aut", - comment = c(ORCID = "0000-0002-3259-5745")), - person("Bateman", "McBride", , "Bateman.Mcbride@phs.scot", role = "aut"), person("Zihao", "Li", , "zihao.li@phs.scot", role = "aut", comment = c(ORCID = "0000-0002-5178-2124")) ) @@ -25,25 +21,19 @@ Imports: data.table (>= 1.14.6), dbplyr (>= 2.3.1), dplyr (>= 1.1.1), - dtplyr (>= 1.3.0), fs (>= 1.6.1), - fst (>= 0.9.8), future (>= 1.33.0), future.callr (>= 0.8.1), - glue (>= 1.6.2), - haven (>= 2.5.2), hms (>= 1.1.0), janitor (>= 2.2.0), keyring (>= 1.3.0), lubridate (>= 1.9.2), magrittr (>= 2.0.3), odbc (>= 1.3.1), - openssl (>= 2.0.5), openxlsx (>= 4.2.5), phsmethods (>= 0.2.2), phsopendata (>= 0.0.1.0), purrr (>= 1.0.1), - qs (>= 0.25.5), R.utils (>= 2.12.2), readr (>= 2.1.0), rlang (>= 1.1.0), @@ -73,5 +63,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 - +RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index 0475603bf..6f1c88841 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export("%>%") +export(add_deceased_flag) export(add_homelessness_date_flags) export(add_homelessness_flag) export(add_hri_variables) @@ -25,9 +26,9 @@ export(end_fy_quarter) export(end_next_fy_quarter) export(find_latest_file) export(fy_interval) -export(get_all_slf_deaths_lookup_path) export(get_boxi_extract_path) export(get_ch_costs_path) +export(get_combined_slf_deaths_lookup_path) export(get_dd_path) export(get_dd_period) export(get_demographic_cohorts_path) @@ -88,11 +89,11 @@ export(midpoint_fy) export(next_fy) export(phs_db_connection) export(previous_update) +export(process_combined_deaths_lookup) export(process_costs_ch_rmd) export(process_costs_dn_rmd) export(process_costs_gp_ooh_rmd) export(process_costs_hc_rmd) -export(process_deaths_lookup) export(process_extract_acute) export(process_extract_ae) export(process_extract_alarms_telecare) @@ -115,6 +116,7 @@ export(process_lookup_ltc) export(process_lookup_postcode) export(process_lookup_sc_client) export(process_lookup_sc_demographics) +export(process_refined_death) export(process_sc_all_alarms_telecare) export(process_sc_all_care_home) export(process_sc_all_home_care) @@ -182,6 +184,7 @@ export(start_fy) export(start_fy_quarter) export(start_next_fy_quarter) export(write_file) +export(years_to_run) importFrom(data.table,.N) importFrom(data.table,.SD) importFrom(magrittr,"%>%") diff --git a/NEWS.md b/NEWS.md index d5aea7364..5a07b8266 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,18 @@ -# June 2024 Update - Unreleased +# September 2024 Update - Unreleased +* New 24/25 files created +* New NSU cohort for 23/24 available +* New SPARRA scores calculated from April 24/25 +* Death dates attached to activity after death flag +* Care home methodology updated +* New cup marker for Acute and GP OOH +* Bug fix: + * person id for SDS and client + * DD data: + * The variable `cij_delay` is only attached to the `recid = DD` rows, not all the rows in the CIJ as we'd expect. + * The `cij_delay` variable is showing as 1 against the Delay records (NA otherwise) - We expect this to be `TRUE/FALSE` to match the other flags. + * The `cij_ppa` variable isn't attaching to the `recid = DD` rows. + +# June 2024 Update - released 06-Jun-24 * Update of 2017/18 onwards to include bug fixes within the files. * Removal of extra variable caused by the LTCs not matching properly. * New NRS mid-2022 population estimates. diff --git a/R/00-update_refs.R b/R/00-update_refs.R index 6106f17cf..33022edf6 100644 --- a/R/00-update_refs.R +++ b/R/00-update_refs.R @@ -7,7 +7,7 @@ #' #' @family initialisation latest_update <- function() { - "Jun_2024" + "Sep_2024" } #' Previous update @@ -61,18 +61,24 @@ previous_update <- function(months_ago = 3L, override = NULL) { #' #' @family initialisation get_dd_period <- function() { - "Jul16_Mar24" + "Jul16_Jun24" } -#' The latest financial year for Cost uplift setting +#' The year list for slf to update #' -#' @description Get the latest year for cost uplift +#' @description Get the vector of years to update slf #' -#' @return The financial year format +#' @return The vector of financial years #' #' @export #' #' @family initialisation -latest_cost_year <- function() { - "2223" +years_to_run <- function() { + fy_start_2digit <- 17 + fy_end_2digit <- 24 + years_to_run <- paste0( + fy_start_2digit:fy_end_2digit, + (fy_start_2digit + 1):(fy_end_2digit + 1) + ) + return(years_to_run) } diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index 0842b47ec..5e800c80b 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -11,8 +11,10 @@ add_activity_after_death_flag <- function( data, year, - deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>% + deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi()) { + cli::cli_alert_info("Add activity after death flag function started at {Sys.time()}") + # to skip warnings no visible binding for global variable ‘.’ . <- NULL @@ -24,7 +26,7 @@ add_activity_after_death_flag <- function( by = "chi", suffix = c("", "_boxi") ) %>% - dplyr::filter(.data$deceased == TRUE | .data$deceased_boxi == TRUE) %>% + dplyr::filter(.data$deceased == TRUE) %>% dplyr::distinct() @@ -70,31 +72,28 @@ add_activity_after_death_flag <- function( )) - # Check and print error message for records which already are TRUE for the deceased variable in the episode file, but this doesn't match the - # BOXI deceased variable - check_deceased_match <- flag_data %>% - dplyr::filter(.data$deceased != .data$deceased_boxi) - - if (nrow(check_deceased_match) != 0) { - warning("There were records in the episode file which have a deceased variable which does not match the BOXI NRS deceased variable") - } - - # Fill in date of death if missing in the episode file but available in BOXI lookup, due to historic dates of death not being carried # over from previous financial years flag_data <- flag_data %>% dplyr::filter(.data$activity_after_death == 1) %>% # Remove temporary flag variables used to create activity after death flag and fill in missing death_date - dplyr::select(.data$year, .data$chi, .data$record_keydate1, .data$record_keydate2, .data$activity_after_death) %>% + dplyr::select(.data$year, .data$chi, .data$record_keydate1, .data$record_keydate2, .data$activity_after_death, .data$death_date_boxi) %>% dplyr::distinct() # Match activity after death flag back to episode file final_data <- data %>% dplyr::left_join( flag_data, + # TODO: this join_by is not 100% accurate. Consider use ep_file_row_id to join by = c("year", "chi", "record_keydate1", "record_keydate2"), na_matches = "never" - ) + ) %>% + dplyr::mutate(death_date = lubridate::as_date(ifelse(is.na(death_date) & !(is.na(death_date_boxi)), + death_date_boxi, death_date + ))) %>% + dplyr::select(-death_date_boxi) %>% + dplyr::distinct() + return(final_data) @@ -118,19 +117,19 @@ add_activity_after_death_flag <- function( #' #' # Read data------------------------------------------------ -process_deaths_lookup <- function(update = latest_update(), - write_to_disk = TRUE, ...) { - all_boxi_deaths <- read_file(get_slf_deaths_lookup_path("1415")) %>% - rbind(read_file(get_slf_deaths_lookup_path("1516"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("1617"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("1718"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("1819"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("1920"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("2021"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("2122"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("2223"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("2324"))) %>% - # TODO: make this automated to pick up files starting with name "get_slf_deaths_lookup_path" + +process_combined_deaths_lookup <- function(update = latest_update(), + write_to_disk = TRUE, ...) { + dir_folder <- "/conf/hscdiip/SLF_Extracts/Deaths" + file_names <- list.files(dir_folder, + pattern = "^anon-slf_deaths_lookup_.*parquet", + full.names = TRUE + ) + + # read all year specific deaths lookups and bind them together + all_boxi_deaths <- lapply(file_names, arrow::read_parquet) %>% + data.table::rbindlist() %>% + # convert to chi for processing slfhelper::get_chi() %>% # Remove rows with missing or blank CHI number - could also use na.omit? # na.omit(all_boxi_deaths) @@ -177,9 +176,7 @@ process_deaths_lookup <- function(update = latest_update(), if (write_to_disk) { write_file( all_boxi_deaths, - fs::path(get_slf_dir(), "Deaths", - file_name = stringr::str_glue("anon-all_slf_deaths_lookup_{update}.parquet") - ) + get_combined_slf_deaths_lookup_path() ) } diff --git a/R/add_deceased_flag.R b/R/add_deceased_flag.R new file mode 100644 index 000000000..f3be216cf --- /dev/null +++ b/R/add_deceased_flag.R @@ -0,0 +1,40 @@ +#' Create the SLF Deaths lookup +#' +#' @description Currently this just uses the NRS death dates 'as is', with no +#' corrections or modifications, it is expected that this will be expanded to +#' use the CHI deaths extract from IT as well as taking into account data in +#' the episode file to assess the validity of a death date. +#' +#' @param year The year to process, in FY format. +#' @param nrs_deaths_data NRS deaths data. +#' @param chi_deaths_data IT CHI deaths data. +#' @param write_to_disk (optional) Should the data be written to disk default is +#' `TRUE` i.e. write the data to disk. +#' +#' @return a [tibble][tibble::tibble-package] containing the episode file +#' @export +add_deceased_flag <- function( + year, + refined_death = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi(), + write_to_disk = TRUE) { + # create slf deaths lookup + + dplyr::mutate( + death_date = dplyr::if_else(is.na(.data$record_keydate1), + .data$death_date_chi, .data$record_keydate1 + ), + deceased = TRUE, + .keep = "unused" + ) %>% + # save anon chi on disk + slfhelper::get_anon_chi() + + if (write_to_disk) { + write_file( + slf_deaths_lookup, + get_slf_deaths_lookup_path(year, check_mode = "write") + ) + } + + return(slf_deaths_lookup) +} diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R index 15d5d4e01..bf6216e57 100644 --- a/R/add_nsu_cohort.R +++ b/R/add_nsu_cohort.R @@ -13,6 +13,8 @@ add_nsu_cohort <- function( data, year, nsu_cohort = read_file(get_nsu_path(year)) %>% slfhelper::get_chi()) { + cli::cli_alert_info("Add NSU cohort function started at {Sys.time()}") + year_param <- year if (!check_year_valid(year, "nsu")) { diff --git a/R/add_ppa_flag.R b/R/add_ppa_flag.R index bb99f0543..1d5f9739d 100644 --- a/R/add_ppa_flag.R +++ b/R/add_ppa_flag.R @@ -8,6 +8,8 @@ #' @return A data frame to use as a lookup of PPAs #' @family episode_file add_ppa_flag <- function(data) { + cli::cli_alert_info("Add PPA flag function started at {Sys.time()}") + check_variables_exist( data, variables = c( diff --git a/R/check_year_valid.R b/R/check_year_valid.R index 2197d8c0e..da257ff4c 100644 --- a/R/check_year_valid.R +++ b/R/check_year_valid.R @@ -17,6 +17,7 @@ check_year_valid <- function( "ch", "client", "cmh", + "cost_dna", "dd", "deaths", "dn", @@ -34,9 +35,9 @@ check_year_valid <- function( )) { if (year <= "1415" && type %in% c("dn", "sparra")) { return(FALSE) - } else if (year <= "1516" && type %in% c("cmh", "homelessness")) { + } else if (year <= "1516" && type %in% c("cmh", "homelessness", "dd")) { return(FALSE) - } else if (year <= "1617" && type %in% c("ch", "hc", "sds", "at")) { + } else if (year <= "1617" && type %in% c("ch", "hc", "sds", "at", "client", "cost_dna")) { return(FALSE) } else if (year <= "1718" && type %in% "hhg") { return(FALSE) @@ -46,7 +47,7 @@ check_year_valid <- function( return(FALSE) } else if (year >= "2425" && type %in% "sparra") { return(FALSE) - } else if (year >= "2425" && type %in% c("ch", "hc", "sds", "at")) { + } else if (year >= "2526" && type %in% c("ch", "hc", "sds", "at")) { return(FALSE) } diff --git a/R/correct_demographics.R b/R/correct_demographics.R index d7ef6f469..d221c25ab 100644 --- a/R/correct_demographics.R +++ b/R/correct_demographics.R @@ -7,6 +7,8 @@ #' #' @return episode files with updated date of birth and ages correct_demographics <- function(data, year) { + cli::cli_alert_info("Correct demographics function started at {Sys.time()}") + # keep episodes with missing chi data_no_chi <- data %>% dplyr::filter(is_missing(.data$chi)) diff --git a/R/cost_uplift.R b/R/cost_uplift.R index e554c2505..abbbd9b5a 100644 --- a/R/cost_uplift.R +++ b/R/cost_uplift.R @@ -5,6 +5,8 @@ #' @return episode data with uplifted costs #' @family episode_file apply_cost_uplift <- function(data) { + cli::cli_alert_info("Apply cost uplift function started at {Sys.time()}") + data <- data %>% # attach a uplift scale as the last column lookup_uplift() %>% @@ -84,3 +86,21 @@ lookup_uplift <- function(data) { return(data) } + +#' The latest financial year for Cost uplift setting +#' +#' @description Get the latest year for cost uplift +#' latest_cost_year() is hard coded in cost_uplift(). +#' 2223 is not changed automatically with time passes. +#' It is changed only when we get a new instruction from somewhere about cost uplift. +#' Do not change unless specific instructions. +#' Changing this means that we need to change cost_uplift(). +#' +#' @return The financial year format +#' +#' @export +#' +#' @family initialisation +latest_cost_year <- function() { + "2223" +} diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 0bb804c7d..ecb6fc126 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -141,7 +141,7 @@ create_episode_file <- function( slf_deaths_lookup ) %>% add_activity_after_death_flag(year, - deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>% + deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi() ) %>% load_ep_file_vars(year) @@ -175,7 +175,73 @@ create_episode_file <- function( sc_social_worker = NA, sc_type_of_housing = NA, sc_meals = NA, - sc_day_care = NA + sc_day_care = NA, + social_care_id = NA, + sc_dementia = NA, + sc_learning_disability = NA, + sc_mental_health_disorders = NA, + sc_physical_and_sensory_disability = NA, + sc_drugs = NA, + sc_alcohol = NA, + sc_palliative_care = NA, + sc_carer = NA, + sc_elderly_frail = NA, + sc_neurological_condition = NA, + sc_autism = NA, + sc_other_vulnerable_groups = NA, + ch_provider_description = NA + ) + } + + if (!check_year_valid(year, type = "homelessness")) { + episode_file <- episode_file %>% + dplyr::mutate( + hl1_12_months_post_app = NA, + hl1_12_months_pre_app = NA, + hl1_6after_ep = NA, + hl1_6before_ep = NA, + hl1_application_ref = NA, + hl1_completeness = NA, + hl1_during_ep = NA, + hl1_in_fy = NA, + hl1_property_type = NA, + hl1_reason_ftm = NA, + hl1_sending_lca = NA + ) + } + + if (!check_year_valid(year, type = "dd")) { + episode_file <- episode_file %>% + dplyr::mutate( + cij_delay = NA, + dd_quality = NA, + dd_responsible_lca = NA, + delay_end_reason = NA, + primary_delay_reason = NA, + secondary_delay_reason = NA, + ) + } + + if (!check_year_valid(year, type = "dn")) { + episode_file <- episode_file %>% + dplyr::mutate( + ccm = NA, + total_no_dn_contacts = NA + ) + } + + if (!check_year_valid(year, type = "cost_dna")) { + episode_file <- episode_file %>% + dplyr::mutate( + cost_total_net_inc_dnas = NA + ) + } + + if (!check_year_valid(year, type = "dn")) { + episode_file <- episode_file %>% + dplyr::mutate( + ccm = NA, + total_no_dn_contacts = NA ) } @@ -199,6 +265,8 @@ create_episode_file <- function( #' #' @return `data` with only the `vars_to_keep` kept store_ep_file_vars <- function(data, year, vars_to_keep) { + cli::cli_alert_info("Store episode file variables function started at {Sys.time()}") + tempfile_path <- get_file_path( directory = get_year_dir(year), file_name = stringr::str_glue("temp_ep_file_variable_store_{year}.parquet"), @@ -236,6 +304,8 @@ store_ep_file_vars <- function(data, year, vars_to_keep) { #' #' @return The full SLF data. load_ep_file_vars <- function(data, year) { + cli::cli_alert_info("Load episode file variable function started at {Sys.time()}") + tempfile_path <- get_file_path( directory = get_year_dir(year), file_name = stringr::str_glue("temp_ep_file_variable_store_{year}.parquet"), @@ -263,6 +333,8 @@ load_ep_file_vars <- function(data, year) { #' #' @return A data frame with CIJ markers filled in for those missing. fill_missing_cij_markers <- function(data) { + cli::cli_alert_info("Fill missing cij markers function started at {Sys.time()}") + fixable_data <- data %>% dplyr::filter( .data[["recid"]] %in% c("01B", "04B", "GLS", "02B", "DD") & !is.na(.data[["chi"]]) @@ -317,6 +389,8 @@ fill_missing_cij_markers <- function(data) { #' #' @return The data with CIJ variables corrected. correct_cij_vars <- function(data) { + cli::cli_alert_info("Correct cij variables function started at {Sys.time()}") + check_variables_exist( data, c("chi", "recid", "cij_admtype", "cij_pattype_code") @@ -358,6 +432,8 @@ correct_cij_vars <- function(data) { #' #' @return The data with cost including dna. create_cost_inc_dna <- function(data) { + cli::cli_alert_info("Create cost inc dna function started at {Sys.time()}") + check_variables_exist(data, c("cost_total_net", "attendance_status")) # Create cost including DNAs and modify costs @@ -382,6 +458,8 @@ create_cost_inc_dna <- function(data) { #' #' @return The data unchanged (the cohorts are written to disk) create_cohort_lookups <- function(data, year, update = latest_update()) { + cli::cli_alert_info("Create cohort lookups function started at {Sys.time()}") + create_demographic_cohorts( data, year, @@ -421,6 +499,8 @@ join_cohort_lookups <- function( col_select = c("anon_chi", "service_use_cohort") ) %>% slfhelper::get_chi()) { + cli::cli_alert_info("Join cohort lookups function started at {Sys.time()}") + join_cohort_lookups <- data %>% dplyr::left_join( demographic_cohort, @@ -447,6 +527,13 @@ join_sc_client <- function(data, year, sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(), file_type = c("episode", "individual")) { + cli::cli_alert_info("Join social care client function started at {Sys.time()}") + + if (!check_year_valid(year, type = "client")) { + data_file <- data + return(data_file) + } + if (file_type == "episode") { # Match on client variables by chi data_file <- data %>% diff --git a/R/create_individual_file.R b/R/create_individual_file.R index c98531310..273761efc 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -78,7 +78,7 @@ create_individual_file <- function( if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { individual_file <- individual_file %>% - aggregate_by_chi(exclude_sc_var = TRUE) + aggregate_by_chi(year = year, exclude_sc_var = TRUE) } else { individual_file <- individual_file %>% aggregate_ch_episodes() %>% @@ -115,6 +115,9 @@ create_individual_file <- function( hc_personal_hours = NA, hc_non_personal_hours = NA, hc_reablement_hours = NA, + hc_non_personal_hours_cost = NA, + hc_personal_hours_cost = NA, + hc_reablement_hours_cost = NA, at_alarms = NA, at_telecare = NA, sds_option_1 = NA, @@ -125,10 +128,33 @@ create_individual_file <- function( sc_support_from_unpaid_carer = NA, sc_social_worker = NA, sc_meals = NA, - sc_day_care = NA + sc_day_care = NA, + sc_type_of_housing = NA, + count_not_known = NA, + sc_latest_submission = NA, + social_care_id = NA, + person_id = NA, + sc_alcohol = NA, + sc_autism = NA, + sc_carer = NA, + sc_dementia = NA, + sc_drugs = NA, + sc_elderly_frail = NA, + sc_learning_disability = NA, + sc_mental_health_disorders = NA, + sc_neurological_condition = NA, + sc_other_vulnerable_groups = NA, + sc_palliative_care = NA, + sc_physical_and_sensory_disability = NA ) } + if (!check_year_valid(year, type = "homelessness")) { + individual_file <- individual_file %>% + dplyr::mutate(hl1_in_fy = NA) + } + + if (anon_chi_out) { individual_file <- individual_file %>% tidyr::replace_na(list(chi = "")) %>% @@ -260,6 +286,8 @@ add_all_columns <- function(episode_file, year) { #' @param condition Condition to create new columns based on #' @family individual_file add_acute_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add acute columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% @@ -271,6 +299,8 @@ add_acute_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_mat_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add maternity columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% @@ -282,6 +312,8 @@ add_mat_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_mh_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add mental health columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% @@ -293,6 +325,8 @@ add_mh_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_gls_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add geriatric long stay columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% @@ -304,6 +338,8 @@ add_gls_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_op_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add outpatient columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file <- episode_file %>% add_standard_cols(prefix, condition) @@ -327,6 +363,8 @@ add_op_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_ae_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add A&E columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% @@ -338,6 +376,8 @@ add_ae_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_pis_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add prescribing columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% @@ -349,6 +389,8 @@ add_pis_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_ooh_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add out of hours columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file <- episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% @@ -384,6 +426,8 @@ add_ooh_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_dn_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add district nursing columns function started at {Sys.time()}") + condition <- substitute(condition) if ("total_no_dn_contacts" %in% names(episode_file)) { episode_file %>% @@ -407,6 +451,8 @@ add_dn_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_cmh_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add communicty mental health columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% @@ -418,6 +464,8 @@ add_cmh_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_dd_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add delayed discharges columns function started at {Sys.time()}") + condition <- substitute(condition) condition_delay <- substitute(condition & primary_delay_reason != "9") episode_file <- episode_file %>% @@ -439,6 +487,8 @@ add_dd_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_nsu_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add non service users columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% @@ -450,6 +500,8 @@ add_nsu_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_nrs_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add nrs columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% @@ -461,6 +513,8 @@ add_nrs_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_hl1_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add homelessness columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) @@ -471,6 +525,8 @@ add_hl1_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_ch_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add care home columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% @@ -501,6 +557,8 @@ add_ch_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_hc_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add home care columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file <- episode_file %>% @@ -545,6 +603,8 @@ add_hc_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_at_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add alarms telecare columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% @@ -559,6 +619,8 @@ add_at_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_sds_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add SDS columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% @@ -581,6 +643,8 @@ add_sds_columns <- function(episode_file, prefix, condition) { #' cij_pattype (lgl) #' @family individual_file add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, elective = TRUE) { + cli::cli_alert_info("Add ipdc columns function started at {Sys.time()}") + condition_i <- substitute(eval(condition) & ipdc == "I") episode_file <- episode_file %>% dplyr::mutate( @@ -625,6 +689,8 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi #' @param cost Whether to create prefix_cost col, e.g. "Acute_cost" #' @family individual_file add_standard_cols <- function(episode_file, prefix, condition, episode = FALSE, cost = FALSE) { + cli::cli_alert_info("Add standard columns function started at {Sys.time()}") + if (episode) { episode_file <- dplyr::mutate(episode_file, "{prefix}_episodes" := dplyr::if_else(eval(condition), 1L, NA_integer_)) } @@ -703,6 +769,8 @@ recode_gender <- function(episode_file) { #' "dementia" and "dementia_date" #' @family individual_file condition_cols <- function() { + cli::cli_alert_info("Return condition columns function started at {Sys.time()}") + conditions <- slfhelper::ltc_vars date_cols <- paste0(conditions, "_date") all_cols <- c(conditions, date_cols) @@ -759,6 +827,8 @@ clean_individual_file <- function(individual_file, year) { #' #' @inheritParams clean_individual_file clean_up_gender <- function(individual_file) { + cli::cli_alert_info("Clean up gender column function started at {Sys.time()}") + individual_file %>% dplyr::mutate( gender = dplyr::case_when( @@ -785,6 +855,8 @@ join_slf_lookup_vars <- function(individual_file, col_select = c("gpprac", "cluster", "hbpraccode") ), hbrescode_var = "hb2018") { + cli::cli_alert_info("Join slf lookup variables function started at {Sys.time()}") + individual_file <- individual_file %>% dplyr::left_join( slf_postcode_lookup, diff --git a/R/fill_ch_names.R b/R/fill_ch_names.R index cd8d18677..b6aa85bb7 100644 --- a/R/fill_ch_names.R +++ b/R/fill_ch_names.R @@ -213,7 +213,7 @@ fill_ch_names <- function(ch_data, "match_mean2", # "open_interval", "ch_admission_date", - "qtr_start", + "period_start_date", "ch_date_registered", "latest_close_date", "ch_active", @@ -305,7 +305,6 @@ fill_ch_names <- function(ch_data, "unique_identifier", "matching_quality_indicator", "sending_location", - "latest_sc_id", "chi", "ch_name", "ch_postcode", @@ -320,9 +319,6 @@ fill_ch_names <- function(ch_data, "ch_admission_date", "ch_discharge_date", "age", - "record_date", - "qtr_start", - "latest_flag", "gender", "dob", "postcode", @@ -763,7 +759,6 @@ fill_ch_names <- function(ch_data, ## produce output ---- col_output <- c( "sending_location", - "latest_sc_id", "chi", "ch_name", "ch_postcode", @@ -778,9 +773,6 @@ fill_ch_names <- function(ch_data, "ch_admission_date", "ch_discharge_date", "age", - "record_date", - "qtr_start", - "latest_flag", "gender", "dob", "postcode", diff --git a/R/fill_geographies.R b/R/fill_geographies.R index c9aee6355..5638c8758 100644 --- a/R/fill_geographies.R +++ b/R/fill_geographies.R @@ -16,6 +16,8 @@ fill_geographies <- function( get_slf_gpprac_path(), col_select = c("gpprac", "cluster", "hbpraccode") )) { + cli::cli_alert_info("Fill geographies function started at {Sys.time()}") + check_variables_exist(data, c( "chi", "postcode", diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R index 2752b634a..9c21cabe9 100644 --- a/R/get_boxi_extract_path.R +++ b/R/get_boxi_extract_path.R @@ -16,12 +16,14 @@ get_boxi_extract_path <- function( "ae", "ae_cup", "acute", + "acute_cup", "cmh", "deaths", "dn", "gp_ooh-c", "gp_ooh-d", "gp_ooh-o", + "gp_ooh_cup", "homelessness", "maternity", "mh", @@ -44,11 +46,13 @@ get_boxi_extract_path <- function( "ae" ~ "anon-A&E-episode-level-extract", "ae_cup" ~ "anon-A&E-UCD-CUP-extract", "acute" ~ "anon-Acute-episode-level-extract", + "acute_cup" ~ "anon-Acute-CUP-extract", "cmh" ~ "anon-Community-MH-contact-level-extract", "dn" ~ "anon-District-Nursing-contact-level-extract", "gp_ooh-c" ~ "anon-GP-OoH-consultations-extract", "gp_ooh-d" ~ "anon-GP-OoH-diagnosis-extract", "gp_ooh-o" ~ "anon-GP-OoH-outcomes-extract", + "gp_ooh_cup" ~ "anon-GP-OoH-CUP-extract", "homelessness" ~ "anon-Homelessness-extract", "maternity" ~ "anon-Maternity-episode-level-extract", "mh" ~ "anon-Mental-Health-episode-level-extract", @@ -82,9 +86,11 @@ get_boxi_extract_path <- function( #' #' @return an [fs::path()] to a dummy file which can be used with targets. get_dummy_boxi_extract_path <- function() { - get_file_path( + dummy_path <- get_file_path( directory = get_dev_dir(), file_name = ".dummy", create = TRUE ) + + return(dummy_path) } diff --git a/R/get_sc_lookup_paths.R b/R/get_sc_lookup_paths.R index d201f416f..90a08e7e1 100644 --- a/R/get_sc_lookup_paths.R +++ b/R/get_sc_lookup_paths.R @@ -38,11 +38,18 @@ get_sc_demog_lookup_path <- function(update = latest_update(), ...) { #' @family social care lookup file paths #' @seealso [get_file_path()] for the generic function. get_sc_client_lookup_path <- function(year, update = latest_update(), ...) { - sc_client_lookup_path <- get_file_path( - directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_client_lookup"), - file_name = stringr::str_glue("anon-sc_client_lookup_{year}_{update}.parquet"), - ... - ) - - return(sc_client_lookup_path) + if (!check_year_valid(year, type = "client")) { + return(get_dummy_boxi_extract_path()) + } else { + sc_client_lookup_path <- get_file_path( + directory = fs::path( + get_slf_dir(), + "Social_care", + "processed_sc_client_lookup" + ), + file_name = stringr::str_glue("anon-sc_client_lookup_{year}_{update}.parquet"), + ... + ) + return(sc_client_lookup_path) + } } diff --git a/R/get_slf_lookup_paths.R b/R/get_slf_lookup_paths.R index 390a27a5a..e06627e54 100644 --- a/R/get_slf_lookup_paths.R +++ b/R/get_slf_lookup_paths.R @@ -73,6 +73,10 @@ get_slf_deaths_lookup_path <- function(year, ...) { #' SLF death dates File Path #' #' @description Get the full path to the BOXI NRS Deaths lookup file for all financial years +#' Note this name is very similar to the existing slf_deaths_lookup_path +#' which returns the path for the refined_death with deceased flag for each financial year. +#' This function will return the combined financial years lookup +#' i.e. all years put together. #' #' @param ... additional arguments passed to [get_file_path()] #' @param update the update month (defaults to use [latest_update()]) @@ -80,17 +84,17 @@ get_slf_deaths_lookup_path <- function(year, ...) { #' @export #' @family slf lookup file path #' @seealso [get_file_path()] for the generic function. - -get_all_slf_deaths_lookup_path <- function(update = latest_update(), ...) { - # Note this name is very similar to the existing slf_deaths_lookup_path which returnsthe path for - # the processed BOXI extract for each financial year. This function will return the combined financial +get_combined_slf_deaths_lookup_path <- function(update = latest_update(), ...) { + # Note this name is very similar to the existing slf_deaths_lookup_path which returns the path for + # the refined_death with deceased flag for each financial year. + # This function will return the combined financial # years lookup i.e. all years put together. - all_slf_deaths_lookup_path <- get_file_path( + combined_slf_deaths_lookup_path <- get_file_path( directory = fs::path(get_slf_dir(), "Deaths"), - file_name = stringr::str_glue("anon-all_slf_deaths_lookup_{update}.parquet"), + file_name = stringr::str_glue("anon-combined_slf_deaths_lookup_{update}.parquet"), ... ) - return(all_slf_deaths_lookup_path) + return(combined_slf_deaths_lookup_path) } diff --git a/R/join_deaths_data.R b/R/join_deaths_data.R index 7fbf203d0..5e61a2082 100644 --- a/R/join_deaths_data.R +++ b/R/join_deaths_data.R @@ -10,10 +10,13 @@ join_deaths_data <- function( data, year, slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) %>% slfhelper::get_chi()) { + cli::cli_alert_info("Join deaths data function started at {Sys.time()}") + return( data %>% dplyr::left_join( - slf_deaths_lookup, + slf_deaths_lookup %>% + dplyr::distinct(chi, .keep_all = TRUE), by = "chi", na_matches = "never", relationship = "many-to-one" diff --git a/R/join_sparra_hhg.R b/R/join_sparra_hhg.R index dafaca867..c22e1a9c3 100644 --- a/R/join_sparra_hhg.R +++ b/R/join_sparra_hhg.R @@ -5,6 +5,8 @@ #' @return The data including the SPARRA and HHG variables matched #' on to the episode file. join_sparra_hhg <- function(data, year) { + cli::cli_alert_info("Join SPARRA and HHG function started at {Sys.time()}") + if (check_year_valid(year, "sparra")) { data <- dplyr::left_join( data, diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index ef4aa4754..d4162b619 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -12,6 +12,14 @@ link_delayed_discharge_eps <- function( episode_file, year, dd_data = read_file(get_source_extract_path(year, "dd")) %>% slfhelper::get_chi()) { + cli::cli_alert_info("Link delayed discharge to episode file function started at {Sys.time()}") + + if (!check_year_valid(year, type = "dd")) { + episode_file <- episode_file + return(episode_file) + } + + names_ep <- names(episode_file) episode_file <- episode_file %>% dplyr::mutate( # remember to revoke the cij_end_date with dummy_cij_end @@ -284,6 +292,11 @@ link_delayed_discharge_eps <- function( )) %>% dplyr::group_by(.data$chi, .data$cij_marker) %>% dplyr::mutate(cij_delay = max(.data$has_delay)) %>% + dplyr::mutate(cij_delay = dplyr::if_else(cij_delay == "0", + FALSE, + TRUE, + missing = NA + )) %>% dplyr::ungroup() %>% # add yearstay and monthly beddays # count_last = TRUE because DD counts last day and not the first @@ -297,37 +310,43 @@ link_delayed_discharge_eps <- function( yearstay = rowSums(dplyr::pick(dplyr::ends_with("_beddays"))) ) %>% # tidy up and rename columns to match the format of episode files + # keep variables from ep files dplyr::select( + -c( + "ep_file_row_id", + "year", + "recid", + "record_keydate1", + "record_keydate2", + "postcode", + "hbtreatcode", + "location", + "spec", + ## following are dummy variables + "cij_start_date_lower", + "cij_end_date_upper", + "cij_end_month", + "is_dummy_cij_start", + "dummy_cij_start", + "is_dummy_cij_end", + "dummy_cij_end", + "datediff_start", + "datediff_end", + "has_delay", + "is_dummy_keydate2", + "dummy_keydate2", + "dummy_id" + ) + ) %>% + dplyr::rename( "year" = "year_dd", "recid" = "recid_dd", "record_keydate1" = "record_keydate1_dd", "record_keydate2" = "record_keydate2_dd", - "smrtype", - "chi", - "gender", - "dob", - "age", - "gpprac", "postcode" = "postcode_dd", - "dd_responsible_lca", "hbtreatcode" = "hbtreatcode_dd", - "delay_end_reason", - "primary_delay_reason", - "secondary_delay_reason", - "cij_marker", - "cij_start_date", - "cij_end_date", - "cij_pattype_code", - "cij_ipdc", - "cij_admtype", - "cij_adm_spec", - "cij_dis_spec", - "cij_delay", - "location", "spec" = "spec_dd", - "dd_quality", - dplyr::ends_with("_beddays"), - "yearstay" + "location" = "location_dd" ) %>% # Combine DD with episode data dplyr::bind_rows( @@ -343,7 +362,19 @@ link_delayed_discharge_eps <- function( "dummy_cij_end" ) ) - ) + ) %>% + # populate cij_delay dd details back to ep + dplyr::group_by(chi, cij_marker) %>% + dplyr::mutate( + has_dd = any(recid == "DD"), + delay_dd = any(cij_delay) + ) %>% + dplyr::ungroup() %>% + dplyr::mutate(cij_delay = dplyr::if_else(has_dd, + delay_dd, + cij_delay + )) %>% + dplyr::select(-c("has_dd", "delay_dd", "original_admission_date", "amended_dates")) return(linked_data) } diff --git a/R/match_on_ltcs.R b/R/match_on_ltcs.R index 3ed052be8..f0522c00d 100644 --- a/R/match_on_ltcs.R +++ b/R/match_on_ltcs.R @@ -12,6 +12,8 @@ match_on_ltcs <- function( data, year, ltc_data = read_file(get_ltcs_path(year)) %>% slfhelper::get_chi()) { + cli::cli_alert_info("Match on LTCs function started at {Sys.time()}") + # Match on LTC lookup matched <- dplyr::left_join( data, diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R index d91a77ab3..c46f175c7 100644 --- a/R/process_extract_acute.R +++ b/R/process_extract_acute.R @@ -12,7 +12,10 @@ #' @return the final data as a [tibble][tibble::tibble-package]. #' @export #' @family process extracts -process_extract_acute <- function(data, year, write_to_disk = TRUE) { +process_extract_acute <- function(data, + year, + acute_cup_path = get_boxi_extract_path(year, "acute_cup"), + write_to_disk = TRUE) { # Only run for a single year stopifnot(length(year) == 1L) @@ -59,7 +62,48 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) { # Add oldtadm as a factor with labels dplyr::mutate(oldtadm = factor(.data$oldtadm, levels = 0L:8L - )) + )) %>% + dplyr::mutate( + unique_row_num = dplyr::row_number() + ) + + acute_cup <- read_file( + path = acute_cup_path, + col_type = readr::cols( + "anon_chi" = readr::col_character(), + "Acute Admission Date" = readr::col_date(format = "%Y/%m/%d %T"), + "Acute Discharge Date" = readr::col_date(format = "%Y/%m/%d %T"), + "Acute Admission Type Code" = readr::col_character(), + "Acute Discharge Type Code" = readr::col_character(), + "Case Reference Number [C]" = readr::col_character(), + "CUP Marker" = readr::col_integer(), + "CUP Pathway Name" = readr::col_character() + ) + ) %>% + dplyr::select( + anon_chi = "anon_chi", + case_reference_number = "Case Reference Number [C]", + record_keydate1 = "Acute Admission Date", + record_keydate2 = "Acute Discharge Date", + tadm = "Acute Admission Type Code", + disch = "Acute Discharge Type Code", + cup_marker = "CUP Marker", + cup_pathway = "CUP Pathway Name" + ) %>% + dplyr::distinct() %>% + slfhelper::get_chi() + + acute_clean <- acute_clean %>% + dplyr::left_join(acute_cup, + by = c( + "record_keydate1", + "record_keydate2", + "case_reference_number", + "chi", + "tadm", + "disch" + ) + ) acute_processed <- acute_clean %>% dplyr::select( @@ -106,7 +150,9 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) { "cost_total_net", tidyselect::ends_with("_beddays"), tidyselect::ends_with("_cost"), - "uri" + "uri", + "cup_marker", + "cup_pathway" ) %>% dplyr::arrange(.data$chi, .data$record_keydate1) %>% slfhelper::get_anon_chi() diff --git a/R/process_extract_alarms_telecare.R b/R/process_extract_alarms_telecare.R index d7b063ace..9d47dd5f0 100644 --- a/R/process_extract_alarms_telecare.R +++ b/R/process_extract_alarms_telecare.R @@ -41,12 +41,12 @@ process_extract_alarms_telecare <- function( "smrtype", "chi", "dob", + # "person_id", "gender", "postcode", "sc_send_lca", "record_keydate1", "record_keydate2", - "person_id", "sc_latest_submission" ) %>% slfhelper::get_anon_chi() diff --git a/R/process_extract_care_home.R b/R/process_extract_care_home.R index 54789eac9..dbf817af4 100644 --- a/R/process_extract_care_home.R +++ b/R/process_extract_care_home.R @@ -39,11 +39,11 @@ process_extract_care_home <- function( is_date_in_fyyear(year, .data$record_keydate1, .data$record_keydate2) ) %>% # remove any episodes where the latest submission was before the current year + # this is what stops cases being in future files dplyr::filter( substr(.data$sc_latest_submission, 1L, 4L) >= convert_fyyear_to_year(year) ) - # Data Cleaning --------------------------------------- source_ch_clean <- ch_data %>% # create variables @@ -115,7 +115,7 @@ process_extract_care_home <- function( "recid", "smrtype", "chi", - "person_id", + # "person_id", "dob", "gender", "postcode", diff --git a/R/process_extract_gp_ooh.R b/R/process_extract_gp_ooh.R index c889c3db7..e8d07b9e8 100644 --- a/R/process_extract_gp_ooh.R +++ b/R/process_extract_gp_ooh.R @@ -12,7 +12,10 @@ #' @return the final data as a [tibble][tibble::tibble-package]. #' @export #' @family process extracts -process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) { +process_extract_gp_ooh <- function(year, + data_list, + gp_ooh_cup_path = get_boxi_extract_path(year, "gp_ooh_cup"), + write_to_disk = TRUE) { diagnosis_extract <- process_extract_ooh_diagnosis(data_list[["diagnosis"]], year) outcomes_extract <- process_extract_ooh_outcomes(data_list[["outcomes"]], year) consultations_extract <- process_extract_ooh_consultations(data_list[["consultations"]], year) @@ -93,6 +96,39 @@ process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) { ) %>% dplyr::ungroup() + ## Link CUP Marker ----- + gp_ooh_cup_file <- read_file( + path = gp_ooh_cup_path, + col_type = readr::cols( + "GP OOH Consultation Start Date" = readr::col_date(format = "%Y/%m/%d %T"), + "GP OOH Consultation Start Time" = readr::col_time(""), + "GUID" = readr::col_character(), + "CUP Marker" = readr::col_integer(), + "CUP Pathway Name" = readr::col_character() + ) + ) %>% + dplyr::select( + record_keydate1 = "GP OOH Consultation Start Date", + keytime1 = "GP OOH Consultation Start Time", + ooh_case_id = "GUID", + cup_marker = "CUP Marker", + cup_pathway = "CUP Pathway Name" + ) %>% + dplyr::distinct( + .data$record_keydate1, + .data$keytime1, + .data$ooh_case_id, + .keep_all = TRUE + ) + + ooh_clean <- ooh_clean %>% + dplyr::left_join(gp_ooh_cup_file, + by = dplyr::join_by( + "ooh_case_id", + "record_keydate1", + "keytime1" + ) + ) ## Save Outfile ------------------------------------- @@ -122,7 +158,9 @@ process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) { tidyselect::starts_with("ooh_outcome"), "cost_total_net", tidyselect::ends_with("_cost"), - "ooh_case_id" + "ooh_case_id", + cup_marker, + cup_pathway ) %>% slfhelper::get_anon_chi() diff --git a/R/process_extract_home_care.R b/R/process_extract_home_care.R index 831496bd2..651be172d 100644 --- a/R/process_extract_home_care.R +++ b/R/process_extract_home_care.R @@ -96,7 +96,7 @@ process_extract_home_care <- function( "cost_total_net", "hc_provider", "hc_reablement", - "person_id" + # "person_id" ) %>% slfhelper::get_anon_chi() diff --git a/R/process_extract_sds.R b/R/process_extract_sds.R index be53f35ff..f8e5f8579 100644 --- a/R/process_extract_sds.R +++ b/R/process_extract_sds.R @@ -41,11 +41,13 @@ process_extract_sds <- function( "smrtype", "chi", "dob", + # "person_id", "gender", "postcode", + "sc_send_lca", "record_keydate1", "record_keydate2", - "sc_send_lca" + "sc_latest_submission" ) %>% slfhelper::get_anon_chi() diff --git a/R/process_it_chi_deaths.R b/R/process_it_chi_deaths.R index 85354880b..cfca32e0f 100644 --- a/R/process_it_chi_deaths.R +++ b/R/process_it_chi_deaths.R @@ -17,10 +17,9 @@ process_it_chi_deaths <- function(data, write_to_disk = TRUE) { dplyr::desc(.data$death_date_chi) ) %>% dplyr::distinct(.data$chi, .keep_all = TRUE) %>% - # Use the NRS death_date unless it isn't there - dplyr::mutate( - death_date = dplyr::coalesce(.data$death_date_nrs, .data$death_date_chi) - ) %>% + # remove death_date_nrs as this is the nrs weekly unvalidated data and we should not use this. + # the boxi nrs death date is more reliable as this is provided monthly and is validated. + dplyr::select(.data$chi, .data$death_date_chi) %>% slfhelper::get_anon_chi() if (write_to_disk) { diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R index 9a5c21974..edc083cd2 100644 --- a/R/process_lookup_deaths.R +++ b/R/process_lookup_deaths.R @@ -1,36 +1,29 @@ #' Create the SLF Deaths lookup #' -#' @description Currently this just uses the NRS death dates 'as is', with no -#' corrections or modifications, it is expected that this will be expanded to -#' use the CHI deaths extract from IT as well as taking into account data in -#' the episode file to assess the validity of a death date. +#' @description Use all-year refined death data to produce year-specific +#' slf_deaths_lookup with deceased flag added. #' #' @param year The year to process, in FY format. -#' @param nrs_deaths_data NRS deaths data. -#' @param chi_deaths_data IT CHI deaths data. +#' @param refined_death refined death date combining nrs and it_chi. #' @param write_to_disk (optional) Should the data be written to disk default is #' `TRUE` i.e. write the data to disk. #' -#' @return a [tibble][tibble::tibble-package] containing the episode file +#' @return a [tibble][tibble::tibble-package] add deceased flag to deaths #' @export process_slf_deaths_lookup <- function( year, - nrs_deaths_data = read_file( - get_source_extract_path(year, "deaths"), - col_select = c("chi", "record_keydate1") - ), - chi_deaths_data = read_file(get_slf_chi_deaths_path()), + refined_death = read_file(get_combined_slf_deaths_lookup_path()), write_to_disk = TRUE) { - slf_deaths_lookup <- nrs_deaths_data %>% - # Only modification over 'raw' NRS is to keep the earliest death date - dplyr::select("chi", "record_keydate1") %>% - dplyr::arrange(.data$record_keydate1) %>% - dplyr::distinct(.data$chi, .keep_all = TRUE) %>% + # create slf deaths lookup + slf_deaths_lookup <- refined_death %>% + slfhelper::get_chi() %>% + # Filter the chi death dates to the FY as the lookup is by FY + dplyr::filter(fy == year) %>% + # use the BOXI NRS death date by default, but if it's missing, use the chi death date. dplyr::mutate( - death_date = .data$record_keydate1, - deceased = TRUE, - .keep = "unused" + deceased = TRUE ) %>% + # save anon chi on disk slfhelper::get_anon_chi() if (write_to_disk) { diff --git a/R/process_lookup_homelessness.R b/R/process_lookup_homelessness.R index b0dc30d51..30772383e 100644 --- a/R/process_lookup_homelessness.R +++ b/R/process_lookup_homelessness.R @@ -13,6 +13,12 @@ create_homelessness_lookup <- function( year, homelessness_data = read_file(get_source_extract_path(year, "homelessness")) %>% slfhelper::get_chi()) { + cli::cli_alert_info("Create homelessness lookup function started at {Sys.time()}") + + # Specify years available for running + if (year < "1617") { + return(NULL) + } homelessness_lookup <- homelessness_data %>% dplyr::distinct(.data$chi, .data$record_keydate1, .data$record_keydate2) %>% tidyr::drop_na(.data$chi) %>% @@ -35,6 +41,13 @@ create_homelessness_lookup <- function( #' @export add_homelessness_flag <- function(data, year, lookup = create_homelessness_lookup(year)) { + cli::cli_alert_info("Add homelessness flag function started at {Sys.time()}") + + if (!check_year_valid(year, type = "homelessness")) { + data <- data + return(data) + } + data <- data %>% dplyr::left_join( lookup %>% @@ -59,6 +72,13 @@ add_homelessness_flag <- function(data, year, #' @return the final data as a [tibble][tibble::tibble-package]. #' @export add_homelessness_date_flags <- function(data, year, lookup = create_homelessness_lookup(year)) { + cli::cli_alert_info("Add homelessness date flags function started at {Sys.time()}") + + if (!check_year_valid(year, type = "homelessness")) { + data <- data + return(data) + } + lookup <- lookup %>% dplyr::filter(!(is.na(.data$record_keydate2))) %>% dplyr::rename( diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index 69818def3..91c08632d 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -18,30 +18,36 @@ process_lookup_sc_client <- year, sc_demographics = read_file(get_sc_demog_lookup_path()) %>% slfhelper::get_chi() %>% - dplyr::select(c("sending_location", "social_care_id", "chi")), + dplyr::select(c("sending_location", "social_care_id", "chi", "latest_flag")), write_to_disk = TRUE) { - client_clean <- data %>% - # Replace 'unknown' responses with NA - dplyr::mutate( - dplyr::across( - c( - "support_from_unpaid_carer", - "social_worker", - "meals", - "living_alone", - "day_care" - ), - dplyr::na_if, - 9L - ), - type_of_housing = dplyr::na_if(.data$type_of_housing, 6L) + # Specify years available for running + if (year < "1718") { + return(NULL) + } + + # Match to demographics lookup to get CHI + sc_client_demographics <- data %>% + dplyr::right_join( + sc_demographics, + by = c("sending_location", "social_care_id") ) %>% - dplyr::group_by(.data$sending_location, .data$social_care_id) %>% + # need period for the replace sc id with latest function + dplyr::mutate(period = ifelse(!(is.na(.data$financial_quarter)), paste0(.data$financial_year, "Q", financial_quarter), + financial_year + )) %>% + replace_sc_id_with_latest() %>% + # remove cases with no data in client + dplyr::filter(!(is.na(.data$financial_year))) %>% + dplyr::select(-.data$latest_sc_id, -.data$latest_flag, -.data$period) + + + client_clean <- sc_client_demographics %>% + dplyr::group_by(.data$sending_location, .data$social_care_id, .data$chi) %>% # summarise to take last submission dplyr::summarise(dplyr::across( c( "dementia", - "mental_health_problems", + "mental_health_disorders", "learning_disability", "physical_and_sensory_disability", "drugs", @@ -70,19 +76,9 @@ process_lookup_sc_client <- "social_worker", "meals", "living_alone", - "day_care" - ), - tidyr::replace_na, - 9L - ), - type_of_housing = tidyr::replace_na(.data$type_of_housing, 6L) - ) %>% - # factor labels - dplyr::mutate( - dplyr::across( - c( + "day_care", "dementia", - "mental_health_problems", + "mental_health_disorders", "learning_disability", "physical_and_sensory_disability", "drugs", @@ -92,37 +88,64 @@ process_lookup_sc_client <- "elderly_frail", "neurological_condition", "autism", - "other_vulnerable_groups" + "other_vulnerable_groups", + "type_of_housing" ), - factor, - levels = c(0L, 1L), - labels = c("No", "Yes") - ), + tidyr::replace_na, 9L + ) + ) %>% + # factor labels + dplyr::mutate( dplyr::across( c( "living_alone", "support_from_unpaid_carer", "social_worker", "meals", - "day_care" + "day_care", + "dementia", + "mental_health_disorders", + "learning_disability", + "physical_and_sensory_disability", + "drugs", + "alcohol", + "palliative_care", + "carer", + "elderly_frail", + "neurological_condition", + "autism", + "other_vulnerable_groups" ), factor, levels = c(0L, 1L, 9L), labels = c("No", "Yes", "Not Known") ), type_of_housing = factor(.data$type_of_housing, - levels = 1L:6L + levels = 1L:9L, + labels = c( + "Mainstream", # 1 + "Supported", # 2 + "Long Stay Care Home", # 3 + "Hospital or other medical establishment", # 4 + "Homeless", # 5 + "Penal Institutions", # 6 + "Not Known", # 7 + "Other", # 8 + "Not Known" # 9 + ) ) ) %>% # rename variables dplyr::rename_with( - .cols = -c("sending_location", "social_care_id"), + .cols = -c("sending_location", "social_care_id", "chi"), .fn = ~ paste0("sc_", .x) ) + sc_client_lookup <- client_clean %>% # reorder dplyr::select( + "chi", "sending_location", "social_care_id", "sc_living_alone", @@ -130,15 +153,23 @@ process_lookup_sc_client <- "sc_social_worker", "sc_type_of_housing", "sc_meals", - "sc_day_care" - ) + "sc_day_care", + "sc_dementia", + "sc_learning_disability", + "sc_mental_health_disorders", + "sc_physical_and_sensory_disability", + "sc_drugs", + "sc_alcohol", + "sc_palliative_care", + "sc_carer", + "sc_elderly_frail", + "sc_neurological_condition", + "sc_autism", + "sc_other_vulnerable_groups" + ) %>% + create_person_id() + - # Match to demographics lookup to get CHI - sc_client_lookup <- sc_client_lookup %>% - dplyr::left_join( - sc_demographics, - by = c("sending_location", "social_care_id") - ) sc_client_lookup <- dplyr::mutate(sc_client_lookup, count_not_known = rowSums( @@ -147,8 +178,21 @@ process_lookup_sc_client <- "sc_living_alone", "sc_support_from_unpaid_carer", "sc_social_worker", + "sc_type_of_housing", "sc_meals", - "sc_day_care" + "sc_day_care", + "sc_dementia", + "sc_learning_disability", + "sc_mental_health_disorders", + "sc_physical_and_sensory_disability", + "sc_drugs", + "sc_alcohol", + "sc_palliative_care", + "sc_carer", + "sc_elderly_frail", + "sc_neurological_condition", + "sc_autism", + "sc_other_vulnerable_groups" ) )) == "Not Known", na.rm = TRUE @@ -156,7 +200,7 @@ process_lookup_sc_client <- ) %>% dplyr::arrange(.data$chi, .data$count_not_known) %>% dplyr::distinct(.data$chi, .keep_all = TRUE) %>% - dplyr::select(-.data$sending_location) %>% + dplyr::select(-.data$sending_location, -.data$count_not_known) %>% slfhelper::get_anon_chi() if (write_to_disk) { diff --git a/R/process_lookup_sc_demographics.R b/R/process_lookup_sc_demographics.R index d6e24c87f..1b29c414c 100644 --- a/R/process_lookup_sc_demographics.R +++ b/R/process_lookup_sc_demographics.R @@ -143,8 +143,8 @@ process_lookup_sc_demographics <- function( dplyr::ungroup() # check to make sure all cases of chi are still there - dplyr::n_distinct(sc_demog_lookup$chi) # 524810 - dplyr::n_distinct(sc_demog_lookup$social_care_id) # 636404 + dplyr::n_distinct(sc_demog_lookup$chi) # 525,834 + dplyr::n_distinct(sc_demog_lookup$social_care_id) # 637,422 sc_demog_lookup <- sc_demog_lookup %>% slfhelper::get_anon_chi() diff --git a/R/process_refined_death.R b/R/process_refined_death.R new file mode 100644 index 000000000..dc7663221 --- /dev/null +++ b/R/process_refined_death.R @@ -0,0 +1,63 @@ +#' Process the refined death data +#' +#' @description This will process +#' year-specific BOXI NRS death file (written to disk), and +#' combine them together to get all years NRS file (Not written to disk). +#' Then join all NRS deaths with IT CHI death data +#' to get an all-year refined death file (written to disk). +#' +#' @param it_chi_deaths it chi death data +#' @param write_to_disk write the result to disk or not. +#' +#' @return refined_death The processed lookup of deaths combining NRS and IT_CHI. +#' @export +#' @family process extracts +process_refined_death <- function( + it_chi_deaths = read_file(get_slf_chi_deaths_path()), + write_to_disk = TRUE) { + years_list <- years_to_run() + + nrs_all_years <- lapply(years_list, (\(year) { + read_extract_nrs_deaths( + year, + get_boxi_extract_path(year, type = "deaths") + ) %>% + process_extract_nrs_deaths(year, + write_to_disk = write_to_disk + ) + })) %>% + data.table::rbindlist() + + it_chi_deaths <- it_chi_deaths %>% + dplyr::select(c( + "anon_chi", + "death_date_chi" + )) %>% + dplyr::arrange(.data$anon_chi, .keep_all = TRUE) + + refined_death <- nrs_all_years %>% + dplyr::arrange(.data$anon_chi, .keep_all = TRUE) %>% + dplyr::full_join(it_chi_deaths, by = "anon_chi") %>% + # use the BOXI NRS death date by default, but if it's missing, use the chi death date. + dplyr::mutate(death_date = dplyr::if_else( + is.na(.data$record_keydate1), + .data$death_date_chi, + .data$record_keydate1 + )) %>% + dplyr::select(anon_chi, death_date) %>% + # add fy when death happened + dplyr::mutate( + fy = phsmethods::extract_fin_year(death_date), + fy = as.character(paste0(substr(fy, 3, 4), substr(fy, 6, 7))) + ) + # TODO: check distinct death data by chi while keeping chi==NA records + + if (write_to_disk) { + write_file( + refined_death, + get_combined_slf_deaths_lookup_path(create = TRUE) + ) + } + + return(refined_death) +} diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R index c583fa8a7..aafc3d727 100644 --- a/R/process_sc_all_alarms_telecare.R +++ b/R/process_sc_all_alarms_telecare.R @@ -87,11 +87,11 @@ process_sc_all_alarms_telecare <- function( # Replace social_care_id with latest if needed (assuming replace_sc_id_with_latest is a custom function) data <- replace_sc_id_with_latest(data) - data$person_id <- paste0( - data$sending_location, - "-", - data$social_care_id - ) + # data$person_id <- paste0( + # data$sending_location, + # "-", + # data$social_care_id + # ) # Deal with episodes that have a package across quarters data[, pkg_count := seq_len(.N), by = list( @@ -125,7 +125,7 @@ process_sc_all_alarms_telecare <- function( dob = data.table::last(dob), postcode = data.table::last(postcode), recid = data.table::last(recid), - person_id = data.table::last(person_id), + # person_id = data.table::last(person_id), sc_send_lca = data.table::last(sc_send_lca) ), by = list( sending_location, diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index b7c29fbc7..5478d50cc 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -7,8 +7,8 @@ #' @param data The extract to process #' @param sc_demog_lookup The Social Care Demographics lookup produced by #' [process_lookup_sc_demographics()]. -#' @param it_chi_deaths_data The processed lookup of deaths from IT produced -#' with [process_it_chi_deaths()]. +#' @param refined_death The processed lookup of deaths from IT produced +#' with [process_refined_death()]. #' @param ch_name_lookup_path Path to the Care Home name Lookup Excel workbook. #' @param spd_path (Optional) Path the Scottish Postcode Directory, default is #' to use [get_spd_path()]. @@ -19,38 +19,38 @@ #' @family process extracts #' #' @export -#' process_sc_all_care_home <- function( data, sc_demog_lookup = read_file(get_sc_demog_lookup_path()) %>% slfhelper::get_chi(), - it_chi_deaths_data = read_file(get_slf_chi_deaths_path()), - ch_name_lookup_path = read_file(get_slf_ch_name_lookup_path()), - spd_path = read_file(get_spd_path()), + refined_death = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi(), + ch_name_lookup_path = get_slf_ch_name_lookup_path(), + spd_path = get_spd_path(), write_to_disk = TRUE) { ## Data Cleaning----------------------------------------------------- + ch_clean <- data %>% dplyr::mutate( - record_date = end_fy_quarter(.data[["period"]]), - qtr_start = start_fy_quarter(.data[["period"]]), - # Set missing admission date to start of the submitted quarter + # Set missing admission date to start of the submitted quarter (n = 2) ch_admission_date = dplyr::if_else( is.na(.data[["ch_admission_date"]]), - .data[["qtr_start"]], + .data[["period_start_date"]], .data[["ch_admission_date"]] ), - # TODO check if we should set the dis date to the end of the period? - # If the dis date is before admission, remove the dis date + # If the dis date is before admission, remove the dis date (n = 5) ch_discharge_date = dplyr::if_else( .data[["ch_admission_date"]] > .data[["ch_discharge_date"]], lubridate::NA_Date_, .data[["ch_discharge_date"]] ) ) %>% - dplyr::left_join(sc_demog_lookup, + dplyr::right_join(sc_demog_lookup, # this is the correct join. by = c("sending_location", "social_care_id") ) %>% - replace_sc_id_with_latest() + replace_sc_id_with_latest() %>% + dplyr::select(-latest_flag, -latest_sc_id) + + # cleaning and matching care home names name_postcode_clean <- fill_ch_names( ch_data = ch_clean, ch_name_lookup_path = ch_name_lookup_path, @@ -58,23 +58,27 @@ process_sc_all_care_home <- function( ) fixed_ch_provider <- name_postcode_clean %>% + dplyr::select(-ch_name_validated, -open_interval, -latest_close_date, -ch_name_old, -ch_postcode_old) %>% dplyr::mutate( - ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]]) + ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]]) # (n = 2) ) %>% # sort data dplyr::arrange( - "sending_location", - "social_care_id", - "ch_admission_date", - "period" + .data[["sending_location"]], + .data[["social_care_id"]], + .data[["period"]], + .data[["ch_admission_date"]] ) %>% dplyr::group_by( .data[["sending_location"]], .data[["social_care_id"]] ) %>% + # work out the min and max ch provider in an episode dplyr::mutate( min_ch_provider = min(.data[["ch_provider"]]), max_ch_provider = max(.data[["ch_provider"]]), + # if care home provider is different across cases, set to "6". + # tidy up ch_provider using 6 when disagreeing values ch_provider = dplyr::if_else( .data[["min_ch_provider"]] != .data[["max_ch_provider"]], 6L, @@ -85,12 +89,9 @@ process_sc_all_care_home <- function( -"min_ch_provider", -"max_ch_provider" ) %>% - # tidy up ch_provider using 6 when disagreeing values - tidyr::fill(.data[["ch_provider"]], .direction = "downup") %>% dplyr::ungroup() - fixed_nursing_provision <- fixed_ch_provider %>% dplyr::group_by( .data[["sending_location"]], @@ -98,7 +99,7 @@ process_sc_all_care_home <- function( .data[["ch_admission_date"]] ) %>% # fill in nursing care provision when missing - # but present in the following entry + # but present in the following entry (n = 0) dplyr::mutate( nursing_care_provision = dplyr::na_if(.data[["nursing_care_provision"]], 9L) ) %>% @@ -106,26 +107,44 @@ process_sc_all_care_home <- function( ready_to_merge <- fixed_nursing_provision %>% - # remove any duplicate records before merging for speed and simplicity - dplyr::distinct() %>% + # remove any duplicate records before merging + dplyr::distinct() %>% # (n = 3) + # sort data + dplyr::arrange( + .data[["sending_location"]], + .data[["social_care_id"]], + .data[["ch_admission_date"]], + .data[["period"]] + ) %>% + dplyr::group_by( + .data[["sending_location"]], + .data[["social_care_id"]], + .data[["ch_admission_date"]] + ) %>% # counter for split episodes - dplyr::mutate( - split_episode = tidyr::replace_na( - .data[["nursing_care_provision"]] != dplyr::lag( - .data[["nursing_care_provision"]] - ), - TRUE - ), - split_episode_counter = cumsum(.data[["split_episode"]]) + # a split episode is an episode where the admission date is the same but the nursing provider has changed. + # We want to keep the nursing provision changes when we merge cases that have the same admission date + dplyr::mutate(previous_nursing_care_provision = dplyr::lag(.data[["nursing_care_provision"]])) %>% + # create a T/F flag for if nursing provision was the same as previous record with same admission date + dplyr::mutate(split_episode = tidyr::replace_na(.data[["previous_nursing_care_provision"]] != nursing_care_provision, TRUE)) %>% + dplyr::group_by( + .data[["social_care_id"]], + .data[["sending_location"]], + .data[["split_episode"]] ) %>% - dplyr::ungroup() + # create a count of each time the nursing provision changes between records with the same admission date + dplyr::mutate(split_episode_counter = ifelse(split_episode == TRUE, dplyr::row_number(), NA)) %>% + dplyr::group_by( + .data[["social_care_id"]], + .data[["sending_location"]] + ) %>% + # fill split episode counter. This will create a new id number for each different nursing provision within an episode + tidyr::fill(split_episode_counter, .direction = c("down")) %>% + dplyr::select(-previous_nursing_care_provision, -split_episode) - # Merge records to a single row per episode - # where admission is the same + + # Merge records to a single row per episode where admission is the same ch_episode <- ready_to_merge %>% - # when nursing_care_provision is different on - # records within the episode, split the episode - # at this point. dplyr::group_by( .data[["chi"]], .data[["sending_location"]], @@ -138,8 +157,8 @@ process_sc_all_care_home <- function( dplyr::desc(.data[["period"]]), dplyr::desc(.data[["ch_discharge_date"]]), dplyr::desc(.data[["ch_provider"]]), - dplyr::desc(.data[["record_date"]]), - dplyr::desc(.data[["qtr_start"]]), + dplyr::desc(.data[["period_end_date"]]), + dplyr::desc(.data[["period_start_date"]]), dplyr::desc(.data[["ch_name"]]), dplyr::desc(.data[["ch_postcode"]]), dplyr::desc(.data[["reason_for_admission"]]), @@ -150,65 +169,42 @@ process_sc_all_care_home <- function( ) %>% dplyr::summarise( sc_latest_submission = dplyr::first(.data[["period"]]), - dplyr::across( - c( - "ch_discharge_date", - "ch_provider", - "record_date", - "qtr_start", - "ch_name", - "ch_postcode", - "reason_for_admission", - "type_of_admission" - ), - dplyr::first - ), + dplyr::across(c( + "ch_discharge_date", + "ch_provider", + "period_end_date", + "period_start_date", + "ch_name", + "ch_postcode", + "reason_for_admission", + "type_of_admission" + ), dplyr::first), dplyr::across(c("gender", "dob", "postcode"), dplyr::first) ) %>% - dplyr::ungroup() %>% - # Amend dates for split episodes - # Change the start and end date as appropriate when an episode is split, - # using the start / end date of the submission quarter - dplyr::group_by( - .data[["chi"]], - .data[["sending_location"]], - .data[["social_care_id"]], - .data[["ch_admission_date"]] - ) %>% - # counter for latest submission - # TODO check if this is the same as split_episode_counter? - dplyr::mutate( - latest_submission_counter = tidyr::replace_na( - .data[["sc_latest_submission"]] != dplyr::lag( - .data[["sc_latest_submission"]] - ), - TRUE - ), - sum_latest_submission = cumsum(.data[["latest_submission_counter"]]) - ) %>% + # If the admission date is missing use the period start date + # otherwise use the start of the quarter dplyr::mutate( - # If it's the first episode(s) then keep the admission date(s), - # otherwise use the start of the quarter - ch_admission_date = dplyr::if_else( - .data[["sum_latest_submission"]] == min(.data[["sum_latest_submission"]]), - .data[["ch_admission_date"]], - .data[["qtr_start"]] + ch_admission_date = dplyr::if_else(is.na(.data[["ch_admission_date"]]), + .data[["period_start_date"]], + .data[["ch_admission_date"]] ), # If it's the last episode(s) then keep the discharge date(s), otherwise # use the end of the quarter - ch_discharge_date = dplyr::if_else( - .data[["sum_latest_submission"]] == max(.data[["sum_latest_submission"]]), - .data[["ch_discharge_date"]], - .data[["record_date"]] + ch_discharge_date = dplyr::if_else(is.na(.data[["ch_discharge_date"]]), + .data[["period_end_date"]], + .data[["ch_discharge_date"]] ) ) %>% - dplyr::ungroup() + dplyr::ungroup() %>% + dplyr::select(-period_start_date, -split_episode_counter) + # Compare to Deaths Data # match ch_episode data with deaths data matched_deaths_data <- ch_episode %>% - dplyr::left_join(it_chi_deaths_data, - by = "chi" + dplyr::left_join(refined_death, + by = "chi", + na_matches = "never" ) %>% # compare discharge date with NRS and CHI death date # if either of the dates are 5 or fewer days before discharge @@ -228,7 +224,7 @@ process_sc_all_care_home <- function( dplyr::ungroup() %>% # remove any episodes where discharge is now before admission, # i.e. death was before admission - dplyr::filter( + dplyr::filter( # (n = 67) !tidyr::replace_na( .data[["ch_discharge_date"]] < .data[["ch_admission_date"]], FALSE @@ -237,39 +233,106 @@ process_sc_all_care_home <- function( # Continuous Care Home Stays # Stay will be continuous as long as the admission date is the next day or - # earlier than the previous discharge date - - ch_markers <- matched_deaths_data %>% - # ch_chi_cis + # earlier than the previous discharge date. + # creates a CIS flag for CHI across all of scotland + # and a CIS for social care ID and sending location for just that LA + ch_chi_markers <- matched_deaths_data %>% + # Group the data by chi dplyr::group_by(.data[["chi"]]) %>% + # Set up previous_discharge_date + # The lag function will set the first row to NA. dplyr::mutate( - continuous_stay_chi = tidyr::replace_na( - .data[["ch_admission_date"]] <= dplyr::lag( - .data[["ch_discharge_date"]] - ) + lubridate::days(1L), - TRUE + # We want to flag the first episode per chi with row_number + row_number = dplyr::row_number(), + # create variable for previous discharge date + 1 day + previous_discharge_date_chi = dplyr::lag(.data[["ch_discharge_date"]]) + + lubridate::days(1L), + # if the first row is NA, set this to the ch_discharge_date + previous_discharge_date_chi = dplyr::if_else(row_number == 1, .data[["ch_discharge_date"]], + .data[["previous_discharge_date_chi"]] + ) + ) %>% + # flag continuous stays and create marker + # calculate number of days between start_date and end_date on the previous episode + dplyr::mutate( + days_to_next_rec = floor( + lubridate::time_length(lubridate::interval( + .data[["previous_discharge_date_chi"]], + .data[["ch_admission_date"]] + ), "days") ), - ch_chi_cis = cumsum(.data[["continuous_stay_chi"]]) + # if there is more than 1 day between (or the last ep for the individual) flag as new ep (Y) + # if there is < 1 day (i.e. a pause of up to 1 day or stays overlap flag as same ep (N)) + new_episode = dplyr::if_else(is.na(days_to_next_rec) | days_to_next_rec > 1, "Y", "N") ) %>% - dplyr::ungroup() %>% - # ch_sc_id_cis - # uses the social care id and sending location so can be used for - # episodes that are not attached to a CHI number - # This will restrict continuous stays to each Local Authority + # create continuous marker using flag for new stay + dplyr::mutate( + ch_chi_cis = purrr::accumulate(new_episode[-1], + .init = 1, + ~ if (.y == "Y") { + .x + 1 + } else { + .x + } + ) + ) %>% + dplyr::ungroup() + + + # This is the same but uses the social care id and sending location so can be used for + # episodes that are not attached to a CHI number + # This will restrict continuous stays to each Local Authority + sc_ch_id_markers <- ch_chi_markers %>% + # uses social_care_id and sending_location to flag continuous stays. + # Will flag cases even if in another LA dplyr::group_by(.data[["social_care_id"]], .data[["sending_location"]]) %>% + # Set up previous_discharge_date + # The lag function will set the first row to NA. + dplyr::mutate( + # We want to flag the first episode per sc id and sending_location with row_number + row_number = dplyr::row_number(), + # create variable for previous discharge date + 1 day + previous_discharge_date_sc = dplyr::lag(.data[["ch_discharge_date"]]) + + lubridate::days(1L), + # if the first row is NA, set this to the ch_discharge_date + previous_discharge_date_sc = dplyr::if_else(row_number == 1, .data[["ch_discharge_date"]], + .data[["previous_discharge_date_sc"]] + ) + ) %>% + # flag continuous stays and create marker + # calculate number of days between start_date and end_date on the previous episode dplyr::mutate( - continuous_stay_sc = tidyr::replace_na( - .data[["ch_admission_date"]] <= dplyr::lag( - .data[["ch_discharge_date"]] - ) + lubridate::days(1L), - TRUE + days_to_next_rec = floor( + lubridate::time_length(lubridate::interval( + .data[["previous_discharge_date_sc"]], + .data[["ch_admission_date"]] + ), "days") ), - ch_sc_id_cis = cumsum(.data[["continuous_stay_sc"]]) + # if there is more than 1 day between (or the last ep for the individual) flag as new ep (Y) + # if there is < 1 day (i.e. a pause of up to 1 day or stays overlap flag as same ep (N)) + new_episode = dplyr::if_else(is.na(days_to_next_rec) | days_to_next_rec > 1, "Y", "N") ) %>% - dplyr::ungroup() + # create continuous marker using flag for new stay + dplyr::mutate( + ch_sc_id_cis = purrr::accumulate(new_episode[-1], + .init = 1, + ~ if (.y == "Y") { + .x + 1 + } else { + .x + } + ) + ) %>% + dplyr::ungroup() %>% + # remove variables no longer needed + dplyr::select( + -previous_discharge_date_chi, -previous_discharge_date_sc, -row_number, + -days_to_next_rec, -new_episode + ) + - # Do a recode on the old reason for admission - adm_reason_recoded <- ch_markers %>% + # Do a recode on the old reason for admission for respite stays. + adm_reason_recoded <- sc_ch_id_markers %>% dplyr::group_by( .data[["social_care_id"]], .data[["sending_location"]], @@ -277,44 +340,52 @@ process_sc_all_care_home <- function( ) %>% dplyr::mutate( ch_ep_start = min(.data[["ch_admission_date"]]), + # Creates a vector for the earliest date out of the end of period and discharge date. + # And will then select what ever is the latest date out of those ch_ep_end = max( pmin( - .data[["record_date"]], + .data[["period_end_date"]], .data[["ch_discharge_date"]], na.rm = TRUE ) ) ) %>% dplyr::ungroup() %>% + # Flag respite stays. dplyr::mutate( - stay_los = lubridate::time_length( - lubridate::interval(.data[["ch_ep_start"]], .data[["ch_ep_end"]]), - "weeks" - ), + stay_los = lubridate::time_length(lubridate::interval(.data[["ch_ep_start"]], .data[["ch_ep_end"]]), "weeks"), stay_respite = .data[["stay_los"]] < 6.0, - type_of_admission = dplyr::if_else( - is.na(.data[["type_of_admission"]]), - dplyr::case_when( - .data[["reason_for_admission"]] == 1L ~ 1L, + type_of_admission = dplyr::if_else(is.na(.data[["type_of_admission"]]), + dplyr::case_when(.data[["reason_for_admission"]] == 1L ~ 1L, .data[["reason_for_admission"]] == 2L ~ 2L, - stay_respite ~ 1L, + stay_respite ~ 1L, # (n = 40573) .default = 3L ), .data[["type_of_admission"]] ) - ) + ) %>% + dplyr::select(-ch_ep_start, -ch_ep_end, -stay_los, -stay_respite) + ch_data_final <- adm_reason_recoded %>% - create_person_id() %>% dplyr::rename( record_keydate1 = "ch_admission_date", record_keydate2 = "ch_discharge_date", ch_adm_reason = "type_of_admission", ch_nursing = "nursing_care_provision" ) %>% + # recode the care home provider description + dplyr::mutate(ch_provider_description = dplyr::case_when( # from social care syntax + ch_provider == 1 ~ "LOCAL AUTHORITY/HSCP/NHS BOARD", + ch_provider == 2 ~ "PRIVATE", + ch_provider == 3 ~ "OTHER LOCAL AUTHORITY", + ch_provider == 4 ~ "THIRD SECTOR", + ch_provider == 5 ~ "NHS BOARD", + ch_provider == 6 ~ "OTHER" + )) %>% dplyr::select( "chi", - "person_id", + # "person_id", "gender", "dob", "postcode", @@ -327,6 +398,7 @@ process_sc_all_care_home <- function( "ch_chi_cis", "ch_sc_id_cis", "ch_provider", + "ch_provider_description", "ch_nursing", "ch_adm_reason", "sc_latest_submission" diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R index 275001c64..352c4fff3 100644 --- a/R/process_sc_all_home_care.R +++ b/R/process_sc_all_home_care.R @@ -37,7 +37,7 @@ process_sc_all_home_care <- function( # Match on demographic data --------------------------------------- matched_hc_data <- replaced_dates %>% - dplyr::left_join( + dplyr::right_join( sc_demog_lookup, by = c("sending_location", "social_care_id") ) %>% @@ -194,8 +194,6 @@ process_sc_all_home_care <- function( TRUE ~ "HC-Unknown" ) ) %>% - # person_id - create_person_id(type = "SC") %>% # compute lca variable from sending_location dplyr::mutate( sc_send_lca = convert_sc_sending_location_to_lca(.data$sending_location) diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R index 5306c0956..c5b7d43eb 100644 --- a/R/process_sc_all_sds.R +++ b/R/process_sc_all_sds.R @@ -128,11 +128,11 @@ process_sc_all_sds <- function( "SDS", convert_sc_sending_location_to_lca(sending_location) )] - sds_full_clean_long$person_id <- paste0( - sds_full_clean_long$sending_location, - "-", - sds_full_clean_long$social_care_id - ) + # sds_full_clean_long$person_id <- paste0( + # sds_full_clean_long$sending_location, + # "-", + # sds_full_clean_long$social_care_id + # ) # Group, arrange and create flags for episodes sds_full_clean_long[, @@ -176,7 +176,7 @@ process_sc_all_sds <- function( dob = data.table::last(dob), postcode = data.table::last(postcode), recid = data.table::last(recid), - person_id = data.table::last(person_id), + # person_id = data.table::last(person_id), sc_send_lca = data.table::last(sc_send_lca) ), by = list(sending_location, social_care_id, smrtype, episode_counter)] rm(sds_full_clean_long) diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R index ccf8e495c..c45992938 100644 --- a/R/process_tests_episode_file.R +++ b/R/process_tests_episode_file.R @@ -31,7 +31,9 @@ process_tests_episode_file <- function(data, year) { recid = TRUE ) %>% dplyr::arrange(.data[["recid"]]) %>% - write_tests_xlsx(sheet_name = "ep_file", year, workbook_name = "ep_file") + write_tests_xlsx(sheet_name = stringr::str_glue({ + "ep_file_{year}" + }), workbook_name = "ep_file") return(comparison) } diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R index 9643a4f3f..900ce7f03 100644 --- a/R/process_tests_individual_file.R +++ b/R/process_tests_individual_file.R @@ -34,7 +34,9 @@ process_tests_individual_file <- function(data, year) { old_data = produce_individual_file_tests(old_data), new_data = produce_individual_file_tests(data) ) %>% - write_tests_xlsx(sheet_name = "indiv_file", year, workbook_name = "indiv_file") + write_tests_xlsx(sheet_name = stringr::str_glue({ + "indiv_file_{year}" + }), workbook_name = "indiv_file") return(comparison) } diff --git a/R/process_tests_sc_all_at_episodes.R b/R/process_tests_sc_all_at_episodes.R index c23a4f6ed..8b5580334 100644 --- a/R/process_tests_sc_all_at_episodes.R +++ b/R/process_tests_sc_all_at_episodes.R @@ -10,9 +10,6 @@ #' #' @export process_tests_sc_all_at_episodes <- function(data) { - data <- data %>% - slfhelper::get_chi() - comparison <- produce_test_comparison( old_data = produce_sc_all_episodes_tests( read_file(get_sc_at_episodes_path(update = previous_update())) diff --git a/R/process_tests_sc_all_ch_episodes.R b/R/process_tests_sc_all_ch_episodes.R index d42eca2c7..7e9655c06 100644 --- a/R/process_tests_sc_all_ch_episodes.R +++ b/R/process_tests_sc_all_ch_episodes.R @@ -10,9 +10,6 @@ #' #' @export process_tests_sc_all_ch_episodes <- function(data) { - data <- data %>% - slfhelper::get_chi() - comparison <- produce_test_comparison( old_data = produce_sc_all_episodes_tests( read_file(get_sc_ch_episodes_path(update = previous_update())) diff --git a/R/process_tests_sc_all_hc_episodes.R b/R/process_tests_sc_all_hc_episodes.R index d037e7908..7194790c0 100644 --- a/R/process_tests_sc_all_hc_episodes.R +++ b/R/process_tests_sc_all_hc_episodes.R @@ -10,9 +10,6 @@ #' #' @export process_tests_sc_all_hc_episodes <- function(data) { - data <- data %>% - slfhelper::get_chi() - comparison <- produce_test_comparison( old_data = produce_sc_all_episodes_tests( read_file(get_sc_hc_episodes_path(update = previous_update())) diff --git a/R/process_tests_sc_all_sds_episodes.R b/R/process_tests_sc_all_sds_episodes.R index 91c32d450..cf87a671c 100644 --- a/R/process_tests_sc_all_sds_episodes.R +++ b/R/process_tests_sc_all_sds_episodes.R @@ -10,9 +10,6 @@ #' #' @export process_tests_sc_all_sds_episodes <- function(data) { - data <- data %>% - slfhelper::get_chi() - comparison <- produce_test_comparison( old_data = produce_sc_all_episodes_tests( read_file(get_sc_sds_episodes_path(update = previous_update())) diff --git a/R/read_extract_acute.R b/R/read_extract_acute.R index 9f649a560..084363033 100644 --- a/R/read_extract_acute.R +++ b/R/read_extract_acute.R @@ -149,6 +149,7 @@ read_extract_acute <- function(year, file_path = get_boxi_extract_path(year = ye ipdc = "Inpatient Day Case Identifier Code", cij_ipdc = "CIJ Inpatient Day Case Identifier Code (01)", lineno = "Line Number (01)", + case_reference_number = "Case Reference Number [C]", GLS_record = "GLS Record" ) %>% # replace NA in cost_total_net by 0 diff --git a/R/read_file.R b/R/read_file.R index be0a6fc65..3d174606c 100644 --- a/R/read_file.R +++ b/R/read_file.R @@ -3,8 +3,6 @@ #' @description Read a file, the function chosen to read the file is dependant #' on the file path. #' * `.rds` uses [readr::read_rds()]. -#' * `.fst` uses [fst::read_fst()]. -#' * `.sav` and `.zsav` use [haven::read_spss()]. #' * `.csv` and `.gz` use [readr::read_csv()]. Note that this assumes any file #' ending with `.gz` is a zipped CSV which isn't necessarily true! #' * `.parquet` uses [arrow::read_parquet()]. @@ -19,9 +17,6 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) { valid_extensions <- c( "rds", "rds.gz", - "fst", - "sav", - "zsav", "csv", "csv.gz", "parquet" @@ -29,7 +24,7 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) { # Return an empty tibble if trying to read the dummy path if (path == get_dummy_boxi_extract_path()) { - return(tibble::tibble()) + return(tibble::tibble(anon_chi = NA_character_)) } ext <- fs::path_ext(path) @@ -60,9 +55,6 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) { data <- switch(ext, "rds" = readr::read_rds(file = path), "rds.gz" = readr::read_rds(file = path), - "fst" = tibble::as_tibble(fst::read_fst(path = path)), - "sav" = haven::read_spss(file = path, ...), - "zsav" = haven::read_spss(file = path, ...), "csv" = readr::read_csv(file = path, ..., show_col_types = FALSE), "csv.gz" = readr::read_csv(file = path, ..., show_col_types = FALSE), "parquet" = arrow::read_parquet( diff --git a/R/read_lookup_sc_client.R b/R/read_lookup_sc_client.R index d0c72c859..6128a1be5 100644 --- a/R/read_lookup_sc_client.R +++ b/R/read_lookup_sc_client.R @@ -44,6 +44,7 @@ read_lookup_sc_client <- function(fyyear, "day_care" ) %>% dplyr::filter(.data$financial_year == year) %>% + dplyr::collect() %>% dplyr::mutate( dplyr::across( c( @@ -77,7 +78,9 @@ read_lookup_sc_client <- function(fyyear, .data$social_care_id, .data$financial_year, .data$financial_quarter - ) + ) %>% + dplyr::rename("mental_health_disorders" = "mental_health_problems") + if (!fs::file_exists(get_sandpit_extract_path(type = "client", year = fyyear))) { client_data %>% diff --git a/R/read_sc_all_care_home.R b/R/read_sc_all_care_home.R index 89ef7951b..b11879487 100644 --- a/R/read_sc_all_care_home.R +++ b/R/read_sc_all_care_home.R @@ -42,7 +42,7 @@ read_sc_all_care_home <- function(sc_dvprod_connection = phs_db_connection(dsn = } ch_data <- ch_data %>% - # Correct FY 2017 + # Correct FY 2017 as data collection only started in 2017 Q4 dplyr::mutate(period = dplyr::if_else( .data$period == "2017", "2017Q4", diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R index ec3cc5705..a1b53f971 100644 --- a/R/write_tests_xlsx.R +++ b/R/write_tests_xlsx.R @@ -24,17 +24,42 @@ write_tests_xlsx <- function(comparison_data, "cross_year" )) { # Set up the workbook ---- - tests_workbook_name <- dplyr::case_when( - is.null(year) & workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_ep_file_tests"), - !is.null(year) & workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_{year}_ep_file_tests"), - is.null(year) & workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_indiv_file_tests"), - !is.null(year) & workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_{year}_indiv_file_tests"), - is.null(year) & workbook_name == "lookup" ~ stringr::str_glue(latest_update(), "_lookups_tests"), - is.null(year) & workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests"), - is.null(year) & workbook_name == "cross_year" ~ stringr::str_glue(latest_update(), "_cross_year_tests"), - !is.null(year) & workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests"), - !is.null(year) & workbook_name == "extract" ~ stringr::str_glue(latest_update(), "_{year}_extract_tests") - ) + if (workbook_name == "ep_file") { + if (is.null(year)) { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_ep_file_tests") + } + } + if (workbook_name == "indiv_file") { + if (is.null(year)) { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_indiv_file_tests") + } + } + if (workbook_name == "lookup") { + if (is.null(year)) { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_lookups_tests") + } + } + if (workbook_name == "sandpit") { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_sandpit_extract_tests") + } + if (workbook_name == "cross_year") { + if (is.null(year)) { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_cross_year_tests") + } + } + if (workbook_name == "extract") { + if (is.null(year)) { + } else { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_{year}_extract_tests") + } + } + tests_workbook_path <- fs::path( get_slf_dir(), @@ -96,6 +121,14 @@ write_tests_xlsx <- function(comparison_data, date_today <- stringr::str_to_lower(date_today) + sheet_name_dated <- ifelse( + is.null(year), + stringr::str_glue("{sheet_name}_{date_today}"), + stringr::str_glue("{year}_{sheet_name}_{date_today}") + ) + + date_today <- stringr::str_to_lower(date_today) + if (is.null(year)) { sheet_name_dated <- stringr::str_glue("{sheet_name}_{date_today}") } else { diff --git a/Run_SLF_Files_manually/run_episode_file_1415.R b/Run_SLF_Files_manually/run_episode_file_1415.R new file mode 100644 index 000000000..b5a2eab38 --- /dev/null +++ b/Run_SLF_Files_manually/run_episode_file_1415.R @@ -0,0 +1,79 @@ +library(targets) +library(createslf) + +year <- "1415" + +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1415", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1415", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1415", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1415", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1415", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1415", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1415", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1415", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1415", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1415", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1415", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1415", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1415", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1415", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1415", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1415", + store = targets_store + ) +) + +# Run episode file +create_episode_file(processed_data_list, year = year) %>% + process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1516.R b/Run_SLF_Files_manually/run_episode_file_1516.R new file mode 100644 index 000000000..59c7ddc63 --- /dev/null +++ b/Run_SLF_Files_manually/run_episode_file_1516.R @@ -0,0 +1,79 @@ +library(targets) +library(createslf) + +year <- "1516" + +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1516", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1516", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1516", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1516", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1516", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1516", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1516", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1516", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1516", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1516", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1516", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1516", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1516", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1516", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1516", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1516", + store = targets_store + ) +) + +# Run episode file +create_episode_file(processed_data_list, year = year) ## %>% +# process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1617.R b/Run_SLF_Files_manually/run_episode_file_1617.R new file mode 100644 index 000000000..b10372be6 --- /dev/null +++ b/Run_SLF_Files_manually/run_episode_file_1617.R @@ -0,0 +1,79 @@ +library(targets) +library(createslf) + +year <- "1617" + +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1617", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1617", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1617", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1617", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1617", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1617", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1617", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1617", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1617", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1617", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1617", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1617", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1617", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1617", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1617", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1617", + store = targets_store + ) +) + +# Run episode file +create_episode_file(processed_data_list, year = year) ## %>% +# process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R index ab75b94d7..b405b5b6e 100644 --- a/Run_SLF_Files_manually/run_episode_file_1718.R +++ b/Run_SLF_Files_manually/run_episode_file_1718.R @@ -3,10 +3,73 @@ library(createslf) year <- "1718" -processed_data_list <- targets::tar_read("processed_data_list_1718", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1718", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1718", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1718", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1718", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1718", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1718", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1718", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1718", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1718", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1718", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1718", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1718", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1718", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1718", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1718", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R index cd5a7435f..fb3227512 100644 --- a/Run_SLF_Files_manually/run_episode_file_1819.R +++ b/Run_SLF_Files_manually/run_episode_file_1819.R @@ -3,10 +3,73 @@ library(createslf) year <- "1819" -processed_data_list <- targets::tar_read("processed_data_list_1819", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1819", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1819", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1819", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1819", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1819", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1819", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1819", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1819", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1819", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1819", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1819", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1819", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1819", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1819", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1819", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R index a9dc591b1..e2e21bdac 100644 --- a/Run_SLF_Files_manually/run_episode_file_1920.R +++ b/Run_SLF_Files_manually/run_episode_file_1920.R @@ -3,10 +3,73 @@ library(createslf) year <- "1920" -processed_data_list <- targets::tar_read("processed_data_list_1920", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1920", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1920", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1920", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1920", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1920", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1920", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1920", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1920", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1920", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1920", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1920", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1920", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1920", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1920", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1920", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R index 37708ee8b..cf98e80de 100644 --- a/Run_SLF_Files_manually/run_episode_file_2021.R +++ b/Run_SLF_Files_manually/run_episode_file_2021.R @@ -3,10 +3,73 @@ library(createslf) year <- "2021" -processed_data_list <- targets::tar_read("processed_data_list_2021", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_2021", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_2021", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2021", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_2021", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_2021", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_2021", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_2021", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_2021", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_2021", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_2021", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_2021", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_2021", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_2021", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_2021", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_2021", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R index 47400e2d1..3bcbf2466 100644 --- a/Run_SLF_Files_manually/run_episode_file_2122.R +++ b/Run_SLF_Files_manually/run_episode_file_2122.R @@ -3,10 +3,73 @@ library(createslf) year <- "2122" -processed_data_list <- targets::tar_read("processed_data_list_2122", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_2122", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_2122", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2122", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_2122", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_2122", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_2122", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_2122", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_2122", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_2122", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_2122", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_2122", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_2122", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_2122", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_2122", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_2122", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R index e64a57f32..af0447eed 100644 --- a/Run_SLF_Files_manually/run_episode_file_2223.R +++ b/Run_SLF_Files_manually/run_episode_file_2223.R @@ -3,10 +3,73 @@ library(createslf) year <- "2223" -processed_data_list <- targets::tar_read("processed_data_list_2223", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_2223", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_2223", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2223", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_2223", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_2223", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_2223", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_2223", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_2223", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_2223", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_2223", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_2223", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_2223", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_2223", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_2223", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_2223", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R index 4a7f0ad29..bdf16e0f8 100644 --- a/Run_SLF_Files_manually/run_episode_file_2324.R +++ b/Run_SLF_Files_manually/run_episode_file_2324.R @@ -3,10 +3,73 @@ library(createslf) year <- "2324" -processed_data_list <- targets::tar_read("processed_data_list_2324", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_2324", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_2324", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2324", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_2324", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_2324", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_2324", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_2324", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_2324", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_2324", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_2324", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_2324", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_2324", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_2324", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_2324", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_2324", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2425.R b/Run_SLF_Files_manually/run_episode_file_2425.R new file mode 100644 index 000000000..699c197b3 --- /dev/null +++ b/Run_SLF_Files_manually/run_episode_file_2425.R @@ -0,0 +1,75 @@ +library(targets) +library(createslf) + +year <- "2425" + +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_2425", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_2425", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2425", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_2425", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_2425", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_2425", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_2425", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_2425", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_2425", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_2425", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_2425", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_2425", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_2425", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_2425", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_2425", + store = targets_store + ) +) + +# Run episode file +create_episode_file(processed_data_list, year = year) %>% + process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_individual_file_1415.R b/Run_SLF_Files_manually/run_individual_file_1415.R new file mode 100644 index 000000000..70aa2bfca --- /dev/null +++ b/Run_SLF_Files_manually/run_individual_file_1415.R @@ -0,0 +1,9 @@ +library(createslf) + +year <- "1415" + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_1516.R b/Run_SLF_Files_manually/run_individual_file_1516.R new file mode 100644 index 000000000..8e8dae906 --- /dev/null +++ b/Run_SLF_Files_manually/run_individual_file_1516.R @@ -0,0 +1,9 @@ +library(createslf) + +year <- "1516" + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_1617.R b/Run_SLF_Files_manually/run_individual_file_1617.R new file mode 100644 index 000000000..255e4e674 --- /dev/null +++ b/Run_SLF_Files_manually/run_individual_file_1617.R @@ -0,0 +1,9 @@ +library(createslf) + +year <- "1617" + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_2425.R b/Run_SLF_Files_manually/run_individual_file_2425.R new file mode 100644 index 000000000..843eb505c --- /dev/null +++ b/Run_SLF_Files_manually/run_individual_file_2425.R @@ -0,0 +1,9 @@ +library(createslf) + +year <- "2425" + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_all_targets.R b/Run_SLF_Files_targets/run_all_targets.R new file mode 100644 index 000000000..fb5b94fab --- /dev/null +++ b/Run_SLF_Files_targets/run_all_targets.R @@ -0,0 +1,5 @@ +library(targets) + +# use tar_make_future() to run targets for all years +# This will run everything needed for creating the episode file. +tar_make_future() diff --git a/Run_SLF_Files_targets/run_targets_1415.R b/Run_SLF_Files_targets/run_targets_1415.R new file mode 100644 index 000000000..a37068c0d --- /dev/null +++ b/Run_SLF_Files_targets/run_targets_1415.R @@ -0,0 +1,9 @@ +library(targets) + +year <- "1415" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1415")) +) diff --git a/Run_SLF_Files_targets/run_targets_1516.R b/Run_SLF_Files_targets/run_targets_1516.R new file mode 100644 index 000000000..7930d5bb5 --- /dev/null +++ b/Run_SLF_Files_targets/run_targets_1516.R @@ -0,0 +1,9 @@ +library(targets) + +year <- "1516" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1516")) +) diff --git a/Run_SLF_Files_targets/run_targets_1617.R b/Run_SLF_Files_targets/run_targets_1617.R new file mode 100644 index 000000000..16361f71e --- /dev/null +++ b/Run_SLF_Files_targets/run_targets_1617.R @@ -0,0 +1,9 @@ +library(targets) + +year <- "1617" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1617")) +) diff --git a/Run_SLF_Files_targets/run_targets_1718.R b/Run_SLF_Files_targets/run_targets_1718.R index ac03edd3f..e85a89ff8 100644 --- a/Run_SLF_Files_targets/run_targets_1718.R +++ b/Run_SLF_Files_targets/run_targets_1718.R @@ -7,12 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("1718")) ) - -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_1819.R b/Run_SLF_Files_targets/run_targets_1819.R index b60728359..6957054c8 100644 --- a/Run_SLF_Files_targets/run_targets_1819.R +++ b/Run_SLF_Files_targets/run_targets_1819.R @@ -7,12 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("1819")) ) - -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_1920.R b/Run_SLF_Files_targets/run_targets_1920.R index 897ee0b7a..ef6272fca 100644 --- a/Run_SLF_Files_targets/run_targets_1920.R +++ b/Run_SLF_Files_targets/run_targets_1920.R @@ -7,12 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("1920")) ) - -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2021.R b/Run_SLF_Files_targets/run_targets_2021.R index 53333c014..237078e05 100644 --- a/Run_SLF_Files_targets/run_targets_2021.R +++ b/Run_SLF_Files_targets/run_targets_2021.R @@ -7,12 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("2021")) ) - -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2122.R b/Run_SLF_Files_targets/run_targets_2122.R index 457fe33e7..dc64b625f 100644 --- a/Run_SLF_Files_targets/run_targets_2122.R +++ b/Run_SLF_Files_targets/run_targets_2122.R @@ -7,12 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("2122")) ) - -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2223.R b/Run_SLF_Files_targets/run_targets_2223.R index fc851f3f7..7d8677a80 100644 --- a/Run_SLF_Files_targets/run_targets_2223.R +++ b/Run_SLF_Files_targets/run_targets_2223.R @@ -7,12 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("2223")) ) - -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2324.R b/Run_SLF_Files_targets/run_targets_2324.R index 3b4c9b240..bcebe5fb8 100644 --- a/Run_SLF_Files_targets/run_targets_2324.R +++ b/Run_SLF_Files_targets/run_targets_2324.R @@ -7,12 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("2324")) ) - -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2425.R b/Run_SLF_Files_targets/run_targets_2425.R new file mode 100644 index 000000000..fe849ede8 --- /dev/null +++ b/Run_SLF_Files_targets/run_targets_2425.R @@ -0,0 +1,9 @@ +library(targets) + +year <- "2425" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("2425")) +) diff --git a/_targets.R b/_targets.R index 4ea32c179..0377e487b 100644 --- a/_targets.R +++ b/_targets.R @@ -19,7 +19,7 @@ tar_option_set( memory = "persistent" # default option ) -years_to_run <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324") +years_to_run <- createslf::years_to_run() list( tar_rds(write_to_disk, TRUE), @@ -167,12 +167,19 @@ list( age = as.difftime(28.0, units = "days") ) ), + tar_target( + refined_death_data, + process_refined_death( + it_chi_deaths = it_chi_deaths_data, + write_to_disk = write_to_disk + ) + ), tar_target( all_care_home, process_sc_all_care_home( all_care_home_extract, sc_demog_lookup = sc_demog_lookup %>% slfhelper::get_chi(), - it_chi_deaths_data = it_chi_deaths_data %>% slfhelper::get_chi(), + refined_death = refined_death_data %>% slfhelper::get_chi(), ch_name_lookup_path = slf_ch_name_lookup_path, spd_path = spd_path, write_to_disk = write_to_disk @@ -204,6 +211,8 @@ list( tests_sc_all_sds, process_tests_sc_all_sds_episodes(all_sds) ), + + # Phase II tar_map( list(year = years_to_run), tar_rds( @@ -251,11 +260,11 @@ list( get_boxi_extract_path(year, type = "mh"), read_extract_mental_health(year, !!.x) ), - tar_file_read( - nrs_deaths_data, - get_boxi_extract_path(year, type = "deaths"), - read_extract_nrs_deaths(year, !!.x) - ), + # tar_file_read( + # nrs_deaths_data, + # get_boxi_extract_path(year, type = "deaths"), + # read_extract_nrs_deaths(year, !!.x) + # ), tar_file_read( outpatients_data, get_boxi_extract_path(year, type = "outpatient"), @@ -281,6 +290,16 @@ list( get_boxi_extract_path(year = year, type = "gp_ooh-c"), format = "file" ), + tar_target( + acute_cup_path, + get_boxi_extract_path(year, type = "acute_cup"), + format = "file" + ), + tar_target( + gp_ooh_cup_path, + get_boxi_extract_path(year, type = "gp_ooh_cup"), + format = "file" + ), tar_qs( ooh_data, read_extract_gp_ooh( @@ -294,6 +313,7 @@ list( tar_target(source_acute_extract, process_extract_acute( acute_data, year, + acute_cup_path, write_to_disk = write_to_disk )), tar_target( @@ -392,11 +412,14 @@ list( year ) ), - tar_target(source_mental_health_extract, process_extract_mental_health( - mental_health_data, - year, - write_to_disk = write_to_disk - )), + tar_target( + source_mental_health_extract, + process_extract_mental_health( + mental_health_data, + year, + write_to_disk = write_to_disk + ) + ), tar_target( tests_source_mental_health_extract, process_tests_mental_health( @@ -404,11 +427,20 @@ list( year ) ), - tar_target(source_nrs_deaths_extract, process_extract_nrs_deaths( - nrs_deaths_data, - year, - write_to_disk = write_to_disk - )), + # tar_target(source_nrs_deaths_extract, process_extract_nrs_deaths( + # nrs_deaths_data, + # year, + # write_to_disk = write_to_disk + # )), + tar_target( + source_nrs_deaths_extract, + # use this anomymous function with redundant but necessary refined_death + # to make sure reading year-specific nrs deaths extracts after it is produced + (\(year, refined_death_datas) { + read_file(get_source_extract_path(year, "deaths")) %>% + as.data.frame() + })(year, refined_death_data) + ), tar_target( tests_source_nrs_deaths_extract, process_tests_nrs_deaths( @@ -419,6 +451,7 @@ list( tar_target(source_ooh_extract, process_extract_gp_ooh( year, ooh_data, + gp_ooh_cup_path, write_to_disk = write_to_disk )), tar_target( @@ -464,7 +497,7 @@ list( year = year, sc_demographics = sc_demog_lookup %>% slfhelper::get_chi() %>% - dplyr::select(c("sending_location", "social_care_id", "chi")), + dplyr::select(c("sending_location", "social_care_id", "chi", "latest_flag")), write_to_disk = write_to_disk ) ), @@ -537,31 +570,10 @@ list( slf_deaths_lookup, process_slf_deaths_lookup( year = year, - nrs_deaths_data = source_nrs_deaths_extract %>% slfhelper::get_chi(), - chi_deaths_data = it_chi_deaths_data %>% slfhelper::get_chi(), + refined_death = refined_death_data, write_to_disk = write_to_disk ) ), - tar_qs( - processed_data_list, - list( - source_acute_extract, - source_ae_extract, - source_cmh_extract, - source_dn_extract, - source_homelessness_extract, - source_maternity_extract, - source_mental_health_extract, - source_nrs_deaths_extract, - source_ooh_extract, - source_outpatients_extract, - source_prescribing_extract, - source_sc_care_home, - source_sc_home_care, - source_sc_sds, - source_sc_alarms_tele - ) - ), tar_file_read(nsu_cohort, get_nsu_path(year), read_file(!!.x)), tar_target( homelessness_lookup, @@ -569,79 +581,107 @@ list( year, homelessness_data = source_homelessness_extract %>% slfhelper::get_chi() ) - ), - tar_target( - episode_file, - create_episode_file( - processed_data_list, - year, - homelessness_lookup = homelessness_lookup, - dd_data = source_dd_extract %>% slfhelper::get_chi(), - nsu_cohort = nsu_cohort %>% slfhelper::get_chi(), - ltc_data = source_ltc_lookup %>% slfhelper::get_chi(), - slf_pc_lookup = source_pc_lookup, - slf_gpprac_lookup = source_gp_lookup, - slf_deaths_lookup = slf_deaths_lookup %>% slfhelper::get_chi(), - sc_client = sc_client_lookup %>% slfhelper::get_chi(), - write_to_disk - ) - ), - tar_target( - episode_file_tests, - process_tests_episode_file( - data = episode_file, - year = year - ) - ) # , - # tar_target( - # cross_year_tests, - # process_tests_cross_year(year = year) - # ), # , - # tar_target( - # individual_file, - # create_individual_file( - # episode_file = episode_file, - # year = year, - # homelessness_lookup = homelessness_lookup, - # write_to_disk = write_to_disk - # ) - # ), - # tar_target( - # individual_file_tests, - # process_tests_individual_file( - # data = individual_file, - # year = year - # ) - # ) # , - # tar_target( - # episode_file_dataset, - # arrow::write_dataset( - # dataset = episode_file, - # path = fs::path( - # get_year_dir(year), - # stringr::str_glue("source-episode-file-{year}") - # ), - # format = "parquet", - # # Should correspond to the available slfhelper filters - # partitioning = c("recid", "hscp2018"), - # compression = "zstd", - # version = "latest" - # ) - # ), - # tar_target( - # individual_file_dataset, - # arrow::write_dataset( - # dataset = individual_file, - # path = fs::path( - # get_year_dir(year), - # stringr::str_glue("source-individual-file-{year}") - # ), - # format = "parquet", - # # Should correspond to the available slfhelper filters - # partitioning = c("hscp2018"), - # compression = "zstd", - # version = "latest" - # ) - # ) - ) + ) + ) # , + # tar_target( + # combined_deaths_lookup, + # process_combined_deaths_lookup() + # ) ) +## End of Targets pipeline ## + +################################################################################ +## Redundant code which may still be useful for including ep/indiv files. +# tar_qs( +# processed_data_list, +# list( +# source_acute_extract, +# source_ae_extract, +# source_cmh_extract, +# source_dn_extract, +# source_homelessness_extract, +# source_maternity_extract, +# source_mental_health_extract, +# source_nrs_deaths_extract, +# source_ooh_extract, +# source_outpatients_extract, +# source_prescribing_extract, +# source_sc_care_home, +# source_sc_home_care, +# source_sc_sds, +# source_sc_alarms_tele +# ) +# ), +# tar_target( +# episode_file, +# create_episode_file( +# processed_data_list, +# year, +# homelessness_lookup = homelessness_lookup, +# dd_data = source_dd_extract %>% slfhelper::get_chi(), +# nsu_cohort = nsu_cohort %>% slfhelper::get_chi(), +# ltc_data = source_ltc_lookup %>% slfhelper::get_chi(), +# slf_pc_lookup = source_pc_lookup, +# slf_gpprac_lookup = source_gp_lookup, +# slf_deaths_lookup = slf_deaths_lookup %>% slfhelper::get_chi(), +# sc_client = sc_client_lookup %>% slfhelper::get_chi(), +# write_to_disk +# ) +# ), +# tar_target( +# episode_file_tests, +# process_tests_episode_file( +# data = episode_file, +# year = year +# ) +# ) # , +# tar_target( +# cross_year_tests, +# process_tests_cross_year(year = year) +# ), # , +# tar_target( +# individual_file, +# create_individual_file( +# episode_file = episode_file, +# year = year, +# homelessness_lookup = homelessness_lookup, +# write_to_disk = write_to_disk +# ) +# ), +# tar_target( +# individual_file_tests, +# process_tests_individual_file( +# data = individual_file, +# year = year +# ) +# ) # , +# tar_target( +# episode_file_dataset, +# arrow::write_dataset( +# dataset = episode_file, +# path = fs::path( +# get_year_dir(year), +# stringr::str_glue("source-episode-file-{year}") +# ), +# format = "parquet", +# # Should correspond to the available slfhelper filters +# partitioning = c("recid", "hscp2018"), +# compression = "zstd", +# version = "latest" +# ) +# ), +# tar_target( +# individual_file_dataset, +# arrow::write_dataset( +# dataset = individual_file, +# path = fs::path( +# get_year_dir(year), +# stringr::str_glue("source-individual-file-{year}") +# ), +# format = "parquet", +# # Should correspond to the available slfhelper filters +# partitioning = c("hscp2018"), +# compression = "zstd", +# version = "latest" +# ) +# ) diff --git a/copy_to_hscdiip.R b/copy_to_hscdiip.R index 7fb969e8d..8a2dcdc58 100644 --- a/copy_to_hscdiip.R +++ b/copy_to_hscdiip.R @@ -1,9 +1,12 @@ +devtools::load_all() + dir_folder <- "/conf/sourcedev/Source_Linkage_File_Updates" target_folder <- "/conf/hscdiip/01-Source-linkage-files" if (!dir.exists(target_folder)) { dir.create(target_folder, mode = "770") } -folders <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324") + +folders <- years_to_run() year_n <- length(folders) resource_consumption <- data.frame( year = rep("0", year_n), @@ -11,22 +14,27 @@ resource_consumption <- data.frame( file_size_MB = rep(0, year_n) ) -for (i in 1:length(folders)) { +for (i in 1:year_n) { timer <- Sys.time() print(stringr::str_glue("{folders[i]} starts at {Sys.time()}")) folder_path <- file.path(dir_folder, folders[i]) - old_path <- list.files(folder_path, - pattern = "^source-.*parquet", - full.names = TRUE - ) - files_name <- basename(old_path) - new_path <- file.path(target_folder, files_name) - print(files_name) + + file_names <- paste0("source-", c("episode", "individual"), "-file-", folders[i], ".parquet") + file_names_im <- paste0("source-", c("episode", "individual"), "-file-", folders[i], "-new.parquet") + + old_path <- file.path(folder_path, file_names) + new_path_im <- file.path(target_folder, file_names_im) + new_path <- file.path(target_folder, file_names) + + print(file_names) fs::file_copy(old_path, - new_path, + new_path_im, overwrite = TRUE ) + fs::file_move(new_path_im, new_path) + fs::file_chmod(new_path, mode = "640") + resource_consumption$time_consumption[i] <- (Sys.time() - timer) file_size <- sum(file.size(old_path)) / 2^20 resource_consumption$file_size_MB[i] <- file_size diff --git a/extract_new_nsu_cohort/filter_nsu_duplicates.R b/extract_new_nsu_cohort/filter_nsu_duplicates.R new file mode 100644 index 000000000..bbe8265e8 --- /dev/null +++ b/extract_new_nsu_cohort/filter_nsu_duplicates.R @@ -0,0 +1,192 @@ +################################################################################ +# Name of file - filter_nsu_duplicates.R +# Original Authors - James McMahon, Jennifer Thom +# Original Date - August 2021 +# Update - June 2024 +# +# Written/run on - RStudio Server +# Version of R - 3.6.1 +# +# Description - Use this script to filter NSU duplicates when taking a new +# extract from the CHILI team. +# +# Steps for requesting a new NSU extract for SLFs: +# 1. Send an email to [phs.chi-recordlinkage@phs.scot] to request a new NSU +# extract after the JUNE update. +# 2. Prepare a service use extract. Run script `get_service_use_cohort.R` to +# extract a list of CHI's from the most recent 'full' file. +# 3. Once the chili team come back to us, send the service use extract to +# the analyst directly. Do not send the list of CHIs to the mailbox for +# Information Governance purposes. +# 4. CHILI team will then process the new NSU extract based on who is not in +# the service use extract. +# 5. Run the script `filter_nsu_duplicates.R` to collect the new NSU +# extract from the analysts SMRA space - see lines 46-47 and change +# username accordingly. Save the extract in: +# "/conf/hscdiip/SLF_Extracts/NSU" +################################################################################ + +library(dplyr) +library(purrr) +library(stringr) +library(PostcodesioR) +library(janitor) +library(fs) +library(glue) + + +## Setup------------------------------------------------------------------------ + +## Update line 41## +# The year of new NSU extract +year <- "2324" + +# Update lines 45-46 ## +# Analysts username and schema to collect the data. +analyst <- "ROBERM18" +schema <- "FINAL_2" + +# setup directory +nsu_dir <- path("/conf/hscdiip/SLF_Extracts/NSU") + +# latest geography file +spd_path <- get_spd_path() + +# Set up connection to SMRA----------------------------------------------------- +db_connection <- odbc::dbConnect( + odbc::odbc(), + dsn = "SMRA", + uid = Sys.getenv("USER"), + pwd = rstudioapi::askForPassword("password") +) + + +# Read data--------------------------------------------------------------------- + +# Read NSU data with duplicates from analyst's SMRA space. +nsu_data <- + tbl(db_connection, dbplyr::in_schema(analyst, schema)) %>% + collect() %>% + clean_names() + + +# Data cleaning----------------------------------------------------------------- + +# Find the records with duplicates +nsu_pc_duplicates <- nsu_data %>% + group_by(upi_number) %>% + mutate(postcode_count = n_distinct(postcode)) %>% + ungroup() %>% + filter(postcode_count > 1) + +# Get the latest SPD +spd <- read_file(spd_path) %>% + select(pc7, date_of_introduction, date_of_deletion) + +# Load some regex to check if a postcode is valid +pc_regex <- + "([Gg][Ii][Rr] 0[Aa]{2})|((([A-Za-z][0-9]{1,2})|(([A-Za-z][A-Ha-hJ-Yj-y][0-9]{1,2})|(([A-Za-z][0-9][A-Za-z])|([A-Za-z][A-Ha-hJ-Yj-y][0-9][A-Za-z]?))))\\s?[0-9][A-Za-z]{2})" + +# Main code to check postcodes in various ways +nsu_pc_duplicates_checked <- nsu_pc_duplicates %>% + select( + upi_number, + start_date, + postcode, + date_address_changed, + gp_prac_no, + date_gp_acceptance + ) %>% + # First check against the regex + mutate(invalid_pc = str_detect(postcode, pc_regex, negate = TRUE)) %>% + # Now check against the SPD + left_join(spd, by = c("postcode" = "pc7")) %>% + # Now check against postcodes.io + left_join( + # Filter to only postcodes that need checking + group_by(., upi_number) %>% + # UPI has no postcode which matched the SPD + filter( + all(is.na( + date_of_introduction + )) + ) %>% + ungroup() %>% + # No need to check invalid postcodes + filter(!invalid_pc) %>% + # Pass the unique list of postcodes to + # postcodes.io + pull(postcode) %>% + unique() %>% + list(postcodes = .) %>% + # This function will fail if more than 100 pcs + PostcodesioR::bulk_postcode_lookup() %>% + # Parse the result, we only want the country + map_dfr(~ tibble( + postcode = .x$query, + # Create an order to make sorting nice later + country = ordered(.x$result$country, c("Scotland", "Wales", "England")) + )) + ) %>% + # Sort so that the 'best' postcode is top of the list + mutate(priority = case_when( + # If they matched SPD, + !is.na(date_of_introduction) & is.na(date_of_deletion) ~ 0, + # If the matched SPD (and had a d_o_d) + !is.na(date_of_introduction) ~ 1, + # If they matched the postcodes.io API request + !is.na(country) ~ 2, + # Invalid postcodes come last + invalid_pc ~ Inf, + TRUE ~ 99 + )) %>% + arrange( + upi_number, + priority, + # newest introduced come first + desc(date_of_introduction), + # latest deleted will be first + desc(date_of_deletion), + # Scotland will be preferred etc. + country + ) %>% + # Flag each row with the assigned priority + group_by(upi_number) %>% + mutate(keep_priority = row_number()) %>% + ungroup() + +# Check +nsu_pc_duplicates_checked %>% + count(priority, keep_priority) + +final_data <- nsu_data %>% + # Filter the main dataset to remove + # the duplicate postcodes we decided not to keep + anti_join(nsu_pc_duplicates_checked %>% + filter(keep_priority > 1)) %>% + # Filter any remaining duplicates (none on this test) + distinct(upi_number, .keep_all = TRUE) %>% + select( + chi = upi_number, + dob = date_of_birth, + postcode, + gpprac = gp_prac_no, + gender = sex + ) %>% + mutate( + year = year, .before = everything(), + dob = as.Date(dob), + across(c(gender, gpprac), as.integer) + ) %>% + arrange(chi) %>% + # Save as anon chi on disk + slfhelper::get_anon_chi() + +# Save data out to be used +final_data %>% + arrow::write_parquet(path(nsu_dir, glue::glue("anon-All_CHIs_20{year}.parquet")), + compression = "zstd" + ) + + +## End of Script ## diff --git a/extract_new_nsu_cohort/get_service_use_cohort.R b/extract_new_nsu_cohort/get_service_use_cohort.R new file mode 100644 index 000000000..c29063a32 --- /dev/null +++ b/extract_new_nsu_cohort/get_service_use_cohort.R @@ -0,0 +1,56 @@ +################################################################################ +# Name of file - get_service_use_cohort.R +# Original Authors - Jennifer Thom +# Original Date - August 2021 +# Update - June 2024 +# +# Written/run on - RStudio Server +# Version of R - 3.6.1 +# +# Description - Use this script to return a list of CHIs from the most recent +# SLF episode file (service users) in preparation for requesting +# a new NSU cohort for the latest 'full year' +# +# Steps for requesting a new NSU extract for SLFs: +# 1. Send an email to [phs.chi-recordlinkage@phs.scot] to request a new NSU +# extract after the JUNE update. +# 2. Prepare a service use extract. Run script `get_service_use_cohort.R` to +# extract a list of CHI's from the most recent 'full' file. +# 3. Once the chili team come back to us, send the service use extract to +# the analyst directly. Do not send the list of CHIs to the mailbox for +# Information Governance purposes. +# 4. CHILI team will then process the new NSU extract based on who is not in +# the service use extract. +# 5. Run the script `filter_nsu_duplicates.R` to collect the new NSU +# extract from the analysts SMRA space - see lines 46-47 and change +# username accordingly. Save the extract in: +# "/conf/hscdiip/SLF_Extracts/NSU" +# +################################################################################ + +# Setup------------------------------------------------------------------------- +library(fs) +library(tidyverse) + +## Update ## +# The year of the new NSU extract we want +year <- "2324" + +nsu_dir <- path("/conf/hscdiip/SLF_Extracts/NSU/") + +# Read data--------------------------------------------------------------------- +episode_file <- slfhelper::read_slf_episode(year, col_select = "anon_chi") %>% + # Remove blank CHI + dplyr::filter(!is.na(anon_chi)) %>% + # Get CHI version for sending to the CHILI team. + # For saving this on disk we want the anon-chi version, save this after sending + # to the CHILI team. + slfhelper::get_chi() + +# Save a parquet file +episode_file %>% + arrow::write_parquet(path(nsu_dir, glue::glue("service_user_extract_{year}.parquet")), + compression = "zstd" + ) + +## End of Script ## diff --git a/man/add_activity_after_death_flag.Rd b/man/add_activity_after_death_flag.Rd index 36eafe7bd..76a4ca64a 100644 --- a/man/add_activity_after_death_flag.Rd +++ b/man/add_activity_after_death_flag.Rd @@ -7,7 +7,8 @@ add_activity_after_death_flag( data, year, - deaths_data = read_file(get_all_slf_deaths_lookup_path()) \%>\% slfhelper::get_chi() + deaths_data = read_file(get_combined_slf_deaths_lookup_path()) \%>\% + slfhelper::get_chi() ) } \arguments{ diff --git a/man/add_deceased_flag.Rd b/man/add_deceased_flag.Rd new file mode 100644 index 000000000..c84568522 --- /dev/null +++ b/man/add_deceased_flag.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_deceased_flag.R +\name{add_deceased_flag} +\alias{add_deceased_flag} +\title{Create the SLF Deaths lookup} +\usage{ +add_deceased_flag( + year, + refined_death = read_file(get_combined_slf_deaths_lookup_path()) \%>\% + slfhelper::get_chi(), + write_to_disk = TRUE +) +} +\arguments{ +\item{year}{The year to process, in FY format.} + +\item{write_to_disk}{(optional) Should the data be written to disk default is +\code{TRUE} i.e. write the data to disk.} + +\item{nrs_deaths_data}{NRS deaths data.} + +\item{chi_deaths_data}{IT CHI deaths data.} +} +\value{ +a \link[tibble:tibble-package]{tibble} containing the episode file +} +\description{ +Currently this just uses the NRS death dates 'as is', with no +corrections or modifications, it is expected that this will be expanded to +use the CHI deaths extract from IT as well as taking into account data in +the episode file to assess the validity of a death date. +} diff --git a/man/check_year_valid.Rd b/man/check_year_valid.Rd index 91c29861e..59960da30 100644 --- a/man/check_year_valid.Rd +++ b/man/check_year_valid.Rd @@ -6,9 +6,9 @@ \usage{ check_year_valid( year, - type = c("acute", "ae", "at", "ch", "client", "cmh", "dd", "deaths", "dn", "gpooh", - "hc", "homelessness", "hhg", "maternity", "mh", "nsu", "outpatients", "pis", "sds", - "sparra") + type = c("acute", "ae", "at", "ch", "client", "cmh", "cost_dna", "dd", "deaths", "dn", + "gpooh", "hc", "homelessness", "hhg", "maternity", "mh", "nsu", "outpatients", "pis", + "sds", "sparra") ) } \arguments{ diff --git a/man/create_homelessness_lookup.Rd b/man/create_homelessness_lookup.Rd index d6a2f2bc8..9826f4ced 100644 --- a/man/create_homelessness_lookup.Rd +++ b/man/create_homelessness_lookup.Rd @@ -50,6 +50,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/get_boxi_extract_path.Rd b/man/get_boxi_extract_path.Rd index c012ac3ef..5a318834a 100644 --- a/man/get_boxi_extract_path.Rd +++ b/man/get_boxi_extract_path.Rd @@ -6,8 +6,9 @@ \usage{ get_boxi_extract_path( year, - type = c("ae", "ae_cup", "acute", "cmh", "deaths", "dn", "gp_ooh-c", "gp_ooh-d", - "gp_ooh-o", "homelessness", "maternity", "mh", "outpatients") + type = c("ae", "ae_cup", "acute", "acute_cup", "cmh", "deaths", "dn", "gp_ooh-c", + "gp_ooh-d", "gp_ooh-o", "gp_ooh_cup", "homelessness", "maternity", "mh", + "outpatients") ) } \arguments{ diff --git a/man/get_all_slf_deaths_lookup_path.Rd b/man/get_combined_slf_deaths_lookup_path.Rd similarity index 66% rename from man/get_all_slf_deaths_lookup_path.Rd rename to man/get_combined_slf_deaths_lookup_path.Rd index 2f06b64d3..709773d01 100644 --- a/man/get_all_slf_deaths_lookup_path.Rd +++ b/man/get_combined_slf_deaths_lookup_path.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_slf_lookup_paths.R -\name{get_all_slf_deaths_lookup_path} -\alias{get_all_slf_deaths_lookup_path} +\name{get_combined_slf_deaths_lookup_path} +\alias{get_combined_slf_deaths_lookup_path} \title{SLF death dates File Path} \usage{ -get_all_slf_deaths_lookup_path(update = latest_update(), ...) +get_combined_slf_deaths_lookup_path(update = latest_update(), ...) } \arguments{ \item{update}{the update month (defaults to use \code{\link[=latest_update]{latest_update()}})} @@ -13,6 +13,10 @@ get_all_slf_deaths_lookup_path(update = latest_update(), ...) } \description{ Get the full path to the BOXI NRS Deaths lookup file for all financial years +Note this name is very similar to the existing slf_deaths_lookup_path +which returns the path for the refined_death with deceased flag for each financial year. +This function will return the combined financial years lookup +i.e. all years put together. } \seealso{ \code{\link[=get_file_path]{get_file_path()}} for the generic function. diff --git a/man/get_dd_period.Rd b/man/get_dd_period.Rd index 29bd8baea..c478f401f 100644 --- a/man/get_dd_period.Rd +++ b/man/get_dd_period.Rd @@ -17,6 +17,7 @@ Get the period for Delayed Discharge Other initialisation: \code{\link{latest_cost_year}()}, \code{\link{latest_update}()}, -\code{\link{previous_update}()} +\code{\link{previous_update}()}, +\code{\link{years_to_run}()} } \concept{initialisation} diff --git a/man/get_slf_ch_name_lookup_path.Rd b/man/get_slf_ch_name_lookup_path.Rd index 2660bbeab..1b0c3f3bb 100644 --- a/man/get_slf_ch_name_lookup_path.Rd +++ b/man/get_slf_ch_name_lookup_path.Rd @@ -22,7 +22,7 @@ has official Care Home names and addresses provided by the Care Inspectorate. \code{\link[=get_file_path]{get_file_path()}} for the generic function. Other slf lookup file path: -\code{\link{get_all_slf_deaths_lookup_path}()}, +\code{\link{get_combined_slf_deaths_lookup_path}()}, \code{\link{get_slf_chi_deaths_path}()}, \code{\link{get_slf_deaths_lookup_path}()}, \code{\link{get_slf_gpprac_path}()}, diff --git a/man/get_slf_chi_deaths_path.Rd b/man/get_slf_chi_deaths_path.Rd index 0db72d9d3..8ba115dfe 100644 --- a/man/get_slf_chi_deaths_path.Rd +++ b/man/get_slf_chi_deaths_path.Rd @@ -22,7 +22,7 @@ Get the full path to the CHI deaths file \code{\link[=get_file_path]{get_file_path()}} for the generic function. Other slf lookup file path: -\code{\link{get_all_slf_deaths_lookup_path}()}, +\code{\link{get_combined_slf_deaths_lookup_path}()}, \code{\link{get_slf_ch_name_lookup_path}()}, \code{\link{get_slf_deaths_lookup_path}()}, \code{\link{get_slf_gpprac_path}()}, diff --git a/man/get_slf_deaths_lookup_path.Rd b/man/get_slf_deaths_lookup_path.Rd index 307c38ad3..ae64e2371 100644 --- a/man/get_slf_deaths_lookup_path.Rd +++ b/man/get_slf_deaths_lookup_path.Rd @@ -21,7 +21,7 @@ Get the full path to the SLF deaths lookup file \code{\link[=get_file_path]{get_file_path()}} for the generic function. Other slf lookup file path: -\code{\link{get_all_slf_deaths_lookup_path}()}, +\code{\link{get_combined_slf_deaths_lookup_path}()}, \code{\link{get_slf_ch_name_lookup_path}()}, \code{\link{get_slf_chi_deaths_path}()}, \code{\link{get_slf_gpprac_path}()}, diff --git a/man/get_slf_gpprac_path.Rd b/man/get_slf_gpprac_path.Rd index 1fb23116f..1371f758e 100644 --- a/man/get_slf_gpprac_path.Rd +++ b/man/get_slf_gpprac_path.Rd @@ -21,7 +21,7 @@ Get the full path to the SLF GP practice lookup \code{\link[=get_file_path]{get_file_path()}} for the generic function. Other slf lookup file path: -\code{\link{get_all_slf_deaths_lookup_path}()}, +\code{\link{get_combined_slf_deaths_lookup_path}()}, \code{\link{get_slf_ch_name_lookup_path}()}, \code{\link{get_slf_chi_deaths_path}()}, \code{\link{get_slf_deaths_lookup_path}()}, diff --git a/man/get_slf_postcode_path.Rd b/man/get_slf_postcode_path.Rd index f37678695..ee9bc65a9 100644 --- a/man/get_slf_postcode_path.Rd +++ b/man/get_slf_postcode_path.Rd @@ -21,7 +21,7 @@ Get the full path to the SLF Postcode lookup \code{\link[=get_file_path]{get_file_path()}} for the generic function. Other slf lookup file path: -\code{\link{get_all_slf_deaths_lookup_path}()}, +\code{\link{get_combined_slf_deaths_lookup_path}()}, \code{\link{get_slf_ch_name_lookup_path}()}, \code{\link{get_slf_chi_deaths_path}()}, \code{\link{get_slf_deaths_lookup_path}()}, diff --git a/man/latest_cost_year.Rd b/man/latest_cost_year.Rd index 0240c6ad0..0045b4efb 100644 --- a/man/latest_cost_year.Rd +++ b/man/latest_cost_year.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/00-update_refs.R +% Please edit documentation in R/cost_uplift.R \name{latest_cost_year} \alias{latest_cost_year} \title{The latest financial year for Cost uplift setting} @@ -11,11 +11,17 @@ The financial year format } \description{ Get the latest year for cost uplift +latest_cost_year() is hard coded in cost_uplift(). +2223 is not changed automatically with time passes. +It is changed only when we get a new instruction from somewhere about cost uplift. +Do not change unless specific instructions. +Changing this means that we need to change cost_uplift(). } \seealso{ Other initialisation: \code{\link{get_dd_period}()}, \code{\link{latest_update}()}, -\code{\link{previous_update}()} +\code{\link{previous_update}()}, +\code{\link{years_to_run}()} } \concept{initialisation} diff --git a/man/latest_update.Rd b/man/latest_update.Rd index b3fbe765c..926e472e4 100644 --- a/man/latest_update.Rd +++ b/man/latest_update.Rd @@ -16,6 +16,7 @@ Get the date of the latest update, e.g 'Jun_2022' Other initialisation: \code{\link{get_dd_period}()}, \code{\link{latest_cost_year}()}, -\code{\link{previous_update}()} +\code{\link{previous_update}()}, +\code{\link{years_to_run}()} } \concept{initialisation} diff --git a/man/previous_update.Rd b/man/previous_update.Rd index f87b4656f..547138700 100644 --- a/man/previous_update.Rd +++ b/man/previous_update.Rd @@ -28,6 +28,7 @@ previous_update(override = "May_2023") # Specific Month Other initialisation: \code{\link{get_dd_period}()}, \code{\link{latest_cost_year}()}, -\code{\link{latest_update}()} +\code{\link{latest_update}()}, +\code{\link{years_to_run}()} } \concept{initialisation} diff --git a/man/process_deaths_lookup.Rd b/man/process_combined_deaths_lookup.Rd similarity index 81% rename from man/process_deaths_lookup.Rd rename to man/process_combined_deaths_lookup.Rd index e897e49a2..7d0a75fc7 100644 --- a/man/process_deaths_lookup.Rd +++ b/man/process_combined_deaths_lookup.Rd @@ -1,10 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/add_activity_after_death_flag.R -\name{process_deaths_lookup} -\alias{process_deaths_lookup} +\name{process_combined_deaths_lookup} +\alias{process_combined_deaths_lookup} \title{Create and read SLF Deaths lookup from processed BOXI NRS deaths extracts} \usage{ -process_deaths_lookup(update = latest_update(), write_to_disk = TRUE, ...) +process_combined_deaths_lookup( + update = latest_update(), + write_to_disk = TRUE, + ... +) } \arguments{ \item{update}{the update month (defaults to use \code{\link[=latest_update]{latest_update()}})} diff --git a/man/process_extract_acute.Rd b/man/process_extract_acute.Rd index 77a99cef3..fae9c7bab 100644 --- a/man/process_extract_acute.Rd +++ b/man/process_extract_acute.Rd @@ -4,7 +4,12 @@ \alias{process_extract_acute} \title{Process the Acute extract} \usage{ -process_extract_acute(data, year, write_to_disk = TRUE) +process_extract_acute( + data, + year, + acute_cup_path = get_boxi_extract_path(year, "acute_cup"), + write_to_disk = TRUE +) } \arguments{ \item{data}{The extract to process} @@ -48,6 +53,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_ae.Rd b/man/process_extract_ae.Rd index 9eec39ba5..36d2bb4d3 100644 --- a/man/process_extract_ae.Rd +++ b/man/process_extract_ae.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_alarms_telecare.Rd b/man/process_extract_alarms_telecare.Rd index 76093be7d..016f5e2b6 100644 --- a/man/process_extract_alarms_telecare.Rd +++ b/man/process_extract_alarms_telecare.Rd @@ -49,6 +49,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_care_home.Rd b/man/process_extract_care_home.Rd index 269ae1e7d..a002d30ab 100644 --- a/man/process_extract_care_home.Rd +++ b/man/process_extract_care_home.Rd @@ -51,6 +51,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_cmh.Rd b/man/process_extract_cmh.Rd index 64e085dcf..799b6d717 100644 --- a/man/process_extract_cmh.Rd +++ b/man/process_extract_cmh.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_delayed_discharges.Rd b/man/process_extract_delayed_discharges.Rd index c6fd560a7..385bdff2a 100644 --- a/man/process_extract_delayed_discharges.Rd +++ b/man/process_extract_delayed_discharges.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_district_nursing.Rd b/man/process_extract_district_nursing.Rd index eb2814fbc..49284b70f 100644 --- a/man/process_extract_district_nursing.Rd +++ b/man/process_extract_district_nursing.Rd @@ -55,6 +55,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_gp_ooh.Rd b/man/process_extract_gp_ooh.Rd index ddec006fe..5c68c35dd 100644 --- a/man/process_extract_gp_ooh.Rd +++ b/man/process_extract_gp_ooh.Rd @@ -4,7 +4,12 @@ \alias{process_extract_gp_ooh} \title{Process the GP OoH extract} \usage{ -process_extract_gp_ooh(year, data_list, write_to_disk = TRUE) +process_extract_gp_ooh( + year, + data_list, + gp_ooh_cup_path = get_boxi_extract_path(year, "gp_ooh_cup"), + write_to_disk = TRUE +) } \arguments{ \item{year}{The year to process, in FY format.} @@ -48,6 +53,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_home_care.Rd b/man/process_extract_home_care.Rd index 4dd609770..98c45a8e2 100644 --- a/man/process_extract_home_care.Rd +++ b/man/process_extract_home_care.Rd @@ -49,6 +49,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_homelessness.Rd b/man/process_extract_homelessness.Rd index 405da34bb..59fe6f283 100644 --- a/man/process_extract_homelessness.Rd +++ b/man/process_extract_homelessness.Rd @@ -62,6 +62,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_maternity.Rd b/man/process_extract_maternity.Rd index 17dd1a64c..19142c4a8 100644 --- a/man/process_extract_maternity.Rd +++ b/man/process_extract_maternity.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_mental_health.Rd b/man/process_extract_mental_health.Rd index 5f1fc7330..bd91dc4ec 100644 --- a/man/process_extract_mental_health.Rd +++ b/man/process_extract_mental_health.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_nrs_deaths.Rd b/man/process_extract_nrs_deaths.Rd index 1938e15ec..71fab68e2 100644 --- a/man/process_extract_nrs_deaths.Rd +++ b/man/process_extract_nrs_deaths.Rd @@ -47,6 +47,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_ooh_consultations.Rd b/man/process_extract_ooh_consultations.Rd index e00155191..ae4265823 100644 --- a/man/process_extract_ooh_consultations.Rd +++ b/man/process_extract_ooh_consultations.Rd @@ -45,6 +45,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_ooh_diagnosis.Rd b/man/process_extract_ooh_diagnosis.Rd index 2dcbee647..78db15f0f 100644 --- a/man/process_extract_ooh_diagnosis.Rd +++ b/man/process_extract_ooh_diagnosis.Rd @@ -45,6 +45,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_ooh_outcomes.Rd b/man/process_extract_ooh_outcomes.Rd index 31ec64439..d59617e7b 100644 --- a/man/process_extract_ooh_outcomes.Rd +++ b/man/process_extract_ooh_outcomes.Rd @@ -45,6 +45,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_outpatients.Rd b/man/process_extract_outpatients.Rd index 3a46ad119..8af2c6ddf 100644 --- a/man/process_extract_outpatients.Rd +++ b/man/process_extract_outpatients.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_prescribing.Rd b/man/process_extract_prescribing.Rd index 195a60bfe..c959ce1e7 100644 --- a/man/process_extract_prescribing.Rd +++ b/man/process_extract_prescribing.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_sds.Rd b/man/process_extract_sds.Rd index 03ee60362..b0cc8788a 100644 --- a/man/process_extract_sds.Rd +++ b/man/process_extract_sds.Rd @@ -49,6 +49,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_it_chi_deaths.Rd b/man/process_it_chi_deaths.Rd index 1d8e085ab..757f06aa7 100644 --- a/man/process_it_chi_deaths.Rd +++ b/man/process_it_chi_deaths.Rd @@ -45,6 +45,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_lookup_gpprac.Rd b/man/process_lookup_gpprac.Rd index 107af24c0..bfda08282 100644 --- a/man/process_lookup_gpprac.Rd +++ b/man/process_lookup_gpprac.Rd @@ -54,6 +54,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_lookup_postcode.Rd b/man/process_lookup_postcode.Rd index e556efd51..b8b1ebd4f 100644 --- a/man/process_lookup_postcode.Rd +++ b/man/process_lookup_postcode.Rd @@ -55,6 +55,7 @@ Other process extracts: \code{\link{process_lookup_gpprac}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_lookup_sc_client.Rd b/man/process_lookup_sc_client.Rd index 74e402846..4b85a06b5 100644 --- a/man/process_lookup_sc_client.Rd +++ b/man/process_lookup_sc_client.Rd @@ -8,7 +8,7 @@ process_lookup_sc_client( data, year, sc_demographics = read_file(get_sc_demog_lookup_path()) \%>\% slfhelper::get_chi() - \%>\% dplyr::select(c("sending_location", "social_care_id", "chi")), + \%>\% dplyr::select(c("sending_location", "social_care_id", "chi", "latest_flag")), write_to_disk = TRUE ) } @@ -56,6 +56,7 @@ Other process extracts: \code{\link{process_lookup_gpprac}()}, \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_lookup_sc_demographics.Rd b/man/process_lookup_sc_demographics.Rd index a89933425..29215f657 100644 --- a/man/process_lookup_sc_demographics.Rd +++ b/man/process_lookup_sc_demographics.Rd @@ -52,6 +52,7 @@ Other process extracts: \code{\link{process_lookup_gpprac}()}, \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_refined_death.Rd b/man/process_refined_death.Rd new file mode 100644 index 000000000..fd5392eb2 --- /dev/null +++ b/man/process_refined_death.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/process_refined_death.R +\name{process_refined_death} +\alias{process_refined_death} +\title{Process the refined death data} +\usage{ +process_refined_death( + it_chi_deaths = read_file(get_slf_chi_deaths_path()), + write_to_disk = TRUE +) +} +\arguments{ +\item{it_chi_deaths}{it chi death data} + +\item{write_to_disk}{write the result to disk or not.} +} +\value{ +refined_death The processed lookup of deaths combining NRS and IT_CHI. +} +\description{ +This will process +year-specific BOXI NRS death file (written to disk), and +combine them together to get all years NRS file (Not written to disk). +Then join all NRS deaths with IT CHI death data +to get an all-year refined death file (written to disk). +} +\seealso{ +Other process extracts: +\code{\link{create_homelessness_lookup}()}, +\code{\link{process_extract_acute}()}, +\code{\link{process_extract_ae}()}, +\code{\link{process_extract_alarms_telecare}()}, +\code{\link{process_extract_care_home}()}, +\code{\link{process_extract_cmh}()}, +\code{\link{process_extract_delayed_discharges}()}, +\code{\link{process_extract_district_nursing}()}, +\code{\link{process_extract_gp_ooh}()}, +\code{\link{process_extract_home_care}()}, +\code{\link{process_extract_homelessness}()}, +\code{\link{process_extract_maternity}()}, +\code{\link{process_extract_mental_health}()}, +\code{\link{process_extract_nrs_deaths}()}, +\code{\link{process_extract_ooh_consultations}()}, +\code{\link{process_extract_ooh_diagnosis}()}, +\code{\link{process_extract_ooh_outcomes}()}, +\code{\link{process_extract_outpatients}()}, +\code{\link{process_extract_prescribing}()}, +\code{\link{process_extract_sds}()}, +\code{\link{process_it_chi_deaths}()}, +\code{\link{process_lookup_gpprac}()}, +\code{\link{process_lookup_postcode}()}, +\code{\link{process_lookup_sc_client}()}, +\code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_sc_all_alarms_telecare}()}, +\code{\link{process_sc_all_care_home}()}, +\code{\link{process_sc_all_home_care}()}, +\code{\link{process_sc_all_sds}()}, +\code{\link{read_extract_gp_ooh}()}, +\code{\link{read_it_chi_deaths}()}, +\code{\link{read_lookup_sc_client}()} +} +\concept{process extracts} diff --git a/man/process_sc_all_alarms_telecare.Rd b/man/process_sc_all_alarms_telecare.Rd index a2e319cbf..1f3eb30e0 100644 --- a/man/process_sc_all_alarms_telecare.Rd +++ b/man/process_sc_all_alarms_telecare.Rd @@ -54,6 +54,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, \code{\link{process_sc_all_sds}()}, diff --git a/man/process_sc_all_care_home.Rd b/man/process_sc_all_care_home.Rd index 792d2200d..f689c19f4 100644 --- a/man/process_sc_all_care_home.Rd +++ b/man/process_sc_all_care_home.Rd @@ -7,9 +7,10 @@ process_sc_all_care_home( data, sc_demog_lookup = read_file(get_sc_demog_lookup_path()) \%>\% slfhelper::get_chi(), - it_chi_deaths_data = read_file(get_slf_chi_deaths_path()), - ch_name_lookup_path = read_file(get_slf_ch_name_lookup_path()), - spd_path = read_file(get_spd_path()), + refined_death = read_file(get_combined_slf_deaths_lookup_path()) \%>\% + slfhelper::get_chi(), + ch_name_lookup_path = get_slf_ch_name_lookup_path(), + spd_path = get_spd_path(), write_to_disk = TRUE ) } @@ -19,8 +20,8 @@ process_sc_all_care_home( \item{sc_demog_lookup}{The Social Care Demographics lookup produced by \code{\link[=process_lookup_sc_demographics]{process_lookup_sc_demographics()}}.} -\item{it_chi_deaths_data}{The processed lookup of deaths from IT produced -with \code{\link[=process_it_chi_deaths]{process_it_chi_deaths()}}.} +\item{refined_death}{The processed lookup of deaths from IT produced +with \code{\link[=process_refined_death]{process_refined_death()}}.} \item{ch_name_lookup_path}{Path to the Care Home name Lookup Excel workbook.} @@ -65,6 +66,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_home_care}()}, \code{\link{process_sc_all_sds}()}, diff --git a/man/process_sc_all_home_care.Rd b/man/process_sc_all_home_care.Rd index c6777889f..1f64cad95 100644 --- a/man/process_sc_all_home_care.Rd +++ b/man/process_sc_all_home_care.Rd @@ -54,6 +54,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_sds}()}, diff --git a/man/process_sc_all_sds.Rd b/man/process_sc_all_sds.Rd index f91c9dfb9..43d18b29d 100644 --- a/man/process_sc_all_sds.Rd +++ b/man/process_sc_all_sds.Rd @@ -54,6 +54,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_slf_deaths_lookup.Rd b/man/process_slf_deaths_lookup.Rd index 8ad103a2a..80e7559e0 100644 --- a/man/process_slf_deaths_lookup.Rd +++ b/man/process_slf_deaths_lookup.Rd @@ -6,28 +6,22 @@ \usage{ process_slf_deaths_lookup( year, - nrs_deaths_data = read_file(get_source_extract_path(year, "deaths"), col_select = - c("chi", "record_keydate1")), - chi_deaths_data = read_file(get_slf_chi_deaths_path()), + refined_death = read_file(get_combined_slf_deaths_lookup_path()), write_to_disk = TRUE ) } \arguments{ \item{year}{The year to process, in FY format.} -\item{nrs_deaths_data}{NRS deaths data.} - -\item{chi_deaths_data}{IT CHI deaths data.} +\item{refined_death}{refined death date combining nrs and it_chi.} \item{write_to_disk}{(optional) Should the data be written to disk default is \code{TRUE} i.e. write the data to disk.} } \value{ -a \link[tibble:tibble-package]{tibble} containing the episode file +a \link[tibble:tibble-package]{tibble} add deceased flag to deaths } \description{ -Currently this just uses the NRS death dates 'as is', with no -corrections or modifications, it is expected that this will be expanded to -use the CHI deaths extract from IT as well as taking into account data in -the episode file to assess the validity of a death date. +Use all-year refined death data to produce year-specific +slf_deaths_lookup with deceased flag added. } diff --git a/man/read_extract_gp_ooh.Rd b/man/read_extract_gp_ooh.Rd index ba908127b..61eaf7d32 100644 --- a/man/read_extract_gp_ooh.Rd +++ b/man/read_extract_gp_ooh.Rd @@ -55,6 +55,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/read_file.Rd b/man/read_file.Rd index 1ef351342..d4d94e0df 100644 --- a/man/read_file.Rd +++ b/man/read_file.Rd @@ -27,8 +27,6 @@ Read a file, the function chosen to read the file is dependant on the file path. \itemize{ \item \code{.rds} uses \code{\link[readr:read_rds]{readr::read_rds()}}. -\item \code{.fst} uses \code{\link[fst:write_fst]{fst::read_fst()}}. -\item \code{.sav} and \code{.zsav} use \code{\link[haven:read_spss]{haven::read_spss()}}. \item \code{.csv} and \code{.gz} use \code{\link[readr:read_delim]{readr::read_csv()}}. Note that this assumes any file ending with \code{.gz} is a zipped CSV which isn't necessarily true! \item \code{.parquet} uses \code{\link[arrow:read_parquet]{arrow::read_parquet()}}. diff --git a/man/read_it_chi_deaths.Rd b/man/read_it_chi_deaths.Rd index d1bfe5cf7..fe548d84b 100644 --- a/man/read_it_chi_deaths.Rd +++ b/man/read_it_chi_deaths.Rd @@ -42,6 +42,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/read_lookup_sc_client.Rd b/man/read_lookup_sc_client.Rd index 283bc6a9a..4cef9df29 100644 --- a/man/read_lookup_sc_client.Rd +++ b/man/read_lookup_sc_client.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/years_to_run.Rd b/man/years_to_run.Rd new file mode 100644 index 000000000..188ea7f5f --- /dev/null +++ b/man/years_to_run.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/00-update_refs.R +\name{years_to_run} +\alias{years_to_run} +\title{The year list for slf to update} +\usage{ +years_to_run() +} +\value{ +The vector of financial years +} +\description{ +Get the vector of years to update slf +} +\seealso{ +Other initialisation: +\code{\link{get_dd_period}()}, +\code{\link{latest_cost_year}()}, +\code{\link{latest_update}()}, +\code{\link{previous_update}()} +} +\concept{initialisation} diff --git a/run_targets_1718.R b/run_targets_1718.R new file mode 100644 index 000000000..488918e1d --- /dev/null +++ b/run_targets_1718.R @@ -0,0 +1,20 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year <- "1718" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1718")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/run_targets_1819.R b/run_targets_1819.R new file mode 100644 index 000000000..7c63807e8 --- /dev/null +++ b/run_targets_1819.R @@ -0,0 +1,20 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year <- "1819" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1819")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/run_targets_1920.R b/run_targets_1920.R new file mode 100644 index 000000000..d3361a34c --- /dev/null +++ b/run_targets_1920.R @@ -0,0 +1,20 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year <- "1920" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1920")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/run_targets_2021.R b/run_targets_2021.R new file mode 100644 index 000000000..efcfaed7a --- /dev/null +++ b/run_targets_2021.R @@ -0,0 +1,20 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year <- "2021" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("2021")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/run_targets_2122.R b/run_targets_2122.R new file mode 100644 index 000000000..e92d75c7d --- /dev/null +++ b/run_targets_2122.R @@ -0,0 +1,20 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year <- "2122" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("2122")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/run_targets_2223.R b/run_targets_2223.R new file mode 100644 index 000000000..f5c93ee2f --- /dev/null +++ b/run_targets_2223.R @@ -0,0 +1,20 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year <- "2223" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("2223")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/run_targets_2324.R b/run_targets_2324.R new file mode 100644 index 000000000..5e3885bc2 --- /dev/null +++ b/run_targets_2324.R @@ -0,0 +1,20 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year <- "2324" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("2324")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year)