Skip to content

Commit

Permalink
Merge branch 'december-2024' into sc-latest-quarter
Browse files Browse the repository at this point in the history
  • Loading branch information
Jennit07 authored Oct 16, 2024
2 parents abe4d8a + 74947ce commit 0e0e8f1
Show file tree
Hide file tree
Showing 35 changed files with 334 additions and 114 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ export(add_homelessness_flag)
export(add_hri_variables)
export(add_nsu_cohort)
export(check_year_format)
export(clean_temp_data)
export(clean_up_free_text)
export(compute_mid_year_age)
export(convert_ca_to_lca)
Expand Down Expand Up @@ -178,12 +179,14 @@ export(read_sc_all_alarms_telecare)
export(read_sc_all_care_home)
export(read_sc_all_home_care)
export(read_sc_all_sds)
export(read_temp_data)
export(rename_hscp)
export(setup_keyring)
export(start_fy)
export(start_fy_quarter)
export(start_next_fy_quarter)
export(write_file)
export(write_temp_data)
export(years_to_run)
importFrom(data.table,.N)
importFrom(data.table,.SD)
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# September 2024 Update - Unreleased
# September 2024 Update - released 13-Sep-24
* New 24/25 files created
* New NSU cohort for 23/24 available
* New SPARRA scores calculated from April 24/25
Expand Down
4 changes: 2 additions & 2 deletions R/check_year_valid.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ check_year_valid <- function(
return(FALSE)
} else if (year >= "2324" && type %in% c("nsu", "hhg")) {
return(FALSE)
} else if (year >= "2425" && type %in% "sparra") {
} else if (year >= "2425" && type %in% "nsu") {
return(FALSE)
} else if (year >= "2526" && type %in% c("ch", "hc", "sds", "at")) {
} else if (year >= "2526" && type %in% c("ch", "hc", "sds", "at", "sparra")) {
return(FALSE)
}

Expand Down
11 changes: 9 additions & 2 deletions R/create_episode_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@ create_episode_file <- function(
slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) %>% slfhelper::get_chi(),
sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(),
write_to_disk = TRUE,
anon_chi_out = TRUE) {
anon_chi_out = TRUE,
write_temp_to_disk = FALSE) {
cli::cli_alert_info("Create episode file function started at {Sys.time()}")

processed_data_list <- purrr::discard(processed_data_list, ~ is.null(.x) | identical(.x, tibble::tibble()))

episode_file <- dplyr::bind_rows(processed_data_list) %>%
slfhelper::get_chi() %>%
write_temp_data(year, file_name = "ep_temp1", write_temp_to_disk) %>%
create_cost_inc_dna() %>%
apply_cost_uplift() %>%
store_ep_file_vars(
Expand Down Expand Up @@ -122,15 +124,18 @@ create_episode_file <- function(
# PC8 format may still be used. Ensure here that all datasets are in PC7 format.
postcode = phsmethods::format_postcode(.data$postcode, "pc7")
) %>%
write_temp_data(year, file_name = "ep_temp2", write_temp_to_disk) %>%
correct_cij_vars() %>%
fill_missing_cij_markers() %>%
add_homelessness_flag(year, lookup = homelessness_lookup) %>%
add_homelessness_date_flags(year, lookup = homelessness_lookup) %>%
add_ppa_flag() %>%
write_temp_data(year, file_name = "ep_temp3", write_temp_to_disk) %>%
link_delayed_discharge_eps(year, dd_data) %>%
add_nsu_cohort(year, nsu_cohort) %>%
match_on_ltcs(year, ltc_data) %>%
correct_demographics(year) %>%
write_temp_data(year, file_name = "ep_temp4", write_temp_to_disk) %>%
create_cohort_lookups(year) %>%
join_cohort_lookups(year) %>%
join_sparra_hhg(year) %>%
Expand All @@ -142,11 +147,13 @@ create_episode_file <- function(
year,
slf_deaths_lookup
) %>%
write_temp_data(year, file_name = "ep_temp5", write_temp_to_disk) %>%
add_activity_after_death_flag(year,
deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>%
slfhelper::get_chi()
) %>%
load_ep_file_vars(year)
load_ep_file_vars(year) %>%
write_temp_data(year, file_name = "ep_temp6", write_temp_to_disk)

if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
episode_file <- episode_file %>%
Expand Down
15 changes: 11 additions & 4 deletions R/create_individual_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ create_individual_file <- function(
homelessness_lookup = create_homelessness_lookup(year),
write_to_disk = TRUE,
anon_chi_in = TRUE,
anon_chi_out = TRUE) {
anon_chi_out = TRUE,
write_temp_to_disk) {
cli::cli_alert_info("Create individual file function finished at {Sys.time()}")

if (anon_chi_in) {
Expand Down Expand Up @@ -76,30 +77,36 @@ create_individual_file <- function(
))) %>%
remove_blank_chi() %>%
add_cij_columns() %>%
add_all_columns(year = year)
add_all_columns(year = year) %>%
write_temp_data(year, file_name = "indiv_temp1", write_temp_to_disk)

if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
individual_file <- individual_file %>%
aggregate_by_chi(year = year, exclude_sc_var = TRUE)
aggregate_by_chi(year = year, exclude_sc_var = TRUE) %>%
write_temp_data(year, file_name = "indiv_temp2", write_temp_to_disk)
} else {
individual_file <- individual_file %>%
aggregate_ch_episodes() %>%
clean_up_ch(year) %>%
aggregate_by_chi(year = year, exclude_sc_var = FALSE)
aggregate_by_chi(year = year, exclude_sc_var = FALSE) %>%
write_temp_data(year, file_name = "indiv_temp2", write_temp_to_disk)
}

individual_file <- individual_file %>%
recode_gender() %>%
clean_individual_file(year) %>%
join_cohort_lookups(year) %>%
write_temp_data(year, file_name = "indiv_temp3", write_temp_to_disk) %>%
add_homelessness_flag(year, lookup = homelessness_lookup) %>%
match_on_ltcs(year) %>%
join_deaths_data(year) %>%
join_sparra_hhg(year) %>%
write_temp_data(year, file_name = "indiv_temp4", write_temp_to_disk) %>%
join_slf_lookup_vars() %>%
dplyr::mutate(year = year) %>%
add_hri_variables(chi_variable = "chi") %>%
add_keep_population_flag(year) %>%
write_temp_data(year, file_name = "indiv_temp5", write_temp_to_disk) %>%
join_sc_client(year, file_type = "individual")

if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
Expand Down
1 change: 0 additions & 1 deletion R/fill_geographies.R
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,6 @@ fill_gpprac_geographies <- function(
#'
#' @return data with matched HSCP and LCA codes
cascade_geographies <- function(data) {
# TODO rework this function into a series of smaller functions which operate on vectors
# e.g. cascade_hscp_lca <- function(hscp, lca) {...}
# Would take HSCP and populate any missing LCA using it
data <- data %>%
Expand Down
1 change: 0 additions & 1 deletion R/process_extract_alarms_telecare.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ process_extract_alarms_telecare <- function(
"smrtype",
"chi",
"dob",
# "person_id",
"gender",
"postcode",
"sc_send_lca",
Expand Down
1 change: 0 additions & 1 deletion R/process_extract_care_home.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ process_extract_care_home <- function(
"recid",
"smrtype",
"chi",
# "person_id",
"dob",
"gender",
"postcode",
Expand Down
1 change: 0 additions & 1 deletion R/process_extract_gp_ooh.R
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ process_extract_gp_ooh <- function(year,
)

# Keep the location descriptions as a lookup.
# TODO write the GP OoH lookup out using some functions
location_lookup <- ooh_clean %>%
dplyr::group_by(.data$location) %>%
dplyr::summarise(
Expand Down
3 changes: 1 addition & 2 deletions R/process_extract_home_care.R
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,7 @@ process_extract_home_care <- function(
tidyselect::starts_with("hc_cost_"),
"cost_total_net",
"hc_provider",
"hc_reablement",
# "person_id"
"hc_reablement"
) %>%
slfhelper::get_anon_chi()

Expand Down
1 change: 0 additions & 1 deletion R/process_extract_homelessness.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,6 @@ process_extract_homelessness <- function(
dplyr::rename(hl1_completeness = "pct_complete_all") %>%
dplyr::mutate(hl1_completeness = round(.data$hl1_completeness, 1))

# TODO - Include person_id (from client_id)
final_data <- hl1_data %>%
dplyr::select(
"year",
Expand Down
1 change: 0 additions & 1 deletion R/process_extract_sds.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@ process_extract_sds <- function(
"smrtype",
"chi",
"dob",
# "person_id",
"gender",
"postcode",
"sc_send_lca",
Expand Down
6 changes: 0 additions & 6 deletions R/process_sc_all_alarms_telecare.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,6 @@ process_sc_all_alarms_telecare <- function(
# Replace social_care_id with latest if needed (assuming replace_sc_id_with_latest is a custom function)
data <- replace_sc_id_with_latest(data)

# data$person_id <- paste0(
# data$sending_location,
# "-",
# data$social_care_id
# )

# Deal with episodes that have a package across quarters
data[, pkg_count := seq_len(.N), by = list(
Expand Down Expand Up @@ -125,7 +120,6 @@ process_sc_all_alarms_telecare <- function(
dob = data.table::last(dob),
postcode = data.table::last(postcode),
recid = data.table::last(recid),
# person_id = data.table::last(person_id),
sc_send_lca = data.table::last(sc_send_lca)
), by = list(
sending_location,
Expand Down
1 change: 0 additions & 1 deletion R/process_sc_all_care_home.R
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,6 @@ process_sc_all_care_home <- function(
)) %>%
dplyr::select(
"chi",
# "person_id",
"gender",
"dob",
"postcode",
Expand Down
6 changes: 0 additions & 6 deletions R/process_sc_all_sds.R
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,6 @@ process_sc_all_sds <- function(
"SDS",
convert_sc_sending_location_to_lca(sending_location)
)]
# sds_full_clean_long$person_id <- paste0(
# sds_full_clean_long$sending_location,
# "-",
# sds_full_clean_long$social_care_id
# )

# Group, arrange and create flags for episodes
sds_full_clean_long[,
Expand Down Expand Up @@ -176,7 +171,6 @@ process_sc_all_sds <- function(
dob = data.table::last(dob),
postcode = data.table::last(postcode),
recid = data.table::last(recid),
# person_id = data.table::last(person_id),
sc_send_lca = data.table::last(sc_send_lca)
), by = list(sending_location, social_care_id, smrtype, episode_counter)]
rm(sds_full_clean_long)
Expand Down
62 changes: 62 additions & 0 deletions R/write_temp_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#' Write a temp data to disk in parquet format for debugging purpose
#'
#' @description Write a temp data in parquet format to disk for debugging purpose.
#' @param data The data to be written
#' @param year year variable
#' @param file_name The file name to be written
#' @param write_temp_to_disk Boolean type, write temp data to disk or not
#'
#' @return the data for next step as a [tibble][tibble::tibble-package].
#' @export
write_temp_data <-
function(data, year, file_name, write_temp_to_disk) {
if (write_temp_to_disk) {
full_file_name <- stringr::str_glue("{file_name}.parquet")
file_path <- file.path(
get_year_dir(year),
full_file_name
)

cli::cli_alert_info(stringr::str_glue("Writing {full_file_name} to disk started at {Sys.time()}"))

write_file(data,
path = file_path
)
}
return(data)
}


#' Read a temp data from disk for debugging purpose
#'
#' @description Read a temp data to disk for debugging purpose.
#' @param year year variable
#' @param file_name The file name to be read
#'
#' @return the data for next step as a [tibble][tibble::tibble-package].
#' @export
read_temp_data <- function(year, file_name) {
full_file_name <- stringr::str_glue("{file_name}.parquet")
file_path <- file.path(
get_year_dir(year),
full_file_name
)

return(read_file(file_path))
}

#' Clean temp data from disk
#'
#' @description Clean temp data from disk to save storage.
#' @param year year variable
#' @param file_type ep or ind files
#'
#' @return the data for next step as a [tibble][tibble::tibble-package].
#' @export
clean_temp_data <- function(year, file_type = c("ep", "ind")) {
list.files(
path = get_year_dir(year),
pattern = stringr::str_glue("^{file_type}_temp")
) %>%
file.remove()
}
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1415.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1415"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1516.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1516"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1617.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1617"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1718.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1718"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1819.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1819"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_1920.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "1920"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_2021.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "2021"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_2122.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "2122"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
2 changes: 2 additions & 0 deletions Run_SLF_Files_manually/run_individual_file_2223.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@ library(createslf)

year <- "2223"

clean_temp_data(year, "ep")

episode_file <- arrow::read_parquet(get_slf_episode_path(year))

# Run individual file
Expand Down
Loading

0 comments on commit 0e0e8f1

Please sign in to comment.