Skip to content

Commit

Permalink
merge Sep2024 fix into sep24 branch (#1003)
Browse files Browse the repository at this point in the history
* update write_tests_xlsx

* update process_refined_death

* fix tests by removing get_chi

* add 2425

* Style code

* fix NA matches in refined_death

* move latest_cost_year() to cost_uplift()

* improve automation

* Update documentation

* fix `cij_ppa` in DD data

* fix bugs of dd and populate cij_delay back to episodes

* Style code

* keep all variable for delayed discharge episodes

* remove dummy variable names from dd_date

* Style code

* remove `deceased_boxi` variable - bug

* remove `create_person_id`. Its matched in client

* remove `create_person_id`

* Update `run_slf_manually` scripts

* further remove person_id

* fix duplicate row introduced by adding death

* remove duplicated chi when joining death data

* TODO: check distinct death data by chi while keeping chi==NA records

* add parameter for year

* fix duplicate in add_activity_after_death_flag

* Update `check_year_valid`

* Declare DN variables

* Style code

* remove redundant variables

---------

Co-authored-by: Zihao Li <[email protected]>
Co-authored-by: lizihao-anu <[email protected]>
Co-authored-by: Jennit07 <[email protected]>
  • Loading branch information
4 people authored Sep 16, 2024
1 parent b24399e commit 86424a5
Show file tree
Hide file tree
Showing 37 changed files with 247 additions and 130 deletions.
13 changes: 0 additions & 13 deletions R/00-update_refs.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,6 @@ get_dd_period <- function() {
"Jul16_Jun24"
}

#' The latest financial year for Cost uplift setting
#'
#' @description Get the latest year for cost uplift
#'
#' @return The financial year format
#'
#' @export
#'
#' @family initialisation
latest_cost_year <- function() {
"2324"
}

#' The year list for slf to update
#'
#' @description Get the vector of years to update slf
Expand Down
16 changes: 4 additions & 12 deletions R/add_activity_after_death_flag.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ add_activity_after_death_flag <- function(
by = "chi",
suffix = c("", "_boxi")
) %>%
dplyr::filter(.data$deceased == TRUE | .data$deceased_boxi == TRUE) %>%
dplyr::filter(.data$deceased == TRUE) %>%
dplyr::distinct()


Expand Down Expand Up @@ -72,16 +72,6 @@ add_activity_after_death_flag <- function(
))


# Check and print error message for records which already are TRUE for the deceased variable in the episode file, but this doesn't match the
# BOXI deceased variable
check_deceased_match <- flag_data %>%
dplyr::filter(.data$deceased != .data$deceased_boxi)

if (nrow(check_deceased_match) != 0) {
warning("There were records in the episode file which have a deceased variable which does not match the BOXI NRS deceased variable")
}


# Fill in date of death if missing in the episode file but available in BOXI lookup, due to historic dates of death not being carried
# over from previous financial years
flag_data <- flag_data %>%
Expand All @@ -94,13 +84,15 @@ add_activity_after_death_flag <- function(
final_data <- data %>%
dplyr::left_join(
flag_data,
# TODO: this join_by is not 100% accurate. Consider use ep_file_row_id to join
by = c("year", "chi", "record_keydate1", "record_keydate2"),
na_matches = "never"
) %>%
dplyr::mutate(death_date = lubridate::as_date(ifelse(is.na(death_date) & !(is.na(death_date_boxi)),
death_date_boxi, death_date
))) %>%
dplyr::select(-death_date_boxi)
dplyr::select(-death_date_boxi) %>%
dplyr::distinct()



Expand Down
2 changes: 1 addition & 1 deletion R/check_year_valid.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ check_year_valid <- function(
return(FALSE)
} else if (year >= "2425" && type %in% "sparra") {
return(FALSE)
} else if (year >= "2425" && type %in% c("ch", "hc", "sds", "at")) {
} else if (year >= "2526" && type %in% c("ch", "hc", "sds", "at")) {
return(FALSE)
}

Expand Down
18 changes: 18 additions & 0 deletions R/cost_uplift.R
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,21 @@ lookup_uplift <- function(data) {

return(data)
}

#' The latest financial year for Cost uplift setting
#'
#' @description Get the latest year for cost uplift
#' latest_cost_year() is hard coded in cost_uplift().
#' 2223 is not changed automatically with time passes.
#' It is changed only when we get a new instruction from somewhere about cost uplift.
#' Do not change unless specific instructions.
#' Changing this means that we need to change cost_uplift().
#'
#' @return The financial year format
#'
#' @export
#'
#' @family initialisation
latest_cost_year <- function() {
"2223"
}
8 changes: 8 additions & 0 deletions R/create_episode_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,14 @@ create_episode_file <- function(
)
}

if (!check_year_valid(year, type = "dn")) {
episode_file <- episode_file %>%
dplyr::mutate(
ccm = NA,
total_no_dn_contacts = NA
)
}

if (anon_chi_out) {
episode_file <- slfhelper::get_anon_chi(episode_file)
}
Expand Down
2 changes: 1 addition & 1 deletion R/create_individual_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ create_individual_file <- function(

if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) {
individual_file <- individual_file %>%
aggregate_by_chi(exclude_sc_var = TRUE)
aggregate_by_chi(year = year, exclude_sc_var = TRUE)
} else {
individual_file <- individual_file %>%
aggregate_ch_episodes() %>%
Expand Down
3 changes: 2 additions & 1 deletion R/join_deaths_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@ join_deaths_data <- function(
return(
data %>%
dplyr::left_join(
slf_deaths_lookup,
slf_deaths_lookup %>%
dplyr::distinct(chi, .keep_all = TRUE),
by = "chi",
na_matches = "never",
relationship = "many-to-one"
Expand Down
73 changes: 49 additions & 24 deletions R/link_delayed_discharge_eps.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ link_delayed_discharge_eps <- function(
dd_data = read_file(get_source_extract_path(year, "dd")) %>% slfhelper::get_chi()) {
cli::cli_alert_info("Link delayed discharge to episode file function started at {Sys.time()}")

names_ep <- names(episode_file)

episode_file <- episode_file %>%
dplyr::mutate(
# remember to revoke the cij_end_date with dummy_cij_end
Expand Down Expand Up @@ -286,6 +288,11 @@ link_delayed_discharge_eps <- function(
)) %>%
dplyr::group_by(.data$chi, .data$cij_marker) %>%
dplyr::mutate(cij_delay = max(.data$has_delay)) %>%
dplyr::mutate(cij_delay = dplyr::if_else(cij_delay == "0",
FALSE,
TRUE,
missing = NA
)) %>%
dplyr::ungroup() %>%
# add yearstay and monthly beddays
# count_last = TRUE because DD counts last day and not the first
Expand All @@ -299,37 +306,43 @@ link_delayed_discharge_eps <- function(
yearstay = rowSums(dplyr::pick(dplyr::ends_with("_beddays")))
) %>%
# tidy up and rename columns to match the format of episode files
# keep variables from ep files
dplyr::select(
-c(
"ep_file_row_id",
"year",
"recid",
"record_keydate1",
"record_keydate2",
"postcode",
"hbtreatcode",
"location",
"spec",
## following are dummy variables
"cij_start_date_lower",
"cij_end_date_upper",
"cij_end_month",
"is_dummy_cij_start",
"dummy_cij_start",
"is_dummy_cij_end",
"dummy_cij_end",
"datediff_start",
"datediff_end",
"has_delay",
"is_dummy_keydate2",
"dummy_keydate2",
"dummy_id"
)
) %>%
dplyr::rename(
"year" = "year_dd",
"recid" = "recid_dd",
"record_keydate1" = "record_keydate1_dd",
"record_keydate2" = "record_keydate2_dd",
"smrtype",
"chi",
"gender",
"dob",
"age",
"gpprac",
"postcode" = "postcode_dd",
"dd_responsible_lca",
"hbtreatcode" = "hbtreatcode_dd",
"delay_end_reason",
"primary_delay_reason",
"secondary_delay_reason",
"cij_marker",
"cij_start_date",
"cij_end_date",
"cij_pattype_code",
"cij_ipdc",
"cij_admtype",
"cij_adm_spec",
"cij_dis_spec",
"cij_delay",
"location",
"spec" = "spec_dd",
"dd_quality",
dplyr::ends_with("_beddays"),
"yearstay"
"location" = "location_dd"
) %>%
# Combine DD with episode data
dplyr::bind_rows(
Expand All @@ -345,7 +358,19 @@ link_delayed_discharge_eps <- function(
"dummy_cij_end"
)
)
)
) %>%
# populate cij_delay dd details back to ep
dplyr::group_by(chi, cij_marker) %>%
dplyr::mutate(
has_dd = any(recid == "DD"),
delay_dd = any(cij_delay)
) %>%
dplyr::ungroup() %>%
dplyr::mutate(cij_delay = dplyr::if_else(has_dd,
delay_dd,
cij_delay
)) %>%
dplyr::select(-c("has_dd", "delay_dd"))

return(linked_data)
}
2 changes: 1 addition & 1 deletion R/process_extract_alarms_telecare.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ process_extract_alarms_telecare <- function(
"smrtype",
"chi",
"dob",
"person_id",
# "person_id",
"gender",
"postcode",
"sc_send_lca",
Expand Down
2 changes: 1 addition & 1 deletion R/process_extract_care_home.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ process_extract_care_home <- function(
"recid",
"smrtype",
"chi",
"person_id",
# "person_id",
"dob",
"gender",
"postcode",
Expand Down
2 changes: 1 addition & 1 deletion R/process_extract_home_care.R
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ process_extract_home_care <- function(
"cost_total_net",
"hc_provider",
"hc_reablement",
"person_id"
# "person_id"
) %>%
slfhelper::get_anon_chi()

Expand Down
2 changes: 1 addition & 1 deletion R/process_extract_sds.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ process_extract_sds <- function(
"smrtype",
"chi",
"dob",
"person_id",
# "person_id",
"gender",
"postcode",
"sc_send_lca",
Expand Down
2 changes: 1 addition & 1 deletion R/process_lookup_sc_client.R
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ process_lookup_sc_client <-
) %>%
dplyr::arrange(.data$chi, .data$count_not_known) %>%
dplyr::distinct(.data$chi, .keep_all = TRUE) %>%
dplyr::select(-.data$sending_location) %>%
dplyr::select(-.data$sending_location, -.data$count_not_known) %>%
slfhelper::get_anon_chi()

if (write_to_disk) {
Expand Down
3 changes: 2 additions & 1 deletion R/process_refined_death.R
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,12 @@ process_refined_death <- function(
fy = phsmethods::extract_fin_year(death_date),
fy = as.character(paste0(substr(fy, 3, 4), substr(fy, 6, 7)))
)
# TODO: check distinct death data by chi while keeping chi==NA records

if (write_to_disk) {
write_file(
refined_death,
get_combined_slf_deaths_lookup_path()
get_combined_slf_deaths_lookup_path(create = TRUE)
)
}

Expand Down
12 changes: 6 additions & 6 deletions R/process_sc_all_alarms_telecare.R
Original file line number Diff line number Diff line change
Expand Up @@ -87,11 +87,11 @@ process_sc_all_alarms_telecare <- function(
# Replace social_care_id with latest if needed (assuming replace_sc_id_with_latest is a custom function)
data <- replace_sc_id_with_latest(data)

data$person_id <- paste0(
data$sending_location,
"-",
data$social_care_id
)
# data$person_id <- paste0(
# data$sending_location,
# "-",
# data$social_care_id
# )

# Deal with episodes that have a package across quarters
data[, pkg_count := seq_len(.N), by = list(
Expand Down Expand Up @@ -125,7 +125,7 @@ process_sc_all_alarms_telecare <- function(
dob = data.table::last(dob),
postcode = data.table::last(postcode),
recid = data.table::last(recid),
person_id = data.table::last(person_id),
# person_id = data.table::last(person_id),
sc_send_lca = data.table::last(sc_send_lca)
), by = list(
sending_location,
Expand Down
6 changes: 3 additions & 3 deletions R/process_sc_all_care_home.R
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,8 @@ process_sc_all_care_home <- function(
# match ch_episode data with deaths data
matched_deaths_data <- ch_episode %>%
dplyr::left_join(refined_death,
by = "chi"
by = "chi",
na_matches = "never"
) %>%
# compare discharge date with NRS and CHI death date
# if either of the dates are 5 or fewer days before discharge
Expand Down Expand Up @@ -367,7 +368,6 @@ process_sc_all_care_home <- function(


ch_data_final <- adm_reason_recoded %>%
create_person_id() %>%
dplyr::rename(
record_keydate1 = "ch_admission_date",
record_keydate2 = "ch_discharge_date",
Expand All @@ -385,7 +385,7 @@ process_sc_all_care_home <- function(
)) %>%
dplyr::select(
"chi",
"person_id",
# "person_id",
"gender",
"dob",
"postcode",
Expand Down
2 changes: 0 additions & 2 deletions R/process_sc_all_home_care.R
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,6 @@ process_sc_all_home_care <- function(
TRUE ~ "HC-Unknown"
)
) %>%
# person_id
create_person_id(type = "SC") %>%
# compute lca variable from sending_location
dplyr::mutate(
sc_send_lca = convert_sc_sending_location_to_lca(.data$sending_location)
Expand Down
12 changes: 6 additions & 6 deletions R/process_sc_all_sds.R
Original file line number Diff line number Diff line change
Expand Up @@ -128,11 +128,11 @@ process_sc_all_sds <- function(
"SDS",
convert_sc_sending_location_to_lca(sending_location)
)]
sds_full_clean_long$person_id <- paste0(
sds_full_clean_long$sending_location,
"-",
sds_full_clean_long$social_care_id
)
# sds_full_clean_long$person_id <- paste0(
# sds_full_clean_long$sending_location,
# "-",
# sds_full_clean_long$social_care_id
# )

# Group, arrange and create flags for episodes
sds_full_clean_long[,
Expand Down Expand Up @@ -176,7 +176,7 @@ process_sc_all_sds <- function(
dob = data.table::last(dob),
postcode = data.table::last(postcode),
recid = data.table::last(recid),
person_id = data.table::last(person_id),
# person_id = data.table::last(person_id),
sc_send_lca = data.table::last(sc_send_lca)
), by = list(sending_location, social_care_id, smrtype, episode_counter)]
rm(sds_full_clean_long)
Expand Down
4 changes: 3 additions & 1 deletion R/process_tests_episode_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ process_tests_episode_file <- function(data, year) {
recid = TRUE
) %>%
dplyr::arrange(.data[["recid"]]) %>%
write_tests_xlsx(sheet_name = "ep_file", year, workbook_name = "ep_file")
write_tests_xlsx(sheet_name = stringr::str_glue({
"ep_file_{year}"
}), workbook_name = "ep_file")

return(comparison)
}
Expand Down
Loading

0 comments on commit 86424a5

Please sign in to comment.