Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add condition for latest file where no social care data is available #827

Merged
merged 10 commits into from
Sep 12, 2023
2 changes: 1 addition & 1 deletion R/add_hri_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ add_hri_variables <- function(
hri_lookup <- data %>%
dplyr::select(
"year",
"chi",
chi_variable,
"postcode",
"gpprac",
"lca",
Expand Down
162 changes: 162 additions & 0 deletions R/aggregate_by_chi.R
Original file line number Diff line number Diff line change
Expand Up @@ -222,3 +222,165 @@ aggregate_ch_episodes <- function(episode_file) {

return(episode_file)
}


#' Aggregate by CHI with no social care variables
#'
#' @description Aggregate episode file by CHI to convert into
#' individual file. Exclude social care variables for the creation of
#' the newest file with Q1 data.
#'
#' @importFrom data.table .N
#' @importFrom data.table .SD
#'
#' @inheritParams create_individual_file
aggregate_by_chi_no_sc <- function(episode_file) {
cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")

# Convert to data.table
data.table::setDT(episode_file)

# Ensure all variable names are lowercase
data.table::setnames(episode_file, stringr::str_to_lower)

# Sort the data
data.table::setkeyv(
episode_file,
c(
"chi",
"record_keydate1",
"keytime1",
"record_keydate2",
"keytime2"
)
)

data.table::setnames(
episode_file,
c(
# "ch_chi_cis",
"cij_marker",
"ooh_case_id",
"hh_in_fy"
),
c(
# "ch_cis_episodes",
"cij_total",
"ooh_cases",
"hl1_in_fy"
)
)

# column specification, grouped by chi
# columns to select last
cols2 <- c(
"postcode",
"dob",
"gpprac" # ,
# vars_start_with(episode_file, "sc_")
)
# columns to count unique rows
cols3 <- c(
# "ch_cis_episodes",
"cij_total",
"cij_el",
"cij_non_el",
"cij_mat",
"cij_delay",
"ooh_cases",
"preventable_admissions"
)
# columns to sum up
cols4 <- c(
vars_end_with(
episode_file,
c(
"episodes",
"beddays",
"cost",
"attendances",
"attend",
"contacts",
"hours",
# "alarms",
# "telecare",
"paid_items",
"advice",
"homev",
"time",
"assessment",
"other",
"dn",
"nhs24",
"pcc"
# )
),
# vars_start_with(
# episode_file,
# "sds_option"
),
"health_net_cost_inc_dnas"
)
# cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")]
# columns to select maximum
cols5 <- c("nsu", vars_contain(episode_file, "hl1_in_fy"))
data.table::setnafill(episode_file, fill = 0L, cols = cols5)
# compute
individual_file_cols1 <- episode_file[,
.(gender = mean(gender)),
by = "chi"
]
individual_file_cols2 <- episode_file[,
.SD[.N],
.SDcols = cols2,
by = "chi"
]
individual_file_cols3 <- episode_file[,
lapply(.SD, function(x) {
data.table::uniqueN(x, na.rm = TRUE)
}),
.SDcols = cols3,
by = "chi"
]
individual_file_cols4 <- episode_file[,
lapply(.SD, function(x) {
sum(x, na.rm = TRUE)
}),
.SDcols = cols4,
by = "chi"
]
individual_file_cols5 <- episode_file[,
lapply(.SD, function(x) max(x, na.rm = TRUE)),
.SDcols = cols5,
by = "chi"
]
individual_file_cols6 <- episode_file[,
.(
preventable_beddays = ifelse(
any(cij_ppa, na.rm = TRUE),
as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))),
NA_integer_
)
),
# cij_marker has been renamed as cij_total
by = c("chi", "cij_total")
]
individual_file_cols6 <- individual_file_cols6[,
.(
preventable_beddays = sum(preventable_beddays, na.rm = TRUE)
),
by = "chi"
]

individual_file <- dplyr::bind_cols(
individual_file_cols1,
individual_file_cols2[, chi := NULL],
individual_file_cols3[, chi := NULL],
individual_file_cols4[, chi := NULL],
individual_file_cols5[, chi := NULL],
individual_file_cols6[, chi := NULL]
)

# convert back to tibble
return(dplyr::as_tibble(individual_file))
}
68 changes: 55 additions & 13 deletions R/create_individual_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,23 +72,59 @@ create_individual_file <- function(
))) %>%
remove_blank_chi() %>%
add_cij_columns() %>%
add_all_columns() %>%
aggregate_ch_episodes() %>%
clean_up_ch(year) %>%
add_all_columns()

if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
individual_file <- individual_file %>%
aggregate_by_chi_no_sc()
} else {
individual_file <- individual_file %>%
aggregate_ch_episodes() %>%
clean_up_ch(year) %>%
aggregate_by_chi() %>%
join_sc_client(year)
}

individual_file <- individual_file %>%
recode_gender() %>%
aggregate_by_chi() %>%
clean_individual_file(year) %>%
join_cohort_lookups(year) %>%
add_homelessness_flag(year, lookup = homelessness_lookup) %>%
match_on_ltcs(year) %>%
join_deaths_data(year) %>%
join_sparra_hhg(year) %>%
join_slf_lookup_vars() %>%
join_sc_client(year) %>%
dplyr::mutate(year = year, .before = dplyr::everything()) %>%
dplyr::mutate(year = year) %>%
add_hri_variables(chi_variable = "chi")


if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
individual_file <- individual_file %>%
dplyr::mutate(
ch_cis_episodes = NA,
ch_beddays = NA,
ch_cost = NA,
hc_episodes = NA,
hc_personal_episodes = NA,
hc_non_personal_episodes = NA,
hc_reablement_episodes = NA,
hc_total_cost = NA,
hc_total_hours = NA,
hc_personal_hours = NA,
hc_non_personal_hours = NA,
hc_reablement_hours = NA,
at_alarms = NA,
at_telecare = NA,
sds_option_1 = NA,
sds_option_2 = NA,
sds_option_3 = NA,
sds_option_4 = NA,
sc_living_alone = NA,
sc_support_from_unpaid_carer = NA,
sc_social_worker = NA,
sc_meals = NA,
sc_day_care = NA
)
}

if (anon_chi_out) {
individual_file <- individual_file %>%
Expand Down Expand Up @@ -173,7 +209,7 @@ add_cij_columns <- function(episode_file) {
add_all_columns <- function(episode_file) {
cli::cli_alert_info("Add all columns function started at {Sys.time()}")

episode_file %>%
episode_file <- episode_file %>%
add_acute_columns("Acute", (.data$smrtype == "Acute-DC" | .data$smrtype == "Acute-IP") & .data$cij_pattype != "Maternity") %>%
add_mat_columns("Mat", .data$recid == "02B" | .data$cij_pattype == "Maternity") %>%
add_mh_columns("MH", .data$recid == "04B" & .data$cij_pattype != "Maternity") %>%
Expand All @@ -187,11 +223,17 @@ add_all_columns <- function(episode_file) {
add_dd_columns("DD", .data$recid == "DD") %>%
add_nsu_columns("NSU", .data$recid == "NSU") %>%
add_nrs_columns("NRS", .data$recid == "NRS") %>%
add_hl1_columns("HL1", .data$recid == "HL1") %>%
add_ch_columns("CH", .data$recid == "CH") %>%
add_hc_columns("HC", .data$recid == "HC") %>%
add_at_columns("AT", .data$recid == "AT") %>%
add_sds_columns("SDS", .data$recid == "SDS") %>%
add_hl1_columns("HL1", .data$recid == "HL1")

if (check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
episode_file <- episode_file %>%
add_ch_columns("CH", .data$recid == "CH") %>%
add_hc_columns("HC", .data$recid == "HC") %>%
add_at_columns("AT", .data$recid == "AT") %>%
add_sds_columns("SDS", .data$recid == "SDS")
}

episode_file <- episode_file %>%
dplyr::mutate(
health_net_cost = rowSums(
dplyr::pick(
Expand Down
32 changes: 32 additions & 0 deletions R/run_episode_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,38 @@ run_episode_file <- function(
join_deaths_data(year) %>%
load_ep_file_vars(year)

if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
episode_file <- episode_file %>%
dplyr::mutate(
sc_send_lca = NA,
sc_living_alone = NA,
sc_support_from_unpaid_carer = NA,
sc_social_worker = NA,
sc_type_of_housing = NA,
sc_meals = NA,
sc_day_care = NA,
sc_latest_submission = NA,
ch_chi_cis = NA,
sc_id_cis = NA,
ch_name = NA,
ch_adm_reason = NA,
ch_provider = NA,
ch_nursing = NA,
hc_hours_annual = NA,
hc_hours_q1 = NA,
hc_hours_q2 = NA,
hc_hours_q3 = NA,
hc_hours_q4 = NA,
hc_cost_q1 = NA,
hc_cost_q2 = NA,
hc_cost_q3 = NA,
hc_cost_q4 = NA,
hc_provider = NA,
hc_reablement = NA,
sds_option_4 = NA,
)
}

if (anon_chi_out) {
episode_file <- slfhelper::get_anon_chi(episode_file)
}
Expand Down
16 changes: 16 additions & 0 deletions man/aggregate_by_chi_no_sc.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.