Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix aggregation_by_chi #829

Merged
merged 5 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 49 additions & 173 deletions R/aggregate_by_chi.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' @importFrom data.table .SD
#'
#' @inheritParams create_individual_file
aggregate_by_chi <- function(episode_file) {
aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) {
cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")

# Convert to data.table
Expand All @@ -28,17 +28,33 @@ aggregate_by_chi <- function(episode_file) {
)
)

data.table::setnames(
episode_file,
c(
"ch_chi_cis", "cij_marker", "ooh_case_id"
# ,"hh_in_fy"
),
c(
"ch_cis_episodes", "cij_total", "ooh_cases"
# ,"hl1_in_fy"
if (exclude_sc_var) {
data.table::setnames(
episode_file,
c(
"cij_marker",
"ooh_case_id"
),
c(
"cij_total",
"ooh_cases"
)
)
)
} else {
data.table::setnames(
episode_file,
c(
"ch_chi_cis",
"cij_marker",
"ooh_case_id"
),
c(
"ch_cis_episodes",
"cij_total",
"ooh_cases"
)
)
}

# column specification, grouped by chi
# columns to select last
Expand All @@ -48,6 +64,9 @@ aggregate_by_chi <- function(episode_file) {
"gpprac",
vars_start_with(episode_file, "sc_")
)
if (exclude_sc_var) {
cols2 <- cols2[!(cols2 %in% vars_start_with(episode_file, "sc_"))]
}
# columns to count unique rows
cols3 <- c(
"ch_cis_episodes",
Expand All @@ -59,6 +78,9 @@ aggregate_by_chi <- function(episode_file) {
"ooh_cases",
"preventable_admissions"
)
if (exclude_sc_var) {
cols3 <- cols3[!(cols3 %in% "ch_cis_episodes")]
}
# columns to sum up
cols4 <- c(
vars_end_with(
Expand Down Expand Up @@ -91,6 +113,22 @@ aggregate_by_chi <- function(episode_file) {
"health_net_cost_inc_dnas"
)
cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")]
if (exclude_sc_var) {
cols4 <-
cols4[!(cols4 %in% c(
vars_end_with(
episode_file,
c(
"alarms",
"telecare"
)
),
vars_start_with(
episode_file,
"sds_option"
)
))]
}
# columns to select maximum
cols5 <- c("nsu", vars_contain(episode_file, "hl1_in_fy"))
data.table::setnafill(episode_file, fill = 0L, cols = cols5)
Expand Down Expand Up @@ -222,165 +260,3 @@ aggregate_ch_episodes <- function(episode_file) {

return(episode_file)
}


#' Aggregate by CHI with no social care variables
#'
#' @description Aggregate episode file by CHI to convert into
#' individual file. Exclude social care variables for the creation of
#' the newest file with Q1 data.
#'
#' @importFrom data.table .N
#' @importFrom data.table .SD
#'
#' @inheritParams create_individual_file
aggregate_by_chi_no_sc <- function(episode_file) {
cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")

# Convert to data.table
data.table::setDT(episode_file)

# Ensure all variable names are lowercase
data.table::setnames(episode_file, stringr::str_to_lower)

# Sort the data
data.table::setkeyv(
episode_file,
c(
"chi",
"record_keydate1",
"keytime1",
"record_keydate2",
"keytime2"
)
)

data.table::setnames(
episode_file,
c(
# "ch_chi_cis",
"cij_marker",
"ooh_case_id",
"hh_in_fy"
),
c(
# "ch_cis_episodes",
"cij_total",
"ooh_cases",
"hl1_in_fy"
)
)

# column specification, grouped by chi
# columns to select last
cols2 <- c(
"postcode",
"dob",
"gpprac" # ,
# vars_start_with(episode_file, "sc_")
)
# columns to count unique rows
cols3 <- c(
# "ch_cis_episodes",
"cij_total",
"cij_el",
"cij_non_el",
"cij_mat",
"cij_delay",
"ooh_cases",
"preventable_admissions"
)
# columns to sum up
cols4 <- c(
vars_end_with(
episode_file,
c(
"episodes",
"beddays",
"cost",
"attendances",
"attend",
"contacts",
"hours",
# "alarms",
# "telecare",
"paid_items",
"advice",
"homev",
"time",
"assessment",
"other",
"dn",
"nhs24",
"pcc"
# )
),
# vars_start_with(
# episode_file,
# "sds_option"
),
"health_net_cost_inc_dnas"
)
# cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")]
# columns to select maximum
cols5 <- c("nsu", vars_contain(episode_file, "hl1_in_fy"))
data.table::setnafill(episode_file, fill = 0L, cols = cols5)
# compute
individual_file_cols1 <- episode_file[,
.(gender = mean(gender)),
by = "chi"
]
individual_file_cols2 <- episode_file[,
.SD[.N],
.SDcols = cols2,
by = "chi"
]
individual_file_cols3 <- episode_file[,
lapply(.SD, function(x) {
data.table::uniqueN(x, na.rm = TRUE)
}),
.SDcols = cols3,
by = "chi"
]
individual_file_cols4 <- episode_file[,
lapply(.SD, function(x) {
sum(x, na.rm = TRUE)
}),
.SDcols = cols4,
by = "chi"
]
individual_file_cols5 <- episode_file[,
lapply(.SD, function(x) max(x, na.rm = TRUE)),
.SDcols = cols5,
by = "chi"
]
individual_file_cols6 <- episode_file[,
.(
preventable_beddays = ifelse(
any(cij_ppa, na.rm = TRUE),
as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))),
NA_integer_
)
),
# cij_marker has been renamed as cij_total
by = c("chi", "cij_total")
]
individual_file_cols6 <- individual_file_cols6[,
.(
preventable_beddays = sum(preventable_beddays, na.rm = TRUE)
),
by = "chi"
]

individual_file <- dplyr::bind_cols(
individual_file_cols1,
individual_file_cols2[, chi := NULL],
individual_file_cols3[, chi := NULL],
individual_file_cols4[, chi := NULL],
individual_file_cols5[, chi := NULL],
individual_file_cols6[, chi := NULL]
)

# convert back to tibble
return(dplyr::as_tibble(individual_file))
}
4 changes: 2 additions & 2 deletions R/create_individual_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,12 @@ create_individual_file <- function(

if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) {
individual_file <- individual_file %>%
aggregate_by_chi_no_sc()
aggregate_by_chi(exclude_sc_var = TRUE)
} else {
individual_file <- individual_file %>%
aggregate_ch_episodes() %>%
clean_up_ch(year) %>%
aggregate_by_chi() %>%
aggregate_by_chi(exclude_sc_var = FALSE) %>%
join_sc_client(year)
}

Expand Down
2 changes: 1 addition & 1 deletion man/aggregate_by_chi.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

16 changes: 0 additions & 16 deletions man/aggregate_by_chi_no_sc.Rd

This file was deleted.