Skip to content

Commit

Permalink
Merge branch 'sept-update-23' into add-sc-client-tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Jennit07 authored Sep 25, 2023
2 parents f103e7a + 15dde68 commit 633d430
Show file tree
Hide file tree
Showing 54 changed files with 726 additions and 76 deletions.
6 changes: 6 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(add_homelessness_date_flags)
export(add_homelessness_flag)
export(add_hri_variables)

Check failure on line 6 in NAMESPACE

View workflow job for this annotation

GitHub Actions / Check Spelling

`hri` is not a recognized word. (unrecognized-spelling)
export(add_nsu_cohort)
export(check_year_format)
export(clean_up_free_text)
Expand All @@ -13,6 +16,7 @@ export(convert_hscp_to_hscpnames)
export(convert_numeric_to_date)
export(convert_sending_location_to_lca)
export(convert_year_to_fyyear)
export(create_homelessness_lookup)
export(create_individual_file)
export(create_service_use_cohorts)
export(end_fy)
Expand All @@ -29,6 +33,7 @@ export(get_demographic_cohorts_path)
export(get_dev_dir)
export(get_dn_costs_path)
export(get_dn_raw_costs_path)
export(get_existing_data_for_tests)
export(get_file_path)
export(get_gp_ooh_costs_path)
export(get_gp_ooh_raw_costs_path)
Expand Down Expand Up @@ -133,6 +138,7 @@ export(process_tests_sc_ch_episodes)
export(process_tests_sc_client_lookup)
export(process_tests_sc_demographics)
export(process_tests_sds)
export(produce_episode_file_tests)
export(produce_source_extract_tests)
export(produce_test_comparison)
export(read_extract_acute)
Expand Down
6 changes: 4 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
# September 2023 Update - Unreleased
* Update of 2017/18 onwards to include bug fixes within the files.
* New 2023/24 files.
* New NSU cohort for 2022/23 file.
* New 2023/24 files.
*No social care data available for new 2023/24 file.
* New NSU cohort for 2022/23 file.
* SPD and SIMD updated.
* Re addition of:
* HRIs in individual file.
* Homelessness Flags.
Expand Down
6 changes: 3 additions & 3 deletions R/00-update_refs.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#'
#' @family initialisation
latest_update <- function() {
"Jun_2023"
"Sep_2023"
}

#' Previous update
Expand Down Expand Up @@ -61,7 +61,7 @@ previous_update <- function(months_ago = 3L, override = NULL) {
#'
#' @family initialisation
get_dd_period <- function() {
"Jul16_Mar23"
"Jul16_Jun23"
}

#' The latest financial year for Cost uplift setting
Expand All @@ -74,5 +74,5 @@ get_dd_period <- function() {
#'
#' @family initialisation
latest_cost_year <- function() {
"2223"
"2324"
}
142 changes: 142 additions & 0 deletions R/add_hri_variables.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
#' Flag non-Scottish residents
#'
#' @details The variable keep flag can be in the range c(0:4) where
#' \itemize{
#' \item{keep_flag = 0 when resident is Scottish}
#' \item{keep_flag = 1 when resident is not Scottish}
#' \item{keep_flag = 2 when the postcode is missing or a dummy, and the gpprac is missing}
#' \item{keep_flag = 3 when the gpprac is not English and the postcode is missing}
#' \item{keep_flag = 4 when the gpprac is not English and the postcode is a dummy}
#' }
#' The intention is to only keep the records where keep_flag = 0
#'
#' @inheritParams add_hri_variables
#'
#' @return A data frame with the variable 'keep_flag'
flag_non_scottish_residents <- function(
data,
slf_pc_lookup) {
check_variables_exist(data, c("postcode", "gpprac"))

# Make a lookup of postcode areas, which consist of the first characters
# of the postcode
pc_areas <- slf_pc_lookup %>%
dplyr::mutate(
pc_area = stringr::str_match(postcode, "^[A-Z]{1,3}"),
scot_flag = TRUE
) %>%
dplyr::distinct(pc_area, scot_flag)

# Create a flag, 'keep_flag', to determine whether individuals are Scottish
# residents or not
return_data <- data %>%
dplyr::mutate(pc_area = stringr::str_match(postcode, "^[A-Z]{1,3}")) %>%
dplyr::left_join(pc_areas, by = "pc_area") %>%
dplyr::mutate(
dummy_postcode = .data$postcode %in% c("BF010AA", "NF1 1AB", "NK010AA") |
stringr::str_sub(.data$postcode, 1, 4) %in% c("ZZ01", "ZZ61"),
eng_prac = .data$gpprac %in% c(99942, 99957, 99961, 99976, 99981, 99995, 99999),
scottish_resident = dplyr::case_when(
.data$scot_flag ~ 0L,
(is_missing(.data$postcode) | .data$dummy_postcode) & is.na(.data$gpprac) ~ 2L,
!.data$eng_prac & is_missing(.data$postcode) ~ 3L,
!.data$eng_prac & .data$dummy_postcode ~ 4L,
.default = 1L
)
) %>%
dplyr::select(-"dummy_postcode", -"eng_prac")

return(return_data)
}

#' Add HRI variables to an SLF Individual File
#'
#' @details Filters the dataset to only include Scottish residents, then
#' creates a lookup where HRIs are calculated at Scotland, Health Board, and
#' LCA level. Then joins on this lookup by chi/anon_chi.
#'
#' @param data An SLF individual file.
#' @param slf_pc_lookup The Source postcode lookup, defaults
#' to [get_slf_postcode_path()] read using [read_file()].
#'
#' @return The individual file with HRI variables matched on
#' @export
add_hri_variables <- function(
data,
chi_variable = "chi",
slf_pc_lookup = read_file(
get_slf_postcode_path(),
col_select = "postcode"
)) {
hri_lookup <- data %>%
dplyr::select(
"year",
chi_variable,
"postcode",
"gpprac",
"lca",
"hbrescode",
"health_net_cost",
"acute_episodes",
"mat_episodes",
"mh_episodes",
"gls_episodes",
"op_newcons_attendances",
# op_newcons_dnas,
"ae_attendances",
"pis_paid_items",
"ooh_cases"
) %>%
flag_non_scottish_residents(slf_pc_lookup = slf_pc_lookup) %>%
dplyr::filter(scottish_resident == 0L) %>%
# Scotland cost and proportion
dplyr::mutate(
scotland_cost = sum(health_net_cost),
scotland_pct = (health_net_cost / scotland_cost) * 100
) %>%
dplyr::arrange(dplyr::desc(health_net_cost)) %>%
dplyr::mutate(hri_scotp = cumsum(scotland_pct)) %>%
# Health Board
dplyr::group_by(hbrescode) %>%
dplyr::mutate(
hb_cost = sum(health_net_cost),
hb_pct = (health_net_cost / hb_cost) * 100
) %>%
dplyr::arrange(dplyr::desc(health_net_cost), .by_group = TRUE) %>%
dplyr::mutate(hri_hbp = cumsum(hb_pct)) %>%
dplyr::ungroup() %>%
# LCA
dplyr::group_by(lca) %>%
dplyr::mutate(
lca_cost = sum(health_net_cost),
lca_pct = (health_net_cost / lca_cost) * 100
) %>%
dplyr::arrange(dplyr::desc(health_net_cost), .by_group = TRUE) %>%
dplyr::mutate(hri_lcap = cumsum(lca_pct)) %>%
dplyr::ungroup() %>%
# Add HRI flags
dplyr::mutate(
hri_scot = hri_scotp <= 50.0,
hri_hb = hri_hbp <= 50.0,
hri_lca = hri_lcap <= 50.0,
# Deal with potential missing variables
hri_hb = dplyr::if_else(is_missing(hbrescode), FALSE, hri_hb),
hri_hbp = dplyr::if_else(is_missing(hbrescode), NA, hri_hbp),
hri_lca = dplyr::if_else(is_missing(lca), FALSE, hri_lca),
hri_lcap = dplyr::if_else(is_missing(lca), NA, hri_lcap)
) %>%
# Select only required variables for the lookup
dplyr::select(
chi_variable,
"hri_scot",
"hri_scotp",
"hri_hb",
"hri_hbp",
"hri_lca",
"hri_lcap"
)

return_data <- dplyr::left_join(data, hri_lookup, by = chi_variable)

return(return_data)
}
60 changes: 49 additions & 11 deletions R/aggregate_by_chi.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#' @importFrom data.table .SD
#'
#' @inheritParams create_individual_file
aggregate_by_chi <- function(episode_file) {
aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) {
cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")

# Convert to data.table
Expand All @@ -28,17 +28,33 @@ aggregate_by_chi <- function(episode_file) {
)
)

data.table::setnames(
episode_file,
c(
"ch_chi_cis", "cij_marker", "ooh_case_id"
# ,"hh_in_fy"
),
c(
"ch_cis_episodes", "cij_total", "ooh_cases"
# ,"hl1_in_fy"
if (exclude_sc_var) {
data.table::setnames(
episode_file,
c(
"cij_marker",
"ooh_case_id"
),
c(
"cij_total",
"ooh_cases"
)
)
)
} else {
data.table::setnames(
episode_file,
c(
"ch_chi_cis",
"cij_marker",
"ooh_case_id"
),
c(
"ch_cis_episodes",
"cij_total",
"ooh_cases"
)
)
}

# column specification, grouped by chi
# columns to select last
Expand All @@ -48,6 +64,9 @@ aggregate_by_chi <- function(episode_file) {
"gpprac",
vars_start_with(episode_file, "sc_")
)
if (exclude_sc_var) {
cols2 <- cols2[!(cols2 %in% vars_start_with(episode_file, "sc_"))]
}
# columns to count unique rows
cols3 <- c(
"ch_cis_episodes",
Expand All @@ -59,6 +78,9 @@ aggregate_by_chi <- function(episode_file) {
"ooh_cases",
"preventable_admissions"
)
if (exclude_sc_var) {
cols3 <- cols3[!(cols3 %in% "ch_cis_episodes")]
}
# columns to sum up
cols4 <- c(
vars_end_with(
Expand Down Expand Up @@ -91,6 +113,22 @@ aggregate_by_chi <- function(episode_file) {
"health_net_cost_inc_dnas"
)
cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")]
if (exclude_sc_var) {
cols4 <-
cols4[!(cols4 %in% c(
vars_end_with(
episode_file,
c(
"alarms",
"telecare"
)
),
vars_start_with(
episode_file,
"sds_option"
)
))]
}
# columns to select maximum
cols5 <- c("nsu", vars_contain(episode_file, "hl1_in_fy"))
data.table::setnafill(episode_file, fill = 0L, cols = cols5)
Expand Down
2 changes: 2 additions & 0 deletions R/check_year_valid.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ check_year_valid <- function(
return(FALSE)
} else if (year >= "2324" && type %in% c("SPARRA", "HHG")) {
return(FALSE)
} else if (year >= "2324" && type %in% c("CH", "HC", "SDS", "AT")) {
return(FALSE)
}

return(TRUE)
Expand Down
Loading

0 comments on commit 633d430

Please sign in to comment.