Skip to content

Commit

Permalink
Merge branch 'september-2024' into reduce_dependencies
Browse files Browse the repository at this point in the history
  • Loading branch information
Jennit07 authored Aug 20, 2024
2 parents 62219ab + 72a66f8 commit 3f1189d
Show file tree
Hide file tree
Showing 64 changed files with 917 additions and 265 deletions.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Generated by roxygen2: do not edit by hand

export("%>%")
export(add_deceased_flag)
export(add_homelessness_date_flags)
export(add_homelessness_flag)
export(add_hri_variables)
Expand Down Expand Up @@ -115,6 +116,7 @@ export(process_lookup_ltc)
export(process_lookup_postcode)
export(process_lookup_sc_client)
export(process_lookup_sc_demographics)
export(process_refined_death)
export(process_sc_all_alarms_telecare)
export(process_sc_all_care_home)
export(process_sc_all_home_care)
Expand Down Expand Up @@ -182,6 +184,7 @@ export(start_fy)
export(start_fy_quarter)
export(start_next_fy_quarter)
export(write_file)
export(years_to_run)
importFrom(data.table,.N)
importFrom(data.table,.SD)
importFrom(magrittr,"%>%")
Expand Down
12 changes: 11 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,14 @@
# June 2024 Update - Unreleased
# September 2024 Update - Unreleased
* New 24/25 files created
* New NSU cohort for 23/24 available
* New SPARRA scores calculated from April 24/25
* Death dates attached to activity after death flag
* Care home methodology updated
* New cup marker for Acute and GP OOH
* Bug fix:
* person id for SDS and client

# June 2024 Update - released 06-Jun-24
* Update of 2017/18 onwards to include bug fixes within the files.
* Removal of extra variable caused by the LTCs not matching properly.
* New NRS mid-2022 population estimates.
Expand Down
25 changes: 22 additions & 3 deletions R/00-update_refs.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#'
#' @family initialisation
latest_update <- function() {
"Jun_2024"
"Sep_2024"
}

#' Previous update
Expand Down Expand Up @@ -61,7 +61,7 @@ previous_update <- function(months_ago = 3L, override = NULL) {
#'
#' @family initialisation
get_dd_period <- function() {
"Jul16_Mar24"
"Jul16_Jun24"
}

#' The latest financial year for Cost uplift setting
Expand All @@ -74,5 +74,24 @@ get_dd_period <- function() {
#'
#' @family initialisation
latest_cost_year <- function() {
"2223"
"2324"
}

#' The year list for slf to update
#'
#' @description Get the vector of years to update slf
#'
#' @return The vector of financial years
#'
#' @export
#'
#' @family initialisation
years_to_run <- function() {
fy_start_2digit <- 17
fy_end_2digit <- 24
years_to_run <- paste0(
fy_start_2digit:fy_end_2digit,
(fy_start_2digit + 1):(fy_end_2digit + 1)
)
return(years_to_run)
}
5 changes: 2 additions & 3 deletions R/add_activity_after_death_flag.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ add_activity_after_death_flag <- function(
#'
#'
# Read data------------------------------------------------

process_combined_deaths_lookup <- function(update = latest_update(),
write_to_disk = TRUE, ...) {
dir_folder <- "/conf/hscdiip/SLF_Extracts/Deaths"
Expand Down Expand Up @@ -183,9 +184,7 @@ process_combined_deaths_lookup <- function(update = latest_update(),
if (write_to_disk) {
write_file(
all_boxi_deaths,
fs::path(get_slf_dir(), "Deaths",
file_name = stringr::str_glue("anon-combined_slf_deaths_lookup_{update}.parquet")
)
get_combined_slf_deaths_lookup_path()
)
}

Expand Down
40 changes: 40 additions & 0 deletions R/add_deceased_flag.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#' Create the SLF Deaths lookup
#'
#' @description Currently this just uses the NRS death dates 'as is', with no
#' corrections or modifications, it is expected that this will be expanded to
#' use the CHI deaths extract from IT as well as taking into account data in
#' the episode file to assess the validity of a death date.
#'
#' @param year The year to process, in FY format.
#' @param nrs_deaths_data NRS deaths data.
#' @param chi_deaths_data IT CHI deaths data.
#' @param write_to_disk (optional) Should the data be written to disk default is
#' `TRUE` i.e. write the data to disk.
#'
#' @return a [tibble][tibble::tibble-package] containing the episode file
#' @export
add_deceased_flag <- function(
year,
refined_death = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi(),
write_to_disk = TRUE) {
# create slf deaths lookup

dplyr::mutate(
death_date = dplyr::if_else(is.na(.data$record_keydate1),
.data$death_date_chi, .data$record_keydate1
),
deceased = TRUE,
.keep = "unused"
) %>%
# save anon chi on disk
slfhelper::get_anon_chi()

if (write_to_disk) {
write_file(
slf_deaths_lookup,
get_slf_deaths_lookup_path(year, check_mode = "write")
)
}

return(slf_deaths_lookup)
}
10 changes: 1 addition & 9 deletions R/fill_ch_names.R
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ fill_ch_names <- function(ch_data,
"match_mean2",
# "open_interval",
"ch_admission_date",
"qtr_start",
"period_start_date",
"ch_date_registered",
"latest_close_date",
"ch_active",
Expand Down Expand Up @@ -305,7 +305,6 @@ fill_ch_names <- function(ch_data,
"unique_identifier",
"matching_quality_indicator",
"sending_location",
"latest_sc_id",
"chi",
"ch_name",
"ch_postcode",
Expand All @@ -320,9 +319,6 @@ fill_ch_names <- function(ch_data,
"ch_admission_date",
"ch_discharge_date",
"age",
"record_date",
"qtr_start",
"latest_flag",
"gender",
"dob",
"postcode",
Expand Down Expand Up @@ -763,7 +759,6 @@ fill_ch_names <- function(ch_data,
## produce output ----
col_output <- c(
"sending_location",
"latest_sc_id",
"chi",
"ch_name",
"ch_postcode",
Expand All @@ -778,9 +773,6 @@ fill_ch_names <- function(ch_data,
"ch_admission_date",
"ch_discharge_date",
"age",
"record_date",
"qtr_start",
"latest_flag",
"gender",
"dob",
"postcode",
Expand Down
4 changes: 2 additions & 2 deletions R/get_boxi_extract_path.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,13 +46,13 @@ get_boxi_extract_path <- function(
"ae" ~ "anon-A&E-episode-level-extract",
"ae_cup" ~ "anon-A&E-UCD-CUP-extract",
"acute" ~ "anon-Acute-episode-level-extract",
"acute_cup" ~ "anon-Acute-cup-extract",
"acute_cup" ~ "anon-Acute-CUP-extract",
"cmh" ~ "anon-Community-MH-contact-level-extract",
"dn" ~ "anon-District-Nursing-contact-level-extract",
"gp_ooh-c" ~ "anon-GP-OoH-consultations-extract",
"gp_ooh-d" ~ "anon-GP-OoH-diagnosis-extract",
"gp_ooh-o" ~ "anon-GP-OoH-outcomes-extract",
"gp_ooh_cup" ~ "anon-GP-OoH-cup-extract",
"gp_ooh_cup" ~ "anon-GP-OoH-CUP-extract",
"homelessness" ~ "anon-Homelessness-extract",
"maternity" ~ "anon-Maternity-episode-level-extract",
"mh" ~ "anon-Mental-Health-episode-level-extract",
Expand Down
8 changes: 6 additions & 2 deletions R/get_slf_lookup_paths.R
Original file line number Diff line number Diff line change
Expand Up @@ -73,17 +73,21 @@ get_slf_deaths_lookup_path <- function(year, ...) {
#' SLF death dates File Path
#'
#' @description Get the full path to the BOXI NRS Deaths lookup file for all financial years
#' Note this name is very similar to the existing slf_deaths_lookup_path
#' which returns the path for the refined_death with deceased flag for each financial year.
#' This function will return the combined financial years lookup
#' i.e. all years put together.
#'
#' @param ... additional arguments passed to [get_file_path()]
#' @param update the update month (defaults to use [latest_update()])
#'
#' @export
#' @family slf lookup file path
#' @seealso [get_file_path()] for the generic function.

get_combined_slf_deaths_lookup_path <- function(update = latest_update(), ...) {
# Note this name is very similar to the existing slf_deaths_lookup_path which returns the path for
# the processed BOXI extract for each financial year. This function will return the combined financial
# the refined_death with deceased flag for each financial year.
# This function will return the combined financial
# years lookup i.e. all years put together.
combined_slf_deaths_lookup_path <- get_file_path(
directory = fs::path(get_slf_dir(), "Deaths"),
Expand Down
2 changes: 1 addition & 1 deletion R/process_extract_alarms_telecare.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,12 @@ process_extract_alarms_telecare <- function(
"smrtype",
"chi",
"dob",
"person_id",
"gender",
"postcode",
"sc_send_lca",
"record_keydate1",
"record_keydate2",
"person_id",
"sc_latest_submission"
) %>%
slfhelper::get_anon_chi()
Expand Down
2 changes: 1 addition & 1 deletion R/process_extract_care_home.R
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,11 @@ process_extract_care_home <- function(
is_date_in_fyyear(year, .data$record_keydate1, .data$record_keydate2)
) %>%
# remove any episodes where the latest submission was before the current year
# this is what stops cases being in future files
dplyr::filter(
substr(.data$sc_latest_submission, 1L, 4L) >= convert_fyyear_to_year(year)
)


# Data Cleaning ---------------------------------------
source_ch_clean <- ch_data %>%
# create variables
Expand Down
4 changes: 3 additions & 1 deletion R/process_extract_sds.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,13 @@ process_extract_sds <- function(
"smrtype",
"chi",
"dob",
"person_id",
"gender",
"postcode",
"sc_send_lca",
"record_keydate1",
"record_keydate2",
"sc_send_lca"
"sc_latest_submission"
) %>%
slfhelper::get_anon_chi()

Expand Down
7 changes: 3 additions & 4 deletions R/process_it_chi_deaths.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ process_it_chi_deaths <- function(data, write_to_disk = TRUE) {
dplyr::desc(.data$death_date_chi)
) %>%
dplyr::distinct(.data$chi, .keep_all = TRUE) %>%
# Use the NRS death_date unless it isn't there
dplyr::mutate(
death_date = dplyr::coalesce(.data$death_date_nrs, .data$death_date_chi)
) %>%
# remove death_date_nrs as this is the nrs weekly unvalidated data and we should not use this.
# the boxi nrs death date is more reliable as this is provided monthly and is validated.
dplyr::select(.data$chi, .data$death_date_chi) %>%
slfhelper::get_anon_chi()

if (write_to_disk) {
Expand Down
33 changes: 13 additions & 20 deletions R/process_lookup_deaths.R
Original file line number Diff line number Diff line change
@@ -1,36 +1,29 @@
#' Create the SLF Deaths lookup
#'
#' @description Currently this just uses the NRS death dates 'as is', with no
#' corrections or modifications, it is expected that this will be expanded to
#' use the CHI deaths extract from IT as well as taking into account data in
#' the episode file to assess the validity of a death date.
#' @description Use all-year refined death data to produce year-specific
#' slf_deaths_lookup with deceased flag added.
#'
#' @param year The year to process, in FY format.
#' @param nrs_deaths_data NRS deaths data.
#' @param chi_deaths_data IT CHI deaths data.
#' @param refined_death refined death date combining nrs and it_chi.
#' @param write_to_disk (optional) Should the data be written to disk default is
#' `TRUE` i.e. write the data to disk.
#'
#' @return a [tibble][tibble::tibble-package] containing the episode file
#' @return a [tibble][tibble::tibble-package] add deceased flag to deaths
#' @export
process_slf_deaths_lookup <- function(
year,
nrs_deaths_data = read_file(
get_source_extract_path(year, "deaths"),
col_select = c("chi", "record_keydate1")
),
chi_deaths_data = read_file(get_slf_chi_deaths_path()),
refined_death = read_file(get_combined_slf_deaths_lookup_path()),
write_to_disk = TRUE) {
slf_deaths_lookup <- nrs_deaths_data %>%
# Only modification over 'raw' NRS is to keep the earliest death date
dplyr::select("chi", "record_keydate1") %>%
dplyr::arrange(.data$record_keydate1) %>%
dplyr::distinct(.data$chi, .keep_all = TRUE) %>%
# create slf deaths lookup
slf_deaths_lookup <- refined_death %>%
slfhelper::get_chi() %>%
# Filter the chi death dates to the FY as the lookup is by FY
dplyr::filter(fy == year) %>%
# use the BOXI NRS death date by default, but if it's missing, use the chi death date.
dplyr::mutate(
death_date = .data$record_keydate1,
deceased = TRUE,
.keep = "unused"
deceased = TRUE
) %>%
# save anon chi on disk
slfhelper::get_anon_chi()

if (write_to_disk) {
Expand Down
Loading

0 comments on commit 3f1189d

Please sign in to comment.