From b829eab03c0b7d4a2b315644304fe6ed7fa2385f Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 26 Mar 2024 15:20:43 +0000 Subject: [PATCH 01/96] update documentation --- man/calculate_stay.Rd | 4 ++-- man/compute_mid_year_age.Rd | 4 ++-- man/convert_date_to_numeric.Rd | 4 ++-- man/convert_numeric_to_date.Rd | 4 ++-- man/end_fy.Rd | 2 +- man/end_fy_quarter.Rd | 2 +- man/end_next_fy_quarter.Rd | 4 ++-- man/fy_interval.Rd | 4 ++-- man/is_date_in_fyyear.Rd | 4 ++-- man/last_date_month.Rd | 4 ++-- man/midpoint_fy.Rd | 4 ++-- man/next_fy.Rd | 4 ++-- man/read_lookup_sc_demographics.Rd | 6 ++++-- man/start_fy.Rd | 2 +- man/start_fy_quarter.Rd | 2 +- man/start_next_fy_quarter.Rd | 6 +++--- 16 files changed, 31 insertions(+), 29 deletions(-) diff --git a/man/calculate_stay.Rd b/man/calculate_stay.Rd index 5e9266b10..43b7bd166 100644 --- a/man/calculate_stay.Rd +++ b/man/calculate_stay.Rd @@ -34,16 +34,16 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy}()}, \code{\link{end_fy_quarter}()}, +\code{\link{end_fy}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy}()}, \code{\link{start_fy_quarter}()}, +\code{\link{start_fy}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/compute_mid_year_age.Rd b/man/compute_mid_year_age.Rd index 5a50370e0..142fa4aab 100644 --- a/man/compute_mid_year_age.Rd +++ b/man/compute_mid_year_age.Rd @@ -31,16 +31,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy}()}, \code{\link{end_fy_quarter}()}, +\code{\link{end_fy}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy}()}, \code{\link{start_fy_quarter}()}, +\code{\link{start_fy}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/convert_date_to_numeric.Rd b/man/convert_date_to_numeric.Rd index b67eaa778..5511fec84 100644 --- a/man/convert_date_to_numeric.Rd +++ b/man/convert_date_to_numeric.Rd @@ -24,16 +24,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{compute_mid_year_age}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy}()}, \code{\link{end_fy_quarter}()}, +\code{\link{end_fy}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy}()}, \code{\link{start_fy_quarter}()}, +\code{\link{start_fy}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/convert_numeric_to_date.Rd b/man/convert_numeric_to_date.Rd index a09b7b9b9..f786e0319 100644 --- a/man/convert_numeric_to_date.Rd +++ b/man/convert_numeric_to_date.Rd @@ -24,16 +24,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, -\code{\link{end_fy}()}, \code{\link{end_fy_quarter}()}, +\code{\link{end_fy}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy}()}, \code{\link{start_fy_quarter}()}, +\code{\link{start_fy}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_fy.Rd b/man/end_fy.Rd index 6220f5f32..2925ffe60 100644 --- a/man/end_fy.Rd +++ b/man/end_fy.Rd @@ -34,8 +34,8 @@ Other date functions: \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy}()}, \code{\link{start_fy_quarter}()}, +\code{\link{start_fy}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_fy_quarter.Rd b/man/end_fy_quarter.Rd index 26c439a04..0efe9624a 100644 --- a/man/end_fy_quarter.Rd +++ b/man/end_fy_quarter.Rd @@ -33,8 +33,8 @@ Other date functions: \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy}()}, \code{\link{start_fy_quarter}()}, +\code{\link{start_fy}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_next_fy_quarter.Rd b/man/end_next_fy_quarter.Rd index 702446e82..f9cc1720a 100644 --- a/man/end_next_fy_quarter.Rd +++ b/man/end_next_fy_quarter.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy}()}, \code{\link{end_fy_quarter}()}, +\code{\link{end_fy}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy}()}, \code{\link{start_fy_quarter}()}, +\code{\link{start_fy}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/fy_interval.Rd b/man/fy_interval.Rd index 00b9ea52c..12d1d36bb 100644 --- a/man/fy_interval.Rd +++ b/man/fy_interval.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy}()}, \code{\link{end_fy_quarter}()}, +\code{\link{end_fy}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy}()}, \code{\link{start_fy_quarter}()}, +\code{\link{start_fy}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/is_date_in_fyyear.Rd b/man/is_date_in_fyyear.Rd index e74bd5734..97a0f3639 100644 --- a/man/is_date_in_fyyear.Rd +++ b/man/is_date_in_fyyear.Rd @@ -41,15 +41,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy}()}, \code{\link{end_fy_quarter}()}, +\code{\link{end_fy}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy}()}, \code{\link{start_fy_quarter}()}, +\code{\link{start_fy}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/last_date_month.Rd b/man/last_date_month.Rd index 3d3b9544e..f52305356 100644 --- a/man/last_date_month.Rd +++ b/man/last_date_month.Rd @@ -25,15 +25,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy}()}, \code{\link{end_fy_quarter}()}, +\code{\link{end_fy}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy}()}, \code{\link{start_fy_quarter}()}, +\code{\link{start_fy}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/midpoint_fy.Rd b/man/midpoint_fy.Rd index 2363df773..7bac9b6b3 100644 --- a/man/midpoint_fy.Rd +++ b/man/midpoint_fy.Rd @@ -27,15 +27,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy}()}, \code{\link{end_fy_quarter}()}, +\code{\link{end_fy}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{next_fy}()}, -\code{\link{start_fy}()}, \code{\link{start_fy_quarter}()}, +\code{\link{start_fy}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/next_fy.Rd b/man/next_fy.Rd index 7524c5f11..19e1193f4 100644 --- a/man/next_fy.Rd +++ b/man/next_fy.Rd @@ -27,15 +27,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy}()}, \code{\link{end_fy_quarter}()}, +\code{\link{end_fy}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, -\code{\link{start_fy}()}, \code{\link{start_fy_quarter}()}, +\code{\link{start_fy}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/read_lookup_sc_demographics.Rd b/man/read_lookup_sc_demographics.Rd index 3bda889fe..6c7dd049e 100644 --- a/man/read_lookup_sc_demographics.Rd +++ b/man/read_lookup_sc_demographics.Rd @@ -4,10 +4,12 @@ \alias{read_lookup_sc_demographics} \title{Read SC demographics} \usage{ -read_lookup_sc_demographics(sc_connection = phs_db_connection(dsn = "DVPROD")) +read_lookup_sc_demographics( + sc_dvprod_connection = phs_db_connection(dsn = "DVPROD") +) } \arguments{ -\item{sc_connection}{Connection to the sc platform} +\item{sc_dvprod_connection}{Connection to the sc platform} } \value{ a \link[tibble:tibble-package]{tibble} diff --git a/man/start_fy.Rd b/man/start_fy.Rd index 9951af2ec..4996bfb72 100644 --- a/man/start_fy.Rd +++ b/man/start_fy.Rd @@ -27,8 +27,8 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy}()}, \code{\link{end_fy_quarter}()}, +\code{\link{end_fy}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, diff --git a/man/start_fy_quarter.Rd b/man/start_fy_quarter.Rd index 9936736a8..f5729dcb0 100644 --- a/man/start_fy_quarter.Rd +++ b/man/start_fy_quarter.Rd @@ -26,8 +26,8 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy}()}, \code{\link{end_fy_quarter}()}, +\code{\link{end_fy}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, diff --git a/man/start_next_fy_quarter.Rd b/man/start_next_fy_quarter.Rd index fdac297a7..098f0bf73 100644 --- a/man/start_next_fy_quarter.Rd +++ b/man/start_next_fy_quarter.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy}()}, \code{\link{end_fy_quarter}()}, +\code{\link{end_fy}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy}()}, -\code{\link{start_fy_quarter}()} +\code{\link{start_fy_quarter}()}, +\code{\link{start_fy}()} } \concept{date functions} From 99f9c2daf9df621f84ca405338b0fa320fa1a482 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 26 Mar 2024 15:21:17 +0000 Subject: [PATCH 02/96] Update sc connection name --- R/read_lookup_sc_demographics.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/read_lookup_sc_demographics.R b/R/read_lookup_sc_demographics.R index 020542baa..cb3cea3c2 100644 --- a/R/read_lookup_sc_demographics.R +++ b/R/read_lookup_sc_demographics.R @@ -1,13 +1,13 @@ #' Read SC demographics #' -#' @param sc_connection Connection to the sc platform +#' @param sc_dvprod_connection Connection to the sc platform #' #' @return a [tibble][tibble::tibble-package] #' @export #' -read_lookup_sc_demographics <- function(sc_connection = phs_db_connection(dsn = "DVPROD")) { +read_lookup_sc_demographics <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPROD")) { sc_demog <- dplyr::tbl( - sc_connection, + sc_dvprod_connection, dbplyr::in_schema("social_care_2", "demographic_snapshot") ) %>% dplyr::select( From 298e61351441841a0edb0bdf220da34a2887e4b6 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Tue, 26 Mar 2024 15:23:34 +0000 Subject: [PATCH 03/96] Update documentation --- man/calculate_stay.Rd | 4 ++-- man/compute_mid_year_age.Rd | 4 ++-- man/convert_date_to_numeric.Rd | 4 ++-- man/convert_numeric_to_date.Rd | 4 ++-- man/end_fy.Rd | 2 +- man/end_fy_quarter.Rd | 2 +- man/end_next_fy_quarter.Rd | 4 ++-- man/fy_interval.Rd | 4 ++-- man/is_date_in_fyyear.Rd | 4 ++-- man/last_date_month.Rd | 4 ++-- man/midpoint_fy.Rd | 4 ++-- man/next_fy.Rd | 4 ++-- man/start_fy.Rd | 2 +- man/start_fy_quarter.Rd | 2 +- man/start_next_fy_quarter.Rd | 6 +++--- 15 files changed, 27 insertions(+), 27 deletions(-) diff --git a/man/calculate_stay.Rd b/man/calculate_stay.Rd index 43b7bd166..5e9266b10 100644 --- a/man/calculate_stay.Rd +++ b/man/calculate_stay.Rd @@ -34,16 +34,16 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/compute_mid_year_age.Rd b/man/compute_mid_year_age.Rd index 142fa4aab..5a50370e0 100644 --- a/man/compute_mid_year_age.Rd +++ b/man/compute_mid_year_age.Rd @@ -31,16 +31,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/convert_date_to_numeric.Rd b/man/convert_date_to_numeric.Rd index 5511fec84..b67eaa778 100644 --- a/man/convert_date_to_numeric.Rd +++ b/man/convert_date_to_numeric.Rd @@ -24,16 +24,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{compute_mid_year_age}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/convert_numeric_to_date.Rd b/man/convert_numeric_to_date.Rd index f786e0319..a09b7b9b9 100644 --- a/man/convert_numeric_to_date.Rd +++ b/man/convert_numeric_to_date.Rd @@ -24,16 +24,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_fy.Rd b/man/end_fy.Rd index 2925ffe60..6220f5f32 100644 --- a/man/end_fy.Rd +++ b/man/end_fy.Rd @@ -34,8 +34,8 @@ Other date functions: \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_fy_quarter.Rd b/man/end_fy_quarter.Rd index 0efe9624a..26c439a04 100644 --- a/man/end_fy_quarter.Rd +++ b/man/end_fy_quarter.Rd @@ -33,8 +33,8 @@ Other date functions: \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_next_fy_quarter.Rd b/man/end_next_fy_quarter.Rd index f9cc1720a..702446e82 100644 --- a/man/end_next_fy_quarter.Rd +++ b/man/end_next_fy_quarter.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/fy_interval.Rd b/man/fy_interval.Rd index 12d1d36bb..00b9ea52c 100644 --- a/man/fy_interval.Rd +++ b/man/fy_interval.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/is_date_in_fyyear.Rd b/man/is_date_in_fyyear.Rd index 97a0f3639..e74bd5734 100644 --- a/man/is_date_in_fyyear.Rd +++ b/man/is_date_in_fyyear.Rd @@ -41,15 +41,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/last_date_month.Rd b/man/last_date_month.Rd index f52305356..3d3b9544e 100644 --- a/man/last_date_month.Rd +++ b/man/last_date_month.Rd @@ -25,15 +25,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/midpoint_fy.Rd b/man/midpoint_fy.Rd index 7bac9b6b3..2363df773 100644 --- a/man/midpoint_fy.Rd +++ b/man/midpoint_fy.Rd @@ -27,15 +27,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/next_fy.Rd b/man/next_fy.Rd index 19e1193f4..7524c5f11 100644 --- a/man/next_fy.Rd +++ b/man/next_fy.Rd @@ -27,15 +27,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/start_fy.Rd b/man/start_fy.Rd index 4996bfb72..9951af2ec 100644 --- a/man/start_fy.Rd +++ b/man/start_fy.Rd @@ -27,8 +27,8 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, diff --git a/man/start_fy_quarter.Rd b/man/start_fy_quarter.Rd index f5729dcb0..9936736a8 100644 --- a/man/start_fy_quarter.Rd +++ b/man/start_fy_quarter.Rd @@ -26,8 +26,8 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, diff --git a/man/start_next_fy_quarter.Rd b/man/start_next_fy_quarter.Rd index 098f0bf73..fdac297a7 100644 --- a/man/start_next_fy_quarter.Rd +++ b/man/start_next_fy_quarter.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, -\code{\link{start_fy}()} +\code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()} } \concept{date functions} From fe189a94d7cccaa156fb6a96d5e3f7f7ad22cb90 Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Mon, 8 Apr 2024 15:53:03 +0100 Subject: [PATCH 04/96] 936 - Update parameters with file paths (#939) Specify file paths in sc function parameters --- R/process_sc_all_alarms_telecare.R | 2 +- R/process_sc_all_care_home.R | 8 ++++---- R/process_sc_all_home_care.R | 2 +- R/process_sc_all_sds.R | 2 +- man/process_sc_all_alarms_telecare.Rd | 6 +++++- man/process_sc_all_care_home.Rd | 8 ++++---- man/process_sc_all_home_care.Rd | 6 +++++- man/process_sc_all_sds.Rd | 6 +++++- 8 files changed, 26 insertions(+), 14 deletions(-) diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R index 77877d584..55bc42d4e 100644 --- a/R/process_sc_all_alarms_telecare.R +++ b/R/process_sc_all_alarms_telecare.R @@ -13,7 +13,7 @@ #' process_sc_all_alarms_telecare <- function( data, - sc_demog_lookup, + sc_demog_lookup = read_file(get_sc_demog_lookup_path()), write_to_disk = TRUE) { # Data Cleaning----------------------------------------------------- diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index d287f2042..f0b6c3db4 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -22,10 +22,10 @@ #' process_sc_all_care_home <- function( data, - sc_demog_lookup, - it_chi_deaths_data, - ch_name_lookup_path = get_slf_ch_name_lookup_path(), - spd_path = get_spd_path(), + sc_demog_lookup = read_file(get_sc_demog_lookup_path()), + it_chi_deaths_data = read_file(get_slf_chi_deaths_path()), + ch_name_lookup_path = read_file(get_slf_ch_name_lookup_path()), + spd_path = read_file(get_spd_path()), write_to_disk = TRUE) { ## Data Cleaning----------------------------------------------------- ch_clean <- data %>% diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R index 3ada9a2da..331a682e1 100644 --- a/R/process_sc_all_home_care.R +++ b/R/process_sc_all_home_care.R @@ -13,7 +13,7 @@ #' process_sc_all_home_care <- function( data, - sc_demog_lookup, + sc_demog_lookup = read_file(get_sc_demog_lookup_path()), write_to_disk = TRUE) { replaced_dates <- data %>% dplyr::filter(.data$hc_start_date_after_period_end_date != 1) %>% diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R index a1a1db24a..3ebc064c0 100644 --- a/R/process_sc_all_sds.R +++ b/R/process_sc_all_sds.R @@ -12,7 +12,7 @@ #' process_sc_all_sds <- function( data, - sc_demog_lookup, + sc_demog_lookup = read_file(get_sc_demog_lookup_path()), write_to_disk = TRUE) { # Match on demographics data (chi, gender, dob and postcode) matched_sds_data <- data %>% diff --git a/man/process_sc_all_alarms_telecare.Rd b/man/process_sc_all_alarms_telecare.Rd index 1dded751d..031fd5028 100644 --- a/man/process_sc_all_alarms_telecare.Rd +++ b/man/process_sc_all_alarms_telecare.Rd @@ -4,7 +4,11 @@ \alias{process_sc_all_alarms_telecare} \title{Process the all Alarms Telecare extract} \usage{ -process_sc_all_alarms_telecare(data, sc_demog_lookup, write_to_disk = TRUE) +process_sc_all_alarms_telecare( + data, + sc_demog_lookup = read_file(get_sc_demog_lookup_path()), + write_to_disk = TRUE +) } \arguments{ \item{data}{The extract to process} diff --git a/man/process_sc_all_care_home.Rd b/man/process_sc_all_care_home.Rd index 37d6332ca..a137119b7 100644 --- a/man/process_sc_all_care_home.Rd +++ b/man/process_sc_all_care_home.Rd @@ -6,10 +6,10 @@ \usage{ process_sc_all_care_home( data, - sc_demog_lookup, - it_chi_deaths_data, - ch_name_lookup_path = get_slf_ch_name_lookup_path(), - spd_path = get_spd_path(), + sc_demog_lookup = read_file(get_sc_demog_lookup_path()), + it_chi_deaths_data = read_file(get_slf_chi_deaths_path()), + ch_name_lookup_path = read_file(get_slf_ch_name_lookup_path()), + spd_path = read_file(get_spd_path()), write_to_disk = TRUE ) } diff --git a/man/process_sc_all_home_care.Rd b/man/process_sc_all_home_care.Rd index 1e0afcafd..ff18aac6a 100644 --- a/man/process_sc_all_home_care.Rd +++ b/man/process_sc_all_home_care.Rd @@ -4,7 +4,11 @@ \alias{process_sc_all_home_care} \title{Process the all home care extract} \usage{ -process_sc_all_home_care(data, sc_demog_lookup, write_to_disk = TRUE) +process_sc_all_home_care( + data, + sc_demog_lookup = read_file(get_sc_demog_lookup_path()), + write_to_disk = TRUE +) } \arguments{ \item{data}{The extract to process} diff --git a/man/process_sc_all_sds.Rd b/man/process_sc_all_sds.Rd index 69d79fc9d..3454ef35b 100644 --- a/man/process_sc_all_sds.Rd +++ b/man/process_sc_all_sds.Rd @@ -4,7 +4,11 @@ \alias{process_sc_all_sds} \title{Process the all SDS extract} \usage{ -process_sc_all_sds(data, sc_demog_lookup, write_to_disk = TRUE) +process_sc_all_sds( + data, + sc_demog_lookup = read_file(get_sc_demog_lookup_path()), + write_to_disk = TRUE +) } \arguments{ \item{data}{The extract to process} From f03c5c60841f65f70ce69b8873ecc4597ef03dfa Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Fri, 29 Mar 2024 10:42:47 +0000 Subject: [PATCH 05/96] remove and merge overlapping records in GP OoHs --- R/process_extract_consultations.R | 55 +++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/R/process_extract_consultations.R b/R/process_extract_consultations.R index 6dc175cb8..cab126bc5 100644 --- a/R/process_extract_consultations.R +++ b/R/process_extract_consultations.R @@ -81,25 +81,44 @@ process_extract_ooh_consultations <- function(data, year) { # Clean up some overlapping episodes # Only merge if they look like duplicates other than the time, # In which case take the earliest start and latest end. - consultations_clean <- consultations_covid + consultations_clean <- consultations_covid %>% + # Sort in reverse order so we can use coalesce which takes the first non-missing value + dplyr::arrange(chi, + ooh_case_id, + record_keydate1, + record_keydate2) %>% + data.table::as.data.table() - # TODO Remove / merge overlapping records in GP OoHs - # dtplyr::lazy_dt() %>% - # # Sort in reverse order so we can use coalesce which takes the first non-missing value - # dplyr::arrange(chi, ooh_case_id, dplyr::desc(record_keydate1), dplyr::desc(record_keydate2)) %>% - # # This seems to be enough to identify a unique episode - # dplyr::group_by(chi, ooh_case_id, consultation_type, location) %>% - # # Records will be merged if they don't look unique and there is overlap or no time between them - # dplyr::mutate(episode_counter = replace_na(record_keydate1 > lag(record_keydate2), TRUE) %>% - # cumsum()) %>% - # dplyr::group_by(chi, ooh_case_id, consultation_type, location, episode_counter) %>% - # dplyr::summarise( - # record_keydate1 = min(record_keydate1), - # record_keydate2 = max(record_keydate2), - # dplyr::across(c(dplyr::everything(), -"record_keydate1", -"record_keydate2"), dplyr::coalesce) - # ) %>% - # dplyr::ungroup() %>% - # dplyr::as_tibble() + consultations_clean[, distinct_check := ( + record_keydate1 > data.table::shift(record_keydate2, fill = NA, type = "lag") + ), + by = .(chi, ooh_case_id, consultation_type, location)] + consultations_clean[, distinct_check := tidyr::replace_na(distinct_check, TRUE)] + consultations_clean[, episode_counter := cumsum(distinct_check), + by = .(chi, ooh_case_id, consultation_type, location)] + consultations_clean[, + c("record_keydate1", + "record_keydate2") := list(min(record_keydate1), + max(record_keydate2)), + by = .(chi, + ooh_case_id, + consultation_type, + location, + episode_counter)] + + # replace NA with previous non-NA value in each column + col_sel = names(consultations_clean) + col_sel = col_sel[!(col_sel %in% c("record_keydate1", "record_keydate2"))] + consultations_clean[, + (col_sel) := lapply(.SD, zoo::na.locf, na.rm = FALSE), + .SDcols = col_sel] + + consultations_clean[, + c("distinct_check", + "episode_counter") := list(NULL, NULL)] + consultations_clean = unique(consultations_clean) %>% + dplyr::as_tibble() + # cleaning up overlapping episodes done return(consultations_clean) } From fda0c515e0b411868b9fed003258915c450a5793 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Fri, 29 Mar 2024 10:44:40 +0000 Subject: [PATCH 06/96] Style code --- R/process_extract_consultations.R | 60 +++++++++++++++++++------------ 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/R/process_extract_consultations.R b/R/process_extract_consultations.R index cab126bc5..2aafdbdca 100644 --- a/R/process_extract_consultations.R +++ b/R/process_extract_consultations.R @@ -83,40 +83,56 @@ process_extract_ooh_consultations <- function(data, year) { # In which case take the earliest start and latest end. consultations_clean <- consultations_covid %>% # Sort in reverse order so we can use coalesce which takes the first non-missing value - dplyr::arrange(chi, - ooh_case_id, - record_keydate1, - record_keydate2) %>% + dplyr::arrange( + chi, + ooh_case_id, + record_keydate1, + record_keydate2 + ) %>% data.table::as.data.table() consultations_clean[, distinct_check := ( record_keydate1 > data.table::shift(record_keydate2, fill = NA, type = "lag") ), - by = .(chi, ooh_case_id, consultation_type, location)] + by = .(chi, ooh_case_id, consultation_type, location) + ] consultations_clean[, distinct_check := tidyr::replace_na(distinct_check, TRUE)] consultations_clean[, episode_counter := cumsum(distinct_check), - by = .(chi, ooh_case_id, consultation_type, location)] + by = .(chi, ooh_case_id, consultation_type, location) + ] consultations_clean[, - c("record_keydate1", - "record_keydate2") := list(min(record_keydate1), - max(record_keydate2)), - by = .(chi, - ooh_case_id, - consultation_type, - location, - episode_counter)] + c( + "record_keydate1", + "record_keydate2" + ) := list( + min(record_keydate1), + max(record_keydate2) + ), + by = .( + chi, + ooh_case_id, + consultation_type, + location, + episode_counter + ) + ] # replace NA with previous non-NA value in each column - col_sel = names(consultations_clean) - col_sel = col_sel[!(col_sel %in% c("record_keydate1", "record_keydate2"))] + col_sel <- names(consultations_clean) + col_sel <- col_sel[!(col_sel %in% c("record_keydate1", "record_keydate2"))] consultations_clean[, - (col_sel) := lapply(.SD, zoo::na.locf, na.rm = FALSE), - .SDcols = col_sel] + (col_sel) := lapply(.SD, zoo::na.locf, na.rm = FALSE), + .SDcols = col_sel + ] - consultations_clean[, - c("distinct_check", - "episode_counter") := list(NULL, NULL)] - consultations_clean = unique(consultations_clean) %>% + consultations_clean[ + , + c( + "distinct_check", + "episode_counter" + ) := list(NULL, NULL) + ] + consultations_clean <- unique(consultations_clean) %>% dplyr::as_tibble() # cleaning up overlapping episodes done From 713e7a76ec6e40f395b90475c2a89f9c24f80c1f Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Mon, 15 Apr 2024 10:12:50 +0100 Subject: [PATCH 07/96] update spelling to lowercases --- .github/actions/spelling/expect.txt | 73 +++++++++++++++-------------- 1 file changed, 39 insertions(+), 34 deletions(-) diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index d4124911f..af62783db 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -1,7 +1,8 @@ -Accom +acc +accom admloc admtype -ADPE +adpe adtf arrivalmode arth @@ -13,23 +14,23 @@ birthtime bodyloc boxi callr -Canx +canx carehome careinspectorate categorises cattend -CCYY +ccyy cdn cennum chp chpstart cij -Classificat +classificat cls cmh -CNWs +cnws codecov -Comhairle +comhairle commhosp congen costincdnas @@ -53,7 +54,7 @@ deathdiag demog devhints dfc -Diagramme +diagramme disch dischloc dischto @@ -71,7 +72,8 @@ fcase feb fifelse fileext -Finalise +finalise +fnc fst ftm fyear @@ -81,7 +83,7 @@ ggplot github gls gms -GPOo +gpoo gpprac gss hbnames @@ -91,7 +93,7 @@ hbrescode hbtreatcode hbtreatname hci -HCP +hcp hhg hjust hms @@ -102,12 +104,12 @@ hscdiip hscp hscpnames htmlwidgets -IDPC +idpc infyyear intzone ipdc issuenumber -itle +istle iwalk jaccard jan @@ -120,7 +122,7 @@ keytimex kis lazydt lcap -LCHO +lcho lgl lintr los @@ -128,16 +130,17 @@ ltc lubridate magrittr markdownguide -Matern -Mcbride +matern +mcbride mcmahon -MMMYY -MONTHFLAG +miu +mmmyy +monthflag mpat multiday multisession multistaff -NAs +na newcons nhs nhshosp @@ -148,31 +151,33 @@ oldtadm opendata openxl openxlsx -ORCID +orcid outfile pandoc patflow pattype -PCEC +pcec phs phsmethods phsopendata pkgdown placeinc plics -PMS +pms popluation postcodes -PPAs +ppas +prac +praccode prac praccode ptypes purrr quickstart rankdir -Rbuildignore +rbuildignore rcmdcheck -RDD +rdd rds reabl reablement @@ -187,28 +192,28 @@ renviron rlang rmarkdown roxygen -Rprofile -Rscript +rprofile +rscript rspm rstudio rstudioapi -Rtype +rtype scoial scotp -SDcols +sdcols seealso selfharm setkeyv setnafill setnames setorder -Siar +siar sigfac simd slf slfhelper smr -SMRA +smra smrtype sparra spd @@ -228,10 +233,10 @@ thom tibble tidyr tidyselect -TODOs +todo uid ungroup -Unicode +unicode updown upi vline @@ -240,7 +245,7 @@ xintercept xlsx yearstay yml -YYYYQX +yyyyqx zihao zsav zstd From a0ac03060842d8cebc602b77a24797c954e967d1 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Mon, 15 Apr 2024 10:28:19 +0100 Subject: [PATCH 08/96] update spelling --- .github/actions/spelling/expect.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index af62783db..d27b6e755 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -12,7 +12,7 @@ aut bedday birthtime bodyloc -boxi +BOXI callr canx carehome @@ -109,7 +109,7 @@ infyyear intzone ipdc issuenumber -istle +itle iwalk jaccard jan @@ -133,7 +133,7 @@ markdownguide matern mcbride mcmahon -miu +MIU mmmyy monthflag mpat @@ -200,7 +200,7 @@ rstudioapi rtype scoial scotp -sdcols +SDcols seealso selfharm setkeyv From 2cff80d9e8bf2602f8579ac31edd24d734a6cb28 Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Wed, 17 Apr 2024 12:55:41 +0100 Subject: [PATCH 09/96] Create tests for social care sandpit extracts (#943) * Update `write_tests_xlsx` * Update documentation * Add in sandpit tests where the extract is saved * Setup tests for sandpit Further checks needed for writing to disk * Update documentation * Amend case_when statement * rename function to include 'sc' * Update documentation * Use `is.null` instead of `missing` * Update documentation * Add `year` as a parameter * Update documentation * Setup for writing sandpit tests to disk * Update parameters for sandpit tests * Update documentation * Use `process_tests_sc_sandpit` * Apply styling * Style code * update documentation Co-authored-by: Zihao Li * Rename variable sc_id Co-authored-by: Zihao Li * Rename variable Co-authored-by: Zihao Li * Rename variable Co-authored-by: Zihao Li * Update documentation * [check-spelling] Update metadata Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/8689503990/attempts/1 Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/943#issuecomment-2056794120 Signed-off-by: check-spelling-bot on-behalf-of: @check-spelling * update spelling * update spelling expect variant --------- Signed-off-by: check-spelling-bot Co-authored-by: Jennit07 Co-authored-by: Zihao Li Co-authored-by: Zihao Li --- .github/actions/spelling/expect.txt | 8 +- NAMESPACE | 2 + R/process_tests_sc_sandpit.R | 144 ++++++++++++++++++++++++++++ R/read_lookup_sc_client.R | 3 + R/read_lookup_sc_demographics.R | 3 + R/read_sc_all_alarms_telecare.R | 3 + R/read_sc_all_care_home.R | 3 + R/read_sc_all_home_care.R | 3 + R/read_sc_all_sds.R | 3 + R/write_tests_xlsx.R | 25 +++-- man/process_tests_sc_sandpit.Rd | 20 ++++ man/produce_sc_sandpit_tests.Rd | 24 +++++ man/write_tests_xlsx.Rd | 2 +- 13 files changed, 228 insertions(+), 15 deletions(-) create mode 100644 R/process_tests_sc_sandpit.R create mode 100644 man/process_tests_sc_sandpit.Rd create mode 100644 man/produce_sc_sandpit_tests.Rd diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index d27b6e755..a4a34a58b 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -12,7 +12,7 @@ aut bedday birthtime bodyloc -BOXI +boxi callr canx carehome @@ -169,8 +169,6 @@ postcodes ppas prac praccode -prac -praccode ptypes purrr quickstart @@ -207,7 +205,7 @@ setkeyv setnafill setnames setorder -siar +Siar sigfac simd slf @@ -246,6 +244,6 @@ xlsx yearstay yml yyyyqx -zihao +Zihao zsav zstd diff --git a/NAMESPACE b/NAMESPACE index 91f6b66d9..4606cf3f2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -145,8 +145,10 @@ export(process_tests_sc_all_hc_episodes) export(process_tests_sc_all_sds_episodes) export(process_tests_sc_client_lookup) export(process_tests_sc_demographics) +export(process_tests_sc_sandpit) export(process_tests_sds) export(produce_episode_file_tests) +export(produce_sc_sandpit_tests) export(produce_source_extract_tests) export(produce_test_comparison) export(read_extract_acute) diff --git a/R/process_tests_sc_sandpit.R b/R/process_tests_sc_sandpit.R new file mode 100644 index 000000000..089f61aa1 --- /dev/null +++ b/R/process_tests_sc_sandpit.R @@ -0,0 +1,144 @@ +#' Process tests for the social care sandpit extracts +#' +#' @param type Name of sandpit extract. +#' +#' @return a [tibble][tibble::tibble-package] containing a test comparison. +#' @export +#' +process_tests_sc_sandpit <- function(type = c("at", "hc", "ch", "sds", "demographics", "client"), year = NULL) { + comparison <- produce_test_comparison( + old_data = produce_sc_sandpit_tests( + read_file(get_sandpit_extract_path(type = {{ type }}, year = year, update = previous_update())), + type = {{ type }} + ), + new_data = produce_sc_sandpit_tests( + read_file(get_sandpit_extract_path(type = {{ type }}, year = year, update = latest_update())), + type = {{ type }} + ) + ) + + comparison %>% + write_tests_xlsx(sheet_name = {{ type }}, year = year, workbook_name = "sandpit") + + return(comparison) +} + + +#' Produce tests for social care sandpit extracts. +#' +#' @param data new or old data for testing summary flags +#' (data is from [get_sandpit_extract_path()]) +#' @param type Name of sandpit extract. +#' +#' @return a dataframe with a count of each flag +#' from [calculate_measures()] +#' @export +#' +produce_sc_sandpit_tests <- function(data, type = c("demographics", "client", "at", "ch", "hc", "sds")) { + if (type == "demographics") { + missing_tests <- data %>% + dplyr::mutate( + n_missing_chi = is_missing(.data$chi_upi), + n_missing_sc_id = is_missing(.data$social_care_id), + n_missing_dob = is.na(.data$chi_date_of_birth), + n_missing_postcode = is_missing(.data$chi_postcode), + n_missing_gender = is_missing(.data$chi_gender_code) + ) %>% + dplyr::select(n_missing_chi:n_missing_gender) %>% + calculate_measures(measure = "sum") + + latest_flag_tests <- data %>% + dplyr::filter(!(is.na(.data$chi_upi))) %>% + dplyr::group_by(.data$chi_upi, .data$sending_location) %>% + dplyr::summarise(latest_count = sum(.data$latest_record_flag)) %>% + dplyr::ungroup() %>% + dplyr::mutate( + chi_latest_flag_0 = dplyr::if_else(.data$latest_count == 0, 1, 0), + chi_latest_flag_1 = dplyr::if_else(.data$latest_count == 1, 1, 0), + chi_latest_flag_2 = dplyr::if_else(.data$latest_count == 2, 1, 0), + chi_latest_flag_3 = dplyr::if_else(.data$latest_count == 3, 1, 0), + chi_latest_flag_4 = dplyr::if_else(.data$latest_count == 4, 1, 0), + chi_latest_flag_5 = dplyr::if_else(.data$latest_count == 5, 1, 0), + chi_latest_flag_6 = dplyr::if_else(.data$latest_count == 6, 1, 0), + chi_latest_flag_7 = dplyr::if_else(.data$latest_count == 7, 1, 0), + chi_latest_flag_8 = dplyr::if_else(.data$latest_count == 8, 1, 0), + chi_latest_flag_9 = dplyr::if_else(.data$latest_count == 9, 1, 0), + chi_latest_flag_10 = dplyr::if_else(.data$latest_count == 10, 1, 0) + ) %>% + dplyr::select(.data$chi_latest_flag_0:.data$chi_latest_flag_10) %>% + calculate_measures(measure = "sum") + + # add a flag for sc ids where there is multiple chi associated + sc_id_multi_chi <- data %>% + dplyr::distinct() %>% + dplyr::filter(!(is.na(.data$chi_upi))) %>% + dplyr::group_by(.data$social_care_id, .data$sending_location) %>% + dplyr::distinct(.data$chi_upi, .keep_all = TRUE) %>% + dplyr::mutate(distinct_chi_count = dplyr::n_distinct(.data$chi_upi)) %>% + dplyr::filter(distinct_chi_count > 1) %>% + dplyr::distinct(.data$social_care_id, .data$sending_location, .keep_all = TRUE) %>% + dplyr::mutate(sc_id_multi_chi = 1) %>% + create_sending_location_test_flags(.data$sending_location) %>% + dplyr::ungroup() %>% + dplyr::rename( + sc_id_multi_chi_Aberdeen_City = Aberdeen_City, + sc_id_multi_chi_Aberdeenshire = Aberdeenshire, + sc_id_multi_chi_Angus = Angus, + sc_id_multi_chi_Argyll_and_Bute = Argyll_and_Bute, + sc_id_multi_chi_City_of_Edinburgh = City_of_Edinburgh, + sc_id_multi_chi_Clackmannanshire = Clackmannanshire, + sc_id_multi_chi_Dumfries_and_Galloway = Dumfries_and_Galloway, + sc_id_multi_chi_Dundee_City = Dundee_City, + sc_id_multi_chi_East_Ayrshire = East_Ayrshire, + sc_id_multi_chi_East_Dunbartonshire = East_Dunbartonshire, + sc_id_multi_chi_East_Lothian = East_Lothian, + sc_id_multi_chi_East_Renfrewshire = East_Renfrewshire, + sc_id_multi_chi_Falkirk = Falkirk, + sc_id_multi_chi_Fife = Fife, + sc_id_multi_chi_Glasgow_City = Glasgow_City, + sc_id_multi_chi_Highland = Highland, + sc_id_multi_chi_Inverclyde = Inverclyde, + sc_id_multi_chi_Midlothian = Midlothian, + sc_id_multi_chi_Moray = Moray, + sc_id_multi_chi_Na_h_Eileanan_Siar = Na_h_Eileanan_Siar, + sc_id_multi_chi_North_Ayrshire = North_Ayrshire, + sc_id_multi_chi_North_Lanarkshire = North_Lanarkshire, + sc_id_multi_chi_Orkney_Islands = Orkney_Islands, + sc_id_multi_chi_Perth_and_Kinross = Perth_and_Kinross, + sc_id_multi_chi_Renfrewshire = Renfrewshire, + sc_id_multi_chi_Scottish_Borders = Scottish_Borders, + sc_id_multi_chi_Shetland_Islands = Shetland_Islands, + sc_id_multi_chi_South_Ayrshire = South_Ayrshire, + sc_id_multi_chi_South_Lanarkshire = South_Lanarkshire, + sc_id_multi_chi_Stirling = Stirling, + sc_id_multi_chi_West_Dunbartonshire = West_Dunbartonshire, + sc_id_multi_chi_West_Lothian = West_Lothian + ) %>% + dplyr::select(.data$sc_id_multi_chi, .data$sc_id_multi_chi_Aberdeen_City:.data$sc_id_multi_chi_West_Lothian) %>% + calculate_measures(measure = "sum") + + output <- list( + missing_tests, + latest_flag_tests, + sc_id_multi_chi + ) %>% + purrr::reduce(dplyr::full_join, by = c("measure", "value")) + + return(output) + } else if (type == "client" | type == "at" | type == "ch" | + type == "hc" | type == "sds") { + output <- data %>% + # create test flags + dplyr::mutate( + unique_sc_id = dplyr::lag(.data$social_care_id) != .data$social_care_id, + n_missing_sc_id = is_missing(.data$social_care_id) + ) %>% + create_sending_location_test_flags(.data$sending_location) %>% + # remove variables that won't be summed + dplyr::select(c("unique_sc_id":"West_Lothian")) %>% + # use function to sum new test flags + calculate_measures(measure = "sum") + + return(output) + } +} diff --git a/R/read_lookup_sc_client.R b/R/read_lookup_sc_client.R index 370a15722..d2b549671 100644 --- a/R/read_lookup_sc_client.R +++ b/R/read_lookup_sc_client.R @@ -80,6 +80,9 @@ read_lookup_sc_client <- function(fyyear, if (!fs::file_exists(get_sandpit_extract_path(type = "client", year = fyyear))) { client_data %>% write_file(get_sandpit_extract_path(type = "client", year = fyyear)) + + client_data %>% + process_tests_sc_sandpit(type = "client", year = fyyear) } else { client_data <- client_data } diff --git a/R/read_lookup_sc_demographics.R b/R/read_lookup_sc_demographics.R index cb3cea3c2..729f3a445 100644 --- a/R/read_lookup_sc_demographics.R +++ b/R/read_lookup_sc_demographics.R @@ -28,6 +28,9 @@ read_lookup_sc_demographics <- function(sc_dvprod_connection = phs_db_connection if (!fs::file_exists(get_sandpit_extract_path(type = "demographics"))) { sc_demog %>% write_file(get_sandpit_extract_path(type = "demographics")) + + sc_demog %>% + process_tests_sc_sandpit(type = "demographics") } else { sc_demog <- sc_demog } diff --git a/R/read_sc_all_alarms_telecare.R b/R/read_sc_all_alarms_telecare.R index 5abd9bc7b..4af57d857 100644 --- a/R/read_sc_all_alarms_telecare.R +++ b/R/read_sc_all_alarms_telecare.R @@ -31,6 +31,9 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection if (!fs::file_exists(get_sandpit_extract_path(type = "at"))) { at_full_data %>% write_file(get_sandpit_extract_path(type = "at")) + + at_full_data %>% + process_tests_sandpit(type = "at") } else { at_full_data <- at_full_data } diff --git a/R/read_sc_all_care_home.R b/R/read_sc_all_care_home.R index 870a94ded..0e74d6623 100644 --- a/R/read_sc_all_care_home.R +++ b/R/read_sc_all_care_home.R @@ -33,6 +33,9 @@ read_sc_all_care_home <- function(sc_dvprod_connection = phs_db_connection(dsn = if (!fs::file_exists(get_sandpit_extract_path(type = "ch"))) { ch_data %>% write_file(get_sandpit_extract_path(type = "ch")) + + ch_data %>% + process_tests_sandpit(type = "ch") } else { ch_data <- ch_data } diff --git a/R/read_sc_all_home_care.R b/R/read_sc_all_home_care.R index cca2d0a9b..3741785a7 100644 --- a/R/read_sc_all_home_care.R +++ b/R/read_sc_all_home_care.R @@ -50,6 +50,9 @@ read_sc_all_home_care <- function(sc_dvprod_connection = phs_db_connection(dsn = if (!fs::file_exists(get_sandpit_extract_path(type = "hc"))) { home_care_data %>% write_file(get_sandpit_extract_path(type = "hc")) + + home_care_date %>% + process_tests_sandpit(type = "hc") } else { home_care_data <- home_care_data } diff --git a/R/read_sc_all_sds.R b/R/read_sc_all_sds.R index d9d5b8b1d..e184ffaeb 100644 --- a/R/read_sc_all_sds.R +++ b/R/read_sc_all_sds.R @@ -31,6 +31,9 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR if (!fs::file_exists(get_sandpit_extract_path(type = "sds"))) { sds_full_data %>% write_file(get_sandpit_extract_path(type = "sds")) + + sds_full_data %>% + process_tests_sandpit(type = "sds") } else { sds_full_data <- sds_full_data } diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R index ffe86f48f..d2e1490f2 100644 --- a/R/write_tests_xlsx.R +++ b/R/write_tests_xlsx.R @@ -19,15 +19,22 @@ write_tests_xlsx <- function(comparison_data, sheet_name, year = NULL, - workbook_name = c("ep_file", "indiv_file", "lookup", "extract")) { + workbook_name = c("ep_file", "indiv_file", "lookup", "extract", "sandpit")) { # Set up the workbook ---- - if (workbook_name == "lookup" | missing(year) & workbook_name == "lookup") { - tests_workbook_name <- stringr::str_glue(latest_update(), "_lookups_tests") - } else { + if (is.null(year)) { tests_workbook_name <- dplyr::case_when( workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_ep_file_tests"), workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_indiv_file_tests"), + workbook_name == "lookup" ~ stringr::str_glue(latest_update(), "_lookups_tests"), + workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests") + ) + } else if (workbook_name == "sandpit" & !is.null(year)) { + tests_workbook_name <- dplyr::case_when( + workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests") + ) + } else { + tests_workbook_name <- dplyr::case_when( workbook_name == "extract" ~ stringr::str_glue(latest_update(), "_{year}_extract_tests") ) } @@ -92,11 +99,11 @@ write_tests_xlsx <- function(comparison_data, date_today <- stringr::str_to_lower(date_today) - sheet_name_dated <- ifelse( - is.null(year), - stringr::str_glue("{sheet_name}_{date_today}"), - stringr::str_glue("{year}_{sheet_name}_{date_today}") - ) + if (is.null(year)) { + sheet_name_dated <- stringr::str_glue("{sheet_name}_{date_today}") + } else { + sheet_name_dated <- stringr::str_glue("{year}_{sheet_name}_{date_today}") + } # If there has already been a sheet created today, append the time if (sheet_name_dated %in% names(wb)) { diff --git a/man/process_tests_sc_sandpit.Rd b/man/process_tests_sc_sandpit.Rd new file mode 100644 index 000000000..d3c1f5984 --- /dev/null +++ b/man/process_tests_sc_sandpit.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/process_tests_sc_sandpit.R +\name{process_tests_sc_sandpit} +\alias{process_tests_sc_sandpit} +\title{Process tests for the social care sandpit extracts} +\usage{ +process_tests_sc_sandpit( + type = c("at", "hc", "ch", "sds", "demographics", "client"), + year = NULL +) +} +\arguments{ +\item{type}{Name of sandpit extract.} +} +\value{ +a \link[tibble:tibble-package]{tibble} containing a test comparison. +} +\description{ +Process tests for the social care sandpit extracts +} diff --git a/man/produce_sc_sandpit_tests.Rd b/man/produce_sc_sandpit_tests.Rd new file mode 100644 index 000000000..4f34d506b --- /dev/null +++ b/man/produce_sc_sandpit_tests.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/process_tests_sc_sandpit.R +\name{produce_sc_sandpit_tests} +\alias{produce_sc_sandpit_tests} +\title{Produce tests for social care sandpit extracts.} +\usage{ +produce_sc_sandpit_tests( + data, + type = c("demographics", "client", "at", "ch", "hc", "sds") +) +} +\arguments{ +\item{data}{new or old data for testing summary flags +(data is from \code{\link[=get_sandpit_extract_path]{get_sandpit_extract_path()}})} + +\item{type}{Name of sandpit extract.} +} +\value{ +a dataframe with a count of each flag +from \code{\link[=calculate_measures]{calculate_measures()}} +} +\description{ +Produce tests for social care sandpit extracts. +} diff --git a/man/write_tests_xlsx.Rd b/man/write_tests_xlsx.Rd index c510e2570..0788d0080 100644 --- a/man/write_tests_xlsx.Rd +++ b/man/write_tests_xlsx.Rd @@ -8,7 +8,7 @@ write_tests_xlsx( comparison_data, sheet_name, year = NULL, - workbook_name = c("ep_file", "indiv_file", "lookup", "extract") + workbook_name = c("ep_file", "indiv_file", "lookup", "extract", "sandpit") ) } \arguments{ From 6baad298865ed67955c9d35c0ff685f7ca8ba102 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 30 Apr 2024 16:43:30 +0100 Subject: [PATCH 10/96] only select columns we want in ltc raw data --- R/read_lookup_ltc.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/read_lookup_ltc.R b/R/read_lookup_ltc.R index 7eb83a434..68aa6e1ee 100644 --- a/R/read_lookup_ltc.R +++ b/R/read_lookup_ltc.R @@ -34,7 +34,7 @@ read_lookup_ltc <- function(file_path = get_it_ltc_path()) { ) ) %>% # Rename variables - dplyr::rename( + dplyr::select( chi = "PATIENT_UPI [C]", postcode = "PATIENT_POSTCODE [C]", arth_date = "ARTHRITIS_DIAG_DATE", From 211c8a765bc6bdf8ff9125fea6d1fa6e80bb0df9 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 30 Apr 2024 15:50:18 +0000 Subject: [PATCH 11/96] [check-spelling] Update metadata Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/8897746003/attempts/1 Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/947#issuecomment-2085735144 Signed-off-by: check-spelling-bot on-behalf-of: @check-spelling --- .github/actions/spelling/expect.txt | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index a4a34a58b..906cdff91 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -7,10 +7,13 @@ adtf arrivalmode arth atlassian +atrialfib attendcat aut bedday +BFO birthtime +bloodbfo bodyloc boxi callr @@ -22,6 +25,8 @@ cattend ccyy cdn cennum +CEREBROVASC +chd chp chpstart cij @@ -33,6 +38,7 @@ codecov comhairle commhosp congen +copd costincdnas costmonthnum costsfy @@ -40,6 +46,7 @@ covr cph createslf customise +cvd dataframe datamart datazone @@ -67,6 +74,7 @@ dplyr dsn dtplyr dvprod +endomet envir fcase feb @@ -94,6 +102,7 @@ hbtreatcode hbtreatname hci hcp +hefailure hhg hjust hms @@ -154,6 +163,7 @@ openxlsx orcid outfile pandoc +parkinsons patflow pattype pcec @@ -184,6 +194,7 @@ readr readxl reasonwait recid +refailure reflectoring refsource renviron From 86efa00a144cff0947003d3273845050991d6e08 Mon Sep 17 00:00:00 2001 From: marjom02 Date: Fri, 3 May 2024 12:02:26 +0100 Subject: [PATCH 12/96] for some reason the latest scid code was overwritten after the march update?? anyway, now it is fixed. --- R/replace_sc_id_with_latest.R | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/R/replace_sc_id_with_latest.R b/R/replace_sc_id_with_latest.R index 8e815d46b..db1dc578c 100644 --- a/R/replace_sc_id_with_latest.R +++ b/R/replace_sc_id_with_latest.R @@ -7,31 +7,40 @@ replace_sc_id_with_latest <- function(data) { # Check for required variables check_variables_exist( data, - c("sending_location", "social_care_id", "chi", "latest_flag") + c("sending_location", "social_care_id", "chi", "period") ) # select variables we need filter_data <- data %>% dplyr::select( - "sending_location", "social_care_id", "chi", "latest_flag" + "sending_location", "social_care_id", "chi", "period" ) %>% - dplyr::filter(!(is.na(.data$chi))) %>% - dplyr::distinct() + dplyr::filter(!(is.na(.data$chi))) change_sc_id <- filter_data %>% - dplyr::filter(.data$latest_flag == 1) %>% + # Sort (by sending_location, chi and period) for unique chi/sending location + dplyr::arrange( + .data$sending_location, + .data$chi, + dplyr::desc(.data$period) + ) %>% + # Find the latest sc_id for each chi/sending location by keeping latest period + dplyr::distinct( + .data$sending_location, + .data$chi, + .keep_all = TRUE + ) %>% # Rename for latest sc id dplyr::rename(latest_sc_id = "social_care_id") %>% - # drop latest_flag for matching - dplyr::select(-"latest_flag") + # drop period for matching + dplyr::select(-"period") return_data <- change_sc_id %>% # Match back onto data dplyr::right_join(data, - by = c("sending_location", "chi"), - multiple = "all" + by = c("sending_location", "chi"), + multiple = "all" ) %>% - dplyr::filter(!(is.na(.data$period))) %>% # Overwrite sc id with the latest dplyr::mutate( social_care_id = dplyr::if_else( @@ -40,6 +49,5 @@ replace_sc_id_with_latest <- function(data) { .data$social_care_id ) ) - return(return_data) } From d571cb623e6d2e80864027532f11c66ef65c54a4 Mon Sep 17 00:00:00 2001 From: SwiftySalmon Date: Fri, 3 May 2024 11:15:15 +0000 Subject: [PATCH 13/96] Style code --- R/replace_sc_id_with_latest.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/replace_sc_id_with_latest.R b/R/replace_sc_id_with_latest.R index db1dc578c..9478ebefe 100644 --- a/R/replace_sc_id_with_latest.R +++ b/R/replace_sc_id_with_latest.R @@ -38,8 +38,8 @@ replace_sc_id_with_latest <- function(data) { return_data <- change_sc_id %>% # Match back onto data dplyr::right_join(data, - by = c("sending_location", "chi"), - multiple = "all" + by = c("sending_location", "chi"), + multiple = "all" ) %>% # Overwrite sc id with the latest dplyr::mutate( From a75374ed4494b36ccce1a8514228e310c159b528 Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Tue, 7 May 2024 08:52:35 +0100 Subject: [PATCH 14/96] Merge May24 NI update into June update branch (#949) Collect data before manipulations --- R/read_lookup_sc_client.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/read_lookup_sc_client.R b/R/read_lookup_sc_client.R index d2b549671..173c971b6 100644 --- a/R/read_lookup_sc_client.R +++ b/R/read_lookup_sc_client.R @@ -41,6 +41,7 @@ read_lookup_sc_client <- function(fyyear, "day_care" ) %>% dplyr::filter(.data$financial_year == year) %>% + dplyr::collect() %>% dplyr::mutate( dplyr::across( c( @@ -74,8 +75,7 @@ read_lookup_sc_client <- function(fyyear, .data$social_care_id, .data$financial_year, .data$financial_quarter - ) %>% - dplyr::collect() + ) if (!fs::file_exists(get_sandpit_extract_path(type = "client", year = fyyear))) { client_data %>% From 08b9f8085d0a5d7e989736fd76e42fc838db3286 Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Mon, 13 May 2024 12:32:56 +0100 Subject: [PATCH 15/96] Update NEWS.md --- NEWS.md | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 52 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index fdbb64c9c..a2e0aeeea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,55 @@ -# September 2023 Update - Unreleased +# June 2024 Update - Unreleased +* Update of 2017/18 onwards to include bug fixes within the files. +* Removal of extra variable caused by the LTCs not matching properly. +* Homelessness improvements: + * Removal of filtering the data in SLFs according to completeness levels. + * New variables: + * `hl1_completeness` - a data quality indicator by percentage compared to SG annual publication. + * `hl1_12_months_pre_app`- date variable + * `hl1_12_momths_post_app` - date variable +* Potential inclusions +* Activity after death flag? +* New care home methodology? - potentially this is on hold until September update. +* Additional Documentation? + +# March 2024 Update - Released 20-Mar-2024 +* Update of 2017/18 onwards to include bug fixes within the files. +* 2023/24 file now includes social care data. +* Geography files updated - SPD and SIMD +* Variable `property_type` in homelessness has been updated to include further description +* Bug fixes: + * Service use cohort wrongly assigning Non-Service Users (NSU) as `psychiatry` + * Not Applicable (NA) introduced for variable `high_cc` in Demographic cohort + * Issue with delayed discharges data not linking to admissions + * Person ID available in self-directed support (SDS) data + * Issue with Social Care ID - missing sc id were all being set to one sc id. + * Improvements to social care methodology + * Demographics + * person_id will now be consistent across social care cases for an individual. The social care ID for a CHI will also be consistent across all areas, not just the latest ID used in AT/SDS/CH/HC. + * Self-directed Support (SDS) and Alarms Telecare (AT) data + * Our tests show this is now in line with the social care team’s publications and therefore, the data may have changed slightly. + * New Social Care methodology + * The new methodology impacts how we match the demographics file and how we select the latest social care ID. + * Previously we used the `latest_flag` but this isn’t accurate as some IDs have none flagged, and some have more than one flagged. We now have one social care ID flagged for each CHI. This issue mostly affects Edinburgh, Falkirk, Western Isles, and Renfrewshire. + * Previously, in cases where a social care ID had multiple CHIs associated only one of the CHIs was chosen. + * The new methodology keeps all CHIs in as there is no way to tell which CHI the activity is for. The new methodology will show duplicate activity but for the different CHIs. The main areas this affects are Midlothian, Western Isles, and Renfrewshire. + + +# December 2023 Update - Released 20-Dec-2023 +* Update of 2017/18 onwards to include bug fixes within the files. +* 2023/24 file contain data from 1st April 2023 up to the end of September 2023. + * No social care data available. +* Re-addition of keep population flag. +* SPARRA update +* NA's introduced for variable `ch_provider` - now fixed. +* Future improvements + * Activity after death flag + * Review of social care methodology. +* SLFhelper updated to version 10.1.1. + * Includes a fix for speeding up function `get_chi()` + + +# September 2023 Update - Released 22-Sep-2023 * Update of 2017/18 onwards to include bug fixes within the files. * New 2023/24 files. *No social care data available for new 2023/24 file. From 7f569d3481f301689f0fe8a19142924cdabff9ea Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Fri, 17 May 2024 16:53:22 +0100 Subject: [PATCH 16/96] link GP-OoH with CUP markers --- R/get_boxi_extract_path.R | 1 + R/process_extract_gp_ooh.R | 37 +++++++++++++++++++++++++++++++++++-- 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R index 3c2b4acdc..c92ffdc47 100644 --- a/R/get_boxi_extract_path.R +++ b/R/get_boxi_extract_path.R @@ -49,6 +49,7 @@ get_boxi_extract_path <- function( "gp_ooh-c" ~ "GP-OoH-consultations-extract", "gp_ooh-d" ~ "GP-OoH-diagnosis-extract", "gp_ooh-o" ~ "GP-OoH-outcomes-extract", + "gp_ooh_cup" ~ "GP-OoH-cup-extract", "homelessness" ~ "Homelessness-extract", "maternity" ~ "Maternity-episode-level-extract", "mh" ~ "Mental-Health-episode-level-extract", diff --git a/R/process_extract_gp_ooh.R b/R/process_extract_gp_ooh.R index 37cfc8f3f..47c0ee7e5 100644 --- a/R/process_extract_gp_ooh.R +++ b/R/process_extract_gp_ooh.R @@ -93,10 +93,41 @@ process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) { ) %>% dplyr::ungroup() + ## Link CUP Marker ----- + gp_ooh_cup_file <- read_file( + # path = get_boxi_extract_path(year, "gp_ooh_cup"), + path = file_name, + col_type = readr::cols( + "GP OOH Consultation Start Date" = readr::col_date(format = "%Y/%m/%d %T"), + "GP OOH Consultation Start Time" = readr::col_time(""), + "GUID" = readr::col_character(), + "CUP Marker" = readr::col_integer(), + "CUP Pathway Name" = readr::col_character() + ) + ) %>% + dplyr::select( + record_keydate1 = "GP OOH Consultation Start Date", + keytime1 = "GP OOH Consultation Start Time", + ooh_case_id = "GUID", + cup_marker = "CUP Marker", + cup_pathway = "CUP Pathway Name" + ) %>% + dplyr::distinct( + .data$record_keydate1, + .data$keytime1, + .data$ooh_case_id, + .keep_all = TRUE + ) + + ooh_clean2 = ooh_clean %>% + dplyr::left_join(gp_ooh_cup_file, + by = dplyr::join_by("ooh_case_id", + "record_keydate1", + "keytime1")) ## Save Outfile ------------------------------------- - final_data <- ooh_clean %>% + final_data <- ooh_clean2 %>% dplyr::select( "year", "recid", @@ -122,7 +153,9 @@ process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) { tidyselect::starts_with("ooh_outcome"), "cost_total_net", tidyselect::ends_with("_cost"), - "ooh_case_id" + "ooh_case_id", + cup_marker, + cup_pathway ) if (write_to_disk) { From 20cdf4d0abf3083a8c8113b4178e17fb3559eb18 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Fri, 17 May 2024 15:57:19 +0000 Subject: [PATCH 17/96] Style code --- R/process_extract_gp_ooh.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/process_extract_gp_ooh.R b/R/process_extract_gp_ooh.R index 47c0ee7e5..a1f77822c 100644 --- a/R/process_extract_gp_ooh.R +++ b/R/process_extract_gp_ooh.R @@ -119,11 +119,14 @@ process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) { .keep_all = TRUE ) - ooh_clean2 = ooh_clean %>% + ooh_clean2 <- ooh_clean %>% dplyr::left_join(gp_ooh_cup_file, - by = dplyr::join_by("ooh_case_id", - "record_keydate1", - "keytime1")) + by = dplyr::join_by( + "ooh_case_id", + "record_keydate1", + "keytime1" + ) + ) ## Save Outfile ------------------------------------- From 3869b098a6e0bdbe7f7bb16fcad91066aa9ba23f Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Thu, 23 May 2024 11:28:34 +0100 Subject: [PATCH 18/96] update gp ooh cup --- R/process_extract_gp_ooh.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/R/process_extract_gp_ooh.R b/R/process_extract_gp_ooh.R index a1f77822c..7fe4431b4 100644 --- a/R/process_extract_gp_ooh.R +++ b/R/process_extract_gp_ooh.R @@ -12,7 +12,11 @@ #' @return the final data as a [tibble][tibble::tibble-package]. #' @export #' @family process extracts -process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) { +process_extract_gp_ooh <- function(year, + data_list, + gp_ooh_cup_path = get_boxi_extract_path(year, "gp_ooh_cup"), + write_to_disk = TRUE) { + diagnosis_extract <- process_extract_ooh_diagnosis(data_list[["diagnosis"]], year) outcomes_extract <- process_extract_ooh_outcomes(data_list[["outcomes"]], year) consultations_extract <- process_extract_ooh_consultations(data_list[["consultations"]], year) @@ -95,8 +99,7 @@ process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) { ## Link CUP Marker ----- gp_ooh_cup_file <- read_file( - # path = get_boxi_extract_path(year, "gp_ooh_cup"), - path = file_name, + path = gp_ooh_cup_path, col_type = readr::cols( "GP OOH Consultation Start Date" = readr::col_date(format = "%Y/%m/%d %T"), "GP OOH Consultation Start Time" = readr::col_time(""), @@ -119,7 +122,7 @@ process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) { .keep_all = TRUE ) - ooh_clean2 <- ooh_clean %>% + ooh_clean <- ooh_clean %>% dplyr::left_join(gp_ooh_cup_file, by = dplyr::join_by( "ooh_case_id", @@ -130,7 +133,7 @@ process_extract_gp_ooh <- function(year, data_list, write_to_disk = TRUE) { ## Save Outfile ------------------------------------- - final_data <- ooh_clean2 %>% + final_data <- ooh_clean %>% dplyr::select( "year", "recid", From a967d5c6917535f6c2012b352a59fa665b586c29 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Thu, 23 May 2024 11:31:11 +0100 Subject: [PATCH 19/96] link cup to acute --- R/get_boxi_extract_path.R | 2 ++ R/process_extract_acute.R | 49 +++++++++++++++++++++++++++++++++++---- R/read_extract_acute.R | 1 + _targets.R | 12 ++++++++++ 4 files changed, 60 insertions(+), 4 deletions(-) diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R index c92ffdc47..3a4e80c59 100644 --- a/R/get_boxi_extract_path.R +++ b/R/get_boxi_extract_path.R @@ -16,6 +16,7 @@ get_boxi_extract_path <- function( "ae", "ae_cup", "acute", + "acute_cup", "cmh", "deaths", "dn", @@ -44,6 +45,7 @@ get_boxi_extract_path <- function( "ae" ~ "A&E-episode-level-extract", "ae_cup" ~ "A&E-UCD-CUP-extract", "acute" ~ "Acute-episode-level-extract", + "acute_cup" ~ "Actue-cup-extract", "cmh" ~ "Community-MH-contact-level-extract", "dn" ~ "District-Nursing-contact-level-extract", "gp_ooh-c" ~ "GP-OoH-consultations-extract", diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R index dcfdb47c0..da4306bcc 100644 --- a/R/process_extract_acute.R +++ b/R/process_extract_acute.R @@ -12,7 +12,11 @@ #' @return the final data as a [tibble][tibble::tibble-package]. #' @export #' @family process extracts -process_extract_acute <- function(data, year, write_to_disk = TRUE) { +process_extract_acute <- function(data, + year, + acute_cup_path = get_boxi_extract_path(year, "acute_cup"), + write_to_disk = TRUE) { + # Only run for a single year stopifnot(length(year) == 1L) @@ -58,8 +62,43 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) { ) %>% # Add oldtadm as a factor with labels dplyr::mutate(oldtadm = factor(.data$oldtadm, - levels = 0L:8L - )) + levels = 0L:8L + )) %>% + dplyr::mutate( + unique_row_num = dplyr::row_number() + ) + + acute_cup <- read_file( + path = acute_cup_path, + col_type = readr::cols( + "Acute Admission Date" = readr::col_date(format = "%Y/%m/%d %T"), + "Acute Discharge Date" = readr::col_date(format = "%Y/%m/%d %T"), + "Acute Admission Type Code" = readr::col_character(), + "Acute Discharge Type Code" = readr::col_character(), + "Case Reference Number [C]" = readr::col_character(), + "UPI Number [C]" = readr::col_character(), + "CUP Marker" = readr::col_integer(), + "CUP Pathway Name" = readr::col_character() + ) + ) %>% dplyr::select( + chi = "UPI Number [C]", + case_reference_number = "Case Reference Number [C]", + record_keydate1 = "Acute Admission Date", + record_keydate2 = "Acute Discharge Date", + tadm = "Acute Admission Type Code", + disch = "Acute Discharge Type Code", + cup_marker = "CUP Marker", + cup_pathway = "CUP Pathway Name" + ) %>% dplyr::distinct() + + acute_clean <- acute_clean %>% + dplyr::left_join(acute_cup, + by = c("record_keydate1", + "record_keydate2", + "case_reference_number", + "chi", + "tadm", + "disch")) acute_processed <- acute_clean %>% dplyr::select( @@ -106,7 +145,9 @@ process_extract_acute <- function(data, year, write_to_disk = TRUE) { "cost_total_net", tidyselect::ends_with("_beddays"), tidyselect::ends_with("_cost"), - "uri" + "uri", + "cup_marker", + "cup_pathway" ) %>% dplyr::arrange(.data$chi, .data$record_keydate1) diff --git a/R/read_extract_acute.R b/R/read_extract_acute.R index 7a227db73..6ed66b92c 100644 --- a/R/read_extract_acute.R +++ b/R/read_extract_acute.R @@ -149,6 +149,7 @@ read_extract_acute <- function(year, file_path = get_boxi_extract_path(year = ye ipdc = "Inpatient Day Case Identifier Code", cij_ipdc = "CIJ Inpatient Day Case Identifier Code (01)", lineno = "Line Number (01)", + case_reference_number = "Case Reference Number [C]", GLS_record = "GLS Record" ) %>% # replace NA in cost_total_net by 0 diff --git a/_targets.R b/_targets.R index 15d2584bb..ae0cc5f5f 100644 --- a/_targets.R +++ b/_targets.R @@ -281,6 +281,16 @@ list( get_boxi_extract_path(year = year, type = "gp_ooh-c"), format = "file" ), + tar_target( + acute_cup_path, + get_boxi_extract_path(year, type = "acute_cup"), + format = "file" + ), + tar_target( + gp_ooh_cup_path, + get_boxi_extract_path(year, type = "gp_ooh_cup"), + format = "file" + ), tar_qs( ooh_data, read_extract_gp_ooh( @@ -294,6 +304,7 @@ list( tar_target(source_acute_extract, process_extract_acute( acute_data, year, + acute_cup_path, write_to_disk = write_to_disk )), tar_target( @@ -419,6 +430,7 @@ list( tar_target(source_ooh_extract, process_extract_gp_ooh( year, ooh_data, + gp_ooh_cup_path, write_to_disk = write_to_disk )), tar_target( From a989b3f7d9c230b85ad21bfe4c250509576def2f Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Thu, 23 May 2024 10:32:53 +0000 Subject: [PATCH 20/96] Update documentation --- man/get_boxi_extract_path.Rd | 4 ++-- man/process_extract_acute.Rd | 7 ++++++- man/process_extract_gp_ooh.Rd | 7 ++++++- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/man/get_boxi_extract_path.Rd b/man/get_boxi_extract_path.Rd index c012ac3ef..69bb886c4 100644 --- a/man/get_boxi_extract_path.Rd +++ b/man/get_boxi_extract_path.Rd @@ -6,8 +6,8 @@ \usage{ get_boxi_extract_path( year, - type = c("ae", "ae_cup", "acute", "cmh", "deaths", "dn", "gp_ooh-c", "gp_ooh-d", - "gp_ooh-o", "homelessness", "maternity", "mh", "outpatients") + type = c("ae", "ae_cup", "acute", "acute_cup", "cmh", "deaths", "dn", "gp_ooh-c", + "gp_ooh-d", "gp_ooh-o", "homelessness", "maternity", "mh", "outpatients") ) } \arguments{ diff --git a/man/process_extract_acute.Rd b/man/process_extract_acute.Rd index 77a99cef3..22ff164c8 100644 --- a/man/process_extract_acute.Rd +++ b/man/process_extract_acute.Rd @@ -4,7 +4,12 @@ \alias{process_extract_acute} \title{Process the Acute extract} \usage{ -process_extract_acute(data, year, write_to_disk = TRUE) +process_extract_acute( + data, + year, + acute_cup_path = get_boxi_extract_path(year, "acute_cup"), + write_to_disk = TRUE +) } \arguments{ \item{data}{The extract to process} diff --git a/man/process_extract_gp_ooh.Rd b/man/process_extract_gp_ooh.Rd index ddec006fe..b137f581c 100644 --- a/man/process_extract_gp_ooh.Rd +++ b/man/process_extract_gp_ooh.Rd @@ -4,7 +4,12 @@ \alias{process_extract_gp_ooh} \title{Process the GP OoH extract} \usage{ -process_extract_gp_ooh(year, data_list, write_to_disk = TRUE) +process_extract_gp_ooh( + year, + data_list, + gp_ooh_cup_path = get_boxi_extract_path(year, "gp_ooh_cup"), + write_to_disk = TRUE +) } \arguments{ \item{year}{The year to process, in FY format.} From ce90a452f9db3f57560e0c777a98d01edad78af8 Mon Sep 17 00:00:00 2001 From: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com> Date: Mon, 24 Jun 2024 14:25:35 +0100 Subject: [PATCH 21/96] adding the death dates to activity after death cases (#972) adding the death dates to the cases where there is activity after death Co-authored-by: marjom02 --- R/add_activity_after_death_flag.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index 0842b47ec..c6c003e3e 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -85,7 +85,7 @@ add_activity_after_death_flag <- function( flag_data <- flag_data %>% dplyr::filter(.data$activity_after_death == 1) %>% # Remove temporary flag variables used to create activity after death flag and fill in missing death_date - dplyr::select(.data$year, .data$chi, .data$record_keydate1, .data$record_keydate2, .data$activity_after_death) %>% + dplyr::select(.data$year, .data$chi, .data$record_keydate1, .data$record_keydate2, .data$activity_after_death, .data$death_date_boxi) %>% dplyr::distinct() # Match activity after death flag back to episode file @@ -94,7 +94,12 @@ add_activity_after_death_flag <- function( flag_data, by = c("year", "chi", "record_keydate1", "record_keydate2"), na_matches = "never" - ) + ) %>% + dplyr::mutate(death_date = lubridate::as_date(ifelse(is.na(death_date) & !(is.na(death_date_boxi)), + death_date_boxi, death_date + ))) %>% + dplyr::select(-death_date_boxi) + return(final_data) From 4e761ec56455b38b891e482067a678691f375e43 Mon Sep 17 00:00:00 2001 From: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com> Date: Mon, 24 Jun 2024 15:10:31 +0100 Subject: [PATCH 22/96] Add sys time to functions (#971) * adding in syst_time alerts for all functions in create episode and create individual. So that when it runs manually I can see where it is and where it's getting stuck * Style code --------- Co-authored-by: marjom02 Co-authored-by: SwiftySalmon Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> --- R/add_activity_after_death_flag.R | 2 ++ R/add_nsu_cohort.R | 2 ++ R/add_ppa_flag.R | 2 ++ R/correct_demographics.R | 2 ++ R/cost_uplift.R | 2 ++ R/create_episode_file.R | 16 +++++++++++ R/create_individual_file.R | 46 +++++++++++++++++++++++++++++++ R/fill_geographies.R | 2 ++ R/join_deaths_data.R | 2 ++ R/join_sparra_hhg.R | 2 ++ R/link_delayed_discharge_eps.R | 2 ++ R/match_on_ltcs.R | 2 ++ R/process_lookup_homelessness.R | 6 ++++ 13 files changed, 88 insertions(+) diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index c6c003e3e..78131d941 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -13,6 +13,8 @@ add_activity_after_death_flag <- function( year, deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>% slfhelper::get_chi()) { + cli::cli_alert_info("Add activity after death flag function started at {Sys.time()}") + # to skip warnings no visible binding for global variable ‘.’ . <- NULL diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R index 15d5d4e01..bf6216e57 100644 --- a/R/add_nsu_cohort.R +++ b/R/add_nsu_cohort.R @@ -13,6 +13,8 @@ add_nsu_cohort <- function( data, year, nsu_cohort = read_file(get_nsu_path(year)) %>% slfhelper::get_chi()) { + cli::cli_alert_info("Add NSU cohort function started at {Sys.time()}") + year_param <- year if (!check_year_valid(year, "nsu")) { diff --git a/R/add_ppa_flag.R b/R/add_ppa_flag.R index bb99f0543..1d5f9739d 100644 --- a/R/add_ppa_flag.R +++ b/R/add_ppa_flag.R @@ -8,6 +8,8 @@ #' @return A data frame to use as a lookup of PPAs #' @family episode_file add_ppa_flag <- function(data) { + cli::cli_alert_info("Add PPA flag function started at {Sys.time()}") + check_variables_exist( data, variables = c( diff --git a/R/correct_demographics.R b/R/correct_demographics.R index d7ef6f469..d221c25ab 100644 --- a/R/correct_demographics.R +++ b/R/correct_demographics.R @@ -7,6 +7,8 @@ #' #' @return episode files with updated date of birth and ages correct_demographics <- function(data, year) { + cli::cli_alert_info("Correct demographics function started at {Sys.time()}") + # keep episodes with missing chi data_no_chi <- data %>% dplyr::filter(is_missing(.data$chi)) diff --git a/R/cost_uplift.R b/R/cost_uplift.R index e554c2505..f14600da6 100644 --- a/R/cost_uplift.R +++ b/R/cost_uplift.R @@ -5,6 +5,8 @@ #' @return episode data with uplifted costs #' @family episode_file apply_cost_uplift <- function(data) { + cli::cli_alert_info("Apply cost uplift function started at {Sys.time()}") + data <- data %>% # attach a uplift scale as the last column lookup_uplift() %>% diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 0bb804c7d..34d2ddf9e 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -199,6 +199,8 @@ create_episode_file <- function( #' #' @return `data` with only the `vars_to_keep` kept store_ep_file_vars <- function(data, year, vars_to_keep) { + cli::cli_alert_info("Store episode file variables function started at {Sys.time()}") + tempfile_path <- get_file_path( directory = get_year_dir(year), file_name = stringr::str_glue("temp_ep_file_variable_store_{year}.parquet"), @@ -236,6 +238,8 @@ store_ep_file_vars <- function(data, year, vars_to_keep) { #' #' @return The full SLF data. load_ep_file_vars <- function(data, year) { + cli::cli_alert_info("Load episode file variable function started at {Sys.time()}") + tempfile_path <- get_file_path( directory = get_year_dir(year), file_name = stringr::str_glue("temp_ep_file_variable_store_{year}.parquet"), @@ -263,6 +267,8 @@ load_ep_file_vars <- function(data, year) { #' #' @return A data frame with CIJ markers filled in for those missing. fill_missing_cij_markers <- function(data) { + cli::cli_alert_info("Fill missing cij markers function started at {Sys.time()}") + fixable_data <- data %>% dplyr::filter( .data[["recid"]] %in% c("01B", "04B", "GLS", "02B", "DD") & !is.na(.data[["chi"]]) @@ -317,6 +323,8 @@ fill_missing_cij_markers <- function(data) { #' #' @return The data with CIJ variables corrected. correct_cij_vars <- function(data) { + cli::cli_alert_info("Correct cij variables function started at {Sys.time()}") + check_variables_exist( data, c("chi", "recid", "cij_admtype", "cij_pattype_code") @@ -358,6 +366,8 @@ correct_cij_vars <- function(data) { #' #' @return The data with cost including dna. create_cost_inc_dna <- function(data) { + cli::cli_alert_info("Create cost inc dna function started at {Sys.time()}") + check_variables_exist(data, c("cost_total_net", "attendance_status")) # Create cost including DNAs and modify costs @@ -382,6 +392,8 @@ create_cost_inc_dna <- function(data) { #' #' @return The data unchanged (the cohorts are written to disk) create_cohort_lookups <- function(data, year, update = latest_update()) { + cli::cli_alert_info("Create cohort lookups function started at {Sys.time()}") + create_demographic_cohorts( data, year, @@ -421,6 +433,8 @@ join_cohort_lookups <- function( col_select = c("anon_chi", "service_use_cohort") ) %>% slfhelper::get_chi()) { + cli::cli_alert_info("Join cohort lookups function started at {Sys.time()}") + join_cohort_lookups <- data %>% dplyr::left_join( demographic_cohort, @@ -447,6 +461,8 @@ join_sc_client <- function(data, year, sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(), file_type = c("episode", "individual")) { + cli::cli_alert_info("Join social care client function started at {Sys.time()}") + if (file_type == "episode") { # Match on client variables by chi data_file <- data %>% diff --git a/R/create_individual_file.R b/R/create_individual_file.R index c98531310..dc15fcb0e 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -260,6 +260,8 @@ add_all_columns <- function(episode_file, year) { #' @param condition Condition to create new columns based on #' @family individual_file add_acute_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add acute columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% @@ -271,6 +273,8 @@ add_acute_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_mat_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add maternity columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% @@ -282,6 +286,8 @@ add_mat_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_mh_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add mental health columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% @@ -293,6 +299,8 @@ add_mh_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_gls_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add geriatric long stay columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, episode = TRUE, cost = TRUE) %>% @@ -304,6 +312,8 @@ add_gls_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_op_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add outpatient columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file <- episode_file %>% add_standard_cols(prefix, condition) @@ -327,6 +337,8 @@ add_op_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_ae_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add A&E columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% @@ -338,6 +350,8 @@ add_ae_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_pis_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add prescribing columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% @@ -349,6 +363,8 @@ add_pis_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_ooh_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add out of hours columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file <- episode_file %>% add_standard_cols(prefix, condition, cost = TRUE) %>% @@ -384,6 +400,8 @@ add_ooh_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_dn_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add district nursing columns function started at {Sys.time()}") + condition <- substitute(condition) if ("total_no_dn_contacts" %in% names(episode_file)) { episode_file %>% @@ -407,6 +425,8 @@ add_dn_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_cmh_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add communicty mental health columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% @@ -418,6 +438,8 @@ add_cmh_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_dd_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add delayed discharges columns function started at {Sys.time()}") + condition <- substitute(condition) condition_delay <- substitute(condition & primary_delay_reason != "9") episode_file <- episode_file %>% @@ -439,6 +461,8 @@ add_dd_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_nsu_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add non service users columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% @@ -450,6 +474,8 @@ add_nsu_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_nrs_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add nrs columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% @@ -461,6 +487,8 @@ add_nrs_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_hl1_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add homelessness columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) @@ -471,6 +499,8 @@ add_hl1_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_ch_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add care home columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% @@ -501,6 +531,8 @@ add_ch_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_hc_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add home care columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file <- episode_file %>% @@ -545,6 +577,8 @@ add_hc_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_at_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add alarms telecare columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% @@ -559,6 +593,8 @@ add_at_columns <- function(episode_file, prefix, condition) { #' @inheritParams add_acute_columns #' @family individual_file add_sds_columns <- function(episode_file, prefix, condition) { + cli::cli_alert_info("Add SDS columns function started at {Sys.time()}") + condition <- substitute(condition) episode_file %>% add_standard_cols(prefix, condition) %>% @@ -581,6 +617,8 @@ add_sds_columns <- function(episode_file, prefix, condition) { #' cij_pattype (lgl) #' @family individual_file add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, elective = TRUE) { + cli::cli_alert_info("Add ipdc columns function started at {Sys.time()}") + condition_i <- substitute(eval(condition) & ipdc == "I") episode_file <- episode_file %>% dplyr::mutate( @@ -625,6 +663,8 @@ add_ipdc_cols <- function(episode_file, prefix, condition, ipdc_d = TRUE, electi #' @param cost Whether to create prefix_cost col, e.g. "Acute_cost" #' @family individual_file add_standard_cols <- function(episode_file, prefix, condition, episode = FALSE, cost = FALSE) { + cli::cli_alert_info("Add standard columns function started at {Sys.time()}") + if (episode) { episode_file <- dplyr::mutate(episode_file, "{prefix}_episodes" := dplyr::if_else(eval(condition), 1L, NA_integer_)) } @@ -703,6 +743,8 @@ recode_gender <- function(episode_file) { #' "dementia" and "dementia_date" #' @family individual_file condition_cols <- function() { + cli::cli_alert_info("Return condition columns function started at {Sys.time()}") + conditions <- slfhelper::ltc_vars date_cols <- paste0(conditions, "_date") all_cols <- c(conditions, date_cols) @@ -759,6 +801,8 @@ clean_individual_file <- function(individual_file, year) { #' #' @inheritParams clean_individual_file clean_up_gender <- function(individual_file) { + cli::cli_alert_info("Clean up gender column function started at {Sys.time()}") + individual_file %>% dplyr::mutate( gender = dplyr::case_when( @@ -785,6 +829,8 @@ join_slf_lookup_vars <- function(individual_file, col_select = c("gpprac", "cluster", "hbpraccode") ), hbrescode_var = "hb2018") { + cli::cli_alert_info("Join slf lookup variables function started at {Sys.time()}") + individual_file <- individual_file %>% dplyr::left_join( slf_postcode_lookup, diff --git a/R/fill_geographies.R b/R/fill_geographies.R index c9aee6355..5638c8758 100644 --- a/R/fill_geographies.R +++ b/R/fill_geographies.R @@ -16,6 +16,8 @@ fill_geographies <- function( get_slf_gpprac_path(), col_select = c("gpprac", "cluster", "hbpraccode") )) { + cli::cli_alert_info("Fill geographies function started at {Sys.time()}") + check_variables_exist(data, c( "chi", "postcode", diff --git a/R/join_deaths_data.R b/R/join_deaths_data.R index 7fbf203d0..d2fc51b91 100644 --- a/R/join_deaths_data.R +++ b/R/join_deaths_data.R @@ -10,6 +10,8 @@ join_deaths_data <- function( data, year, slf_deaths_lookup = read_file(get_slf_deaths_lookup_path(year)) %>% slfhelper::get_chi()) { + cli::cli_alert_info("Join deaths data function started at {Sys.time()}") + return( data %>% dplyr::left_join( diff --git a/R/join_sparra_hhg.R b/R/join_sparra_hhg.R index dafaca867..c22e1a9c3 100644 --- a/R/join_sparra_hhg.R +++ b/R/join_sparra_hhg.R @@ -5,6 +5,8 @@ #' @return The data including the SPARRA and HHG variables matched #' on to the episode file. join_sparra_hhg <- function(data, year) { + cli::cli_alert_info("Join SPARRA and HHG function started at {Sys.time()}") + if (check_year_valid(year, "sparra")) { data <- dplyr::left_join( data, diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index ef4aa4754..a28ee3b0f 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -12,6 +12,8 @@ link_delayed_discharge_eps <- function( episode_file, year, dd_data = read_file(get_source_extract_path(year, "dd")) %>% slfhelper::get_chi()) { + cli::cli_alert_info("Link delayed discharge to episode file function started at {Sys.time()}") + episode_file <- episode_file %>% dplyr::mutate( # remember to revoke the cij_end_date with dummy_cij_end diff --git a/R/match_on_ltcs.R b/R/match_on_ltcs.R index 3ed052be8..f0522c00d 100644 --- a/R/match_on_ltcs.R +++ b/R/match_on_ltcs.R @@ -12,6 +12,8 @@ match_on_ltcs <- function( data, year, ltc_data = read_file(get_ltcs_path(year)) %>% slfhelper::get_chi()) { + cli::cli_alert_info("Match on LTCs function started at {Sys.time()}") + # Match on LTC lookup matched <- dplyr::left_join( data, diff --git a/R/process_lookup_homelessness.R b/R/process_lookup_homelessness.R index b0dc30d51..5341cf2b6 100644 --- a/R/process_lookup_homelessness.R +++ b/R/process_lookup_homelessness.R @@ -13,6 +13,8 @@ create_homelessness_lookup <- function( year, homelessness_data = read_file(get_source_extract_path(year, "homelessness")) %>% slfhelper::get_chi()) { + cli::cli_alert_info("Create homelessness lookup function started at {Sys.time()}") + homelessness_lookup <- homelessness_data %>% dplyr::distinct(.data$chi, .data$record_keydate1, .data$record_keydate2) %>% tidyr::drop_na(.data$chi) %>% @@ -35,6 +37,8 @@ create_homelessness_lookup <- function( #' @export add_homelessness_flag <- function(data, year, lookup = create_homelessness_lookup(year)) { + cli::cli_alert_info("Add homelessness flag function started at {Sys.time()}") + data <- data %>% dplyr::left_join( lookup %>% @@ -59,6 +63,8 @@ add_homelessness_flag <- function(data, year, #' @return the final data as a [tibble][tibble::tibble-package]. #' @export add_homelessness_date_flags <- function(data, year, lookup = create_homelessness_lookup(year)) { + cli::cli_alert_info("Add homelessness date flags function started at {Sys.time()}") + lookup <- lookup %>% dplyr::filter(!(is.na(.data$record_keydate2))) %>% dplyr::rename( From 5beee761d01576c6595a7f3d297c58b0df616ea0 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 25 Jun 2024 11:08:41 +0100 Subject: [PATCH 23/96] Update slf deaths lookup function name --- NAMESPACE | 2 +- R/add_activity_after_death_flag.R | 2 +- R/create_episode_file.R | 2 +- R/get_slf_lookup_paths.R | 10 +++++----- man/add_activity_after_death_flag.Rd | 3 ++- ..._path.Rd => get_combined_slf_deaths_lookup_path.Rd} | 6 +++--- man/get_slf_ch_name_lookup_path.Rd | 2 +- man/get_slf_chi_deaths_path.Rd | 2 +- man/get_slf_deaths_lookup_path.Rd | 2 +- man/get_slf_gpprac_path.Rd | 2 +- man/get_slf_postcode_path.Rd | 2 +- 11 files changed, 18 insertions(+), 17 deletions(-) rename man/{get_all_slf_deaths_lookup_path.Rd => get_combined_slf_deaths_lookup_path.Rd} (83%) diff --git a/NAMESPACE b/NAMESPACE index 0475603bf..c6bba9b38 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,9 +25,9 @@ export(end_fy_quarter) export(end_next_fy_quarter) export(find_latest_file) export(fy_interval) -export(get_all_slf_deaths_lookup_path) export(get_boxi_extract_path) export(get_ch_costs_path) +export(get_combined_slf_deaths_lookup_path) export(get_dd_path) export(get_dd_period) export(get_demographic_cohorts_path) diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index 78131d941..b4e0bfd18 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -11,7 +11,7 @@ add_activity_after_death_flag <- function( data, year, - deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>% + deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi()) { cli::cli_alert_info("Add activity after death flag function started at {Sys.time()}") diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 34d2ddf9e..7909e2e7f 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -141,7 +141,7 @@ create_episode_file <- function( slf_deaths_lookup ) %>% add_activity_after_death_flag(year, - deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>% + deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi() ) %>% load_ep_file_vars(year) diff --git a/R/get_slf_lookup_paths.R b/R/get_slf_lookup_paths.R index 390a27a5a..2455be768 100644 --- a/R/get_slf_lookup_paths.R +++ b/R/get_slf_lookup_paths.R @@ -81,16 +81,16 @@ get_slf_deaths_lookup_path <- function(year, ...) { #' @family slf lookup file path #' @seealso [get_file_path()] for the generic function. -get_all_slf_deaths_lookup_path <- function(update = latest_update(), ...) { - # Note this name is very similar to the existing slf_deaths_lookup_path which returnsthe path for +get_combined_slf_deaths_lookup_path <- function(update = latest_update(), ...) { + # Note this name is very similar to the existing slf_deaths_lookup_path which returns the path for # the processed BOXI extract for each financial year. This function will return the combined financial # years lookup i.e. all years put together. - all_slf_deaths_lookup_path <- get_file_path( + combined_slf_deaths_lookup_path <- get_file_path( directory = fs::path(get_slf_dir(), "Deaths"), - file_name = stringr::str_glue("anon-all_slf_deaths_lookup_{update}.parquet"), + file_name = stringr::str_glue("anon-combined_slf_deaths_lookup_{update}.parquet"), ... ) - return(all_slf_deaths_lookup_path) + return(combined_slf_deaths_lookup_path) } diff --git a/man/add_activity_after_death_flag.Rd b/man/add_activity_after_death_flag.Rd index 36eafe7bd..76a4ca64a 100644 --- a/man/add_activity_after_death_flag.Rd +++ b/man/add_activity_after_death_flag.Rd @@ -7,7 +7,8 @@ add_activity_after_death_flag( data, year, - deaths_data = read_file(get_all_slf_deaths_lookup_path()) \%>\% slfhelper::get_chi() + deaths_data = read_file(get_combined_slf_deaths_lookup_path()) \%>\% + slfhelper::get_chi() ) } \arguments{ diff --git a/man/get_all_slf_deaths_lookup_path.Rd b/man/get_combined_slf_deaths_lookup_path.Rd similarity index 83% rename from man/get_all_slf_deaths_lookup_path.Rd rename to man/get_combined_slf_deaths_lookup_path.Rd index 2f06b64d3..dd03a0541 100644 --- a/man/get_all_slf_deaths_lookup_path.Rd +++ b/man/get_combined_slf_deaths_lookup_path.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_slf_lookup_paths.R -\name{get_all_slf_deaths_lookup_path} -\alias{get_all_slf_deaths_lookup_path} +\name{get_combined_slf_deaths_lookup_path} +\alias{get_combined_slf_deaths_lookup_path} \title{SLF death dates File Path} \usage{ -get_all_slf_deaths_lookup_path(update = latest_update(), ...) +get_combined_slf_deaths_lookup_path(update = latest_update(), ...) } \arguments{ \item{update}{the update month (defaults to use \code{\link[=latest_update]{latest_update()}})} diff --git a/man/get_slf_ch_name_lookup_path.Rd b/man/get_slf_ch_name_lookup_path.Rd index 2660bbeab..1b0c3f3bb 100644 --- a/man/get_slf_ch_name_lookup_path.Rd +++ b/man/get_slf_ch_name_lookup_path.Rd @@ -22,7 +22,7 @@ has official Care Home names and addresses provided by the Care Inspectorate. \code{\link[=get_file_path]{get_file_path()}} for the generic function. Other slf lookup file path: -\code{\link{get_all_slf_deaths_lookup_path}()}, +\code{\link{get_combined_slf_deaths_lookup_path}()}, \code{\link{get_slf_chi_deaths_path}()}, \code{\link{get_slf_deaths_lookup_path}()}, \code{\link{get_slf_gpprac_path}()}, diff --git a/man/get_slf_chi_deaths_path.Rd b/man/get_slf_chi_deaths_path.Rd index 0db72d9d3..8ba115dfe 100644 --- a/man/get_slf_chi_deaths_path.Rd +++ b/man/get_slf_chi_deaths_path.Rd @@ -22,7 +22,7 @@ Get the full path to the CHI deaths file \code{\link[=get_file_path]{get_file_path()}} for the generic function. Other slf lookup file path: -\code{\link{get_all_slf_deaths_lookup_path}()}, +\code{\link{get_combined_slf_deaths_lookup_path}()}, \code{\link{get_slf_ch_name_lookup_path}()}, \code{\link{get_slf_deaths_lookup_path}()}, \code{\link{get_slf_gpprac_path}()}, diff --git a/man/get_slf_deaths_lookup_path.Rd b/man/get_slf_deaths_lookup_path.Rd index 307c38ad3..ae64e2371 100644 --- a/man/get_slf_deaths_lookup_path.Rd +++ b/man/get_slf_deaths_lookup_path.Rd @@ -21,7 +21,7 @@ Get the full path to the SLF deaths lookup file \code{\link[=get_file_path]{get_file_path()}} for the generic function. Other slf lookup file path: -\code{\link{get_all_slf_deaths_lookup_path}()}, +\code{\link{get_combined_slf_deaths_lookup_path}()}, \code{\link{get_slf_ch_name_lookup_path}()}, \code{\link{get_slf_chi_deaths_path}()}, \code{\link{get_slf_gpprac_path}()}, diff --git a/man/get_slf_gpprac_path.Rd b/man/get_slf_gpprac_path.Rd index 1fb23116f..1371f758e 100644 --- a/man/get_slf_gpprac_path.Rd +++ b/man/get_slf_gpprac_path.Rd @@ -21,7 +21,7 @@ Get the full path to the SLF GP practice lookup \code{\link[=get_file_path]{get_file_path()}} for the generic function. Other slf lookup file path: -\code{\link{get_all_slf_deaths_lookup_path}()}, +\code{\link{get_combined_slf_deaths_lookup_path}()}, \code{\link{get_slf_ch_name_lookup_path}()}, \code{\link{get_slf_chi_deaths_path}()}, \code{\link{get_slf_deaths_lookup_path}()}, diff --git a/man/get_slf_postcode_path.Rd b/man/get_slf_postcode_path.Rd index f37678695..ee9bc65a9 100644 --- a/man/get_slf_postcode_path.Rd +++ b/man/get_slf_postcode_path.Rd @@ -21,7 +21,7 @@ Get the full path to the SLF Postcode lookup \code{\link[=get_file_path]{get_file_path()}} for the generic function. Other slf lookup file path: -\code{\link{get_all_slf_deaths_lookup_path}()}, +\code{\link{get_combined_slf_deaths_lookup_path}()}, \code{\link{get_slf_ch_name_lookup_path}()}, \code{\link{get_slf_chi_deaths_path}()}, \code{\link{get_slf_deaths_lookup_path}()}, From 33278ef2dfdb4cf8bf6b936ae33e1ab1ffb071dc Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 26 Jun 2024 11:34:55 +0100 Subject: [PATCH 24/96] automate combined deaths lookup --- R/add_activity_after_death_flag.R | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index b4e0bfd18..5f973eb1e 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -127,17 +127,16 @@ add_activity_after_death_flag <- function( # Read data------------------------------------------------ process_deaths_lookup <- function(update = latest_update(), write_to_disk = TRUE, ...) { - all_boxi_deaths <- read_file(get_slf_deaths_lookup_path("1415")) %>% - rbind(read_file(get_slf_deaths_lookup_path("1516"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("1617"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("1718"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("1819"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("1920"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("2021"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("2122"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("2223"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("2324"))) %>% - # TODO: make this automated to pick up files starting with name "get_slf_deaths_lookup_path" + dir_folder <- "/conf/hscdiip/SLF_Extracts/Deaths" + file_names <- list.files(dir_folder, + pattern = "^anon-slf_deaths_lookup_.*parquet", + full.names = TRUE + ) + + # read all year specific deaths lookups and bind them together + all_boxi_deaths <- lapply(file_names, arrow::read_parquet) %>% + data.table::rbindlist() %>% + # convert to chi for processing slfhelper::get_chi() %>% # Remove rows with missing or blank CHI number - could also use na.omit? # na.omit(all_boxi_deaths) @@ -185,7 +184,7 @@ process_deaths_lookup <- function(update = latest_update(), write_file( all_boxi_deaths, fs::path(get_slf_dir(), "Deaths", - file_name = stringr::str_glue("anon-all_slf_deaths_lookup_{update}.parquet") + file_name = stringr::str_glue("anon-combined_slf_deaths_lookup_{update}.parquet") ) ) } From 16f5852cf89f1170dac0751e71c359b91e207c3e Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Tue, 2 Jul 2024 12:39:38 +0000 Subject: [PATCH 25/96] Update documentation --- DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3d731a0af..a25794864 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,5 +73,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 - +RoxygenNote: 7.3.2 From dbb118aa5e90ecd695df13249f95660645857261 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 2 Jul 2024 16:17:15 +0100 Subject: [PATCH 26/96] Update targets script --- _targets.R | 95 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 52 insertions(+), 43 deletions(-) diff --git a/_targets.R b/_targets.R index 4ea32c179..3c788438e 100644 --- a/_targets.R +++ b/_targets.R @@ -19,7 +19,7 @@ tar_option_set( memory = "persistent" # default option ) -years_to_run <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324") +years_to_run <- c("1718", "1819", "1920") # , "2021", "2122", "2223", "2324") list( tar_rds(write_to_disk, TRUE), @@ -542,26 +542,6 @@ list( write_to_disk = write_to_disk ) ), - tar_qs( - processed_data_list, - list( - source_acute_extract, - source_ae_extract, - source_cmh_extract, - source_dn_extract, - source_homelessness_extract, - source_maternity_extract, - source_mental_health_extract, - source_nrs_deaths_extract, - source_ooh_extract, - source_outpatients_extract, - source_prescribing_extract, - source_sc_care_home, - source_sc_home_care, - source_sc_sds, - source_sc_alarms_tele - ) - ), tar_file_read(nsu_cohort, get_nsu_path(year), read_file(!!.x)), tar_target( homelessness_lookup, @@ -571,28 +551,57 @@ list( ) ), tar_target( - episode_file, - create_episode_file( - processed_data_list, - year, - homelessness_lookup = homelessness_lookup, - dd_data = source_dd_extract %>% slfhelper::get_chi(), - nsu_cohort = nsu_cohort %>% slfhelper::get_chi(), - ltc_data = source_ltc_lookup %>% slfhelper::get_chi(), - slf_pc_lookup = source_pc_lookup, - slf_gpprac_lookup = source_gp_lookup, - slf_deaths_lookup = slf_deaths_lookup %>% slfhelper::get_chi(), - sc_client = sc_client_lookup %>% slfhelper::get_chi(), - write_to_disk - ) - ), - tar_target( - episode_file_tests, - process_tests_episode_file( - data = episode_file, - year = year - ) - ) # , + combined_deaths_lookup, + process_combined_deaths_lookup() + ) + + ## End of Targets pipeline ## + + ################################################################################ + ## Redundant code which may still be useful for including ep/indiv files. + # tar_qs( + # processed_data_list, + # list( + # source_acute_extract, + # source_ae_extract, + # source_cmh_extract, + # source_dn_extract, + # source_homelessness_extract, + # source_maternity_extract, + # source_mental_health_extract, + # source_nrs_deaths_extract, + # source_ooh_extract, + # source_outpatients_extract, + # source_prescribing_extract, + # source_sc_care_home, + # source_sc_home_care, + # source_sc_sds, + # source_sc_alarms_tele + # ) + # ), + # tar_target( + # episode_file, + # create_episode_file( + # processed_data_list, + # year, + # homelessness_lookup = homelessness_lookup, + # dd_data = source_dd_extract %>% slfhelper::get_chi(), + # nsu_cohort = nsu_cohort %>% slfhelper::get_chi(), + # ltc_data = source_ltc_lookup %>% slfhelper::get_chi(), + # slf_pc_lookup = source_pc_lookup, + # slf_gpprac_lookup = source_gp_lookup, + # slf_deaths_lookup = slf_deaths_lookup %>% slfhelper::get_chi(), + # sc_client = sc_client_lookup %>% slfhelper::get_chi(), + # write_to_disk + # ) + # ), + # tar_target( + # episode_file_tests, + # process_tests_episode_file( + # data = episode_file, + # year = year + # ) + # ) # , # tar_target( # cross_year_tests, # process_tests_cross_year(year = year) From 623b2224a2e8c1fc321bf79c5a4e579d6e3a059a Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 2 Jul 2024 16:18:03 +0100 Subject: [PATCH 27/96] Update years --- _targets.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_targets.R b/_targets.R index 3c788438e..7511ff03d 100644 --- a/_targets.R +++ b/_targets.R @@ -19,7 +19,7 @@ tar_option_set( memory = "persistent" # default option ) -years_to_run <- c("1718", "1819", "1920") # , "2021", "2122", "2223", "2324") +years_to_run <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324") list( tar_rds(write_to_disk, TRUE), From d319c8543bf24440d8aa593147095638a7ced783 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 2 Jul 2024 16:41:52 +0100 Subject: [PATCH 28/96] Update running process manually --- .../run_episode_file_1718.R | 71 ++++++++++++++++++- .../run_episode_file_1819.R | 71 ++++++++++++++++++- .../run_episode_file_1920.R | 71 ++++++++++++++++++- .../run_episode_file_2021.R | 71 ++++++++++++++++++- .../run_episode_file_2122.R | 71 ++++++++++++++++++- .../run_episode_file_2223.R | 71 ++++++++++++++++++- .../run_episode_file_2324.R | 71 ++++++++++++++++++- 7 files changed, 483 insertions(+), 14 deletions(-) diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R index ab75b94d7..f679ea669 100644 --- a/Run_SLF_Files_manually/run_episode_file_1718.R +++ b/Run_SLF_Files_manually/run_episode_file_1718.R @@ -3,10 +3,77 @@ library(createslf) year <- "1718" -processed_data_list <- targets::tar_read("processed_data_list_1718", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1718", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1718", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1718", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1718", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1718", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1718", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1718", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1718", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1718", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1718", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1718", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1718", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1718", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1718", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1718", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1718", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R index cd5a7435f..d7a65690e 100644 --- a/Run_SLF_Files_manually/run_episode_file_1819.R +++ b/Run_SLF_Files_manually/run_episode_file_1819.R @@ -3,10 +3,77 @@ library(createslf) year <- "1819" -processed_data_list <- targets::tar_read("processed_data_list_1819", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1819", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1819", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1819", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1819", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1819", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1819", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1819", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1819", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1819", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1819", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1819", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1819", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1819", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1819", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1819", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1819", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R index a9dc591b1..e3c2ebeb0 100644 --- a/Run_SLF_Files_manually/run_episode_file_1920.R +++ b/Run_SLF_Files_manually/run_episode_file_1920.R @@ -3,10 +3,77 @@ library(createslf) year <- "1920" -processed_data_list <- targets::tar_read("processed_data_list_1920", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1920", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1920", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1920", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1920", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1920", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1920", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1920", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1920", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1920", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1920", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1920", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1920", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1920", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1920", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1920", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1920", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R index 37708ee8b..c66f4572d 100644 --- a/Run_SLF_Files_manually/run_episode_file_2021.R +++ b/Run_SLF_Files_manually/run_episode_file_2021.R @@ -3,10 +3,77 @@ library(createslf) year <- "2021" -processed_data_list <- targets::tar_read("processed_data_list_2021", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_2021", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_2021", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2021", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2021", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_2021", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_2021", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_2021", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_2021", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_2021", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_2021", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_2021", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_2021", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_2021", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_2021", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_2021", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_2021", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R index 47400e2d1..cde974be2 100644 --- a/Run_SLF_Files_manually/run_episode_file_2122.R +++ b/Run_SLF_Files_manually/run_episode_file_2122.R @@ -3,10 +3,77 @@ library(createslf) year <- "2122" -processed_data_list <- targets::tar_read("processed_data_list_2122", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_2122", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_2122", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2122", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2122", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_2122", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_2122", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_2122", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_2122", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_2122", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_2122", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_2122", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_2122", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_2122", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_2122", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_2122", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_2122", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R index e64a57f32..ee83082f1 100644 --- a/Run_SLF_Files_manually/run_episode_file_2223.R +++ b/Run_SLF_Files_manually/run_episode_file_2223.R @@ -3,10 +3,77 @@ library(createslf) year <- "2223" -processed_data_list <- targets::tar_read("processed_data_list_2223", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_2223", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_2223", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2223", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2223", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_2223", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_2223", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_2223", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_2223", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_2223", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_2223", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_2223", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_2223", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_2223", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_2223", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_2223", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_2223", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R index 4a7f0ad29..508689f6d 100644 --- a/Run_SLF_Files_manually/run_episode_file_2324.R +++ b/Run_SLF_Files_manually/run_episode_file_2324.R @@ -3,10 +3,77 @@ library(createslf) year <- "2324" -processed_data_list <- targets::tar_read("processed_data_list_2324", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_2324", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_2324", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2324", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2324", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_2324", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_2324", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_2324", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_2324", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_2324", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_2324", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_2324", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_2324", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_2324", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_2324", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_2324", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_2324", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## From ca2d033dd156df0832d257202b31a82b102a62ac Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 3 Jul 2024 09:32:23 +0100 Subject: [PATCH 29/96] re-arrange brackets --- _targets.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/_targets.R b/_targets.R index 7511ff03d..4583f21fb 100644 --- a/_targets.R +++ b/_targets.R @@ -554,7 +554,8 @@ list( combined_deaths_lookup, process_combined_deaths_lookup() ) - + ) +) ## End of Targets pipeline ## ################################################################################ @@ -652,5 +653,4 @@ list( # version = "latest" # ) # ) - ) -) + From f15ca4e6c3f948ce610ac919a768b0518fefc55f Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 3 Jul 2024 09:33:50 +0100 Subject: [PATCH 30/96] Style code --- _targets.R | 201 ++++++++++++++++++++++++++--------------------------- 1 file changed, 100 insertions(+), 101 deletions(-) diff --git a/_targets.R b/_targets.R index 4583f21fb..80c56701e 100644 --- a/_targets.R +++ b/_targets.R @@ -549,108 +549,107 @@ list( year, homelessness_data = source_homelessness_extract %>% slfhelper::get_chi() ) - ), - tar_target( - combined_deaths_lookup, - process_combined_deaths_lookup() ) + ), + tar_target( + combined_deaths_lookup, + process_combined_deaths_lookup() ) ) - ## End of Targets pipeline ## - - ################################################################################ - ## Redundant code which may still be useful for including ep/indiv files. - # tar_qs( - # processed_data_list, - # list( - # source_acute_extract, - # source_ae_extract, - # source_cmh_extract, - # source_dn_extract, - # source_homelessness_extract, - # source_maternity_extract, - # source_mental_health_extract, - # source_nrs_deaths_extract, - # source_ooh_extract, - # source_outpatients_extract, - # source_prescribing_extract, - # source_sc_care_home, - # source_sc_home_care, - # source_sc_sds, - # source_sc_alarms_tele - # ) - # ), - # tar_target( - # episode_file, - # create_episode_file( - # processed_data_list, - # year, - # homelessness_lookup = homelessness_lookup, - # dd_data = source_dd_extract %>% slfhelper::get_chi(), - # nsu_cohort = nsu_cohort %>% slfhelper::get_chi(), - # ltc_data = source_ltc_lookup %>% slfhelper::get_chi(), - # slf_pc_lookup = source_pc_lookup, - # slf_gpprac_lookup = source_gp_lookup, - # slf_deaths_lookup = slf_deaths_lookup %>% slfhelper::get_chi(), - # sc_client = sc_client_lookup %>% slfhelper::get_chi(), - # write_to_disk - # ) - # ), - # tar_target( - # episode_file_tests, - # process_tests_episode_file( - # data = episode_file, - # year = year - # ) - # ) # , - # tar_target( - # cross_year_tests, - # process_tests_cross_year(year = year) - # ), # , - # tar_target( - # individual_file, - # create_individual_file( - # episode_file = episode_file, - # year = year, - # homelessness_lookup = homelessness_lookup, - # write_to_disk = write_to_disk - # ) - # ), - # tar_target( - # individual_file_tests, - # process_tests_individual_file( - # data = individual_file, - # year = year - # ) - # ) # , - # tar_target( - # episode_file_dataset, - # arrow::write_dataset( - # dataset = episode_file, - # path = fs::path( - # get_year_dir(year), - # stringr::str_glue("source-episode-file-{year}") - # ), - # format = "parquet", - # # Should correspond to the available slfhelper filters - # partitioning = c("recid", "hscp2018"), - # compression = "zstd", - # version = "latest" - # ) - # ), - # tar_target( - # individual_file_dataset, - # arrow::write_dataset( - # dataset = individual_file, - # path = fs::path( - # get_year_dir(year), - # stringr::str_glue("source-individual-file-{year}") - # ), - # format = "parquet", - # # Should correspond to the available slfhelper filters - # partitioning = c("hscp2018"), - # compression = "zstd", - # version = "latest" - # ) - # ) +## End of Targets pipeline ## +################################################################################ +## Redundant code which may still be useful for including ep/indiv files. +# tar_qs( +# processed_data_list, +# list( +# source_acute_extract, +# source_ae_extract, +# source_cmh_extract, +# source_dn_extract, +# source_homelessness_extract, +# source_maternity_extract, +# source_mental_health_extract, +# source_nrs_deaths_extract, +# source_ooh_extract, +# source_outpatients_extract, +# source_prescribing_extract, +# source_sc_care_home, +# source_sc_home_care, +# source_sc_sds, +# source_sc_alarms_tele +# ) +# ), +# tar_target( +# episode_file, +# create_episode_file( +# processed_data_list, +# year, +# homelessness_lookup = homelessness_lookup, +# dd_data = source_dd_extract %>% slfhelper::get_chi(), +# nsu_cohort = nsu_cohort %>% slfhelper::get_chi(), +# ltc_data = source_ltc_lookup %>% slfhelper::get_chi(), +# slf_pc_lookup = source_pc_lookup, +# slf_gpprac_lookup = source_gp_lookup, +# slf_deaths_lookup = slf_deaths_lookup %>% slfhelper::get_chi(), +# sc_client = sc_client_lookup %>% slfhelper::get_chi(), +# write_to_disk +# ) +# ), +# tar_target( +# episode_file_tests, +# process_tests_episode_file( +# data = episode_file, +# year = year +# ) +# ) # , +# tar_target( +# cross_year_tests, +# process_tests_cross_year(year = year) +# ), # , +# tar_target( +# individual_file, +# create_individual_file( +# episode_file = episode_file, +# year = year, +# homelessness_lookup = homelessness_lookup, +# write_to_disk = write_to_disk +# ) +# ), +# tar_target( +# individual_file_tests, +# process_tests_individual_file( +# data = individual_file, +# year = year +# ) +# ) # , +# tar_target( +# episode_file_dataset, +# arrow::write_dataset( +# dataset = episode_file, +# path = fs::path( +# get_year_dir(year), +# stringr::str_glue("source-episode-file-{year}") +# ), +# format = "parquet", +# # Should correspond to the available slfhelper filters +# partitioning = c("recid", "hscp2018"), +# compression = "zstd", +# version = "latest" +# ) +# ), +# tar_target( +# individual_file_dataset, +# arrow::write_dataset( +# dataset = individual_file, +# path = fs::path( +# get_year_dir(year), +# stringr::str_glue("source-individual-file-{year}") +# ), +# format = "parquet", +# # Should correspond to the available slfhelper filters +# partitioning = c("hscp2018"), +# compression = "zstd", +# version = "latest" +# ) +# ) From 61baf029f359b41529972b2755a44877d1b2e077 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 3 Jul 2024 10:27:54 +0100 Subject: [PATCH 31/96] Update run targets scripts --- Run_SLF_Files_targets/run_targets_1718.R | 8 -------- Run_SLF_Files_targets/run_targets_1819.R | 8 -------- Run_SLF_Files_targets/run_targets_1920.R | 8 -------- Run_SLF_Files_targets/run_targets_2021.R | 8 -------- Run_SLF_Files_targets/run_targets_2122.R | 8 -------- Run_SLF_Files_targets/run_targets_2223.R | 8 -------- Run_SLF_Files_targets/run_targets_2324.R | 8 -------- 7 files changed, 56 deletions(-) diff --git a/Run_SLF_Files_targets/run_targets_1718.R b/Run_SLF_Files_targets/run_targets_1718.R index ac03edd3f..a0dc17cf0 100644 --- a/Run_SLF_Files_targets/run_targets_1718.R +++ b/Run_SLF_Files_targets/run_targets_1718.R @@ -8,11 +8,3 @@ tar_make_future( names = (targets::contains("1718")) ) -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_1819.R b/Run_SLF_Files_targets/run_targets_1819.R index b60728359..981b6ee33 100644 --- a/Run_SLF_Files_targets/run_targets_1819.R +++ b/Run_SLF_Files_targets/run_targets_1819.R @@ -8,11 +8,3 @@ tar_make_future( names = (targets::contains("1819")) ) -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_1920.R b/Run_SLF_Files_targets/run_targets_1920.R index 897ee0b7a..79ee50644 100644 --- a/Run_SLF_Files_targets/run_targets_1920.R +++ b/Run_SLF_Files_targets/run_targets_1920.R @@ -8,11 +8,3 @@ tar_make_future( names = (targets::contains("1920")) ) -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2021.R b/Run_SLF_Files_targets/run_targets_2021.R index 53333c014..c0433117d 100644 --- a/Run_SLF_Files_targets/run_targets_2021.R +++ b/Run_SLF_Files_targets/run_targets_2021.R @@ -8,11 +8,3 @@ tar_make_future( names = (targets::contains("2021")) ) -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2122.R b/Run_SLF_Files_targets/run_targets_2122.R index 457fe33e7..3424daf08 100644 --- a/Run_SLF_Files_targets/run_targets_2122.R +++ b/Run_SLF_Files_targets/run_targets_2122.R @@ -8,11 +8,3 @@ tar_make_future( names = (targets::contains("2122")) ) -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2223.R b/Run_SLF_Files_targets/run_targets_2223.R index fc851f3f7..052daed3e 100644 --- a/Run_SLF_Files_targets/run_targets_2223.R +++ b/Run_SLF_Files_targets/run_targets_2223.R @@ -8,11 +8,3 @@ tar_make_future( names = (targets::contains("2223")) ) -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2324.R b/Run_SLF_Files_targets/run_targets_2324.R index 3b4c9b240..c34ae7061 100644 --- a/Run_SLF_Files_targets/run_targets_2324.R +++ b/Run_SLF_Files_targets/run_targets_2324.R @@ -8,11 +8,3 @@ tar_make_future( names = (targets::contains("2324")) ) -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) From 379b13752660909c0daeb8e94840648486d3d5e9 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 3 Jul 2024 10:44:03 +0100 Subject: [PATCH 32/96] Set up all-targets script --- Run_SLF_Files_targets/run_all_targets.R | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 Run_SLF_Files_targets/run_all_targets.R diff --git a/Run_SLF_Files_targets/run_all_targets.R b/Run_SLF_Files_targets/run_all_targets.R new file mode 100644 index 000000000..fb5b94fab --- /dev/null +++ b/Run_SLF_Files_targets/run_all_targets.R @@ -0,0 +1,5 @@ +library(targets) + +# use tar_make_future() to run targets for all years +# This will run everything needed for creating the episode file. +tar_make_future() From 3fb27ae732ff72a2438efca850527f9d57359094 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Wed, 3 Jul 2024 09:45:29 +0000 Subject: [PATCH 33/96] Style code --- Run_SLF_Files_targets/run_targets_1718.R | 1 - Run_SLF_Files_targets/run_targets_1819.R | 1 - Run_SLF_Files_targets/run_targets_1920.R | 1 - Run_SLF_Files_targets/run_targets_2021.R | 1 - Run_SLF_Files_targets/run_targets_2122.R | 1 - Run_SLF_Files_targets/run_targets_2223.R | 1 - Run_SLF_Files_targets/run_targets_2324.R | 1 - 7 files changed, 7 deletions(-) diff --git a/Run_SLF_Files_targets/run_targets_1718.R b/Run_SLF_Files_targets/run_targets_1718.R index a0dc17cf0..e85a89ff8 100644 --- a/Run_SLF_Files_targets/run_targets_1718.R +++ b/Run_SLF_Files_targets/run_targets_1718.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("1718")) ) - diff --git a/Run_SLF_Files_targets/run_targets_1819.R b/Run_SLF_Files_targets/run_targets_1819.R index 981b6ee33..6957054c8 100644 --- a/Run_SLF_Files_targets/run_targets_1819.R +++ b/Run_SLF_Files_targets/run_targets_1819.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("1819")) ) - diff --git a/Run_SLF_Files_targets/run_targets_1920.R b/Run_SLF_Files_targets/run_targets_1920.R index 79ee50644..ef6272fca 100644 --- a/Run_SLF_Files_targets/run_targets_1920.R +++ b/Run_SLF_Files_targets/run_targets_1920.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("1920")) ) - diff --git a/Run_SLF_Files_targets/run_targets_2021.R b/Run_SLF_Files_targets/run_targets_2021.R index c0433117d..237078e05 100644 --- a/Run_SLF_Files_targets/run_targets_2021.R +++ b/Run_SLF_Files_targets/run_targets_2021.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("2021")) ) - diff --git a/Run_SLF_Files_targets/run_targets_2122.R b/Run_SLF_Files_targets/run_targets_2122.R index 3424daf08..dc64b625f 100644 --- a/Run_SLF_Files_targets/run_targets_2122.R +++ b/Run_SLF_Files_targets/run_targets_2122.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("2122")) ) - diff --git a/Run_SLF_Files_targets/run_targets_2223.R b/Run_SLF_Files_targets/run_targets_2223.R index 052daed3e..7d8677a80 100644 --- a/Run_SLF_Files_targets/run_targets_2223.R +++ b/Run_SLF_Files_targets/run_targets_2223.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("2223")) ) - diff --git a/Run_SLF_Files_targets/run_targets_2324.R b/Run_SLF_Files_targets/run_targets_2324.R index c34ae7061..bcebe5fb8 100644 --- a/Run_SLF_Files_targets/run_targets_2324.R +++ b/Run_SLF_Files_targets/run_targets_2324.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("2324")) ) - From ceb526a1260972da21c4103929e4ff0b4772d74c Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Wed, 3 Jul 2024 10:19:45 +0000 Subject: [PATCH 34/96] Update documentation --- DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3d731a0af..a25794864 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,5 +73,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 - +RoxygenNote: 7.3.2 From a521206271345728e155d66b00789698ce106261 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 2 Jul 2024 16:17:15 +0100 Subject: [PATCH 35/96] Update targets script --- _targets.R | 95 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 52 insertions(+), 43 deletions(-) diff --git a/_targets.R b/_targets.R index 4ea32c179..3c788438e 100644 --- a/_targets.R +++ b/_targets.R @@ -19,7 +19,7 @@ tar_option_set( memory = "persistent" # default option ) -years_to_run <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324") +years_to_run <- c("1718", "1819", "1920") # , "2021", "2122", "2223", "2324") list( tar_rds(write_to_disk, TRUE), @@ -542,26 +542,6 @@ list( write_to_disk = write_to_disk ) ), - tar_qs( - processed_data_list, - list( - source_acute_extract, - source_ae_extract, - source_cmh_extract, - source_dn_extract, - source_homelessness_extract, - source_maternity_extract, - source_mental_health_extract, - source_nrs_deaths_extract, - source_ooh_extract, - source_outpatients_extract, - source_prescribing_extract, - source_sc_care_home, - source_sc_home_care, - source_sc_sds, - source_sc_alarms_tele - ) - ), tar_file_read(nsu_cohort, get_nsu_path(year), read_file(!!.x)), tar_target( homelessness_lookup, @@ -571,28 +551,57 @@ list( ) ), tar_target( - episode_file, - create_episode_file( - processed_data_list, - year, - homelessness_lookup = homelessness_lookup, - dd_data = source_dd_extract %>% slfhelper::get_chi(), - nsu_cohort = nsu_cohort %>% slfhelper::get_chi(), - ltc_data = source_ltc_lookup %>% slfhelper::get_chi(), - slf_pc_lookup = source_pc_lookup, - slf_gpprac_lookup = source_gp_lookup, - slf_deaths_lookup = slf_deaths_lookup %>% slfhelper::get_chi(), - sc_client = sc_client_lookup %>% slfhelper::get_chi(), - write_to_disk - ) - ), - tar_target( - episode_file_tests, - process_tests_episode_file( - data = episode_file, - year = year - ) - ) # , + combined_deaths_lookup, + process_combined_deaths_lookup() + ) + + ## End of Targets pipeline ## + + ################################################################################ + ## Redundant code which may still be useful for including ep/indiv files. + # tar_qs( + # processed_data_list, + # list( + # source_acute_extract, + # source_ae_extract, + # source_cmh_extract, + # source_dn_extract, + # source_homelessness_extract, + # source_maternity_extract, + # source_mental_health_extract, + # source_nrs_deaths_extract, + # source_ooh_extract, + # source_outpatients_extract, + # source_prescribing_extract, + # source_sc_care_home, + # source_sc_home_care, + # source_sc_sds, + # source_sc_alarms_tele + # ) + # ), + # tar_target( + # episode_file, + # create_episode_file( + # processed_data_list, + # year, + # homelessness_lookup = homelessness_lookup, + # dd_data = source_dd_extract %>% slfhelper::get_chi(), + # nsu_cohort = nsu_cohort %>% slfhelper::get_chi(), + # ltc_data = source_ltc_lookup %>% slfhelper::get_chi(), + # slf_pc_lookup = source_pc_lookup, + # slf_gpprac_lookup = source_gp_lookup, + # slf_deaths_lookup = slf_deaths_lookup %>% slfhelper::get_chi(), + # sc_client = sc_client_lookup %>% slfhelper::get_chi(), + # write_to_disk + # ) + # ), + # tar_target( + # episode_file_tests, + # process_tests_episode_file( + # data = episode_file, + # year = year + # ) + # ) # , # tar_target( # cross_year_tests, # process_tests_cross_year(year = year) From 585db7f28da533e35657f93117d628aa361152a2 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 2 Jul 2024 16:18:03 +0100 Subject: [PATCH 36/96] Update years --- _targets.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_targets.R b/_targets.R index 3c788438e..7511ff03d 100644 --- a/_targets.R +++ b/_targets.R @@ -19,7 +19,7 @@ tar_option_set( memory = "persistent" # default option ) -years_to_run <- c("1718", "1819", "1920") # , "2021", "2122", "2223", "2324") +years_to_run <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324") list( tar_rds(write_to_disk, TRUE), From dada9cb5676d115af9ba8b6896349ffe693836eb Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 2 Jul 2024 16:41:52 +0100 Subject: [PATCH 37/96] Update running process manually --- .../run_episode_file_1718.R | 71 ++++++++++++++++++- .../run_episode_file_1819.R | 71 ++++++++++++++++++- .../run_episode_file_1920.R | 71 ++++++++++++++++++- .../run_episode_file_2021.R | 71 ++++++++++++++++++- .../run_episode_file_2122.R | 71 ++++++++++++++++++- .../run_episode_file_2223.R | 71 ++++++++++++++++++- .../run_episode_file_2324.R | 71 ++++++++++++++++++- 7 files changed, 483 insertions(+), 14 deletions(-) diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R index ab75b94d7..f679ea669 100644 --- a/Run_SLF_Files_manually/run_episode_file_1718.R +++ b/Run_SLF_Files_manually/run_episode_file_1718.R @@ -3,10 +3,77 @@ library(createslf) year <- "1718" -processed_data_list <- targets::tar_read("processed_data_list_1718", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1718", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1718", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1718", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1718", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1718", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1718", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1718", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1718", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1718", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1718", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1718", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1718", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1718", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1718", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1718", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1718", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R index cd5a7435f..d7a65690e 100644 --- a/Run_SLF_Files_manually/run_episode_file_1819.R +++ b/Run_SLF_Files_manually/run_episode_file_1819.R @@ -3,10 +3,77 @@ library(createslf) year <- "1819" -processed_data_list <- targets::tar_read("processed_data_list_1819", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1819", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1819", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1819", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1819", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1819", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1819", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1819", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1819", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1819", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1819", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1819", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1819", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1819", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1819", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1819", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1819", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R index a9dc591b1..e3c2ebeb0 100644 --- a/Run_SLF_Files_manually/run_episode_file_1920.R +++ b/Run_SLF_Files_manually/run_episode_file_1920.R @@ -3,10 +3,77 @@ library(createslf) year <- "1920" -processed_data_list <- targets::tar_read("processed_data_list_1920", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1920", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1920", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1920", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1920", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1920", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1920", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1920", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1920", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1920", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1920", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1920", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1920", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1920", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1920", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1920", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1920", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R index 37708ee8b..c66f4572d 100644 --- a/Run_SLF_Files_manually/run_episode_file_2021.R +++ b/Run_SLF_Files_manually/run_episode_file_2021.R @@ -3,10 +3,77 @@ library(createslf) year <- "2021" -processed_data_list <- targets::tar_read("processed_data_list_2021", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_2021", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_2021", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2021", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2021", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_2021", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_2021", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_2021", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_2021", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_2021", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_2021", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_2021", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_2021", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_2021", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_2021", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_2021", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_2021", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R index 47400e2d1..cde974be2 100644 --- a/Run_SLF_Files_manually/run_episode_file_2122.R +++ b/Run_SLF_Files_manually/run_episode_file_2122.R @@ -3,10 +3,77 @@ library(createslf) year <- "2122" -processed_data_list <- targets::tar_read("processed_data_list_2122", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_2122", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_2122", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2122", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2122", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_2122", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_2122", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_2122", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_2122", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_2122", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_2122", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_2122", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_2122", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_2122", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_2122", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_2122", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_2122", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R index e64a57f32..ee83082f1 100644 --- a/Run_SLF_Files_manually/run_episode_file_2223.R +++ b/Run_SLF_Files_manually/run_episode_file_2223.R @@ -3,10 +3,77 @@ library(createslf) year <- "2223" -processed_data_list <- targets::tar_read("processed_data_list_2223", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_2223", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_2223", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2223", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2223", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_2223", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_2223", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_2223", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_2223", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_2223", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_2223", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_2223", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_2223", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_2223", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_2223", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_2223", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_2223", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R index 4a7f0ad29..508689f6d 100644 --- a/Run_SLF_Files_manually/run_episode_file_2324.R +++ b/Run_SLF_Files_manually/run_episode_file_2324.R @@ -3,10 +3,77 @@ library(createslf) year <- "2324" -processed_data_list <- targets::tar_read("processed_data_list_2324", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_2324", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_2324", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2324", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2324", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_2324", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_2324", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_2324", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_2324", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_2324", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_2324", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_2324", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_2324", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_2324", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_2324", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_2324", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_2324", + store = targets_store + ) ) # Run episode file create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) + +## End of Script ## From bf63d73616a6cda5e32c8a0862b0c7794f23c80a Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 3 Jul 2024 09:32:23 +0100 Subject: [PATCH 38/96] re-arrange brackets --- _targets.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/_targets.R b/_targets.R index 7511ff03d..4583f21fb 100644 --- a/_targets.R +++ b/_targets.R @@ -554,7 +554,8 @@ list( combined_deaths_lookup, process_combined_deaths_lookup() ) - + ) +) ## End of Targets pipeline ## ################################################################################ @@ -652,5 +653,4 @@ list( # version = "latest" # ) # ) - ) -) + From ba2be196ec1eb9fee627413af3ea28081fac8533 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 3 Jul 2024 09:33:50 +0100 Subject: [PATCH 39/96] Style code --- _targets.R | 201 ++++++++++++++++++++++++++--------------------------- 1 file changed, 100 insertions(+), 101 deletions(-) diff --git a/_targets.R b/_targets.R index 4583f21fb..80c56701e 100644 --- a/_targets.R +++ b/_targets.R @@ -549,108 +549,107 @@ list( year, homelessness_data = source_homelessness_extract %>% slfhelper::get_chi() ) - ), - tar_target( - combined_deaths_lookup, - process_combined_deaths_lookup() ) + ), + tar_target( + combined_deaths_lookup, + process_combined_deaths_lookup() ) ) - ## End of Targets pipeline ## - - ################################################################################ - ## Redundant code which may still be useful for including ep/indiv files. - # tar_qs( - # processed_data_list, - # list( - # source_acute_extract, - # source_ae_extract, - # source_cmh_extract, - # source_dn_extract, - # source_homelessness_extract, - # source_maternity_extract, - # source_mental_health_extract, - # source_nrs_deaths_extract, - # source_ooh_extract, - # source_outpatients_extract, - # source_prescribing_extract, - # source_sc_care_home, - # source_sc_home_care, - # source_sc_sds, - # source_sc_alarms_tele - # ) - # ), - # tar_target( - # episode_file, - # create_episode_file( - # processed_data_list, - # year, - # homelessness_lookup = homelessness_lookup, - # dd_data = source_dd_extract %>% slfhelper::get_chi(), - # nsu_cohort = nsu_cohort %>% slfhelper::get_chi(), - # ltc_data = source_ltc_lookup %>% slfhelper::get_chi(), - # slf_pc_lookup = source_pc_lookup, - # slf_gpprac_lookup = source_gp_lookup, - # slf_deaths_lookup = slf_deaths_lookup %>% slfhelper::get_chi(), - # sc_client = sc_client_lookup %>% slfhelper::get_chi(), - # write_to_disk - # ) - # ), - # tar_target( - # episode_file_tests, - # process_tests_episode_file( - # data = episode_file, - # year = year - # ) - # ) # , - # tar_target( - # cross_year_tests, - # process_tests_cross_year(year = year) - # ), # , - # tar_target( - # individual_file, - # create_individual_file( - # episode_file = episode_file, - # year = year, - # homelessness_lookup = homelessness_lookup, - # write_to_disk = write_to_disk - # ) - # ), - # tar_target( - # individual_file_tests, - # process_tests_individual_file( - # data = individual_file, - # year = year - # ) - # ) # , - # tar_target( - # episode_file_dataset, - # arrow::write_dataset( - # dataset = episode_file, - # path = fs::path( - # get_year_dir(year), - # stringr::str_glue("source-episode-file-{year}") - # ), - # format = "parquet", - # # Should correspond to the available slfhelper filters - # partitioning = c("recid", "hscp2018"), - # compression = "zstd", - # version = "latest" - # ) - # ), - # tar_target( - # individual_file_dataset, - # arrow::write_dataset( - # dataset = individual_file, - # path = fs::path( - # get_year_dir(year), - # stringr::str_glue("source-individual-file-{year}") - # ), - # format = "parquet", - # # Should correspond to the available slfhelper filters - # partitioning = c("hscp2018"), - # compression = "zstd", - # version = "latest" - # ) - # ) +## End of Targets pipeline ## +################################################################################ +## Redundant code which may still be useful for including ep/indiv files. +# tar_qs( +# processed_data_list, +# list( +# source_acute_extract, +# source_ae_extract, +# source_cmh_extract, +# source_dn_extract, +# source_homelessness_extract, +# source_maternity_extract, +# source_mental_health_extract, +# source_nrs_deaths_extract, +# source_ooh_extract, +# source_outpatients_extract, +# source_prescribing_extract, +# source_sc_care_home, +# source_sc_home_care, +# source_sc_sds, +# source_sc_alarms_tele +# ) +# ), +# tar_target( +# episode_file, +# create_episode_file( +# processed_data_list, +# year, +# homelessness_lookup = homelessness_lookup, +# dd_data = source_dd_extract %>% slfhelper::get_chi(), +# nsu_cohort = nsu_cohort %>% slfhelper::get_chi(), +# ltc_data = source_ltc_lookup %>% slfhelper::get_chi(), +# slf_pc_lookup = source_pc_lookup, +# slf_gpprac_lookup = source_gp_lookup, +# slf_deaths_lookup = slf_deaths_lookup %>% slfhelper::get_chi(), +# sc_client = sc_client_lookup %>% slfhelper::get_chi(), +# write_to_disk +# ) +# ), +# tar_target( +# episode_file_tests, +# process_tests_episode_file( +# data = episode_file, +# year = year +# ) +# ) # , +# tar_target( +# cross_year_tests, +# process_tests_cross_year(year = year) +# ), # , +# tar_target( +# individual_file, +# create_individual_file( +# episode_file = episode_file, +# year = year, +# homelessness_lookup = homelessness_lookup, +# write_to_disk = write_to_disk +# ) +# ), +# tar_target( +# individual_file_tests, +# process_tests_individual_file( +# data = individual_file, +# year = year +# ) +# ) # , +# tar_target( +# episode_file_dataset, +# arrow::write_dataset( +# dataset = episode_file, +# path = fs::path( +# get_year_dir(year), +# stringr::str_glue("source-episode-file-{year}") +# ), +# format = "parquet", +# # Should correspond to the available slfhelper filters +# partitioning = c("recid", "hscp2018"), +# compression = "zstd", +# version = "latest" +# ) +# ), +# tar_target( +# individual_file_dataset, +# arrow::write_dataset( +# dataset = individual_file, +# path = fs::path( +# get_year_dir(year), +# stringr::str_glue("source-individual-file-{year}") +# ), +# format = "parquet", +# # Should correspond to the available slfhelper filters +# partitioning = c("hscp2018"), +# compression = "zstd", +# version = "latest" +# ) +# ) From a4cf7b6523a7564ad4d34bc6eabf07541a220e0b Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 3 Jul 2024 10:27:54 +0100 Subject: [PATCH 40/96] Update run targets scripts --- Run_SLF_Files_targets/run_targets_1718.R | 8 -------- Run_SLF_Files_targets/run_targets_1819.R | 8 -------- Run_SLF_Files_targets/run_targets_1920.R | 8 -------- Run_SLF_Files_targets/run_targets_2021.R | 8 -------- Run_SLF_Files_targets/run_targets_2122.R | 8 -------- Run_SLF_Files_targets/run_targets_2223.R | 8 -------- Run_SLF_Files_targets/run_targets_2324.R | 8 -------- 7 files changed, 56 deletions(-) diff --git a/Run_SLF_Files_targets/run_targets_1718.R b/Run_SLF_Files_targets/run_targets_1718.R index ac03edd3f..a0dc17cf0 100644 --- a/Run_SLF_Files_targets/run_targets_1718.R +++ b/Run_SLF_Files_targets/run_targets_1718.R @@ -8,11 +8,3 @@ tar_make_future( names = (targets::contains("1718")) ) -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_1819.R b/Run_SLF_Files_targets/run_targets_1819.R index b60728359..981b6ee33 100644 --- a/Run_SLF_Files_targets/run_targets_1819.R +++ b/Run_SLF_Files_targets/run_targets_1819.R @@ -8,11 +8,3 @@ tar_make_future( names = (targets::contains("1819")) ) -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_1920.R b/Run_SLF_Files_targets/run_targets_1920.R index 897ee0b7a..79ee50644 100644 --- a/Run_SLF_Files_targets/run_targets_1920.R +++ b/Run_SLF_Files_targets/run_targets_1920.R @@ -8,11 +8,3 @@ tar_make_future( names = (targets::contains("1920")) ) -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2021.R b/Run_SLF_Files_targets/run_targets_2021.R index 53333c014..c0433117d 100644 --- a/Run_SLF_Files_targets/run_targets_2021.R +++ b/Run_SLF_Files_targets/run_targets_2021.R @@ -8,11 +8,3 @@ tar_make_future( names = (targets::contains("2021")) ) -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2122.R b/Run_SLF_Files_targets/run_targets_2122.R index 457fe33e7..3424daf08 100644 --- a/Run_SLF_Files_targets/run_targets_2122.R +++ b/Run_SLF_Files_targets/run_targets_2122.R @@ -8,11 +8,3 @@ tar_make_future( names = (targets::contains("2122")) ) -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2223.R b/Run_SLF_Files_targets/run_targets_2223.R index fc851f3f7..052daed3e 100644 --- a/Run_SLF_Files_targets/run_targets_2223.R +++ b/Run_SLF_Files_targets/run_targets_2223.R @@ -8,11 +8,3 @@ tar_make_future( names = (targets::contains("2223")) ) -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2324.R b/Run_SLF_Files_targets/run_targets_2324.R index 3b4c9b240..c34ae7061 100644 --- a/Run_SLF_Files_targets/run_targets_2324.R +++ b/Run_SLF_Files_targets/run_targets_2324.R @@ -8,11 +8,3 @@ tar_make_future( names = (targets::contains("2324")) ) -# use targets to create individual files due to RAM limit -library(createslf) - -episode_file <- arrow::read_parquet(get_slf_episode_path(year)) - -# Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) From f840c534709451e110e98f1f18f11fc90da31fe4 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 3 Jul 2024 10:44:03 +0100 Subject: [PATCH 41/96] Set up all-targets script --- Run_SLF_Files_targets/run_all_targets.R | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 Run_SLF_Files_targets/run_all_targets.R diff --git a/Run_SLF_Files_targets/run_all_targets.R b/Run_SLF_Files_targets/run_all_targets.R new file mode 100644 index 000000000..fb5b94fab --- /dev/null +++ b/Run_SLF_Files_targets/run_all_targets.R @@ -0,0 +1,5 @@ +library(targets) + +# use tar_make_future() to run targets for all years +# This will run everything needed for creating the episode file. +tar_make_future() From 6d20852f5859c7dc6d335920101452b121c0e9bd Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Wed, 3 Jul 2024 09:45:29 +0000 Subject: [PATCH 42/96] Style code --- Run_SLF_Files_targets/run_targets_1718.R | 1 - Run_SLF_Files_targets/run_targets_1819.R | 1 - Run_SLF_Files_targets/run_targets_1920.R | 1 - Run_SLF_Files_targets/run_targets_2021.R | 1 - Run_SLF_Files_targets/run_targets_2122.R | 1 - Run_SLF_Files_targets/run_targets_2223.R | 1 - Run_SLF_Files_targets/run_targets_2324.R | 1 - 7 files changed, 7 deletions(-) diff --git a/Run_SLF_Files_targets/run_targets_1718.R b/Run_SLF_Files_targets/run_targets_1718.R index a0dc17cf0..e85a89ff8 100644 --- a/Run_SLF_Files_targets/run_targets_1718.R +++ b/Run_SLF_Files_targets/run_targets_1718.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("1718")) ) - diff --git a/Run_SLF_Files_targets/run_targets_1819.R b/Run_SLF_Files_targets/run_targets_1819.R index 981b6ee33..6957054c8 100644 --- a/Run_SLF_Files_targets/run_targets_1819.R +++ b/Run_SLF_Files_targets/run_targets_1819.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("1819")) ) - diff --git a/Run_SLF_Files_targets/run_targets_1920.R b/Run_SLF_Files_targets/run_targets_1920.R index 79ee50644..ef6272fca 100644 --- a/Run_SLF_Files_targets/run_targets_1920.R +++ b/Run_SLF_Files_targets/run_targets_1920.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("1920")) ) - diff --git a/Run_SLF_Files_targets/run_targets_2021.R b/Run_SLF_Files_targets/run_targets_2021.R index c0433117d..237078e05 100644 --- a/Run_SLF_Files_targets/run_targets_2021.R +++ b/Run_SLF_Files_targets/run_targets_2021.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("2021")) ) - diff --git a/Run_SLF_Files_targets/run_targets_2122.R b/Run_SLF_Files_targets/run_targets_2122.R index 3424daf08..dc64b625f 100644 --- a/Run_SLF_Files_targets/run_targets_2122.R +++ b/Run_SLF_Files_targets/run_targets_2122.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("2122")) ) - diff --git a/Run_SLF_Files_targets/run_targets_2223.R b/Run_SLF_Files_targets/run_targets_2223.R index 052daed3e..7d8677a80 100644 --- a/Run_SLF_Files_targets/run_targets_2223.R +++ b/Run_SLF_Files_targets/run_targets_2223.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("2223")) ) - diff --git a/Run_SLF_Files_targets/run_targets_2324.R b/Run_SLF_Files_targets/run_targets_2324.R index c34ae7061..bcebe5fb8 100644 --- a/Run_SLF_Files_targets/run_targets_2324.R +++ b/Run_SLF_Files_targets/run_targets_2324.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("2324")) ) - From 5482a75599a76da435a2cd97a61c3361e85feb1f Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Thu, 4 Jul 2024 13:24:31 +0000 Subject: [PATCH 43/96] Style code --- R/process_extract_acute.R | 40 +++++++++++++++++++++----------------- R/process_extract_gp_ooh.R | 1 - 2 files changed, 22 insertions(+), 19 deletions(-) diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R index c68909c8a..82926bd92 100644 --- a/R/process_extract_acute.R +++ b/R/process_extract_acute.R @@ -16,7 +16,6 @@ process_extract_acute <- function(data, year, acute_cup_path = get_boxi_extract_path(year, "acute_cup"), write_to_disk = TRUE) { - # Only run for a single year stopifnot(length(year) == 1L) @@ -62,7 +61,7 @@ process_extract_acute <- function(data, ) %>% # Add oldtadm as a factor with labels dplyr::mutate(oldtadm = factor(.data$oldtadm, - levels = 0L:8L + levels = 0L:8L )) %>% dplyr::mutate( unique_row_num = dplyr::row_number() @@ -80,25 +79,30 @@ process_extract_acute <- function(data, "CUP Marker" = readr::col_integer(), "CUP Pathway Name" = readr::col_character() ) - ) %>% dplyr::select( - chi = "UPI Number [C]", - case_reference_number = "Case Reference Number [C]", - record_keydate1 = "Acute Admission Date", - record_keydate2 = "Acute Discharge Date", - tadm = "Acute Admission Type Code", - disch = "Acute Discharge Type Code", - cup_marker = "CUP Marker", - cup_pathway = "CUP Pathway Name" - ) %>% dplyr::distinct() + ) %>% + dplyr::select( + chi = "UPI Number [C]", + case_reference_number = "Case Reference Number [C]", + record_keydate1 = "Acute Admission Date", + record_keydate2 = "Acute Discharge Date", + tadm = "Acute Admission Type Code", + disch = "Acute Discharge Type Code", + cup_marker = "CUP Marker", + cup_pathway = "CUP Pathway Name" + ) %>% + dplyr::distinct() acute_clean <- acute_clean %>% dplyr::left_join(acute_cup, - by = c("record_keydate1", - "record_keydate2", - "case_reference_number", - "chi", - "tadm", - "disch")) + by = c( + "record_keydate1", + "record_keydate2", + "case_reference_number", + "chi", + "tadm", + "disch" + ) + ) acute_processed <- acute_clean %>% dplyr::select( diff --git a/R/process_extract_gp_ooh.R b/R/process_extract_gp_ooh.R index f663729d7..e8d07b9e8 100644 --- a/R/process_extract_gp_ooh.R +++ b/R/process_extract_gp_ooh.R @@ -16,7 +16,6 @@ process_extract_gp_ooh <- function(year, data_list, gp_ooh_cup_path = get_boxi_extract_path(year, "gp_ooh_cup"), write_to_disk = TRUE) { - diagnosis_extract <- process_extract_ooh_diagnosis(data_list[["diagnosis"]], year) outcomes_extract <- process_extract_ooh_outcomes(data_list[["outcomes"]], year) consultations_extract <- process_extract_ooh_consultations(data_list[["consultations"]], year) From 91e5946c7e5e9bc1e4c6796c25cb61ac63d4819d Mon Sep 17 00:00:00 2001 From: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com> Date: Fri, 28 Jun 2024 11:11:34 +0100 Subject: [PATCH 44/96] Automate the combined slf deaths lookup (#973) Closes #957 and #968 --- NAMESPACE | 2 +- R/add_activity_after_death_flag.R | 25 +++++++++---------- R/create_episode_file.R | 2 +- R/get_slf_lookup_paths.R | 10 ++++---- man/add_activity_after_death_flag.Rd | 3 ++- ...=> get_combined_slf_deaths_lookup_path.Rd} | 6 ++--- man/get_slf_ch_name_lookup_path.Rd | 2 +- man/get_slf_chi_deaths_path.Rd | 2 +- man/get_slf_deaths_lookup_path.Rd | 2 +- man/get_slf_gpprac_path.Rd | 2 +- man/get_slf_postcode_path.Rd | 2 +- 11 files changed, 29 insertions(+), 29 deletions(-) rename man/{get_all_slf_deaths_lookup_path.Rd => get_combined_slf_deaths_lookup_path.Rd} (83%) diff --git a/NAMESPACE b/NAMESPACE index 0475603bf..c6bba9b38 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,9 +25,9 @@ export(end_fy_quarter) export(end_next_fy_quarter) export(find_latest_file) export(fy_interval) -export(get_all_slf_deaths_lookup_path) export(get_boxi_extract_path) export(get_ch_costs_path) +export(get_combined_slf_deaths_lookup_path) export(get_dd_path) export(get_dd_period) export(get_demographic_cohorts_path) diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index 78131d941..5f973eb1e 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -11,7 +11,7 @@ add_activity_after_death_flag <- function( data, year, - deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>% + deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi()) { cli::cli_alert_info("Add activity after death flag function started at {Sys.time()}") @@ -127,17 +127,16 @@ add_activity_after_death_flag <- function( # Read data------------------------------------------------ process_deaths_lookup <- function(update = latest_update(), write_to_disk = TRUE, ...) { - all_boxi_deaths <- read_file(get_slf_deaths_lookup_path("1415")) %>% - rbind(read_file(get_slf_deaths_lookup_path("1516"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("1617"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("1718"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("1819"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("1920"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("2021"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("2122"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("2223"))) %>% - rbind(read_file(get_slf_deaths_lookup_path("2324"))) %>% - # TODO: make this automated to pick up files starting with name "get_slf_deaths_lookup_path" + dir_folder <- "/conf/hscdiip/SLF_Extracts/Deaths" + file_names <- list.files(dir_folder, + pattern = "^anon-slf_deaths_lookup_.*parquet", + full.names = TRUE + ) + + # read all year specific deaths lookups and bind them together + all_boxi_deaths <- lapply(file_names, arrow::read_parquet) %>% + data.table::rbindlist() %>% + # convert to chi for processing slfhelper::get_chi() %>% # Remove rows with missing or blank CHI number - could also use na.omit? # na.omit(all_boxi_deaths) @@ -185,7 +184,7 @@ process_deaths_lookup <- function(update = latest_update(), write_file( all_boxi_deaths, fs::path(get_slf_dir(), "Deaths", - file_name = stringr::str_glue("anon-all_slf_deaths_lookup_{update}.parquet") + file_name = stringr::str_glue("anon-combined_slf_deaths_lookup_{update}.parquet") ) ) } diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 34d2ddf9e..7909e2e7f 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -141,7 +141,7 @@ create_episode_file <- function( slf_deaths_lookup ) %>% add_activity_after_death_flag(year, - deaths_data = read_file(get_all_slf_deaths_lookup_path()) %>% + deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi() ) %>% load_ep_file_vars(year) diff --git a/R/get_slf_lookup_paths.R b/R/get_slf_lookup_paths.R index 390a27a5a..2455be768 100644 --- a/R/get_slf_lookup_paths.R +++ b/R/get_slf_lookup_paths.R @@ -81,16 +81,16 @@ get_slf_deaths_lookup_path <- function(year, ...) { #' @family slf lookup file path #' @seealso [get_file_path()] for the generic function. -get_all_slf_deaths_lookup_path <- function(update = latest_update(), ...) { - # Note this name is very similar to the existing slf_deaths_lookup_path which returnsthe path for +get_combined_slf_deaths_lookup_path <- function(update = latest_update(), ...) { + # Note this name is very similar to the existing slf_deaths_lookup_path which returns the path for # the processed BOXI extract for each financial year. This function will return the combined financial # years lookup i.e. all years put together. - all_slf_deaths_lookup_path <- get_file_path( + combined_slf_deaths_lookup_path <- get_file_path( directory = fs::path(get_slf_dir(), "Deaths"), - file_name = stringr::str_glue("anon-all_slf_deaths_lookup_{update}.parquet"), + file_name = stringr::str_glue("anon-combined_slf_deaths_lookup_{update}.parquet"), ... ) - return(all_slf_deaths_lookup_path) + return(combined_slf_deaths_lookup_path) } diff --git a/man/add_activity_after_death_flag.Rd b/man/add_activity_after_death_flag.Rd index 36eafe7bd..76a4ca64a 100644 --- a/man/add_activity_after_death_flag.Rd +++ b/man/add_activity_after_death_flag.Rd @@ -7,7 +7,8 @@ add_activity_after_death_flag( data, year, - deaths_data = read_file(get_all_slf_deaths_lookup_path()) \%>\% slfhelper::get_chi() + deaths_data = read_file(get_combined_slf_deaths_lookup_path()) \%>\% + slfhelper::get_chi() ) } \arguments{ diff --git a/man/get_all_slf_deaths_lookup_path.Rd b/man/get_combined_slf_deaths_lookup_path.Rd similarity index 83% rename from man/get_all_slf_deaths_lookup_path.Rd rename to man/get_combined_slf_deaths_lookup_path.Rd index 2f06b64d3..dd03a0541 100644 --- a/man/get_all_slf_deaths_lookup_path.Rd +++ b/man/get_combined_slf_deaths_lookup_path.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_slf_lookup_paths.R -\name{get_all_slf_deaths_lookup_path} -\alias{get_all_slf_deaths_lookup_path} +\name{get_combined_slf_deaths_lookup_path} +\alias{get_combined_slf_deaths_lookup_path} \title{SLF death dates File Path} \usage{ -get_all_slf_deaths_lookup_path(update = latest_update(), ...) +get_combined_slf_deaths_lookup_path(update = latest_update(), ...) } \arguments{ \item{update}{the update month (defaults to use \code{\link[=latest_update]{latest_update()}})} diff --git a/man/get_slf_ch_name_lookup_path.Rd b/man/get_slf_ch_name_lookup_path.Rd index 2660bbeab..1b0c3f3bb 100644 --- a/man/get_slf_ch_name_lookup_path.Rd +++ b/man/get_slf_ch_name_lookup_path.Rd @@ -22,7 +22,7 @@ has official Care Home names and addresses provided by the Care Inspectorate. \code{\link[=get_file_path]{get_file_path()}} for the generic function. Other slf lookup file path: -\code{\link{get_all_slf_deaths_lookup_path}()}, +\code{\link{get_combined_slf_deaths_lookup_path}()}, \code{\link{get_slf_chi_deaths_path}()}, \code{\link{get_slf_deaths_lookup_path}()}, \code{\link{get_slf_gpprac_path}()}, diff --git a/man/get_slf_chi_deaths_path.Rd b/man/get_slf_chi_deaths_path.Rd index 0db72d9d3..8ba115dfe 100644 --- a/man/get_slf_chi_deaths_path.Rd +++ b/man/get_slf_chi_deaths_path.Rd @@ -22,7 +22,7 @@ Get the full path to the CHI deaths file \code{\link[=get_file_path]{get_file_path()}} for the generic function. Other slf lookup file path: -\code{\link{get_all_slf_deaths_lookup_path}()}, +\code{\link{get_combined_slf_deaths_lookup_path}()}, \code{\link{get_slf_ch_name_lookup_path}()}, \code{\link{get_slf_deaths_lookup_path}()}, \code{\link{get_slf_gpprac_path}()}, diff --git a/man/get_slf_deaths_lookup_path.Rd b/man/get_slf_deaths_lookup_path.Rd index 307c38ad3..ae64e2371 100644 --- a/man/get_slf_deaths_lookup_path.Rd +++ b/man/get_slf_deaths_lookup_path.Rd @@ -21,7 +21,7 @@ Get the full path to the SLF deaths lookup file \code{\link[=get_file_path]{get_file_path()}} for the generic function. Other slf lookup file path: -\code{\link{get_all_slf_deaths_lookup_path}()}, +\code{\link{get_combined_slf_deaths_lookup_path}()}, \code{\link{get_slf_ch_name_lookup_path}()}, \code{\link{get_slf_chi_deaths_path}()}, \code{\link{get_slf_gpprac_path}()}, diff --git a/man/get_slf_gpprac_path.Rd b/man/get_slf_gpprac_path.Rd index 1fb23116f..1371f758e 100644 --- a/man/get_slf_gpprac_path.Rd +++ b/man/get_slf_gpprac_path.Rd @@ -21,7 +21,7 @@ Get the full path to the SLF GP practice lookup \code{\link[=get_file_path]{get_file_path()}} for the generic function. Other slf lookup file path: -\code{\link{get_all_slf_deaths_lookup_path}()}, +\code{\link{get_combined_slf_deaths_lookup_path}()}, \code{\link{get_slf_ch_name_lookup_path}()}, \code{\link{get_slf_chi_deaths_path}()}, \code{\link{get_slf_deaths_lookup_path}()}, diff --git a/man/get_slf_postcode_path.Rd b/man/get_slf_postcode_path.Rd index f37678695..ee9bc65a9 100644 --- a/man/get_slf_postcode_path.Rd +++ b/man/get_slf_postcode_path.Rd @@ -21,7 +21,7 @@ Get the full path to the SLF Postcode lookup \code{\link[=get_file_path]{get_file_path()}} for the generic function. Other slf lookup file path: -\code{\link{get_all_slf_deaths_lookup_path}()}, +\code{\link{get_combined_slf_deaths_lookup_path}()}, \code{\link{get_slf_ch_name_lookup_path}()}, \code{\link{get_slf_chi_deaths_path}()}, \code{\link{get_slf_deaths_lookup_path}()}, From c3fb9d522000f2517b9aa0a7dacd579b0f4e85f5 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Fri, 5 Jul 2024 10:20:52 +0000 Subject: [PATCH 45/96] Update documentation --- DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3d731a0af..a25794864 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,5 +73,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.1 - +RoxygenNote: 7.3.2 From f30344cd996a43d13905ef9bc138648d2f8681f8 Mon Sep 17 00:00:00 2001 From: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com> Date: Mon, 8 Jul 2024 11:08:16 +0100 Subject: [PATCH 46/96] Update Run_SLF_Files_targets/run_all_targets.R Co-authored-by: Zihao Li --- Run_SLF_Files_targets/run_all_targets.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Run_SLF_Files_targets/run_all_targets.R b/Run_SLF_Files_targets/run_all_targets.R index fb5b94fab..73399cb75 100644 --- a/Run_SLF_Files_targets/run_all_targets.R +++ b/Run_SLF_Files_targets/run_all_targets.R @@ -3,3 +3,7 @@ library(targets) # use tar_make_future() to run targets for all years # This will run everything needed for creating the episode file. tar_make_future() + +# Combine deaths lookup here rather than in targets to make sure that +# it is run after the death file for each year is produced. +combined_deaths_lookup = process_combined_deaths_lookup() From 06813ab42d4ed6e46eb57d98b282a4a8ef1656b0 Mon Sep 17 00:00:00 2001 From: SwiftySalmon Date: Mon, 8 Jul 2024 10:09:39 +0000 Subject: [PATCH 47/96] Style code --- Run_SLF_Files_targets/run_all_targets.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Run_SLF_Files_targets/run_all_targets.R b/Run_SLF_Files_targets/run_all_targets.R index 73399cb75..2e93f1cca 100644 --- a/Run_SLF_Files_targets/run_all_targets.R +++ b/Run_SLF_Files_targets/run_all_targets.R @@ -6,4 +6,4 @@ tar_make_future() # Combine deaths lookup here rather than in targets to make sure that # it is run after the death file for each year is produced. -combined_deaths_lookup = process_combined_deaths_lookup() +combined_deaths_lookup <- process_combined_deaths_lookup() From 8b4706d7e62ad8e66c2e2a945593ce90efd786e5 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Mon, 8 Jul 2024 11:44:36 +0100 Subject: [PATCH 48/96] remove combined_deaths_lookup from targets --- _targets.R | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/_targets.R b/_targets.R index 80c56701e..0c1b223ff 100644 --- a/_targets.R +++ b/_targets.R @@ -550,11 +550,11 @@ list( homelessness_data = source_homelessness_extract %>% slfhelper::get_chi() ) ) - ), - tar_target( - combined_deaths_lookup, - process_combined_deaths_lookup() - ) + ) #, + # tar_target( + # combined_deaths_lookup, + # process_combined_deaths_lookup() + # ) ) ## End of Targets pipeline ## From 09076f495a5235ed72564ea81a3f1da2b6c3dad5 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Mon, 8 Jul 2024 10:45:59 +0000 Subject: [PATCH 49/96] Style code --- _targets.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_targets.R b/_targets.R index 0c1b223ff..e2dbacbe8 100644 --- a/_targets.R +++ b/_targets.R @@ -550,7 +550,7 @@ list( homelessness_data = source_homelessness_extract %>% slfhelper::get_chi() ) ) - ) #, + ) # , # tar_target( # combined_deaths_lookup, # process_combined_deaths_lookup() From 400841eb509a7190e45c52399d9756d84747db24 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Mon, 8 Jul 2024 15:51:55 +0100 Subject: [PATCH 50/96] fix acute_cup and gp_ooh_cup paths --- R/get_boxi_extract_path.R | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R index 8ea1c80f2..4ec1de3a8 100644 --- a/R/get_boxi_extract_path.R +++ b/R/get_boxi_extract_path.R @@ -23,6 +23,7 @@ get_boxi_extract_path <- function( "gp_ooh-c", "gp_ooh-d", "gp_ooh-o", + "gp_ooh_cup", "homelessness", "maternity", "mh", @@ -45,13 +46,13 @@ get_boxi_extract_path <- function( "ae" ~ "anon-A&E-episode-level-extract", "ae_cup" ~ "anon-A&E-UCD-CUP-extract", "acute" ~ "anon-Acute-episode-level-extract", - "acute_cup" ~ "Actue-cup-extract", + "acute_cup" ~ "anon-Actue-cup-extract", "cmh" ~ "anon-Community-MH-contact-level-extract", "dn" ~ "anon-District-Nursing-contact-level-extract", "gp_ooh-c" ~ "anon-GP-OoH-consultations-extract", "gp_ooh-d" ~ "anon-GP-OoH-diagnosis-extract", "gp_ooh-o" ~ "anon-GP-OoH-outcomes-extract", - "gp_ooh_cup" ~ "GP-OoH-cup-extract", + "gp_ooh_cup" ~ "anon-GP-OoH-cup-extract", "homelessness" ~ "anon-Homelessness-extract", "maternity" ~ "anon-Maternity-episode-level-extract", "mh" ~ "anon-Mental-Health-episode-level-extract", From 5879fe4d9fa971374c6c732bd5c62bf6cb07917b Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Mon, 8 Jul 2024 14:53:53 +0000 Subject: [PATCH 51/96] Update documentation --- man/get_boxi_extract_path.Rd | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/man/get_boxi_extract_path.Rd b/man/get_boxi_extract_path.Rd index 69bb886c4..5a318834a 100644 --- a/man/get_boxi_extract_path.Rd +++ b/man/get_boxi_extract_path.Rd @@ -7,7 +7,8 @@ get_boxi_extract_path( year, type = c("ae", "ae_cup", "acute", "acute_cup", "cmh", "deaths", "dn", "gp_ooh-c", - "gp_ooh-d", "gp_ooh-o", "homelessness", "maternity", "mh", "outpatients") + "gp_ooh-d", "gp_ooh-o", "gp_ooh_cup", "homelessness", "maternity", "mh", + "outpatients") ) } \arguments{ From 2e4b1605a2b86cfbf445930810a8b784a04d156a Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Mon, 8 Jul 2024 16:24:25 +0100 Subject: [PATCH 52/96] fix typo --- R/get_boxi_extract_path.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R index 4ec1de3a8..04488dacb 100644 --- a/R/get_boxi_extract_path.R +++ b/R/get_boxi_extract_path.R @@ -46,7 +46,7 @@ get_boxi_extract_path <- function( "ae" ~ "anon-A&E-episode-level-extract", "ae_cup" ~ "anon-A&E-UCD-CUP-extract", "acute" ~ "anon-Acute-episode-level-extract", - "acute_cup" ~ "anon-Actue-cup-extract", + "acute_cup" ~ "anon-Acute-cup-extract", "cmh" ~ "anon-Community-MH-contact-level-extract", "dn" ~ "anon-District-Nursing-contact-level-extract", "gp_ooh-c" ~ "anon-GP-OoH-consultations-extract", From b56ef2522f5d94affaf7f1910c73e8488f800d1e Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 9 Jul 2024 14:45:41 +0100 Subject: [PATCH 53/96] adapt acute_cup for anon_chi --- R/process_extract_acute.R | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R index 82926bd92..62969238f 100644 --- a/R/process_extract_acute.R +++ b/R/process_extract_acute.R @@ -67,30 +67,31 @@ process_extract_acute <- function(data, unique_row_num = dplyr::row_number() ) - acute_cup <- read_file( - path = acute_cup_path, + acute_cup = read_file( + # path = get_boxi_extract_path(year, "acute_cup"), + path = cup_file_name, col_type = readr::cols( + "anon_chi" = readr::col_character(), "Acute Admission Date" = readr::col_date(format = "%Y/%m/%d %T"), "Acute Discharge Date" = readr::col_date(format = "%Y/%m/%d %T"), "Acute Admission Type Code" = readr::col_character(), "Acute Discharge Type Code" = readr::col_character(), "Case Reference Number [C]" = readr::col_character(), - "UPI Number [C]" = readr::col_character(), "CUP Marker" = readr::col_integer(), "CUP Pathway Name" = readr::col_character() ) + ) %>% dplyr::select( + anon_chi = "anon_chi", + case_reference_number = "Case Reference Number [C]", + record_keydate1 = "Acute Admission Date", + record_keydate2 = "Acute Discharge Date", + tadm = "Acute Admission Type Code", + disch = "Acute Discharge Type Code", + cup_marker = "CUP Marker", + cup_pathway = "CUP Pathway Name" ) %>% - dplyr::select( - chi = "UPI Number [C]", - case_reference_number = "Case Reference Number [C]", - record_keydate1 = "Acute Admission Date", - record_keydate2 = "Acute Discharge Date", - tadm = "Acute Admission Type Code", - disch = "Acute Discharge Type Code", - cup_marker = "CUP Marker", - cup_pathway = "CUP Pathway Name" - ) %>% - dplyr::distinct() + dplyr::distinct() %>% + slfhelper::get_chi() acute_clean <- acute_clean %>% dplyr::left_join(acute_cup, From 8c3873c2f0685a5a80acf276d45902a494974d57 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 9 Jul 2024 13:47:52 +0000 Subject: [PATCH 54/96] Style code --- R/process_extract_acute.R | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R index 62969238f..1c7e03322 100644 --- a/R/process_extract_acute.R +++ b/R/process_extract_acute.R @@ -67,7 +67,7 @@ process_extract_acute <- function(data, unique_row_num = dplyr::row_number() ) - acute_cup = read_file( + acute_cup <- read_file( # path = get_boxi_extract_path(year, "acute_cup"), path = cup_file_name, col_type = readr::cols( @@ -80,16 +80,17 @@ process_extract_acute <- function(data, "CUP Marker" = readr::col_integer(), "CUP Pathway Name" = readr::col_character() ) - ) %>% dplyr::select( - anon_chi = "anon_chi", - case_reference_number = "Case Reference Number [C]", - record_keydate1 = "Acute Admission Date", - record_keydate2 = "Acute Discharge Date", - tadm = "Acute Admission Type Code", - disch = "Acute Discharge Type Code", - cup_marker = "CUP Marker", - cup_pathway = "CUP Pathway Name" ) %>% + dplyr::select( + anon_chi = "anon_chi", + case_reference_number = "Case Reference Number [C]", + record_keydate1 = "Acute Admission Date", + record_keydate2 = "Acute Discharge Date", + tadm = "Acute Admission Type Code", + disch = "Acute Discharge Type Code", + cup_marker = "CUP Marker", + cup_pathway = "CUP Pathway Name" + ) %>% dplyr::distinct() %>% slfhelper::get_chi() From 283e00457b7ff1fc636ee0ac2ce2dd6dd6671bee Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 10 Jul 2024 15:25:16 +0100 Subject: [PATCH 55/96] minor changes --- R/add_activity_after_death_flag.R | 2 +- R/process_extract_acute.R | 3 +-- Run_SLF_Files_targets/run_all_targets.R | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index 5f973eb1e..c81e1fc79 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -125,7 +125,7 @@ add_activity_after_death_flag <- function( #' #' # Read data------------------------------------------------ -process_deaths_lookup <- function(update = latest_update(), +process_combined_deaths_lookup <- function(update = latest_update(), write_to_disk = TRUE, ...) { dir_folder <- "/conf/hscdiip/SLF_Extracts/Deaths" file_names <- list.files(dir_folder, diff --git a/R/process_extract_acute.R b/R/process_extract_acute.R index 1c7e03322..c46f175c7 100644 --- a/R/process_extract_acute.R +++ b/R/process_extract_acute.R @@ -68,8 +68,7 @@ process_extract_acute <- function(data, ) acute_cup <- read_file( - # path = get_boxi_extract_path(year, "acute_cup"), - path = cup_file_name, + path = acute_cup_path, col_type = readr::cols( "anon_chi" = readr::col_character(), "Acute Admission Date" = readr::col_date(format = "%Y/%m/%d %T"), diff --git a/Run_SLF_Files_targets/run_all_targets.R b/Run_SLF_Files_targets/run_all_targets.R index 2e93f1cca..9ea6e9e6f 100644 --- a/Run_SLF_Files_targets/run_all_targets.R +++ b/Run_SLF_Files_targets/run_all_targets.R @@ -6,4 +6,4 @@ tar_make_future() # Combine deaths lookup here rather than in targets to make sure that # it is run after the death file for each year is produced. -combined_deaths_lookup <- process_combined_deaths_lookup() +createslf::process_combined_deaths_lookup() From 70b18506f1b95333fbfadbc0398d31e033e02727 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 10 Jul 2024 14:27:10 +0000 Subject: [PATCH 56/96] Style code --- R/add_activity_after_death_flag.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index c81e1fc79..39c894681 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -126,7 +126,7 @@ add_activity_after_death_flag <- function( #' # Read data------------------------------------------------ process_combined_deaths_lookup <- function(update = latest_update(), - write_to_disk = TRUE, ...) { + write_to_disk = TRUE, ...) { dir_folder <- "/conf/hscdiip/SLF_Extracts/Deaths" file_names <- list.files(dir_folder, pattern = "^anon-slf_deaths_lookup_.*parquet", From a7bac7391b834f74c22787913b0648604cea7623 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 10 Jul 2024 14:27:34 +0000 Subject: [PATCH 57/96] Update documentation --- NAMESPACE | 2 +- ...ths_lookup.Rd => process_combined_deaths_lookup.Rd} | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) rename man/{process_deaths_lookup.Rd => process_combined_deaths_lookup.Rd} (81%) diff --git a/NAMESPACE b/NAMESPACE index c6bba9b38..b4314febd 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -88,11 +88,11 @@ export(midpoint_fy) export(next_fy) export(phs_db_connection) export(previous_update) +export(process_combined_deaths_lookup) export(process_costs_ch_rmd) export(process_costs_dn_rmd) export(process_costs_gp_ooh_rmd) export(process_costs_hc_rmd) -export(process_deaths_lookup) export(process_extract_acute) export(process_extract_ae) export(process_extract_alarms_telecare) diff --git a/man/process_deaths_lookup.Rd b/man/process_combined_deaths_lookup.Rd similarity index 81% rename from man/process_deaths_lookup.Rd rename to man/process_combined_deaths_lookup.Rd index e897e49a2..7d0a75fc7 100644 --- a/man/process_deaths_lookup.Rd +++ b/man/process_combined_deaths_lookup.Rd @@ -1,10 +1,14 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/add_activity_after_death_flag.R -\name{process_deaths_lookup} -\alias{process_deaths_lookup} +\name{process_combined_deaths_lookup} +\alias{process_combined_deaths_lookup} \title{Create and read SLF Deaths lookup from processed BOXI NRS deaths extracts} \usage{ -process_deaths_lookup(update = latest_update(), write_to_disk = TRUE, ...) +process_combined_deaths_lookup( + update = latest_update(), + write_to_disk = TRUE, + ... +) } \arguments{ \item{update}{the update month (defaults to use \code{\link[=latest_update]{latest_update()}})} From 9dac223a1d593dc936a9f228933e0d7200c838a7 Mon Sep 17 00:00:00 2001 From: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com> Date: Fri, 19 Jul 2024 07:33:47 +0100 Subject: [PATCH 58/96] Person id sds (#981) * added back in missing person_id for SDS. also added latest_flag back in to client lookup in targets * Update documentation * added back in missing person_id for SDS. also added latest_flag back in to client lookup in targets * Update documentation * change as suggested by Jen * Update documentation --------- Co-authored-by: marjom02 Co-authored-by: SwiftySalmon Co-authored-by: Jennifer Thom Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> --- R/process_extract_alarms_telecare.R | 2 +- R/process_extract_sds.R | 4 +++- R/process_lookup_sc_client.R | 2 +- _targets.R | 2 +- man/process_lookup_sc_client.Rd | 2 +- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/R/process_extract_alarms_telecare.R b/R/process_extract_alarms_telecare.R index d7b063ace..6c481c3a4 100644 --- a/R/process_extract_alarms_telecare.R +++ b/R/process_extract_alarms_telecare.R @@ -41,12 +41,12 @@ process_extract_alarms_telecare <- function( "smrtype", "chi", "dob", + "person_id", "gender", "postcode", "sc_send_lca", "record_keydate1", "record_keydate2", - "person_id", "sc_latest_submission" ) %>% slfhelper::get_anon_chi() diff --git a/R/process_extract_sds.R b/R/process_extract_sds.R index be53f35ff..ce317c8b9 100644 --- a/R/process_extract_sds.R +++ b/R/process_extract_sds.R @@ -41,11 +41,13 @@ process_extract_sds <- function( "smrtype", "chi", "dob", + "person_id", "gender", "postcode", + "sc_send_lca", "record_keydate1", "record_keydate2", - "sc_send_lca" + "sc_latest_submission" ) %>% slfhelper::get_anon_chi() diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index 69818def3..b9fda35b1 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -18,7 +18,7 @@ process_lookup_sc_client <- year, sc_demographics = read_file(get_sc_demog_lookup_path()) %>% slfhelper::get_chi() %>% - dplyr::select(c("sending_location", "social_care_id", "chi")), + dplyr::select(c("sending_location", "social_care_id", "chi", "latest_flag")), write_to_disk = TRUE) { client_clean <- data %>% # Replace 'unknown' responses with NA diff --git a/_targets.R b/_targets.R index 7fe0c7a60..9ea0d9a38 100644 --- a/_targets.R +++ b/_targets.R @@ -476,7 +476,7 @@ list( year = year, sc_demographics = sc_demog_lookup %>% slfhelper::get_chi() %>% - dplyr::select(c("sending_location", "social_care_id", "chi")), + dplyr::select(c("sending_location", "social_care_id", "chi", "latest_flag")), write_to_disk = write_to_disk ) ), diff --git a/man/process_lookup_sc_client.Rd b/man/process_lookup_sc_client.Rd index 74e402846..5ea50cea5 100644 --- a/man/process_lookup_sc_client.Rd +++ b/man/process_lookup_sc_client.Rd @@ -8,7 +8,7 @@ process_lookup_sc_client( data, year, sc_demographics = read_file(get_sc_demog_lookup_path()) \%>\% slfhelper::get_chi() - \%>\% dplyr::select(c("sending_location", "social_care_id", "chi")), + \%>\% dplyr::select(c("sending_location", "social_care_id", "chi", "latest_flag")), write_to_disk = TRUE ) } From 419720bf3b6cc4bb31178b350ab61cb7d89bab4d Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Fri, 19 Jul 2024 09:50:10 +0100 Subject: [PATCH 59/96] Update NEWS.md --- NEWS.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index d5aea7364..88dcca1b3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,14 @@ -# June 2024 Update - Unreleased +# September 2024 Update - Unreleased +* New 24/25 files created +* New NSU cohort for 23/24 available +* New SPARRA scores calculated from April 24/25 +* Death dates attached to activity after death flag +* Care home methodology updated +* New cup marker for Acute and GP OOH +* Bug fix: + * person id for SDS and client + +# June 2024 Update - released 06-Jun-24 * Update of 2017/18 onwards to include bug fixes within the files. * Removal of extra variable caused by the LTCs not matching properly. * New NRS mid-2022 population estimates. From 7509d73483666481155d07305f6badea53bdfb9a Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 23 Jul 2024 12:37:13 +0100 Subject: [PATCH 60/96] unify file names for cup files --- R/get_boxi_extract_path.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R index 04488dacb..a1c59b4f2 100644 --- a/R/get_boxi_extract_path.R +++ b/R/get_boxi_extract_path.R @@ -46,13 +46,13 @@ get_boxi_extract_path <- function( "ae" ~ "anon-A&E-episode-level-extract", "ae_cup" ~ "anon-A&E-UCD-CUP-extract", "acute" ~ "anon-Acute-episode-level-extract", - "acute_cup" ~ "anon-Acute-cup-extract", + "acute_cup" ~ "anon-Acute-CUP-extract", "cmh" ~ "anon-Community-MH-contact-level-extract", "dn" ~ "anon-District-Nursing-contact-level-extract", "gp_ooh-c" ~ "anon-GP-OoH-consultations-extract", "gp_ooh-d" ~ "anon-GP-OoH-diagnosis-extract", "gp_ooh-o" ~ "anon-GP-OoH-outcomes-extract", - "gp_ooh_cup" ~ "anon-GP-OoH-cup-extract", + "gp_ooh_cup" ~ "anon-GP-OoH-CUP-extract", "homelessness" ~ "anon-Homelessness-extract", "maternity" ~ "anon-Maternity-episode-level-extract", "mh" ~ "anon-Mental-Health-episode-level-extract", From 3e06476339aa7e3d6ebd3cba58cc66cd6b421098 Mon Sep 17 00:00:00 2001 From: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com> Date: Wed, 24 Jul 2024 13:57:54 +0100 Subject: [PATCH 61/96] Add client flags (#979) * New methodology for social care client data. - removed code that wasn't needed. - updated housing codes - latest social care ID - changed "mental health problems" to "mental health disorders" in line with PHS style guide * Update documentation * Style code * add person ID to client so it carries through to match on to all cases * New methodology for social care client data. - removed code that wasn't needed. - updated housing codes - latest social care ID - changed "mental health problems" to "mental health disorders" in line with PHS style guide * Update documentation * Style code * add person ID to client so it carries through to match on to all cases --------- Co-authored-by: marjom02 Co-authored-by: SwiftySalmon Co-authored-by: Jennifer Thom Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> --- R/process_lookup_sc_client.R | 131 +++++++++++++++++++++++------------ R/read_lookup_sc_client.R | 4 +- 2 files changed, 88 insertions(+), 47 deletions(-) diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index b9fda35b1..f1e03ee95 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -20,28 +20,29 @@ process_lookup_sc_client <- slfhelper::get_chi() %>% dplyr::select(c("sending_location", "social_care_id", "chi", "latest_flag")), write_to_disk = TRUE) { - client_clean <- data %>% - # Replace 'unknown' responses with NA - dplyr::mutate( - dplyr::across( - c( - "support_from_unpaid_carer", - "social_worker", - "meals", - "living_alone", - "day_care" - ), - dplyr::na_if, - 9L - ), - type_of_housing = dplyr::na_if(.data$type_of_housing, 6L) + # Match to demographics lookup to get CHI + sc_client_demographics <- data %>% + dplyr::right_join( + sc_demographics, + by = c("sending_location", "social_care_id") ) %>% - dplyr::group_by(.data$sending_location, .data$social_care_id) %>% + # need period for the replace sc id with latest function + dplyr::mutate(period = ifelse(!(is.na(.data$financial_quarter)), paste0(.data$financial_year, "Q", financial_quarter), + financial_year + )) %>% + replace_sc_id_with_latest() %>% + # remove cases with no data in client + dplyr::filter(!(is.na(.data$financial_year))) %>% + dplyr::select(-.data$latest_sc_id, -.data$latest_flag, -.data$period) + + + client_clean <- sc_client_demographics %>% + dplyr::group_by(.data$sending_location, .data$social_care_id, .data$chi) %>% # summarise to take last submission dplyr::summarise(dplyr::across( c( "dementia", - "mental_health_problems", + "mental_health_disorders", "learning_disability", "physical_and_sensory_disability", "drugs", @@ -70,19 +71,9 @@ process_lookup_sc_client <- "social_worker", "meals", "living_alone", - "day_care" - ), - tidyr::replace_na, - 9L - ), - type_of_housing = tidyr::replace_na(.data$type_of_housing, 6L) - ) %>% - # factor labels - dplyr::mutate( - dplyr::across( - c( + "day_care", "dementia", - "mental_health_problems", + "mental_health_disorders", "learning_disability", "physical_and_sensory_disability", "drugs", @@ -92,37 +83,64 @@ process_lookup_sc_client <- "elderly_frail", "neurological_condition", "autism", - "other_vulnerable_groups" + "other_vulnerable_groups", + "type_of_housing" ), - factor, - levels = c(0L, 1L), - labels = c("No", "Yes") - ), + tidyr::replace_na, 9L + ) + ) %>% + # factor labels + dplyr::mutate( dplyr::across( c( "living_alone", "support_from_unpaid_carer", "social_worker", "meals", - "day_care" + "day_care", + "dementia", + "mental_health_disorders", + "learning_disability", + "physical_and_sensory_disability", + "drugs", + "alcohol", + "palliative_care", + "carer", + "elderly_frail", + "neurological_condition", + "autism", + "other_vulnerable_groups" ), factor, levels = c(0L, 1L, 9L), labels = c("No", "Yes", "Not Known") ), type_of_housing = factor(.data$type_of_housing, - levels = 1L:6L + levels = 1L:9L, + labels = c( + "Mainstream", # 1 + "Supported", # 2 + "Long Stay Care Home", # 3 + "Hospital or other medical establishment", # 4 + "Homeless", # 5 + "Penal Institutions", # 6 + "Not Known", # 7 + "Other", # 8 + "Not Known" # 9 + ) ) ) %>% # rename variables dplyr::rename_with( - .cols = -c("sending_location", "social_care_id"), + .cols = -c("sending_location", "social_care_id", "chi"), .fn = ~ paste0("sc_", .x) ) + sc_client_lookup <- client_clean %>% # reorder dplyr::select( + "chi", "sending_location", "social_care_id", "sc_living_alone", @@ -130,15 +148,23 @@ process_lookup_sc_client <- "sc_social_worker", "sc_type_of_housing", "sc_meals", - "sc_day_care" - ) + "sc_day_care", + "sc_dementia", + "sc_learning_disability", + "sc_mental_health_disorders", + "sc_physical_and_sensory_disability", + "sc_drugs", + "sc_alcohol", + "sc_palliative_care", + "sc_carer", + "sc_elderly_frail", + "sc_neurological_condition", + "sc_autism", + "sc_other_vulnerable_groups" + ) %>% + create_person_id() + - # Match to demographics lookup to get CHI - sc_client_lookup <- sc_client_lookup %>% - dplyr::left_join( - sc_demographics, - by = c("sending_location", "social_care_id") - ) sc_client_lookup <- dplyr::mutate(sc_client_lookup, count_not_known = rowSums( @@ -147,8 +173,21 @@ process_lookup_sc_client <- "sc_living_alone", "sc_support_from_unpaid_carer", "sc_social_worker", + "sc_type_of_housing", "sc_meals", - "sc_day_care" + "sc_day_care", + "sc_dementia", + "sc_learning_disability", + "sc_mental_health_disorders", + "sc_physical_and_sensory_disability", + "sc_drugs", + "sc_alcohol", + "sc_palliative_care", + "sc_carer", + "sc_elderly_frail", + "sc_neurological_condition", + "sc_autism", + "sc_other_vulnerable_groups" ) )) == "Not Known", na.rm = TRUE diff --git a/R/read_lookup_sc_client.R b/R/read_lookup_sc_client.R index de4b6101f..6128a1be5 100644 --- a/R/read_lookup_sc_client.R +++ b/R/read_lookup_sc_client.R @@ -78,7 +78,9 @@ read_lookup_sc_client <- function(fyyear, .data$social_care_id, .data$financial_year, .data$financial_quarter - ) + ) %>% + dplyr::rename("mental_health_disorders" = "mental_health_problems") + if (!fs::file_exists(get_sandpit_extract_path(type = "client", year = fyyear))) { client_data %>% From bc0662227ee3a39d6e87c5a3c2b763a6a37230e7 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 26 Jul 2024 16:46:27 +0100 Subject: [PATCH 62/96] Update lookup to use anon-chi --- R/process_lookup_deaths.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R index 9a5c21974..a4d8fd7ea 100644 --- a/R/process_lookup_deaths.R +++ b/R/process_lookup_deaths.R @@ -17,11 +17,12 @@ process_slf_deaths_lookup <- function( year, nrs_deaths_data = read_file( get_source_extract_path(year, "deaths"), - col_select = c("chi", "record_keydate1") + col_select = c("anon_chi", "record_keydate1") ), chi_deaths_data = read_file(get_slf_chi_deaths_path()), write_to_disk = TRUE) { slf_deaths_lookup <- nrs_deaths_data %>% + slfhelper::get_chi() %>% # Only modification over 'raw' NRS is to keep the earliest death date dplyr::select("chi", "record_keydate1") %>% dplyr::arrange(.data$record_keydate1) %>% From 60d6eb15b80f5d6da0f1c02f43367518d4816fad Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 26 Jul 2024 16:47:46 +0100 Subject: [PATCH 63/96] Remove redundant code This uses the NRS Weekly dates and if this is blank use the chi death date. This methodology is wrong. We want to use the monthly nrs boxi date by default and chi date if there is an issue --- R/process_it_chi_deaths.R | 4 ---- 1 file changed, 4 deletions(-) diff --git a/R/process_it_chi_deaths.R b/R/process_it_chi_deaths.R index 85354880b..192d72043 100644 --- a/R/process_it_chi_deaths.R +++ b/R/process_it_chi_deaths.R @@ -17,10 +17,6 @@ process_it_chi_deaths <- function(data, write_to_disk = TRUE) { dplyr::desc(.data$death_date_chi) ) %>% dplyr::distinct(.data$chi, .keep_all = TRUE) %>% - # Use the NRS death_date unless it isn't there - dplyr::mutate( - death_date = dplyr::coalesce(.data$death_date_nrs, .data$death_date_chi) - ) %>% slfhelper::get_anon_chi() if (write_to_disk) { From 4dd9b51daa842038d0ce675a22750d240f0ec00f Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Fri, 26 Jul 2024 15:50:38 +0000 Subject: [PATCH 64/96] Update documentation --- man/process_slf_deaths_lookup.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/process_slf_deaths_lookup.Rd b/man/process_slf_deaths_lookup.Rd index 8ad103a2a..bf69d3952 100644 --- a/man/process_slf_deaths_lookup.Rd +++ b/man/process_slf_deaths_lookup.Rd @@ -7,7 +7,7 @@ process_slf_deaths_lookup( year, nrs_deaths_data = read_file(get_source_extract_path(year, "deaths"), col_select = - c("chi", "record_keydate1")), + c("anon_chi", "record_keydate1")), chi_deaths_data = read_file(get_slf_chi_deaths_path()), write_to_disk = TRUE ) From 3e07838a181e6a6ab50d9d3c5888cc1256c780ad Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 29 Jul 2024 12:12:15 +0100 Subject: [PATCH 65/96] remove weekly nrs date variable --- R/process_it_chi_deaths.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/process_it_chi_deaths.R b/R/process_it_chi_deaths.R index 192d72043..cfca32e0f 100644 --- a/R/process_it_chi_deaths.R +++ b/R/process_it_chi_deaths.R @@ -17,6 +17,9 @@ process_it_chi_deaths <- function(data, write_to_disk = TRUE) { dplyr::desc(.data$death_date_chi) ) %>% dplyr::distinct(.data$chi, .keep_all = TRUE) %>% + # remove death_date_nrs as this is the nrs weekly unvalidated data and we should not use this. + # the boxi nrs death date is more reliable as this is provided monthly and is validated. + dplyr::select(.data$chi, .data$death_date_chi) %>% slfhelper::get_anon_chi() if (write_to_disk) { From 3e9c36268d0ea97a84b8ada94addae312ad96a06 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 29 Jul 2024 12:13:15 +0100 Subject: [PATCH 66/96] Use boxi nrs date or chi death date --- R/process_lookup_deaths.R | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R index a4d8fd7ea..c33acf7d1 100644 --- a/R/process_lookup_deaths.R +++ b/R/process_lookup_deaths.R @@ -21,17 +21,27 @@ process_slf_deaths_lookup <- function( ), chi_deaths_data = read_file(get_slf_chi_deaths_path()), write_to_disk = TRUE) { - slf_deaths_lookup <- nrs_deaths_data %>% + boxi_nrs_data <- nrs_deaths_data %>% slfhelper::get_chi() %>% # Only modification over 'raw' NRS is to keep the earliest death date dplyr::select("chi", "record_keydate1") %>% dplyr::arrange(.data$record_keydate1) %>% - dplyr::distinct(.data$chi, .keep_all = TRUE) %>% + dplyr::distinct(.data$chi, .keep_all = TRUE) + + # create slf deaths lookup + slf_deaths_lookup <- chi_deaths %>% + # join boxi nrs data to chi deaths + dplyr::right_join(boxi_nrs_data, by = "chi") %>% + # If the BOXI NRS date does not match the chi death date, use the chi death date + # should now have one row per chi with deaths within the FY dplyr::mutate( - death_date = .data$record_keydate1, + death_date = dplyr::if_else(.data$record_keydate1 != .data$death_date_chi, + .data$death_date_chi, .data$record_keydate1 + ), deceased = TRUE, .keep = "unused" ) %>% + # save anon chi on disk slfhelper::get_anon_chi() if (write_to_disk) { From 76afd9736578fca205dd7a117070a37930bae538 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 29 Jul 2024 14:01:19 +0100 Subject: [PATCH 67/96] Use `get_combined_slf_deaths_path` --- R/add_activity_after_death_flag.R | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index 39c894681..fdede9001 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -183,9 +183,7 @@ process_combined_deaths_lookup <- function(update = latest_update(), if (write_to_disk) { write_file( all_boxi_deaths, - fs::path(get_slf_dir(), "Deaths", - file_name = stringr::str_glue("anon-combined_slf_deaths_lookup_{update}.parquet") - ) + get_combined_slf_deaths_lookup_path() ) } From 7491dc87e1d8d1a5fb34b5839333d2137841a092 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 29 Jul 2024 15:22:42 +0100 Subject: [PATCH 68/96] add catch for NAs --- R/process_lookup_deaths.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R index c33acf7d1..9f070ca9b 100644 --- a/R/process_lookup_deaths.R +++ b/R/process_lookup_deaths.R @@ -38,6 +38,7 @@ process_slf_deaths_lookup <- function( death_date = dplyr::if_else(.data$record_keydate1 != .data$death_date_chi, .data$death_date_chi, .data$record_keydate1 ), + death_date = dplyr::if_else(is.na(.data$death_date_chi), .data$record_keydate1, .data$death_date), deceased = TRUE, .keep = "unused" ) %>% From 519b0c3cda4439486fab05fc3b659a66cc8dc1c4 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 29 Jul 2024 15:31:31 +0100 Subject: [PATCH 69/96] add notes --- R/process_lookup_deaths.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R index 9f070ca9b..d50d64ca9 100644 --- a/R/process_lookup_deaths.R +++ b/R/process_lookup_deaths.R @@ -38,6 +38,7 @@ process_slf_deaths_lookup <- function( death_date = dplyr::if_else(.data$record_keydate1 != .data$death_date_chi, .data$death_date_chi, .data$record_keydate1 ), + # check in case boxi and chi dates do not match due to a NA value death_date = dplyr::if_else(is.na(.data$death_date_chi), .data$record_keydate1, .data$death_date), deceased = TRUE, .keep = "unused" From f7536e8d731e43007a8d9ad5bd2c9dac33c6870b Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 30 Jul 2024 09:29:04 +0100 Subject: [PATCH 70/96] Fix typo --- R/process_lookup_deaths.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R index d50d64ca9..53dd6c819 100644 --- a/R/process_lookup_deaths.R +++ b/R/process_lookup_deaths.R @@ -29,7 +29,7 @@ process_slf_deaths_lookup <- function( dplyr::distinct(.data$chi, .keep_all = TRUE) # create slf deaths lookup - slf_deaths_lookup <- chi_deaths %>% + slf_deaths_lookup <- chi_deaths_data %>% # join boxi nrs data to chi deaths dplyr::right_join(boxi_nrs_data, by = "chi") %>% # If the BOXI NRS date does not match the chi death date, use the chi death date From 1347c79dd30e11291b8a060c9953491efa7f437c Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 30 Jul 2024 13:40:49 +0100 Subject: [PATCH 71/96] remove redundant code --- R/process_lookup_deaths.R | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R index 53dd6c819..b478a55e7 100644 --- a/R/process_lookup_deaths.R +++ b/R/process_lookup_deaths.R @@ -30,16 +30,19 @@ process_slf_deaths_lookup <- function( # create slf deaths lookup slf_deaths_lookup <- chi_deaths_data %>% + slfhelper::get_chi() %>% + dplyr::mutate(fy = phsmethods::extract_fin_year(death_date_chi), + fy = as.character(paste0(substr(fy, 3, 4), substr(fy,6,7))) + ) %>% + # Filter the chi death dates to the FY as the lookup is by FY + dplyr::filter(fy == year) %>% # join boxi nrs data to chi deaths - dplyr::right_join(boxi_nrs_data, by = "chi") %>% - # If the BOXI NRS date does not match the chi death date, use the chi death date - # should now have one row per chi with deaths within the FY + dplyr::full_join(boxi_nrs_data, by = "chi") %>% + # use the BOXI NRS death date by default, but if it's missing, use the chi death date. dplyr::mutate( - death_date = dplyr::if_else(.data$record_keydate1 != .data$death_date_chi, + death_date = dplyr::if_else(is.na(.data$record_keydate1), .data$death_date_chi, .data$record_keydate1 ), - # check in case boxi and chi dates do not match due to a NA value - death_date = dplyr::if_else(is.na(.data$death_date_chi), .data$record_keydate1, .data$death_date), deceased = TRUE, .keep = "unused" ) %>% From 214bc3b6da95bec27ade6a4125efe2eff9760f5f Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Tue, 30 Jul 2024 12:42:32 +0000 Subject: [PATCH 72/96] Style code --- R/process_lookup_deaths.R | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R index b478a55e7..40f316a4a 100644 --- a/R/process_lookup_deaths.R +++ b/R/process_lookup_deaths.R @@ -31,9 +31,10 @@ process_slf_deaths_lookup <- function( # create slf deaths lookup slf_deaths_lookup <- chi_deaths_data %>% slfhelper::get_chi() %>% - dplyr::mutate(fy = phsmethods::extract_fin_year(death_date_chi), - fy = as.character(paste0(substr(fy, 3, 4), substr(fy,6,7))) - ) %>% + dplyr::mutate( + fy = phsmethods::extract_fin_year(death_date_chi), + fy = as.character(paste0(substr(fy, 3, 4), substr(fy, 6, 7))) + ) %>% # Filter the chi death dates to the FY as the lookup is by FY dplyr::filter(fy == year) %>% # join boxi nrs data to chi deaths From c493a8039f8d4b533e72fd021b0133f14dea0f0e Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Fri, 2 Aug 2024 18:01:34 +0100 Subject: [PATCH 73/96] add a function of combine nrs and it_chi death --- R/00-update_refs.R | 17 +++++++ R/add_deceased_flag.R | 41 +++++++++++++++++ R/get_slf_lookup_paths.R | 8 +++- R/process_lookup_deaths.R | 32 ++----------- R/process_refined_death.R | 55 ++++++++++++++++++++++ R/process_sc_all_care_home.R | 8 ++-- R/process_slf_deaths_lookup.R | 61 +++++++++++++++++++++++++ Run_SLF_Files_targets/run_all_targets.R | 4 -- _targets.R | 53 +++++++++++++-------- 9 files changed, 224 insertions(+), 55 deletions(-) create mode 100644 R/add_deceased_flag.R create mode 100644 R/process_refined_death.R create mode 100644 R/process_slf_deaths_lookup.R diff --git a/R/00-update_refs.R b/R/00-update_refs.R index 6106f17cf..c22585aaf 100644 --- a/R/00-update_refs.R +++ b/R/00-update_refs.R @@ -76,3 +76,20 @@ get_dd_period <- function() { latest_cost_year <- function() { "2223" } + +#' The year list for slf to update +#' +#' @description Get the vector of years to update slf +#' +#' @return The vector of financial years +#' +#' @export +#' +#' @family initialisation +years_to_run <- function() { + fy_start_2digit <- 17 + fy_end_2digit <- 23 + years_to_run = paste0(fy_start_2digit:fy_end_2digit, + (fy_start_2digit + 1):(fy_end_2digit + 1)) + return(years_to_run) +} diff --git a/R/add_deceased_flag.R b/R/add_deceased_flag.R new file mode 100644 index 000000000..25010cba6 --- /dev/null +++ b/R/add_deceased_flag.R @@ -0,0 +1,41 @@ +#' Create the SLF Deaths lookup +#' +#' @description Currently this just uses the NRS death dates 'as is', with no +#' corrections or modifications, it is expected that this will be expanded to +#' use the CHI deaths extract from IT as well as taking into account data in +#' the episode file to assess the validity of a death date. +#' +#' @param year The year to process, in FY format. +#' @param nrs_deaths_data NRS deaths data. +#' @param chi_deaths_data IT CHI deaths data. +#' @param write_to_disk (optional) Should the data be written to disk default is +#' `TRUE` i.e. write the data to disk. +#' +#' @return a [tibble][tibble::tibble-package] containing the episode file +#' @export +add_deceased_flag <- function( + year, + refined_death = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi(), + write_to_disk = TRUE) { + + # create slf deaths lookup + + dplyr::mutate( + death_date = dplyr::if_else(is.na(.data$record_keydate1), + .data$death_date_chi, .data$record_keydate1 + ), + deceased = TRUE, + .keep = "unused" + ) %>% + # save anon chi on disk + slfhelper::get_anon_chi() + + if (write_to_disk) { + write_file( + slf_deaths_lookup, + get_slf_deaths_lookup_path(year, check_mode = "write") + ) + } + + return(slf_deaths_lookup) +} diff --git a/R/get_slf_lookup_paths.R b/R/get_slf_lookup_paths.R index 2455be768..e06627e54 100644 --- a/R/get_slf_lookup_paths.R +++ b/R/get_slf_lookup_paths.R @@ -73,6 +73,10 @@ get_slf_deaths_lookup_path <- function(year, ...) { #' SLF death dates File Path #' #' @description Get the full path to the BOXI NRS Deaths lookup file for all financial years +#' Note this name is very similar to the existing slf_deaths_lookup_path +#' which returns the path for the refined_death with deceased flag for each financial year. +#' This function will return the combined financial years lookup +#' i.e. all years put together. #' #' @param ... additional arguments passed to [get_file_path()] #' @param update the update month (defaults to use [latest_update()]) @@ -80,10 +84,10 @@ get_slf_deaths_lookup_path <- function(year, ...) { #' @export #' @family slf lookup file path #' @seealso [get_file_path()] for the generic function. - get_combined_slf_deaths_lookup_path <- function(update = latest_update(), ...) { # Note this name is very similar to the existing slf_deaths_lookup_path which returns the path for - # the processed BOXI extract for each financial year. This function will return the combined financial + # the refined_death with deceased flag for each financial year. + # This function will return the combined financial # years lookup i.e. all years put together. combined_slf_deaths_lookup_path <- get_file_path( directory = fs::path(get_slf_dir(), "Deaths"), diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R index 40f316a4a..ca5c6fc67 100644 --- a/R/process_lookup_deaths.R +++ b/R/process_lookup_deaths.R @@ -1,9 +1,7 @@ #' Create the SLF Deaths lookup #' -#' @description Currently this just uses the NRS death dates 'as is', with no -#' corrections or modifications, it is expected that this will be expanded to -#' use the CHI deaths extract from IT as well as taking into account data in -#' the episode file to assess the validity of a death date. +#' @description Use all-year refined death data to produce year-specific +#' slf_deaths_lookup with deceased flag added. #' #' @param year The year to process, in FY format. #' @param nrs_deaths_data NRS deaths data. @@ -15,37 +13,17 @@ #' @export process_slf_deaths_lookup <- function( year, - nrs_deaths_data = read_file( - get_source_extract_path(year, "deaths"), - col_select = c("anon_chi", "record_keydate1") - ), - chi_deaths_data = read_file(get_slf_chi_deaths_path()), + refined_death = read_file(get_combined_slf_deaths_lookup_path()), write_to_disk = TRUE) { - boxi_nrs_data <- nrs_deaths_data %>% - slfhelper::get_chi() %>% - # Only modification over 'raw' NRS is to keep the earliest death date - dplyr::select("chi", "record_keydate1") %>% - dplyr::arrange(.data$record_keydate1) %>% - dplyr::distinct(.data$chi, .keep_all = TRUE) # create slf deaths lookup - slf_deaths_lookup <- chi_deaths_data %>% + slf_deaths_lookup <- refined_death %>% slfhelper::get_chi() %>% - dplyr::mutate( - fy = phsmethods::extract_fin_year(death_date_chi), - fy = as.character(paste0(substr(fy, 3, 4), substr(fy, 6, 7))) - ) %>% # Filter the chi death dates to the FY as the lookup is by FY dplyr::filter(fy == year) %>% - # join boxi nrs data to chi deaths - dplyr::full_join(boxi_nrs_data, by = "chi") %>% # use the BOXI NRS death date by default, but if it's missing, use the chi death date. dplyr::mutate( - death_date = dplyr::if_else(is.na(.data$record_keydate1), - .data$death_date_chi, .data$record_keydate1 - ), - deceased = TRUE, - .keep = "unused" + deceased = TRUE ) %>% # save anon chi on disk slfhelper::get_anon_chi() diff --git a/R/process_refined_death.R b/R/process_refined_death.R new file mode 100644 index 000000000..d7067e91e --- /dev/null +++ b/R/process_refined_death.R @@ -0,0 +1,55 @@ +#' Process the refined death data +#' +#' @description This will process +#' year-specific BOXI NRS death file (written to disk), and +#' combine them together to get all years NRS file (Not written to disk). +#' Then join all NRS deaths with IT CHI death data +#' to get an all-year refined death file (written to disk). +#' +#' @param it_chi_deaths it chi death data +#' @param write_to_disk write the result to disk or not. +#' +#' @return refined_death The processed lookup of deaths combining NRS and IT_CHI. +#' @export +#' @family process extracts +process_refined_death <- function( + it_chi_deaths = read_file(get_slf_chi_deaths_path()), + write_to_disk = TRUE) { + years_list = years_to_run() + + nrs_all_years <- lapply(years_list, (\(year) { + read_extract_nrs_deaths(year, + get_boxi_extract_path(year, type = "deaths")) %>% + process_extract_nrs_deaths(year, + write_to_disk = write_to_disk) + })) %>% + data.table::rbindlist() + + it_chi_deaths <- it_chi_deaths %>% + dplyr::select(c("anon_chi", + "death_date_chi")) %>% + dplyr::arrange(.data$anon_chi, .keep_all = TRUE) + + refined_death <- nrs_all_years %>% + dplyr::arrange(.data$anon_chi, .keep_all = TRUE) %>% + dplyr::full_join(it_chi_deaths, by = "anon_chi") %>% + # use the BOXI NRS death date by default, but if it's missing, use the chi death date. + dplyr::mutate(death_date = dplyr::if_else( + is.na(.data$record_keydate1), + .data$death_date_chi, + .data$record_keydate1 + )) %>% + dplyr::select(anon_chi, death_date) %>% + # add fy when death happened + dplyr::mutate( + fy = phsmethods::extract_fin_year(death_date), + fy = as.character(paste0(substr(fy, 3, 4), substr(fy, 6, 7))) + ) + + if (write_to_disk) { + write_file(refined_death, + get_combined_slf_deaths_lookup_path()) + } + + return(refined_death) +} diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index b7c29fbc7..7b87d68f0 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -7,8 +7,8 @@ #' @param data The extract to process #' @param sc_demog_lookup The Social Care Demographics lookup produced by #' [process_lookup_sc_demographics()]. -#' @param it_chi_deaths_data The processed lookup of deaths from IT produced -#' with [process_it_chi_deaths()]. +#' @param refined_death The processed lookup of deaths from IT produced +#' with [process_refined_death()]. #' @param ch_name_lookup_path Path to the Care Home name Lookup Excel workbook. #' @param spd_path (Optional) Path the Scottish Postcode Directory, default is #' to use [get_spd_path()]. @@ -23,7 +23,7 @@ process_sc_all_care_home <- function( data, sc_demog_lookup = read_file(get_sc_demog_lookup_path()) %>% slfhelper::get_chi(), - it_chi_deaths_data = read_file(get_slf_chi_deaths_path()), + refined_death = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi(), ch_name_lookup_path = read_file(get_slf_ch_name_lookup_path()), spd_path = read_file(get_spd_path()), write_to_disk = TRUE) { @@ -207,7 +207,7 @@ process_sc_all_care_home <- function( # Compare to Deaths Data # match ch_episode data with deaths data matched_deaths_data <- ch_episode %>% - dplyr::left_join(it_chi_deaths_data, + dplyr::left_join(refined_death, by = "chi" ) %>% # compare discharge date with NRS and CHI death date diff --git a/R/process_slf_deaths_lookup.R b/R/process_slf_deaths_lookup.R new file mode 100644 index 000000000..40f316a4a --- /dev/null +++ b/R/process_slf_deaths_lookup.R @@ -0,0 +1,61 @@ +#' Create the SLF Deaths lookup +#' +#' @description Currently this just uses the NRS death dates 'as is', with no +#' corrections or modifications, it is expected that this will be expanded to +#' use the CHI deaths extract from IT as well as taking into account data in +#' the episode file to assess the validity of a death date. +#' +#' @param year The year to process, in FY format. +#' @param nrs_deaths_data NRS deaths data. +#' @param chi_deaths_data IT CHI deaths data. +#' @param write_to_disk (optional) Should the data be written to disk default is +#' `TRUE` i.e. write the data to disk. +#' +#' @return a [tibble][tibble::tibble-package] containing the episode file +#' @export +process_slf_deaths_lookup <- function( + year, + nrs_deaths_data = read_file( + get_source_extract_path(year, "deaths"), + col_select = c("anon_chi", "record_keydate1") + ), + chi_deaths_data = read_file(get_slf_chi_deaths_path()), + write_to_disk = TRUE) { + boxi_nrs_data <- nrs_deaths_data %>% + slfhelper::get_chi() %>% + # Only modification over 'raw' NRS is to keep the earliest death date + dplyr::select("chi", "record_keydate1") %>% + dplyr::arrange(.data$record_keydate1) %>% + dplyr::distinct(.data$chi, .keep_all = TRUE) + + # create slf deaths lookup + slf_deaths_lookup <- chi_deaths_data %>% + slfhelper::get_chi() %>% + dplyr::mutate( + fy = phsmethods::extract_fin_year(death_date_chi), + fy = as.character(paste0(substr(fy, 3, 4), substr(fy, 6, 7))) + ) %>% + # Filter the chi death dates to the FY as the lookup is by FY + dplyr::filter(fy == year) %>% + # join boxi nrs data to chi deaths + dplyr::full_join(boxi_nrs_data, by = "chi") %>% + # use the BOXI NRS death date by default, but if it's missing, use the chi death date. + dplyr::mutate( + death_date = dplyr::if_else(is.na(.data$record_keydate1), + .data$death_date_chi, .data$record_keydate1 + ), + deceased = TRUE, + .keep = "unused" + ) %>% + # save anon chi on disk + slfhelper::get_anon_chi() + + if (write_to_disk) { + write_file( + slf_deaths_lookup, + get_slf_deaths_lookup_path(year, check_mode = "write") + ) + } + + return(slf_deaths_lookup) +} diff --git a/Run_SLF_Files_targets/run_all_targets.R b/Run_SLF_Files_targets/run_all_targets.R index 9ea6e9e6f..fb5b94fab 100644 --- a/Run_SLF_Files_targets/run_all_targets.R +++ b/Run_SLF_Files_targets/run_all_targets.R @@ -3,7 +3,3 @@ library(targets) # use tar_make_future() to run targets for all years # This will run everything needed for creating the episode file. tar_make_future() - -# Combine deaths lookup here rather than in targets to make sure that -# it is run after the death file for each year is produced. -createslf::process_combined_deaths_lookup() diff --git a/_targets.R b/_targets.R index 9ea0d9a38..66df85f63 100644 --- a/_targets.R +++ b/_targets.R @@ -172,7 +172,7 @@ list( process_sc_all_care_home( all_care_home_extract, sc_demog_lookup = sc_demog_lookup %>% slfhelper::get_chi(), - it_chi_deaths_data = it_chi_deaths_data %>% slfhelper::get_chi(), + refined_death = refined_death %>% slfhelper::get_chi(), ch_name_lookup_path = slf_ch_name_lookup_path, spd_path = spd_path, write_to_disk = write_to_disk @@ -204,6 +204,13 @@ list( tests_sc_all_sds, process_tests_sc_all_sds_episodes(all_sds) ), + tar_target( + refined_death, + process_refined_death(it_chi_deaths = it_chi_deaths_data, + write_to_disk = write_to_disk) + ), + + # Phase II tar_map( list(year = years_to_run), tar_rds( @@ -251,11 +258,11 @@ list( get_boxi_extract_path(year, type = "mh"), read_extract_mental_health(year, !!.x) ), - tar_file_read( - nrs_deaths_data, - get_boxi_extract_path(year, type = "deaths"), - read_extract_nrs_deaths(year, !!.x) - ), + # tar_file_read( + # nrs_deaths_data, + # get_boxi_extract_path(year, type = "deaths"), + # read_extract_nrs_deaths(year, !!.x) + # ), tar_file_read( outpatients_data, get_boxi_extract_path(year, type = "outpatient"), @@ -403,11 +410,13 @@ list( year ) ), - tar_target(source_mental_health_extract, process_extract_mental_health( - mental_health_data, - year, - write_to_disk = write_to_disk - )), + tar_target( + source_mental_health_extract, + process_extract_mental_health( + mental_health_data, + year, + write_to_disk = write_to_disk) + ), tar_target( tests_source_mental_health_extract, process_tests_mental_health( @@ -415,11 +424,20 @@ list( year ) ), - tar_target(source_nrs_deaths_extract, process_extract_nrs_deaths( - nrs_deaths_data, - year, - write_to_disk = write_to_disk - )), + # tar_target(source_nrs_deaths_extract, process_extract_nrs_deaths( + # nrs_deaths_data, + # year, + # write_to_disk = write_to_disk + # )), + tar_target( + source_nrs_deaths_extract, + # use this anomymous function with redundant but necessary refined_death + # to make sure reading year-specific nrs deaths extracts after it is produced + (\(year, refined_death) { + read_file(get_source_extract_path(year, "deaths")) %>% + as.data.frame() + })(year, refined_death) + ), tar_target( tests_source_nrs_deaths_extract, process_tests_nrs_deaths( @@ -549,8 +567,7 @@ list( slf_deaths_lookup, process_slf_deaths_lookup( year = year, - nrs_deaths_data = source_nrs_deaths_extract %>% slfhelper::get_chi(), - chi_deaths_data = it_chi_deaths_data %>% slfhelper::get_chi(), + refined_data = refined_data, write_to_disk = write_to_disk ) ), From 516cf9fea85eaa1f61cd7897dc0d9accd7dabbdb Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Fri, 2 Aug 2024 17:06:53 +0000 Subject: [PATCH 74/96] Update documentation --- NAMESPACE | 3 ++ man/add_deceased_flag.Rd | 32 +++++++++++ man/create_homelessness_lookup.Rd | 1 + man/get_combined_slf_deaths_lookup_path.Rd | 4 ++ man/get_dd_period.Rd | 3 +- man/latest_cost_year.Rd | 3 +- man/latest_update.Rd | 3 +- man/previous_update.Rd | 3 +- man/process_extract_acute.Rd | 1 + man/process_extract_ae.Rd | 1 + man/process_extract_alarms_telecare.Rd | 1 + man/process_extract_care_home.Rd | 1 + man/process_extract_cmh.Rd | 1 + man/process_extract_delayed_discharges.Rd | 1 + man/process_extract_district_nursing.Rd | 1 + man/process_extract_gp_ooh.Rd | 1 + man/process_extract_home_care.Rd | 1 + man/process_extract_homelessness.Rd | 1 + man/process_extract_maternity.Rd | 1 + man/process_extract_mental_health.Rd | 1 + man/process_extract_nrs_deaths.Rd | 1 + man/process_extract_ooh_consultations.Rd | 1 + man/process_extract_ooh_diagnosis.Rd | 1 + man/process_extract_ooh_outcomes.Rd | 1 + man/process_extract_outpatients.Rd | 1 + man/process_extract_prescribing.Rd | 1 + man/process_extract_sds.Rd | 1 + man/process_it_chi_deaths.Rd | 1 + man/process_lookup_gpprac.Rd | 1 + man/process_lookup_postcode.Rd | 1 + man/process_lookup_sc_client.Rd | 1 + man/process_lookup_sc_demographics.Rd | 1 + man/process_refined_death.Rd | 62 ++++++++++++++++++++++ man/process_sc_all_alarms_telecare.Rd | 1 + man/process_sc_all_care_home.Rd | 8 +-- man/process_sc_all_home_care.Rd | 1 + man/process_sc_all_sds.Rd | 1 + man/process_slf_deaths_lookup.Rd | 16 +++++- man/read_extract_gp_ooh.Rd | 1 + man/read_it_chi_deaths.Rd | 1 + man/read_lookup_sc_client.Rd | 1 + man/years_to_run.Rd | 22 ++++++++ 42 files changed, 182 insertions(+), 8 deletions(-) create mode 100644 man/add_deceased_flag.Rd create mode 100644 man/process_refined_death.Rd create mode 100644 man/years_to_run.Rd diff --git a/NAMESPACE b/NAMESPACE index b4314febd..6f1c88841 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,7 @@ # Generated by roxygen2: do not edit by hand export("%>%") +export(add_deceased_flag) export(add_homelessness_date_flags) export(add_homelessness_flag) export(add_hri_variables) @@ -115,6 +116,7 @@ export(process_lookup_ltc) export(process_lookup_postcode) export(process_lookup_sc_client) export(process_lookup_sc_demographics) +export(process_refined_death) export(process_sc_all_alarms_telecare) export(process_sc_all_care_home) export(process_sc_all_home_care) @@ -182,6 +184,7 @@ export(start_fy) export(start_fy_quarter) export(start_next_fy_quarter) export(write_file) +export(years_to_run) importFrom(data.table,.N) importFrom(data.table,.SD) importFrom(magrittr,"%>%") diff --git a/man/add_deceased_flag.Rd b/man/add_deceased_flag.Rd new file mode 100644 index 000000000..c84568522 --- /dev/null +++ b/man/add_deceased_flag.Rd @@ -0,0 +1,32 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/add_deceased_flag.R +\name{add_deceased_flag} +\alias{add_deceased_flag} +\title{Create the SLF Deaths lookup} +\usage{ +add_deceased_flag( + year, + refined_death = read_file(get_combined_slf_deaths_lookup_path()) \%>\% + slfhelper::get_chi(), + write_to_disk = TRUE +) +} +\arguments{ +\item{year}{The year to process, in FY format.} + +\item{write_to_disk}{(optional) Should the data be written to disk default is +\code{TRUE} i.e. write the data to disk.} + +\item{nrs_deaths_data}{NRS deaths data.} + +\item{chi_deaths_data}{IT CHI deaths data.} +} +\value{ +a \link[tibble:tibble-package]{tibble} containing the episode file +} +\description{ +Currently this just uses the NRS death dates 'as is', with no +corrections or modifications, it is expected that this will be expanded to +use the CHI deaths extract from IT as well as taking into account data in +the episode file to assess the validity of a death date. +} diff --git a/man/create_homelessness_lookup.Rd b/man/create_homelessness_lookup.Rd index d6a2f2bc8..9826f4ced 100644 --- a/man/create_homelessness_lookup.Rd +++ b/man/create_homelessness_lookup.Rd @@ -50,6 +50,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/get_combined_slf_deaths_lookup_path.Rd b/man/get_combined_slf_deaths_lookup_path.Rd index dd03a0541..709773d01 100644 --- a/man/get_combined_slf_deaths_lookup_path.Rd +++ b/man/get_combined_slf_deaths_lookup_path.Rd @@ -13,6 +13,10 @@ get_combined_slf_deaths_lookup_path(update = latest_update(), ...) } \description{ Get the full path to the BOXI NRS Deaths lookup file for all financial years +Note this name is very similar to the existing slf_deaths_lookup_path +which returns the path for the refined_death with deceased flag for each financial year. +This function will return the combined financial years lookup +i.e. all years put together. } \seealso{ \code{\link[=get_file_path]{get_file_path()}} for the generic function. diff --git a/man/get_dd_period.Rd b/man/get_dd_period.Rd index 29bd8baea..c478f401f 100644 --- a/man/get_dd_period.Rd +++ b/man/get_dd_period.Rd @@ -17,6 +17,7 @@ Get the period for Delayed Discharge Other initialisation: \code{\link{latest_cost_year}()}, \code{\link{latest_update}()}, -\code{\link{previous_update}()} +\code{\link{previous_update}()}, +\code{\link{years_to_run}()} } \concept{initialisation} diff --git a/man/latest_cost_year.Rd b/man/latest_cost_year.Rd index 0240c6ad0..0f50b3ac6 100644 --- a/man/latest_cost_year.Rd +++ b/man/latest_cost_year.Rd @@ -16,6 +16,7 @@ Get the latest year for cost uplift Other initialisation: \code{\link{get_dd_period}()}, \code{\link{latest_update}()}, -\code{\link{previous_update}()} +\code{\link{previous_update}()}, +\code{\link{years_to_run}()} } \concept{initialisation} diff --git a/man/latest_update.Rd b/man/latest_update.Rd index b3fbe765c..926e472e4 100644 --- a/man/latest_update.Rd +++ b/man/latest_update.Rd @@ -16,6 +16,7 @@ Get the date of the latest update, e.g 'Jun_2022' Other initialisation: \code{\link{get_dd_period}()}, \code{\link{latest_cost_year}()}, -\code{\link{previous_update}()} +\code{\link{previous_update}()}, +\code{\link{years_to_run}()} } \concept{initialisation} diff --git a/man/previous_update.Rd b/man/previous_update.Rd index f87b4656f..547138700 100644 --- a/man/previous_update.Rd +++ b/man/previous_update.Rd @@ -28,6 +28,7 @@ previous_update(override = "May_2023") # Specific Month Other initialisation: \code{\link{get_dd_period}()}, \code{\link{latest_cost_year}()}, -\code{\link{latest_update}()} +\code{\link{latest_update}()}, +\code{\link{years_to_run}()} } \concept{initialisation} diff --git a/man/process_extract_acute.Rd b/man/process_extract_acute.Rd index 22ff164c8..fae9c7bab 100644 --- a/man/process_extract_acute.Rd +++ b/man/process_extract_acute.Rd @@ -53,6 +53,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_ae.Rd b/man/process_extract_ae.Rd index 9eec39ba5..36d2bb4d3 100644 --- a/man/process_extract_ae.Rd +++ b/man/process_extract_ae.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_alarms_telecare.Rd b/man/process_extract_alarms_telecare.Rd index 76093be7d..016f5e2b6 100644 --- a/man/process_extract_alarms_telecare.Rd +++ b/man/process_extract_alarms_telecare.Rd @@ -49,6 +49,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_care_home.Rd b/man/process_extract_care_home.Rd index 269ae1e7d..a002d30ab 100644 --- a/man/process_extract_care_home.Rd +++ b/man/process_extract_care_home.Rd @@ -51,6 +51,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_cmh.Rd b/man/process_extract_cmh.Rd index 64e085dcf..799b6d717 100644 --- a/man/process_extract_cmh.Rd +++ b/man/process_extract_cmh.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_delayed_discharges.Rd b/man/process_extract_delayed_discharges.Rd index c6fd560a7..385bdff2a 100644 --- a/man/process_extract_delayed_discharges.Rd +++ b/man/process_extract_delayed_discharges.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_district_nursing.Rd b/man/process_extract_district_nursing.Rd index eb2814fbc..49284b70f 100644 --- a/man/process_extract_district_nursing.Rd +++ b/man/process_extract_district_nursing.Rd @@ -55,6 +55,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_gp_ooh.Rd b/man/process_extract_gp_ooh.Rd index b137f581c..5c68c35dd 100644 --- a/man/process_extract_gp_ooh.Rd +++ b/man/process_extract_gp_ooh.Rd @@ -53,6 +53,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_home_care.Rd b/man/process_extract_home_care.Rd index 4dd609770..98c45a8e2 100644 --- a/man/process_extract_home_care.Rd +++ b/man/process_extract_home_care.Rd @@ -49,6 +49,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_homelessness.Rd b/man/process_extract_homelessness.Rd index 405da34bb..59fe6f283 100644 --- a/man/process_extract_homelessness.Rd +++ b/man/process_extract_homelessness.Rd @@ -62,6 +62,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_maternity.Rd b/man/process_extract_maternity.Rd index 17dd1a64c..19142c4a8 100644 --- a/man/process_extract_maternity.Rd +++ b/man/process_extract_maternity.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_mental_health.Rd b/man/process_extract_mental_health.Rd index 5f1fc7330..bd91dc4ec 100644 --- a/man/process_extract_mental_health.Rd +++ b/man/process_extract_mental_health.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_nrs_deaths.Rd b/man/process_extract_nrs_deaths.Rd index 1938e15ec..71fab68e2 100644 --- a/man/process_extract_nrs_deaths.Rd +++ b/man/process_extract_nrs_deaths.Rd @@ -47,6 +47,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_ooh_consultations.Rd b/man/process_extract_ooh_consultations.Rd index e00155191..ae4265823 100644 --- a/man/process_extract_ooh_consultations.Rd +++ b/man/process_extract_ooh_consultations.Rd @@ -45,6 +45,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_ooh_diagnosis.Rd b/man/process_extract_ooh_diagnosis.Rd index 2dcbee647..78db15f0f 100644 --- a/man/process_extract_ooh_diagnosis.Rd +++ b/man/process_extract_ooh_diagnosis.Rd @@ -45,6 +45,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_ooh_outcomes.Rd b/man/process_extract_ooh_outcomes.Rd index 31ec64439..d59617e7b 100644 --- a/man/process_extract_ooh_outcomes.Rd +++ b/man/process_extract_ooh_outcomes.Rd @@ -45,6 +45,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_outpatients.Rd b/man/process_extract_outpatients.Rd index 3a46ad119..8af2c6ddf 100644 --- a/man/process_extract_outpatients.Rd +++ b/man/process_extract_outpatients.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_prescribing.Rd b/man/process_extract_prescribing.Rd index 195a60bfe..c959ce1e7 100644 --- a/man/process_extract_prescribing.Rd +++ b/man/process_extract_prescribing.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_extract_sds.Rd b/man/process_extract_sds.Rd index 03ee60362..b0cc8788a 100644 --- a/man/process_extract_sds.Rd +++ b/man/process_extract_sds.Rd @@ -49,6 +49,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_it_chi_deaths.Rd b/man/process_it_chi_deaths.Rd index 1d8e085ab..757f06aa7 100644 --- a/man/process_it_chi_deaths.Rd +++ b/man/process_it_chi_deaths.Rd @@ -45,6 +45,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_lookup_gpprac.Rd b/man/process_lookup_gpprac.Rd index 107af24c0..bfda08282 100644 --- a/man/process_lookup_gpprac.Rd +++ b/man/process_lookup_gpprac.Rd @@ -54,6 +54,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_lookup_postcode.Rd b/man/process_lookup_postcode.Rd index e556efd51..b8b1ebd4f 100644 --- a/man/process_lookup_postcode.Rd +++ b/man/process_lookup_postcode.Rd @@ -55,6 +55,7 @@ Other process extracts: \code{\link{process_lookup_gpprac}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_lookup_sc_client.Rd b/man/process_lookup_sc_client.Rd index 5ea50cea5..4b85a06b5 100644 --- a/man/process_lookup_sc_client.Rd +++ b/man/process_lookup_sc_client.Rd @@ -56,6 +56,7 @@ Other process extracts: \code{\link{process_lookup_gpprac}()}, \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_lookup_sc_demographics.Rd b/man/process_lookup_sc_demographics.Rd index a89933425..29215f657 100644 --- a/man/process_lookup_sc_demographics.Rd +++ b/man/process_lookup_sc_demographics.Rd @@ -52,6 +52,7 @@ Other process extracts: \code{\link{process_lookup_gpprac}()}, \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_refined_death.Rd b/man/process_refined_death.Rd new file mode 100644 index 000000000..fd5392eb2 --- /dev/null +++ b/man/process_refined_death.Rd @@ -0,0 +1,62 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/process_refined_death.R +\name{process_refined_death} +\alias{process_refined_death} +\title{Process the refined death data} +\usage{ +process_refined_death( + it_chi_deaths = read_file(get_slf_chi_deaths_path()), + write_to_disk = TRUE +) +} +\arguments{ +\item{it_chi_deaths}{it chi death data} + +\item{write_to_disk}{write the result to disk or not.} +} +\value{ +refined_death The processed lookup of deaths combining NRS and IT_CHI. +} +\description{ +This will process +year-specific BOXI NRS death file (written to disk), and +combine them together to get all years NRS file (Not written to disk). +Then join all NRS deaths with IT CHI death data +to get an all-year refined death file (written to disk). +} +\seealso{ +Other process extracts: +\code{\link{create_homelessness_lookup}()}, +\code{\link{process_extract_acute}()}, +\code{\link{process_extract_ae}()}, +\code{\link{process_extract_alarms_telecare}()}, +\code{\link{process_extract_care_home}()}, +\code{\link{process_extract_cmh}()}, +\code{\link{process_extract_delayed_discharges}()}, +\code{\link{process_extract_district_nursing}()}, +\code{\link{process_extract_gp_ooh}()}, +\code{\link{process_extract_home_care}()}, +\code{\link{process_extract_homelessness}()}, +\code{\link{process_extract_maternity}()}, +\code{\link{process_extract_mental_health}()}, +\code{\link{process_extract_nrs_deaths}()}, +\code{\link{process_extract_ooh_consultations}()}, +\code{\link{process_extract_ooh_diagnosis}()}, +\code{\link{process_extract_ooh_outcomes}()}, +\code{\link{process_extract_outpatients}()}, +\code{\link{process_extract_prescribing}()}, +\code{\link{process_extract_sds}()}, +\code{\link{process_it_chi_deaths}()}, +\code{\link{process_lookup_gpprac}()}, +\code{\link{process_lookup_postcode}()}, +\code{\link{process_lookup_sc_client}()}, +\code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_sc_all_alarms_telecare}()}, +\code{\link{process_sc_all_care_home}()}, +\code{\link{process_sc_all_home_care}()}, +\code{\link{process_sc_all_sds}()}, +\code{\link{read_extract_gp_ooh}()}, +\code{\link{read_it_chi_deaths}()}, +\code{\link{read_lookup_sc_client}()} +} +\concept{process extracts} diff --git a/man/process_sc_all_alarms_telecare.Rd b/man/process_sc_all_alarms_telecare.Rd index a2e319cbf..1f3eb30e0 100644 --- a/man/process_sc_all_alarms_telecare.Rd +++ b/man/process_sc_all_alarms_telecare.Rd @@ -54,6 +54,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, \code{\link{process_sc_all_sds}()}, diff --git a/man/process_sc_all_care_home.Rd b/man/process_sc_all_care_home.Rd index 792d2200d..7d4234b25 100644 --- a/man/process_sc_all_care_home.Rd +++ b/man/process_sc_all_care_home.Rd @@ -7,7 +7,8 @@ process_sc_all_care_home( data, sc_demog_lookup = read_file(get_sc_demog_lookup_path()) \%>\% slfhelper::get_chi(), - it_chi_deaths_data = read_file(get_slf_chi_deaths_path()), + refined_death = read_file(get_combined_slf_deaths_lookup_path()) \%>\% + slfhelper::get_chi(), ch_name_lookup_path = read_file(get_slf_ch_name_lookup_path()), spd_path = read_file(get_spd_path()), write_to_disk = TRUE @@ -19,8 +20,8 @@ process_sc_all_care_home( \item{sc_demog_lookup}{The Social Care Demographics lookup produced by \code{\link[=process_lookup_sc_demographics]{process_lookup_sc_demographics()}}.} -\item{it_chi_deaths_data}{The processed lookup of deaths from IT produced -with \code{\link[=process_it_chi_deaths]{process_it_chi_deaths()}}.} +\item{refined_death}{The processed lookup of deaths from IT produced +with \code{\link[=process_refined_death]{process_refined_death()}}.} \item{ch_name_lookup_path}{Path to the Care Home name Lookup Excel workbook.} @@ -65,6 +66,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_home_care}()}, \code{\link{process_sc_all_sds}()}, diff --git a/man/process_sc_all_home_care.Rd b/man/process_sc_all_home_care.Rd index c6777889f..1f64cad95 100644 --- a/man/process_sc_all_home_care.Rd +++ b/man/process_sc_all_home_care.Rd @@ -54,6 +54,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_sds}()}, diff --git a/man/process_sc_all_sds.Rd b/man/process_sc_all_sds.Rd index f91c9dfb9..43d18b29d 100644 --- a/man/process_sc_all_sds.Rd +++ b/man/process_sc_all_sds.Rd @@ -54,6 +54,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/process_slf_deaths_lookup.Rd b/man/process_slf_deaths_lookup.Rd index bf69d3952..9361bedbd 100644 --- a/man/process_slf_deaths_lookup.Rd +++ b/man/process_slf_deaths_lookup.Rd @@ -1,9 +1,18 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/process_lookup_deaths.R +% Please edit documentation in R/process_lookup_deaths.R, +% R/process_slf_deaths_lookup.R \name{process_slf_deaths_lookup} \alias{process_slf_deaths_lookup} \title{Create the SLF Deaths lookup} \usage{ +process_slf_deaths_lookup( + year, + nrs_deaths_data = read_file(get_source_extract_path(year, "deaths"), col_select = + c("anon_chi", "record_keydate1")), + chi_deaths_data = read_file(get_slf_chi_deaths_path()), + write_to_disk = TRUE +) + process_slf_deaths_lookup( year, nrs_deaths_data = read_file(get_source_extract_path(year, "deaths"), col_select = @@ -23,9 +32,14 @@ process_slf_deaths_lookup( \code{TRUE} i.e. write the data to disk.} } \value{ +a \link[tibble:tibble-package]{tibble} containing the episode file + a \link[tibble:tibble-package]{tibble} containing the episode file } \description{ +Use all-year refined death data to produce year-specific +slf_deaths_lookup with deceased flag added. + Currently this just uses the NRS death dates 'as is', with no corrections or modifications, it is expected that this will be expanded to use the CHI deaths extract from IT as well as taking into account data in diff --git a/man/read_extract_gp_ooh.Rd b/man/read_extract_gp_ooh.Rd index ba908127b..61eaf7d32 100644 --- a/man/read_extract_gp_ooh.Rd +++ b/man/read_extract_gp_ooh.Rd @@ -55,6 +55,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/read_it_chi_deaths.Rd b/man/read_it_chi_deaths.Rd index d1bfe5cf7..fe548d84b 100644 --- a/man/read_it_chi_deaths.Rd +++ b/man/read_it_chi_deaths.Rd @@ -42,6 +42,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/read_lookup_sc_client.Rd b/man/read_lookup_sc_client.Rd index 283bc6a9a..4cef9df29 100644 --- a/man/read_lookup_sc_client.Rd +++ b/man/read_lookup_sc_client.Rd @@ -48,6 +48,7 @@ Other process extracts: \code{\link{process_lookup_postcode}()}, \code{\link{process_lookup_sc_client}()}, \code{\link{process_lookup_sc_demographics}()}, +\code{\link{process_refined_death}()}, \code{\link{process_sc_all_alarms_telecare}()}, \code{\link{process_sc_all_care_home}()}, \code{\link{process_sc_all_home_care}()}, diff --git a/man/years_to_run.Rd b/man/years_to_run.Rd new file mode 100644 index 000000000..188ea7f5f --- /dev/null +++ b/man/years_to_run.Rd @@ -0,0 +1,22 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/00-update_refs.R +\name{years_to_run} +\alias{years_to_run} +\title{The year list for slf to update} +\usage{ +years_to_run() +} +\value{ +The vector of financial years +} +\description{ +Get the vector of years to update slf +} +\seealso{ +Other initialisation: +\code{\link{get_dd_period}()}, +\code{\link{latest_cost_year}()}, +\code{\link{latest_update}()}, +\code{\link{previous_update}()} +} +\concept{initialisation} From f0babdb941d01c06f19653ed3d800694c9cba365 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Fri, 2 Aug 2024 17:06:58 +0000 Subject: [PATCH 75/96] Style code --- R/00-update_refs.R | 6 ++++-- R/add_deceased_flag.R | 15 +++++++-------- R/process_lookup_deaths.R | 1 - R/process_refined_death.R | 23 +++++++++++++++-------- _targets.R | 9 ++++++--- 5 files changed, 32 insertions(+), 22 deletions(-) diff --git a/R/00-update_refs.R b/R/00-update_refs.R index c22585aaf..c45f10e9c 100644 --- a/R/00-update_refs.R +++ b/R/00-update_refs.R @@ -89,7 +89,9 @@ latest_cost_year <- function() { years_to_run <- function() { fy_start_2digit <- 17 fy_end_2digit <- 23 - years_to_run = paste0(fy_start_2digit:fy_end_2digit, - (fy_start_2digit + 1):(fy_end_2digit + 1)) + years_to_run <- paste0( + fy_start_2digit:fy_end_2digit, + (fy_start_2digit + 1):(fy_end_2digit + 1) + ) return(years_to_run) } diff --git a/R/add_deceased_flag.R b/R/add_deceased_flag.R index 25010cba6..f3be216cf 100644 --- a/R/add_deceased_flag.R +++ b/R/add_deceased_flag.R @@ -17,16 +17,15 @@ add_deceased_flag <- function( year, refined_death = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi(), write_to_disk = TRUE) { - # create slf deaths lookup - dplyr::mutate( - death_date = dplyr::if_else(is.na(.data$record_keydate1), - .data$death_date_chi, .data$record_keydate1 - ), - deceased = TRUE, - .keep = "unused" - ) %>% + dplyr::mutate( + death_date = dplyr::if_else(is.na(.data$record_keydate1), + .data$death_date_chi, .data$record_keydate1 + ), + deceased = TRUE, + .keep = "unused" + ) %>% # save anon chi on disk slfhelper::get_anon_chi() diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R index ca5c6fc67..e1ba9edf3 100644 --- a/R/process_lookup_deaths.R +++ b/R/process_lookup_deaths.R @@ -15,7 +15,6 @@ process_slf_deaths_lookup <- function( year, refined_death = read_file(get_combined_slf_deaths_lookup_path()), write_to_disk = TRUE) { - # create slf deaths lookup slf_deaths_lookup <- refined_death %>% slfhelper::get_chi() %>% diff --git a/R/process_refined_death.R b/R/process_refined_death.R index d7067e91e..48f14fd43 100644 --- a/R/process_refined_death.R +++ b/R/process_refined_death.R @@ -15,19 +15,24 @@ process_refined_death <- function( it_chi_deaths = read_file(get_slf_chi_deaths_path()), write_to_disk = TRUE) { - years_list = years_to_run() + years_list <- years_to_run() nrs_all_years <- lapply(years_list, (\(year) { - read_extract_nrs_deaths(year, - get_boxi_extract_path(year, type = "deaths")) %>% + read_extract_nrs_deaths( + year, + get_boxi_extract_path(year, type = "deaths") + ) %>% process_extract_nrs_deaths(year, - write_to_disk = write_to_disk) + write_to_disk = write_to_disk + ) })) %>% data.table::rbindlist() it_chi_deaths <- it_chi_deaths %>% - dplyr::select(c("anon_chi", - "death_date_chi")) %>% + dplyr::select(c( + "anon_chi", + "death_date_chi" + )) %>% dplyr::arrange(.data$anon_chi, .keep_all = TRUE) refined_death <- nrs_all_years %>% @@ -47,8 +52,10 @@ process_refined_death <- function( ) if (write_to_disk) { - write_file(refined_death, - get_combined_slf_deaths_lookup_path()) + write_file( + refined_death, + get_combined_slf_deaths_lookup_path() + ) } return(refined_death) diff --git a/_targets.R b/_targets.R index 66df85f63..7b613fc4a 100644 --- a/_targets.R +++ b/_targets.R @@ -206,8 +206,10 @@ list( ), tar_target( refined_death, - process_refined_death(it_chi_deaths = it_chi_deaths_data, - write_to_disk = write_to_disk) + process_refined_death( + it_chi_deaths = it_chi_deaths_data, + write_to_disk = write_to_disk + ) ), # Phase II @@ -415,7 +417,8 @@ list( process_extract_mental_health( mental_health_data, year, - write_to_disk = write_to_disk) + write_to_disk = write_to_disk + ) ), tar_target( tests_source_mental_health_extract, From b57c69f2b6e8655349de027622c9df9bdb2de3d1 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Fri, 2 Aug 2024 18:41:23 +0100 Subject: [PATCH 76/96] minor changes --- R/process_lookup_deaths.R | 3 +-- _targets.R | 22 +++++++++++----------- man/process_slf_deaths_lookup.Rd | 2 ++ 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R index e1ba9edf3..d4e5d126c 100644 --- a/R/process_lookup_deaths.R +++ b/R/process_lookup_deaths.R @@ -4,8 +4,7 @@ #' slf_deaths_lookup with deceased flag added. #' #' @param year The year to process, in FY format. -#' @param nrs_deaths_data NRS deaths data. -#' @param chi_deaths_data IT CHI deaths data. +#' @param refined_death refined death date combining nrs and it_chi. #' @param write_to_disk (optional) Should the data be written to disk default is #' `TRUE` i.e. write the data to disk. #' diff --git a/_targets.R b/_targets.R index 7b613fc4a..0fea087ca 100644 --- a/_targets.R +++ b/_targets.R @@ -167,12 +167,19 @@ list( age = as.difftime(28.0, units = "days") ) ), + tar_target( + refined_death_data, + process_refined_death( + it_chi_deaths = it_chi_deaths_data, + write_to_disk = write_to_disk + ) + ), tar_target( all_care_home, process_sc_all_care_home( all_care_home_extract, sc_demog_lookup = sc_demog_lookup %>% slfhelper::get_chi(), - refined_death = refined_death %>% slfhelper::get_chi(), + refined_death = refined_death_data %>% slfhelper::get_chi(), ch_name_lookup_path = slf_ch_name_lookup_path, spd_path = spd_path, write_to_disk = write_to_disk @@ -204,13 +211,6 @@ list( tests_sc_all_sds, process_tests_sc_all_sds_episodes(all_sds) ), - tar_target( - refined_death, - process_refined_death( - it_chi_deaths = it_chi_deaths_data, - write_to_disk = write_to_disk - ) - ), # Phase II tar_map( @@ -436,10 +436,10 @@ list( source_nrs_deaths_extract, # use this anomymous function with redundant but necessary refined_death # to make sure reading year-specific nrs deaths extracts after it is produced - (\(year, refined_death) { + (\(year, refined_death_datas) { read_file(get_source_extract_path(year, "deaths")) %>% as.data.frame() - })(year, refined_death) + })(year, refined_death_data) ), tar_target( tests_source_nrs_deaths_extract, @@ -570,7 +570,7 @@ list( slf_deaths_lookup, process_slf_deaths_lookup( year = year, - refined_data = refined_data, + refined_death = refined_death_data, write_to_disk = write_to_disk ) ), diff --git a/man/process_slf_deaths_lookup.Rd b/man/process_slf_deaths_lookup.Rd index 9361bedbd..424e52073 100644 --- a/man/process_slf_deaths_lookup.Rd +++ b/man/process_slf_deaths_lookup.Rd @@ -30,6 +30,8 @@ process_slf_deaths_lookup( \item{write_to_disk}{(optional) Should the data be written to disk default is \code{TRUE} i.e. write the data to disk.} + +\item{refined_death}{refined death date combining nrs and it_chi.} } \value{ a \link[tibble:tibble-package]{tibble} containing the episode file From 5d78180112a563d5247bf16c035a1b64ece994e2 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Mon, 5 Aug 2024 10:06:30 +0100 Subject: [PATCH 77/96] remove process_slf_deaths_lookup --- R/process_lookup_deaths.R | 2 +- R/process_slf_deaths_lookup.R | 61 ----------------------------------- 2 files changed, 1 insertion(+), 62 deletions(-) delete mode 100644 R/process_slf_deaths_lookup.R diff --git a/R/process_lookup_deaths.R b/R/process_lookup_deaths.R index d4e5d126c..edc083cd2 100644 --- a/R/process_lookup_deaths.R +++ b/R/process_lookup_deaths.R @@ -8,7 +8,7 @@ #' @param write_to_disk (optional) Should the data be written to disk default is #' `TRUE` i.e. write the data to disk. #' -#' @return a [tibble][tibble::tibble-package] containing the episode file +#' @return a [tibble][tibble::tibble-package] add deceased flag to deaths #' @export process_slf_deaths_lookup <- function( year, diff --git a/R/process_slf_deaths_lookup.R b/R/process_slf_deaths_lookup.R deleted file mode 100644 index 40f316a4a..000000000 --- a/R/process_slf_deaths_lookup.R +++ /dev/null @@ -1,61 +0,0 @@ -#' Create the SLF Deaths lookup -#' -#' @description Currently this just uses the NRS death dates 'as is', with no -#' corrections or modifications, it is expected that this will be expanded to -#' use the CHI deaths extract from IT as well as taking into account data in -#' the episode file to assess the validity of a death date. -#' -#' @param year The year to process, in FY format. -#' @param nrs_deaths_data NRS deaths data. -#' @param chi_deaths_data IT CHI deaths data. -#' @param write_to_disk (optional) Should the data be written to disk default is -#' `TRUE` i.e. write the data to disk. -#' -#' @return a [tibble][tibble::tibble-package] containing the episode file -#' @export -process_slf_deaths_lookup <- function( - year, - nrs_deaths_data = read_file( - get_source_extract_path(year, "deaths"), - col_select = c("anon_chi", "record_keydate1") - ), - chi_deaths_data = read_file(get_slf_chi_deaths_path()), - write_to_disk = TRUE) { - boxi_nrs_data <- nrs_deaths_data %>% - slfhelper::get_chi() %>% - # Only modification over 'raw' NRS is to keep the earliest death date - dplyr::select("chi", "record_keydate1") %>% - dplyr::arrange(.data$record_keydate1) %>% - dplyr::distinct(.data$chi, .keep_all = TRUE) - - # create slf deaths lookup - slf_deaths_lookup <- chi_deaths_data %>% - slfhelper::get_chi() %>% - dplyr::mutate( - fy = phsmethods::extract_fin_year(death_date_chi), - fy = as.character(paste0(substr(fy, 3, 4), substr(fy, 6, 7))) - ) %>% - # Filter the chi death dates to the FY as the lookup is by FY - dplyr::filter(fy == year) %>% - # join boxi nrs data to chi deaths - dplyr::full_join(boxi_nrs_data, by = "chi") %>% - # use the BOXI NRS death date by default, but if it's missing, use the chi death date. - dplyr::mutate( - death_date = dplyr::if_else(is.na(.data$record_keydate1), - .data$death_date_chi, .data$record_keydate1 - ), - deceased = TRUE, - .keep = "unused" - ) %>% - # save anon chi on disk - slfhelper::get_anon_chi() - - if (write_to_disk) { - write_file( - slf_deaths_lookup, - get_slf_deaths_lookup_path(year, check_mode = "write") - ) - } - - return(slf_deaths_lookup) -} From 03b41c3c3fc625d1bffd630f98aa301a0a7daf26 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Mon, 5 Aug 2024 09:08:11 +0000 Subject: [PATCH 78/96] Update documentation --- man/process_slf_deaths_lookup.Rd | 30 ++++-------------------------- 1 file changed, 4 insertions(+), 26 deletions(-) diff --git a/man/process_slf_deaths_lookup.Rd b/man/process_slf_deaths_lookup.Rd index 424e52073..80e7559e0 100644 --- a/man/process_slf_deaths_lookup.Rd +++ b/man/process_slf_deaths_lookup.Rd @@ -1,49 +1,27 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/process_lookup_deaths.R, -% R/process_slf_deaths_lookup.R +% Please edit documentation in R/process_lookup_deaths.R \name{process_slf_deaths_lookup} \alias{process_slf_deaths_lookup} \title{Create the SLF Deaths lookup} \usage{ process_slf_deaths_lookup( year, - nrs_deaths_data = read_file(get_source_extract_path(year, "deaths"), col_select = - c("anon_chi", "record_keydate1")), - chi_deaths_data = read_file(get_slf_chi_deaths_path()), - write_to_disk = TRUE -) - -process_slf_deaths_lookup( - year, - nrs_deaths_data = read_file(get_source_extract_path(year, "deaths"), col_select = - c("anon_chi", "record_keydate1")), - chi_deaths_data = read_file(get_slf_chi_deaths_path()), + refined_death = read_file(get_combined_slf_deaths_lookup_path()), write_to_disk = TRUE ) } \arguments{ \item{year}{The year to process, in FY format.} -\item{nrs_deaths_data}{NRS deaths data.} - -\item{chi_deaths_data}{IT CHI deaths data.} +\item{refined_death}{refined death date combining nrs and it_chi.} \item{write_to_disk}{(optional) Should the data be written to disk default is \code{TRUE} i.e. write the data to disk.} - -\item{refined_death}{refined death date combining nrs and it_chi.} } \value{ -a \link[tibble:tibble-package]{tibble} containing the episode file - -a \link[tibble:tibble-package]{tibble} containing the episode file +a \link[tibble:tibble-package]{tibble} add deceased flag to deaths } \description{ Use all-year refined death data to produce year-specific slf_deaths_lookup with deceased flag added. - -Currently this just uses the NRS death dates 'as is', with no -corrections or modifications, it is expected that this will be expanded to -use the CHI deaths extract from IT as well as taking into account data in -the episode file to assess the validity of a death date. } From d186a3db1eb0ef818879d211891de8bafdaf4483 Mon Sep 17 00:00:00 2001 From: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com> Date: Wed, 7 Aug 2024 12:19:47 +0100 Subject: [PATCH 79/96] Major update of Care Home script (#945) * # major changes to care home script see document on sharepoint for description also: - added in type of admission description - updated care home contact in fill_ch_name script * minor note updates * Style code * Update documentation * couple of note updates * Update R/process_sc_all_care_home.R Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> * Update R/process_sc_all_care_home.R Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> * Style code * Update R/process_sc_all_care_home.R * change to ch name lookup * Update documentation * remove fill ch provider fill line * update fill ch names so it works with new ch methodology * Style code * Update documentation * Style code * Update documentation * Remove redundant variable `latest_sc_id` * use slfhelper::get_chi * new section for sc_ch_id_markers * Style code * Update documentation * Remove extra text and white space * add rename to use death_date_chi * use `read_excel` function * Update documentation * Return the paths only for SPD and ch name * Update documentation * Remove rename - no longer needed * fix typo * remove variables that dont exist --------- Co-authored-by: marjom02 Co-authored-by: SwiftySalmon Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> Co-authored-by: Jennit07 Co-authored-by: Jennifer Thom --- R/add_activity_after_death_flag.R | 1 + R/fill_ch_names.R | 10 +- R/process_extract_care_home.R | 2 +- R/process_lookup_sc_demographics.R | 4 +- R/process_sc_all_care_home.R | 280 ++++++++++++++++------------- R/read_sc_all_care_home.R | 2 +- man/process_sc_all_care_home.Rd | 4 +- 7 files changed, 164 insertions(+), 139 deletions(-) diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index fdede9001..a45e4296a 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -125,6 +125,7 @@ add_activity_after_death_flag <- function( #' #' # Read data------------------------------------------------ + process_combined_deaths_lookup <- function(update = latest_update(), write_to_disk = TRUE, ...) { dir_folder <- "/conf/hscdiip/SLF_Extracts/Deaths" diff --git a/R/fill_ch_names.R b/R/fill_ch_names.R index cd8d18677..b6aa85bb7 100644 --- a/R/fill_ch_names.R +++ b/R/fill_ch_names.R @@ -213,7 +213,7 @@ fill_ch_names <- function(ch_data, "match_mean2", # "open_interval", "ch_admission_date", - "qtr_start", + "period_start_date", "ch_date_registered", "latest_close_date", "ch_active", @@ -305,7 +305,6 @@ fill_ch_names <- function(ch_data, "unique_identifier", "matching_quality_indicator", "sending_location", - "latest_sc_id", "chi", "ch_name", "ch_postcode", @@ -320,9 +319,6 @@ fill_ch_names <- function(ch_data, "ch_admission_date", "ch_discharge_date", "age", - "record_date", - "qtr_start", - "latest_flag", "gender", "dob", "postcode", @@ -763,7 +759,6 @@ fill_ch_names <- function(ch_data, ## produce output ---- col_output <- c( "sending_location", - "latest_sc_id", "chi", "ch_name", "ch_postcode", @@ -778,9 +773,6 @@ fill_ch_names <- function(ch_data, "ch_admission_date", "ch_discharge_date", "age", - "record_date", - "qtr_start", - "latest_flag", "gender", "dob", "postcode", diff --git a/R/process_extract_care_home.R b/R/process_extract_care_home.R index 54789eac9..db7997061 100644 --- a/R/process_extract_care_home.R +++ b/R/process_extract_care_home.R @@ -39,11 +39,11 @@ process_extract_care_home <- function( is_date_in_fyyear(year, .data$record_keydate1, .data$record_keydate2) ) %>% # remove any episodes where the latest submission was before the current year + # this is what stops cases being in future files dplyr::filter( substr(.data$sc_latest_submission, 1L, 4L) >= convert_fyyear_to_year(year) ) - # Data Cleaning --------------------------------------- source_ch_clean <- ch_data %>% # create variables diff --git a/R/process_lookup_sc_demographics.R b/R/process_lookup_sc_demographics.R index d6e24c87f..1b29c414c 100644 --- a/R/process_lookup_sc_demographics.R +++ b/R/process_lookup_sc_demographics.R @@ -143,8 +143,8 @@ process_lookup_sc_demographics <- function( dplyr::ungroup() # check to make sure all cases of chi are still there - dplyr::n_distinct(sc_demog_lookup$chi) # 524810 - dplyr::n_distinct(sc_demog_lookup$social_care_id) # 636404 + dplyr::n_distinct(sc_demog_lookup$chi) # 525,834 + dplyr::n_distinct(sc_demog_lookup$social_care_id) # 637,422 sc_demog_lookup <- sc_demog_lookup %>% slfhelper::get_anon_chi() diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index 7b87d68f0..8492268f7 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -19,38 +19,38 @@ #' @family process extracts #' #' @export -#' process_sc_all_care_home <- function( data, sc_demog_lookup = read_file(get_sc_demog_lookup_path()) %>% slfhelper::get_chi(), refined_death = read_file(get_combined_slf_deaths_lookup_path()) %>% slfhelper::get_chi(), - ch_name_lookup_path = read_file(get_slf_ch_name_lookup_path()), - spd_path = read_file(get_spd_path()), + ch_name_lookup_path = get_slf_ch_name_lookup_path(), + spd_path = get_spd_path(), write_to_disk = TRUE) { ## Data Cleaning----------------------------------------------------- + ch_clean <- data %>% dplyr::mutate( - record_date = end_fy_quarter(.data[["period"]]), - qtr_start = start_fy_quarter(.data[["period"]]), - # Set missing admission date to start of the submitted quarter + # Set missing admission date to start of the submitted quarter (n = 2) ch_admission_date = dplyr::if_else( is.na(.data[["ch_admission_date"]]), - .data[["qtr_start"]], + .data[["period_start_date"]], .data[["ch_admission_date"]] ), - # TODO check if we should set the dis date to the end of the period? - # If the dis date is before admission, remove the dis date + # If the dis date is before admission, remove the dis date (n = 5) ch_discharge_date = dplyr::if_else( .data[["ch_admission_date"]] > .data[["ch_discharge_date"]], lubridate::NA_Date_, .data[["ch_discharge_date"]] ) ) %>% - dplyr::left_join(sc_demog_lookup, + dplyr::full_join(sc_demog_lookup, # this is the correct join. by = c("sending_location", "social_care_id") ) %>% - replace_sc_id_with_latest() + replace_sc_id_with_latest() %>% + dplyr::select(-latest_flag, -latest_sc_id) + + # cleaning and matching care home names name_postcode_clean <- fill_ch_names( ch_data = ch_clean, ch_name_lookup_path = ch_name_lookup_path, @@ -58,23 +58,27 @@ process_sc_all_care_home <- function( ) fixed_ch_provider <- name_postcode_clean %>% + dplyr::select(-ch_name_validated, -open_interval, -latest_close_date, -ch_name_old, -ch_postcode_old) %>% dplyr::mutate( - ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]]) + ch_provider = dplyr::if_else(is.na(.data[["ch_provider"]]), 6L, .data[["ch_provider"]]) # (n = 2) ) %>% # sort data dplyr::arrange( - "sending_location", - "social_care_id", - "ch_admission_date", - "period" + .data[["sending_location"]], + .data[["social_care_id"]], + .data[["period"]], + .data[["ch_admission_date"]] ) %>% dplyr::group_by( .data[["sending_location"]], .data[["social_care_id"]] ) %>% + # work out the min and max ch provider in an episode dplyr::mutate( min_ch_provider = min(.data[["ch_provider"]]), max_ch_provider = max(.data[["ch_provider"]]), + # if care home provider is different across cases, set to "6". + # tidy up ch_provider using 6 when disagreeing values ch_provider = dplyr::if_else( .data[["min_ch_provider"]] != .data[["max_ch_provider"]], 6L, @@ -85,12 +89,9 @@ process_sc_all_care_home <- function( -"min_ch_provider", -"max_ch_provider" ) %>% - # tidy up ch_provider using 6 when disagreeing values - tidyr::fill(.data[["ch_provider"]], .direction = "downup") %>% dplyr::ungroup() - fixed_nursing_provision <- fixed_ch_provider %>% dplyr::group_by( .data[["sending_location"]], @@ -98,7 +99,7 @@ process_sc_all_care_home <- function( .data[["ch_admission_date"]] ) %>% # fill in nursing care provision when missing - # but present in the following entry + # but present in the following entry (n = 0) dplyr::mutate( nursing_care_provision = dplyr::na_if(.data[["nursing_care_provision"]], 9L) ) %>% @@ -106,26 +107,44 @@ process_sc_all_care_home <- function( ready_to_merge <- fixed_nursing_provision %>% - # remove any duplicate records before merging for speed and simplicity - dplyr::distinct() %>% + # remove any duplicate records before merging + dplyr::distinct() %>% # (n = 3) + # sort data + dplyr::arrange( + .data[["sending_location"]], + .data[["social_care_id"]], + .data[["ch_admission_date"]], + .data[["period"]] + ) %>% + dplyr::group_by( + .data[["sending_location"]], + .data[["social_care_id"]], + .data[["ch_admission_date"]] + ) %>% # counter for split episodes - dplyr::mutate( - split_episode = tidyr::replace_na( - .data[["nursing_care_provision"]] != dplyr::lag( - .data[["nursing_care_provision"]] - ), - TRUE - ), - split_episode_counter = cumsum(.data[["split_episode"]]) + # a split episode is an episode where the admission date is the same but the nursing provider has changed. + # We want to keep the nursing provision changes when we merge cases that have the same admission date + dplyr::mutate(previous_nursing_care_provision = dplyr::lag(.data[["nursing_care_provision"]])) %>% + # create a T/F flag for if nursing provision was the same as previous record with same admission date + dplyr::mutate(split_episode = tidyr::replace_na(.data[["previous_nursing_care_provision"]] != nursing_care_provision, TRUE)) %>% + dplyr::group_by( + .data[["social_care_id"]], + .data[["sending_location"]], + .data[["split_episode"]] ) %>% - dplyr::ungroup() + # create a count of each time the nursing provision changes between records with the same admission date + dplyr::mutate(split_episode_counter = ifelse(split_episode == TRUE, dplyr::row_number(), NA)) %>% + dplyr::group_by( + .data[["social_care_id"]], + .data[["sending_location"]] + ) %>% + # fill split episode counter. This will create a new id number for each different nursing provision within an episode + tidyr::fill(split_episode_counter, .direction = c("down")) %>% + dplyr::select(-previous_nursing_care_provision, -split_episode) + - # Merge records to a single row per episode - # where admission is the same + # Merge records to a single row per episode where admission is the same ch_episode <- ready_to_merge %>% - # when nursing_care_provision is different on - # records within the episode, split the episode - # at this point. dplyr::group_by( .data[["chi"]], .data[["sending_location"]], @@ -138,8 +157,8 @@ process_sc_all_care_home <- function( dplyr::desc(.data[["period"]]), dplyr::desc(.data[["ch_discharge_date"]]), dplyr::desc(.data[["ch_provider"]]), - dplyr::desc(.data[["record_date"]]), - dplyr::desc(.data[["qtr_start"]]), + dplyr::desc(.data[["period_end_date"]]), + dplyr::desc(.data[["period_start_date"]]), dplyr::desc(.data[["ch_name"]]), dplyr::desc(.data[["ch_postcode"]]), dplyr::desc(.data[["reason_for_admission"]]), @@ -150,62 +169,39 @@ process_sc_all_care_home <- function( ) %>% dplyr::summarise( sc_latest_submission = dplyr::first(.data[["period"]]), - dplyr::across( - c( - "ch_discharge_date", - "ch_provider", - "record_date", - "qtr_start", - "ch_name", - "ch_postcode", - "reason_for_admission", - "type_of_admission" - ), - dplyr::first - ), + dplyr::across(c( + "ch_discharge_date", + "ch_provider", + "period_end_date", + "period_start_date", + "ch_name", + "ch_postcode", + "reason_for_admission", + "type_of_admission" + ), dplyr::first), dplyr::across(c("gender", "dob", "postcode"), dplyr::first) ) %>% - dplyr::ungroup() %>% - # Amend dates for split episodes - # Change the start and end date as appropriate when an episode is split, - # using the start / end date of the submission quarter - dplyr::group_by( - .data[["chi"]], - .data[["sending_location"]], - .data[["social_care_id"]], - .data[["ch_admission_date"]] - ) %>% - # counter for latest submission - # TODO check if this is the same as split_episode_counter? - dplyr::mutate( - latest_submission_counter = tidyr::replace_na( - .data[["sc_latest_submission"]] != dplyr::lag( - .data[["sc_latest_submission"]] - ), - TRUE - ), - sum_latest_submission = cumsum(.data[["latest_submission_counter"]]) - ) %>% + # If the admission date is missing use the period start date + # otherwise use the start of the quarter dplyr::mutate( - # If it's the first episode(s) then keep the admission date(s), - # otherwise use the start of the quarter - ch_admission_date = dplyr::if_else( - .data[["sum_latest_submission"]] == min(.data[["sum_latest_submission"]]), - .data[["ch_admission_date"]], - .data[["qtr_start"]] + ch_admission_date = dplyr::if_else(is.na(.data[["ch_admission_date"]]), + .data[["period_start_date"]], + .data[["ch_admission_date"]] ), # If it's the last episode(s) then keep the discharge date(s), otherwise # use the end of the quarter - ch_discharge_date = dplyr::if_else( - .data[["sum_latest_submission"]] == max(.data[["sum_latest_submission"]]), - .data[["ch_discharge_date"]], - .data[["record_date"]] + ch_discharge_date = dplyr::if_else(is.na(.data[["ch_discharge_date"]]), + .data[["period_end_date"]], + .data[["ch_discharge_date"]] ) ) %>% - dplyr::ungroup() + dplyr::ungroup() %>% + dplyr::select(-period_start_date, -split_episode_counter) + # Compare to Deaths Data # match ch_episode data with deaths data + # TO DO should this be boxi nrs death dates instead of IT extract deaths? matched_deaths_data <- ch_episode %>% dplyr::left_join(refined_death, by = "chi" @@ -228,7 +224,7 @@ process_sc_all_care_home <- function( dplyr::ungroup() %>% # remove any episodes where discharge is now before admission, # i.e. death was before admission - dplyr::filter( + dplyr::filter( # (n = 67) !tidyr::replace_na( .data[["ch_discharge_date"]] < .data[["ch_admission_date"]], FALSE @@ -237,39 +233,65 @@ process_sc_all_care_home <- function( # Continuous Care Home Stays # Stay will be continuous as long as the admission date is the next day or - # earlier than the previous discharge date - - ch_markers <- matched_deaths_data %>% - # ch_chi_cis + # earlier than the previous discharge date. + # creates a CIS flag for CHI across all of scotland + # and a CIS for social care ID and sending location for just that LA + ch_chi_markers <- matched_deaths_data %>% + # uses the chi to flag continuous stays. Will flag cases even if in another LA dplyr::group_by(.data[["chi"]]) %>% - dplyr::mutate( - continuous_stay_chi = tidyr::replace_na( - .data[["ch_admission_date"]] <= dplyr::lag( - .data[["ch_discharge_date"]] - ) + lubridate::days(1L), - TRUE - ), - ch_chi_cis = cumsum(.data[["continuous_stay_chi"]]) + # create variable for previous discharge date + 1 day + dplyr::mutate(previous_discharge_date_chi = dplyr::lag(.data[["ch_discharge_date"]]) + lubridate::days(1L)) %>% + # TRUE/FALSE flag for if admission date is before or equal to previous discharge date + 1 day + dplyr::mutate(continuous_stay_flag_chi = tidyr::replace_na(.data[["ch_admission_date"]] <= previous_discharge_date_chi, FALSE)) %>% + # different to code in above sections. + # we want to uniquely identify all cases where the flag is FALSE. and only the first case where the flag is TRUE + # to do this create a variable of the flag in the previous row + dplyr::mutate(previous_continuous_stay_flag_chi = tidyr::replace_na(dplyr::lag(.data[["continuous_stay_flag_chi"]]), FALSE)) %>% + dplyr::mutate(continuous_stay_chi = ifelse(continuous_stay_flag_chi == FALSE | + (continuous_stay_flag_chi == TRUE & previous_continuous_stay_flag_chi == FALSE), FALSE, TRUE)) %>% + dplyr::group_by( + .data[["chi"]], + .data[["continuous_stay_chi"]] ) %>% - dplyr::ungroup() %>% - # ch_sc_id_cis - # uses the social care id and sending location so can be used for - # episodes that are not attached to a CHI number - # This will restrict continuous stays to each Local Authority - dplyr::group_by(.data[["social_care_id"]], .data[["sending_location"]]) %>% - dplyr::mutate( - continuous_stay_sc = tidyr::replace_na( - .data[["ch_admission_date"]] <= dplyr::lag( - .data[["ch_discharge_date"]] - ) + lubridate::days(1L), - TRUE - ), - ch_sc_id_cis = cumsum(.data[["continuous_stay_sc"]]) + # gives cases their unique CIS identifier + dplyr::mutate(ch_chi_cis = ifelse(continuous_stay_chi == FALSE, dplyr::row_number(), NA)) %>% + dplyr::group_by( + .data[["social_care_id"]], + .data[["sending_location"]] ) %>% - dplyr::ungroup() + # fills in CIS identifier for all cases + tidyr::fill(ch_chi_cis, .direction = c("down")) + + + # This is the same but uses the social care id and sending location so can be used for + # episodes that are not attached to a CHI number + # This will restrict continuous stays to each Local Authority + sc_ch_id_markers <- ch_chi_markers %>% + dplyr::group_by(.data[["social_care_id"]], .data[["sending_location"]]) %>% + # create variable for previous discharge date + 1 day + dplyr::mutate(previous_discharge_date_sc = dplyr::lag(.data[["ch_discharge_date"]]) + lubridate::days(1L)) %>% + # TRUE/FALSE flag for if admission date is before or equal to previous discharge date + 1 day + dplyr::mutate(continuous_stay_flag_sc = tidyr::replace_na(.data[["ch_admission_date"]] <= previous_discharge_date_sc, FALSE)) %>% + # we want to uniquely identify all cases where the flag is FALSE. and only the first case where the flag is TRUE + # to do this create a variable of the flag in the previous row + dplyr::mutate(previous_continuous_stay_flag_sc = tidyr::replace_na(dplyr::lag(.data[["continuous_stay_flag_sc"]]), FALSE)) %>% + dplyr::mutate(continuous_stay_sc = ifelse(continuous_stay_flag_sc == FALSE | + (continuous_stay_flag_sc == TRUE & previous_continuous_stay_flag_sc == FALSE), FALSE, TRUE)) %>% + dplyr::group_by(.data[["social_care_id"]], .data[["sending_location"]], .data[["continuous_stay_sc"]]) %>% + # gives cases their unique CIS identifier + dplyr::mutate(ch_sc_id_cis = ifelse(continuous_stay_sc == FALSE, dplyr::row_number(), NA)) %>% + dplyr::group_by(.data[["social_care_id"]], .data[["sending_location"]]) %>% + # fills in CIS identifier for all cases + tidyr::fill(ch_sc_id_cis, .direction = c("down")) %>% + dplyr::select( + -previous_discharge_date_chi, -continuous_stay_flag_chi, -previous_continuous_stay_flag_chi, -continuous_stay_chi, + -previous_discharge_date_sc, -continuous_stay_flag_sc, -previous_continuous_stay_flag_sc, -continuous_stay_sc, + -dis_after_death + ) + - # Do a recode on the old reason for admission - adm_reason_recoded <- ch_markers %>% + # Do a recode on the old reason for admission for respite stays. + adm_reason_recoded <- sc_ch_id_markers %>% dplyr::group_by( .data[["social_care_id"]], .data[["sending_location"]], @@ -277,32 +299,32 @@ process_sc_all_care_home <- function( ) %>% dplyr::mutate( ch_ep_start = min(.data[["ch_admission_date"]]), + # Creates a vector for the earliest date out of the end of period and discharge date. + # And will then select what ever is the latest date out of those ch_ep_end = max( pmin( - .data[["record_date"]], + .data[["period_end_date"]], .data[["ch_discharge_date"]], na.rm = TRUE ) ) ) %>% dplyr::ungroup() %>% + # Flag respite stays. dplyr::mutate( - stay_los = lubridate::time_length( - lubridate::interval(.data[["ch_ep_start"]], .data[["ch_ep_end"]]), - "weeks" - ), + stay_los = lubridate::time_length(lubridate::interval(.data[["ch_ep_start"]], .data[["ch_ep_end"]]), "weeks"), stay_respite = .data[["stay_los"]] < 6.0, - type_of_admission = dplyr::if_else( - is.na(.data[["type_of_admission"]]), - dplyr::case_when( - .data[["reason_for_admission"]] == 1L ~ 1L, + type_of_admission = dplyr::if_else(is.na(.data[["type_of_admission"]]), + dplyr::case_when(.data[["reason_for_admission"]] == 1L ~ 1L, .data[["reason_for_admission"]] == 2L ~ 2L, - stay_respite ~ 1L, + stay_respite ~ 1L, # (n = 40573) .default = 3L ), .data[["type_of_admission"]] ) - ) + ) %>% + dplyr::select(-ch_ep_start, -ch_ep_end, -stay_los, -stay_respite) + ch_data_final <- adm_reason_recoded %>% create_person_id() %>% @@ -312,6 +334,15 @@ process_sc_all_care_home <- function( ch_adm_reason = "type_of_admission", ch_nursing = "nursing_care_provision" ) %>% + # recode the care home provider description + dplyr::mutate(ch_provider_description = dplyr::case_when( # from social care syntax + ch_provider == 1 ~ "LOCAL AUTHORITY/HSCP/NHS BOARD", + ch_provider == 2 ~ "PRIVATE", + ch_provider == 3 ~ "OTHER LOCAL AUTHORITY", + ch_provider == 4 ~ "THIRD SECTOR", + ch_provider == 5 ~ "NHS BOARD", + ch_provider == 6 ~ "OTHER" + )) %>% dplyr::select( "chi", "person_id", @@ -327,6 +358,7 @@ process_sc_all_care_home <- function( "ch_chi_cis", "ch_sc_id_cis", "ch_provider", + "ch_provider_description", "ch_nursing", "ch_adm_reason", "sc_latest_submission" diff --git a/R/read_sc_all_care_home.R b/R/read_sc_all_care_home.R index 89ef7951b..b11879487 100644 --- a/R/read_sc_all_care_home.R +++ b/R/read_sc_all_care_home.R @@ -42,7 +42,7 @@ read_sc_all_care_home <- function(sc_dvprod_connection = phs_db_connection(dsn = } ch_data <- ch_data %>% - # Correct FY 2017 + # Correct FY 2017 as data collection only started in 2017 Q4 dplyr::mutate(period = dplyr::if_else( .data$period == "2017", "2017Q4", diff --git a/man/process_sc_all_care_home.Rd b/man/process_sc_all_care_home.Rd index 7d4234b25..f689c19f4 100644 --- a/man/process_sc_all_care_home.Rd +++ b/man/process_sc_all_care_home.Rd @@ -9,8 +9,8 @@ process_sc_all_care_home( sc_demog_lookup = read_file(get_sc_demog_lookup_path()) \%>\% slfhelper::get_chi(), refined_death = read_file(get_combined_slf_deaths_lookup_path()) \%>\% slfhelper::get_chi(), - ch_name_lookup_path = read_file(get_slf_ch_name_lookup_path()), - spd_path = read_file(get_spd_path()), + ch_name_lookup_path = get_slf_ch_name_lookup_path(), + spd_path = get_spd_path(), write_to_disk = TRUE ) } From 8f4a06abf1ed05afeb5ee338a8fc63d023a066e1 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 7 Aug 2024 17:09:06 +0100 Subject: [PATCH 80/96] update `ch_chi_cis` methodology --- R/process_sc_all_care_home.R | 54 +++++++++++++++++++++++------------- 1 file changed, 34 insertions(+), 20 deletions(-) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index 8492268f7..4891c6423 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -237,30 +237,44 @@ process_sc_all_care_home <- function( # creates a CIS flag for CHI across all of scotland # and a CIS for social care ID and sending location for just that LA ch_chi_markers <- matched_deaths_data %>% - # uses the chi to flag continuous stays. Will flag cases even if in another LA + # Group the data by chi dplyr::group_by(.data[["chi"]]) %>% # create variable for previous discharge date + 1 day - dplyr::mutate(previous_discharge_date_chi = dplyr::lag(.data[["ch_discharge_date"]]) + lubridate::days(1L)) %>% - # TRUE/FALSE flag for if admission date is before or equal to previous discharge date + 1 day - dplyr::mutate(continuous_stay_flag_chi = tidyr::replace_na(.data[["ch_admission_date"]] <= previous_discharge_date_chi, FALSE)) %>% - # different to code in above sections. - # we want to uniquely identify all cases where the flag is FALSE. and only the first case where the flag is TRUE - # to do this create a variable of the flag in the previous row - dplyr::mutate(previous_continuous_stay_flag_chi = tidyr::replace_na(dplyr::lag(.data[["continuous_stay_flag_chi"]]), FALSE)) %>% - dplyr::mutate(continuous_stay_chi = ifelse(continuous_stay_flag_chi == FALSE | - (continuous_stay_flag_chi == TRUE & previous_continuous_stay_flag_chi == FALSE), FALSE, TRUE)) %>% - dplyr::group_by( - .data[["chi"]], - .data[["continuous_stay_chi"]] + # The lag function will set the first row to NA. We want to flag the first row + dplyr::mutate( + row_number = dplyr::row_number(), + previous_discharge_date_chi = dplyr::lag(.data[["ch_discharge_date"]]) + + lubridate::days(1L), + # if the first row is NA, set this to the ch_discharge_date + previous_discharge_date_chi = dplyr::if_else(row_number == 1, .data[["ch_discharge_date"]], + .data[["previous_discharge_date_chi"]] + ) ) %>% - # gives cases their unique CIS identifier - dplyr::mutate(ch_chi_cis = ifelse(continuous_stay_chi == FALSE, dplyr::row_number(), NA)) %>% - dplyr::group_by( - .data[["social_care_id"]], - .data[["sending_location"]] + # flag continuous stays and create marker + # calculate number of days between start_date and end_date on the previous episode + dplyr::mutate( + days_to_next_rec = floor( + lubridate::time_length(lubridate::interval( + .data[["previous_discharge_date_chi"]], + .data[["ch_admission_date"]] + ), "days") + ), + # if there is more than 1 day between (or the last ep for the individual) flag as new ep (Y) + # if there is < 1 day (i.e. a pause of up to 1 day or stays overlap flag as same ep (N)) + new_episode = dplyr::if_else(is.na(days_to_next_rec) | days_to_next_rec > 1, "Y", "N") ) %>% - # fills in CIS identifier for all cases - tidyr::fill(ch_chi_cis, .direction = c("down")) + # create continuous marker using flag for new stay + dplyr::mutate( + ch_chi_cis = purrr::accumulate(new_episode[-1], + .init = 1, + ~ if (.y == "Y") { + .x + 1 + } else { + .x + } + ) + ) %>% + dplyr::ungroup() # This is the same but uses the social care id and sending location so can be used for From 4c8bbf362c290c446787dc35eabc9e8792acce83 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 7 Aug 2024 17:25:59 +0100 Subject: [PATCH 81/96] update `ch_sc_id_cis` methodology --- R/process_sc_all_care_home.R | 57 +++++++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index 4891c6423..ee906ae3a 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -281,26 +281,49 @@ process_sc_all_care_home <- function( # episodes that are not attached to a CHI number # This will restrict continuous stays to each Local Authority sc_ch_id_markers <- ch_chi_markers %>% + # uses social_care_id and sending_location to flag continuous stays. + # Will flag cases even if in another LA dplyr::group_by(.data[["social_care_id"]], .data[["sending_location"]]) %>% # create variable for previous discharge date + 1 day - dplyr::mutate(previous_discharge_date_sc = dplyr::lag(.data[["ch_discharge_date"]]) + lubridate::days(1L)) %>% - # TRUE/FALSE flag for if admission date is before or equal to previous discharge date + 1 day - dplyr::mutate(continuous_stay_flag_sc = tidyr::replace_na(.data[["ch_admission_date"]] <= previous_discharge_date_sc, FALSE)) %>% - # we want to uniquely identify all cases where the flag is FALSE. and only the first case where the flag is TRUE - # to do this create a variable of the flag in the previous row - dplyr::mutate(previous_continuous_stay_flag_sc = tidyr::replace_na(dplyr::lag(.data[["continuous_stay_flag_sc"]]), FALSE)) %>% - dplyr::mutate(continuous_stay_sc = ifelse(continuous_stay_flag_sc == FALSE | - (continuous_stay_flag_sc == TRUE & previous_continuous_stay_flag_sc == FALSE), FALSE, TRUE)) %>% - dplyr::group_by(.data[["social_care_id"]], .data[["sending_location"]], .data[["continuous_stay_sc"]]) %>% - # gives cases their unique CIS identifier - dplyr::mutate(ch_sc_id_cis = ifelse(continuous_stay_sc == FALSE, dplyr::row_number(), NA)) %>% - dplyr::group_by(.data[["social_care_id"]], .data[["sending_location"]]) %>% - # fills in CIS identifier for all cases - tidyr::fill(ch_sc_id_cis, .direction = c("down")) %>% + # The lag function will set the first row to NA. We want to flag the first row + dplyr::mutate( + row_number = dplyr::row_number(), + previous_discharge_date_sc = dplyr::lag(.data[["ch_discharge_date"]]) + + lubridate::days(1L), + # if the first row is NA, set this to the ch_discharge_date + previous_discharge_date_sc = dplyr::if_else(row_number == 1, .data[["ch_discharge_date"]], + .data[["previous_discharge_date_sc"]] + ) + ) %>% + # flag continuous stays and create marker + # calculate number of days between start_date and end_date on the previous episode + dplyr::mutate( + days_to_next_rec = floor( + lubridate::time_length(lubridate::interval( + .data[["previous_discharge_date_sc"]], + .data[["ch_admission_date"]] + ), "days") + ), + # if there is more than 1 day between (or the last ep for the individual) flag as new ep (Y) + # if there is < 1 day (i.e. a pause of up to 1 day or stays overlap flag as same ep (N)) + new_episode = dplyr::if_else(is.na(days_to_next_rec) | days_to_next_rec > 1, "Y", "N") + ) %>% + # create continuous marker using flag for new stay + dplyr::mutate( + ch_sc_id_cis = purrr::accumulate(new_episode[-1], + .init = 1, + ~ if (.y == "Y") { + .x + 1 + } else { + .x + } + ) + ) %>% + dplyr::ungroup() %>% + # remove variables no longer needed dplyr::select( - -previous_discharge_date_chi, -continuous_stay_flag_chi, -previous_continuous_stay_flag_chi, -continuous_stay_chi, - -previous_discharge_date_sc, -continuous_stay_flag_sc, -previous_continuous_stay_flag_sc, -continuous_stay_sc, - -dis_after_death + -previous_discharge_date_chi, -previous_discharge_date_sc, -row_number, + -days_to_next_rec, -new_episode ) From b96feb191e763f5a61b63676fb3d91f1641238f4 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 8 Aug 2024 08:35:41 +0100 Subject: [PATCH 82/96] Update notes --- R/process_sc_all_care_home.R | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index ee906ae3a..149ac767a 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -201,7 +201,6 @@ process_sc_all_care_home <- function( # Compare to Deaths Data # match ch_episode data with deaths data - # TO DO should this be boxi nrs death dates instead of IT extract deaths? matched_deaths_data <- ch_episode %>% dplyr::left_join(refined_death, by = "chi" @@ -239,10 +238,12 @@ process_sc_all_care_home <- function( ch_chi_markers <- matched_deaths_data %>% # Group the data by chi dplyr::group_by(.data[["chi"]]) %>% - # create variable for previous discharge date + 1 day - # The lag function will set the first row to NA. We want to flag the first row + # Set up previous_discharge_date + # The lag function will set the first row to NA. dplyr::mutate( + # We want to flag the first episode per chi with row_number row_number = dplyr::row_number(), + # create variable for previous discharge date + 1 day previous_discharge_date_chi = dplyr::lag(.data[["ch_discharge_date"]]) + lubridate::days(1L), # if the first row is NA, set this to the ch_discharge_date @@ -284,10 +285,12 @@ process_sc_all_care_home <- function( # uses social_care_id and sending_location to flag continuous stays. # Will flag cases even if in another LA dplyr::group_by(.data[["social_care_id"]], .data[["sending_location"]]) %>% - # create variable for previous discharge date + 1 day - # The lag function will set the first row to NA. We want to flag the first row + # Set up previous_discharge_date + # The lag function will set the first row to NA. dplyr::mutate( + # We want to flag the first episode per sc id and sending_location with row_number row_number = dplyr::row_number(), + # create variable for previous discharge date + 1 day previous_discharge_date_sc = dplyr::lag(.data[["ch_discharge_date"]]) + lubridate::days(1L), # if the first row is NA, set this to the ch_discharge_date From 46a668c3962e0593344e208e704d03a5b19250fc Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 12 Aug 2024 10:19:46 +0100 Subject: [PATCH 83/96] Use `right_join` --- R/process_sc_all_care_home.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index 149ac767a..524be1b2f 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -43,7 +43,7 @@ process_sc_all_care_home <- function( .data[["ch_discharge_date"]] ) ) %>% - dplyr::full_join(sc_demog_lookup, # this is the correct join. + dplyr::right_join(sc_demog_lookup, # this is the correct join. by = c("sending_location", "social_care_id") ) %>% replace_sc_id_with_latest() %>% From f7caaea39490d378796cb5701cf63d4d1e096413 Mon Sep 17 00:00:00 2001 From: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com> Date: Mon, 12 Aug 2024 11:11:17 +0100 Subject: [PATCH 84/96] Update process_sc_all_care_home.R added in missing variable at the end --- R/process_sc_all_care_home.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index 524be1b2f..4e4db583f 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -401,6 +401,7 @@ process_sc_all_care_home <- function( "ch_provider_description", "ch_nursing", "ch_adm_reason", + "type_of_admission", "sc_latest_submission" ) %>% slfhelper::get_anon_chi() From 439e430a2fff33eeb3be85193124a5fc61ae6aad Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Fri, 16 Aug 2024 08:20:42 +0100 Subject: [PATCH 85/96] Add new nsu (#991) * Add NSU code to github Includes extracting the service user cohort to send to the chili team and then NSU extraction. * Style code * Add compression and package library * Style code * pick up latest geography file, and save out with compression (#983) * pick up latest geography file, and save out with compression * Style code * use `get_spd_path` Co-authored-by: James McMahon --------- Co-authored-by: marjom02 Co-authored-by: SwiftySalmon Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> Co-authored-by: James McMahon --------- Co-authored-by: Jennit07 Co-authored-by: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com> Co-authored-by: marjom02 Co-authored-by: SwiftySalmon Co-authored-by: James McMahon --- .../filter_nsu_duplicates.R | 192 ++++++++++++++++++ .../get_service_use_cohort.R | 56 +++++ 2 files changed, 248 insertions(+) create mode 100644 extract_new_nsu_cohort/filter_nsu_duplicates.R create mode 100644 extract_new_nsu_cohort/get_service_use_cohort.R diff --git a/extract_new_nsu_cohort/filter_nsu_duplicates.R b/extract_new_nsu_cohort/filter_nsu_duplicates.R new file mode 100644 index 000000000..bbe8265e8 --- /dev/null +++ b/extract_new_nsu_cohort/filter_nsu_duplicates.R @@ -0,0 +1,192 @@ +################################################################################ +# Name of file - filter_nsu_duplicates.R +# Original Authors - James McMahon, Jennifer Thom +# Original Date - August 2021 +# Update - June 2024 +# +# Written/run on - RStudio Server +# Version of R - 3.6.1 +# +# Description - Use this script to filter NSU duplicates when taking a new +# extract from the CHILI team. +# +# Steps for requesting a new NSU extract for SLFs: +# 1. Send an email to [phs.chi-recordlinkage@phs.scot] to request a new NSU +# extract after the JUNE update. +# 2. Prepare a service use extract. Run script `get_service_use_cohort.R` to +# extract a list of CHI's from the most recent 'full' file. +# 3. Once the chili team come back to us, send the service use extract to +# the analyst directly. Do not send the list of CHIs to the mailbox for +# Information Governance purposes. +# 4. CHILI team will then process the new NSU extract based on who is not in +# the service use extract. +# 5. Run the script `filter_nsu_duplicates.R` to collect the new NSU +# extract from the analysts SMRA space - see lines 46-47 and change +# username accordingly. Save the extract in: +# "/conf/hscdiip/SLF_Extracts/NSU" +################################################################################ + +library(dplyr) +library(purrr) +library(stringr) +library(PostcodesioR) +library(janitor) +library(fs) +library(glue) + + +## Setup------------------------------------------------------------------------ + +## Update line 41## +# The year of new NSU extract +year <- "2324" + +# Update lines 45-46 ## +# Analysts username and schema to collect the data. +analyst <- "ROBERM18" +schema <- "FINAL_2" + +# setup directory +nsu_dir <- path("/conf/hscdiip/SLF_Extracts/NSU") + +# latest geography file +spd_path <- get_spd_path() + +# Set up connection to SMRA----------------------------------------------------- +db_connection <- odbc::dbConnect( + odbc::odbc(), + dsn = "SMRA", + uid = Sys.getenv("USER"), + pwd = rstudioapi::askForPassword("password") +) + + +# Read data--------------------------------------------------------------------- + +# Read NSU data with duplicates from analyst's SMRA space. +nsu_data <- + tbl(db_connection, dbplyr::in_schema(analyst, schema)) %>% + collect() %>% + clean_names() + + +# Data cleaning----------------------------------------------------------------- + +# Find the records with duplicates +nsu_pc_duplicates <- nsu_data %>% + group_by(upi_number) %>% + mutate(postcode_count = n_distinct(postcode)) %>% + ungroup() %>% + filter(postcode_count > 1) + +# Get the latest SPD +spd <- read_file(spd_path) %>% + select(pc7, date_of_introduction, date_of_deletion) + +# Load some regex to check if a postcode is valid +pc_regex <- + "([Gg][Ii][Rr] 0[Aa]{2})|((([A-Za-z][0-9]{1,2})|(([A-Za-z][A-Ha-hJ-Yj-y][0-9]{1,2})|(([A-Za-z][0-9][A-Za-z])|([A-Za-z][A-Ha-hJ-Yj-y][0-9][A-Za-z]?))))\\s?[0-9][A-Za-z]{2})" + +# Main code to check postcodes in various ways +nsu_pc_duplicates_checked <- nsu_pc_duplicates %>% + select( + upi_number, + start_date, + postcode, + date_address_changed, + gp_prac_no, + date_gp_acceptance + ) %>% + # First check against the regex + mutate(invalid_pc = str_detect(postcode, pc_regex, negate = TRUE)) %>% + # Now check against the SPD + left_join(spd, by = c("postcode" = "pc7")) %>% + # Now check against postcodes.io + left_join( + # Filter to only postcodes that need checking + group_by(., upi_number) %>% + # UPI has no postcode which matched the SPD + filter( + all(is.na( + date_of_introduction + )) + ) %>% + ungroup() %>% + # No need to check invalid postcodes + filter(!invalid_pc) %>% + # Pass the unique list of postcodes to + # postcodes.io + pull(postcode) %>% + unique() %>% + list(postcodes = .) %>% + # This function will fail if more than 100 pcs + PostcodesioR::bulk_postcode_lookup() %>% + # Parse the result, we only want the country + map_dfr(~ tibble( + postcode = .x$query, + # Create an order to make sorting nice later + country = ordered(.x$result$country, c("Scotland", "Wales", "England")) + )) + ) %>% + # Sort so that the 'best' postcode is top of the list + mutate(priority = case_when( + # If they matched SPD, + !is.na(date_of_introduction) & is.na(date_of_deletion) ~ 0, + # If the matched SPD (and had a d_o_d) + !is.na(date_of_introduction) ~ 1, + # If they matched the postcodes.io API request + !is.na(country) ~ 2, + # Invalid postcodes come last + invalid_pc ~ Inf, + TRUE ~ 99 + )) %>% + arrange( + upi_number, + priority, + # newest introduced come first + desc(date_of_introduction), + # latest deleted will be first + desc(date_of_deletion), + # Scotland will be preferred etc. + country + ) %>% + # Flag each row with the assigned priority + group_by(upi_number) %>% + mutate(keep_priority = row_number()) %>% + ungroup() + +# Check +nsu_pc_duplicates_checked %>% + count(priority, keep_priority) + +final_data <- nsu_data %>% + # Filter the main dataset to remove + # the duplicate postcodes we decided not to keep + anti_join(nsu_pc_duplicates_checked %>% + filter(keep_priority > 1)) %>% + # Filter any remaining duplicates (none on this test) + distinct(upi_number, .keep_all = TRUE) %>% + select( + chi = upi_number, + dob = date_of_birth, + postcode, + gpprac = gp_prac_no, + gender = sex + ) %>% + mutate( + year = year, .before = everything(), + dob = as.Date(dob), + across(c(gender, gpprac), as.integer) + ) %>% + arrange(chi) %>% + # Save as anon chi on disk + slfhelper::get_anon_chi() + +# Save data out to be used +final_data %>% + arrow::write_parquet(path(nsu_dir, glue::glue("anon-All_CHIs_20{year}.parquet")), + compression = "zstd" + ) + + +## End of Script ## diff --git a/extract_new_nsu_cohort/get_service_use_cohort.R b/extract_new_nsu_cohort/get_service_use_cohort.R new file mode 100644 index 000000000..c29063a32 --- /dev/null +++ b/extract_new_nsu_cohort/get_service_use_cohort.R @@ -0,0 +1,56 @@ +################################################################################ +# Name of file - get_service_use_cohort.R +# Original Authors - Jennifer Thom +# Original Date - August 2021 +# Update - June 2024 +# +# Written/run on - RStudio Server +# Version of R - 3.6.1 +# +# Description - Use this script to return a list of CHIs from the most recent +# SLF episode file (service users) in preparation for requesting +# a new NSU cohort for the latest 'full year' +# +# Steps for requesting a new NSU extract for SLFs: +# 1. Send an email to [phs.chi-recordlinkage@phs.scot] to request a new NSU +# extract after the JUNE update. +# 2. Prepare a service use extract. Run script `get_service_use_cohort.R` to +# extract a list of CHI's from the most recent 'full' file. +# 3. Once the chili team come back to us, send the service use extract to +# the analyst directly. Do not send the list of CHIs to the mailbox for +# Information Governance purposes. +# 4. CHILI team will then process the new NSU extract based on who is not in +# the service use extract. +# 5. Run the script `filter_nsu_duplicates.R` to collect the new NSU +# extract from the analysts SMRA space - see lines 46-47 and change +# username accordingly. Save the extract in: +# "/conf/hscdiip/SLF_Extracts/NSU" +# +################################################################################ + +# Setup------------------------------------------------------------------------- +library(fs) +library(tidyverse) + +## Update ## +# The year of the new NSU extract we want +year <- "2324" + +nsu_dir <- path("/conf/hscdiip/SLF_Extracts/NSU/") + +# Read data--------------------------------------------------------------------- +episode_file <- slfhelper::read_slf_episode(year, col_select = "anon_chi") %>% + # Remove blank CHI + dplyr::filter(!is.na(anon_chi)) %>% + # Get CHI version for sending to the CHILI team. + # For saving this on disk we want the anon-chi version, save this after sending + # to the CHILI team. + slfhelper::get_chi() + +# Save a parquet file +episode_file %>% + arrow::write_parquet(path(nsu_dir, glue::glue("service_user_extract_{year}.parquet")), + compression = "zstd" + ) + +## End of Script ## From 72a66f850bfedaf2e06fea4a8dca4c47d94cd709 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Mon, 19 Aug 2024 14:48:41 +0100 Subject: [PATCH 86/96] update reference --- R/00-update_refs.R | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/00-update_refs.R b/R/00-update_refs.R index c45f10e9c..a4a21ea73 100644 --- a/R/00-update_refs.R +++ b/R/00-update_refs.R @@ -7,7 +7,7 @@ #' #' @family initialisation latest_update <- function() { - "Jun_2024" + "Sep_2024" } #' Previous update @@ -61,7 +61,7 @@ previous_update <- function(months_ago = 3L, override = NULL) { #' #' @family initialisation get_dd_period <- function() { - "Jul16_Mar24" + "Jul16_Jun24" } #' The latest financial year for Cost uplift setting @@ -74,7 +74,7 @@ get_dd_period <- function() { #' #' @family initialisation latest_cost_year <- function() { - "2223" + "2324" } #' The year list for slf to update @@ -88,7 +88,7 @@ latest_cost_year <- function() { #' @family initialisation years_to_run <- function() { fy_start_2digit <- 17 - fy_end_2digit <- 23 + fy_end_2digit <- 24 years_to_run <- paste0( fy_start_2digit:fy_end_2digit, (fy_start_2digit + 1):(fy_end_2digit + 1) From fce32496c1f29f6e56a03b3469a12811f9b2f7f4 Mon Sep 17 00:00:00 2001 From: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com> Date: Tue, 20 Aug 2024 11:25:08 +0100 Subject: [PATCH 87/96] Reduce dependencies (#984) * removing packages that I don't think get used anywhere. and removing references to fst and spss files * Update documentation * Update authors in description * [check-spelling] Update metadata Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/10469297653/attempts/1 Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/984#issuecomment-2298498525 Signed-off-by: check-spelling-bot on-behalf-of: @check-spelling * [check-spelling] Update metadata Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/10469690723/attempts/1https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/10469690723/attempts/1 Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/984#issuecomment-2298506158 Signed-off-by: check-spelling-bot on-behalf-of: @check-spelling --------- Signed-off-by: check-spelling-bot Co-authored-by: marjom02 Co-authored-by: SwiftySalmon Co-authored-by: Jennifer Thom Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> Co-authored-by: Jennit07 --- .github/actions/spelling/expect.txt | 30 +++++++++++++++++------------ DESCRIPTION | 12 +----------- R/read_file.R | 8 -------- man/read_file.Rd | 2 -- 4 files changed, 19 insertions(+), 33 deletions(-) diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 4eb473f0f..1abb216ea 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -3,9 +3,10 @@ Accom Admissio admloc admtype -adpe +ADPE adtf alstr +anomymous arrivalmode arth atlassian @@ -20,12 +21,12 @@ bodyloc boxi CAK callr -canx +Canx carehome careinspectorate categorises cattend -ccyy +CCYY cdn cennum CEREBROVASC @@ -36,10 +37,11 @@ chpstart cij Classificat cmh -cnws +CNWs codecov -comhairle +Comhairle commhosp +communicty congen copd costinc @@ -53,6 +55,7 @@ customise cvd dataframe datamart +datas datazone datediff dateformat @@ -158,7 +161,7 @@ keytime kis knitr lcap -lcho +LCHO lgl linetype lintr @@ -168,9 +171,9 @@ ltd lubridate magrittr markdownguide -matern -mcbride -mcmahon +Matern +mcnicol +megan microsoft MIU MMMYY @@ -203,6 +206,7 @@ parkinsons patflow pattype PCEC +pcs PERTH PHIBCS phs @@ -215,13 +219,15 @@ PLICS popluation Posix postcodes -ppas +Postcodesio +PPAs prac praccode ptypes purrr quickstart rankdir +rbindlist rcmdcheck rdd rdname @@ -232,13 +238,13 @@ readr readxl reasonwait recid +recordlinkage refailure reflectoring refsource reftype relaint renviron -returnsthe rlang rmarkdown Rnw @@ -268,7 +274,7 @@ simd slf slfhelper smr -smra +SMRA smrtype sourcedev sparra diff --git a/DESCRIPTION b/DESCRIPTION index a25794864..28a8303ff 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -3,12 +3,8 @@ Title: Create the Source Linkage Files Version: 0.0.0.9000 Authors@R: c( person("Public Health Scotland", , , "phs.source@phs.scot", role = c("cre", "cph")), + person("Megan", "McNicol", , "megan.mcnicol2@phs.scot", role = "aut"), person("Jennifer", "Thom", , "jennifer.thom@phs.scot", role = "aut"), - person("James", "McMahon", , "james.mcmahon@phs.scot", role = "aut", - comment = c(ORCID = "0000-0002-5380-2029")), - person("Catherine", "Holland", , "catherine.holland@phs.scot", role = "aut", - comment = c(ORCID = "0000-0002-3259-5745")), - person("Bateman", "McBride", , "Bateman.Mcbride@phs.scot", role = "aut"), person("Zihao", "Li", , "zihao.li@phs.scot", role = "aut", comment = c(ORCID = "0000-0002-5178-2124")) ) @@ -25,25 +21,19 @@ Imports: data.table (>= 1.14.6), dbplyr (>= 2.3.1), dplyr (>= 1.1.1), - dtplyr (>= 1.3.0), fs (>= 1.6.1), - fst (>= 0.9.8), future (>= 1.33.0), future.callr (>= 0.8.1), - glue (>= 1.6.2), - haven (>= 2.5.2), hms (>= 1.1.0), janitor (>= 2.2.0), keyring (>= 1.3.0), lubridate (>= 1.9.2), magrittr (>= 2.0.3), odbc (>= 1.3.1), - openssl (>= 2.0.5), openxlsx (>= 4.2.5), phsmethods (>= 0.2.2), phsopendata (>= 0.0.1.0), purrr (>= 1.0.1), - qs (>= 0.25.5), R.utils (>= 2.12.2), readr (>= 2.1.0), rlang (>= 1.1.0), diff --git a/R/read_file.R b/R/read_file.R index be0a6fc65..022f4cc26 100644 --- a/R/read_file.R +++ b/R/read_file.R @@ -3,8 +3,6 @@ #' @description Read a file, the function chosen to read the file is dependant #' on the file path. #' * `.rds` uses [readr::read_rds()]. -#' * `.fst` uses [fst::read_fst()]. -#' * `.sav` and `.zsav` use [haven::read_spss()]. #' * `.csv` and `.gz` use [readr::read_csv()]. Note that this assumes any file #' ending with `.gz` is a zipped CSV which isn't necessarily true! #' * `.parquet` uses [arrow::read_parquet()]. @@ -19,9 +17,6 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) { valid_extensions <- c( "rds", "rds.gz", - "fst", - "sav", - "zsav", "csv", "csv.gz", "parquet" @@ -60,9 +55,6 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) { data <- switch(ext, "rds" = readr::read_rds(file = path), "rds.gz" = readr::read_rds(file = path), - "fst" = tibble::as_tibble(fst::read_fst(path = path)), - "sav" = haven::read_spss(file = path, ...), - "zsav" = haven::read_spss(file = path, ...), "csv" = readr::read_csv(file = path, ..., show_col_types = FALSE), "csv.gz" = readr::read_csv(file = path, ..., show_col_types = FALSE), "parquet" = arrow::read_parquet( diff --git a/man/read_file.Rd b/man/read_file.Rd index 1ef351342..d4d94e0df 100644 --- a/man/read_file.Rd +++ b/man/read_file.Rd @@ -27,8 +27,6 @@ Read a file, the function chosen to read the file is dependant on the file path. \itemize{ \item \code{.rds} uses \code{\link[readr:read_rds]{readr::read_rds()}}. -\item \code{.fst} uses \code{\link[fst:write_fst]{fst::read_fst()}}. -\item \code{.sav} and \code{.zsav} use \code{\link[haven:read_spss]{haven::read_spss()}}. \item \code{.csv} and \code{.gz} use \code{\link[readr:read_delim]{readr::read_csv()}}. Note that this assumes any file ending with \code{.gz} is a zipped CSV which isn't necessarily true! \item \code{.parquet} uses \code{\link[arrow:read_parquet]{arrow::read_parquet()}}. From 7146a119d2870ee6027da2934584918c43d4709c Mon Sep 17 00:00:00 2001 From: marjom02 Date: Wed, 21 Aug 2024 13:29:40 +0100 Subject: [PATCH 88/96] minor changes to social care code --- R/process_sc_all_care_home.R | 2 +- R/process_sc_all_home_care.R | 2 +- R/replace_sc_id_with_latest.R | 5 ++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index 4e4db583f..73232d5e6 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -401,7 +401,7 @@ process_sc_all_care_home <- function( "ch_provider_description", "ch_nursing", "ch_adm_reason", - "type_of_admission", + # "type_of_admission", "sc_latest_submission" ) %>% slfhelper::get_anon_chi() diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R index 275001c64..52cab0568 100644 --- a/R/process_sc_all_home_care.R +++ b/R/process_sc_all_home_care.R @@ -37,7 +37,7 @@ process_sc_all_home_care <- function( # Match on demographic data --------------------------------------- matched_hc_data <- replaced_dates %>% - dplyr::left_join( + dplyr::right_join( sc_demog_lookup, by = c("sending_location", "social_care_id") ) %>% diff --git a/R/replace_sc_id_with_latest.R b/R/replace_sc_id_with_latest.R index 9478ebefe..215816753 100644 --- a/R/replace_sc_id_with_latest.R +++ b/R/replace_sc_id_with_latest.R @@ -48,6 +48,9 @@ replace_sc_id_with_latest <- function(data) { .data$latest_sc_id, .data$social_care_id ) - ) + ) %>% + dplyr::filter(!(is.na(period))) + + return(return_data) } From 5e31183d96de789eddb109d24b3f0b56c2f61278 Mon Sep 17 00:00:00 2001 From: SwiftySalmon Date: Wed, 21 Aug 2024 12:33:27 +0000 Subject: [PATCH 89/96] Style code --- R/process_sc_all_care_home.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index 73232d5e6..f090008d6 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -401,7 +401,7 @@ process_sc_all_care_home <- function( "ch_provider_description", "ch_nursing", "ch_adm_reason", - # "type_of_admission", + # "type_of_admission", "sc_latest_submission" ) %>% slfhelper::get_anon_chi() From 962f7a3599546b87e80bb633a38f330394f5896a Mon Sep 17 00:00:00 2001 From: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com> Date: Wed, 21 Aug 2024 15:38:47 +0100 Subject: [PATCH 90/96] Update process_sc_all_care_home.R --- R/process_sc_all_care_home.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index f090008d6..524be1b2f 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -401,7 +401,6 @@ process_sc_all_care_home <- function( "ch_provider_description", "ch_nursing", "ch_adm_reason", - # "type_of_admission", "sc_latest_submission" ) %>% slfhelper::get_anon_chi() From b24399e1e7c6f2d18946185ec831b03cbea02855 Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Tue, 27 Aug 2024 14:52:31 +0100 Subject: [PATCH 91/96] Update NEWS.md --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index 88dcca1b3..5a07b8266 100644 --- a/NEWS.md +++ b/NEWS.md @@ -7,6 +7,10 @@ * New cup marker for Acute and GP OOH * Bug fix: * person id for SDS and client + * DD data: + * The variable `cij_delay` is only attached to the `recid = DD` rows, not all the rows in the CIJ as we'd expect. + * The `cij_delay` variable is showing as 1 against the Delay records (NA otherwise) - We expect this to be `TRUE/FALSE` to match the other flags. + * The `cij_ppa` variable isn't attaching to the `recid = DD` rows. # June 2024 Update - released 06-Jun-24 * Update of 2017/18 onwards to include bug fixes within the files. From 86424a57f3c806137b99907a0756bb78c7ec566b Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Mon, 16 Sep 2024 10:14:26 +0100 Subject: [PATCH 92/96] merge Sep2024 fix into sep24 branch (#1003) * update write_tests_xlsx * update process_refined_death * fix tests by removing get_chi * add 2425 * Style code * fix NA matches in refined_death * move latest_cost_year() to cost_uplift() * improve automation * Update documentation * fix `cij_ppa` in DD data * fix bugs of dd and populate cij_delay back to episodes * Style code * keep all variable for delayed discharge episodes * remove dummy variable names from dd_date * Style code * remove `deceased_boxi` variable - bug * remove `create_person_id`. Its matched in client * remove `create_person_id` * Update `run_slf_manually` scripts * further remove person_id * fix duplicate row introduced by adding death * remove duplicated chi when joining death data * TODO: check distinct death data by chi while keeping chi==NA records * add parameter for year * fix duplicate in add_activity_after_death_flag * Update `check_year_valid` * Declare DN variables * Style code * remove redundant variables --------- Co-authored-by: Zihao Li Co-authored-by: lizihao-anu Co-authored-by: Jennit07 --- R/00-update_refs.R | 13 ---- R/add_activity_after_death_flag.R | 16 +--- R/check_year_valid.R | 2 +- R/cost_uplift.R | 18 +++++ R/create_episode_file.R | 8 ++ R/create_individual_file.R | 2 +- R/join_deaths_data.R | 3 +- R/link_delayed_discharge_eps.R | 73 ++++++++++++------ R/process_extract_alarms_telecare.R | 2 +- R/process_extract_care_home.R | 2 +- R/process_extract_home_care.R | 2 +- R/process_extract_sds.R | 2 +- R/process_lookup_sc_client.R | 2 +- R/process_refined_death.R | 3 +- R/process_sc_all_alarms_telecare.R | 12 +-- R/process_sc_all_care_home.R | 6 +- R/process_sc_all_home_care.R | 2 - R/process_sc_all_sds.R | 12 +-- R/process_tests_episode_file.R | 4 +- R/process_tests_individual_file.R | 4 +- R/process_tests_sc_all_at_episodes.R | 3 - R/process_tests_sc_all_ch_episodes.R | 3 - R/process_tests_sc_all_hc_episodes.R | 3 - R/process_tests_sc_all_sds_episodes.R | 3 - R/write_tests_xlsx.R | 47 +++++++++--- .../run_episode_file_1718.R | 4 - .../run_episode_file_1819.R | 4 - .../run_episode_file_1920.R | 4 - .../run_episode_file_2021.R | 4 - .../run_episode_file_2122.R | 4 - .../run_episode_file_2223.R | 4 - .../run_episode_file_2324.R | 4 - .../run_episode_file_2425.R | 75 +++++++++++++++++++ .../run_individual_file_2425.R | 9 +++ Run_SLF_Files_targets/run_targets_2425.R | 9 +++ _targets.R | 2 +- man/latest_cost_year.Rd | 7 +- 37 files changed, 247 insertions(+), 130 deletions(-) create mode 100644 Run_SLF_Files_manually/run_episode_file_2425.R create mode 100644 Run_SLF_Files_manually/run_individual_file_2425.R create mode 100644 Run_SLF_Files_targets/run_targets_2425.R diff --git a/R/00-update_refs.R b/R/00-update_refs.R index a4a21ea73..33022edf6 100644 --- a/R/00-update_refs.R +++ b/R/00-update_refs.R @@ -64,19 +64,6 @@ get_dd_period <- function() { "Jul16_Jun24" } -#' The latest financial year for Cost uplift setting -#' -#' @description Get the latest year for cost uplift -#' -#' @return The financial year format -#' -#' @export -#' -#' @family initialisation -latest_cost_year <- function() { - "2324" -} - #' The year list for slf to update #' #' @description Get the vector of years to update slf diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index a45e4296a..5e800c80b 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -26,7 +26,7 @@ add_activity_after_death_flag <- function( by = "chi", suffix = c("", "_boxi") ) %>% - dplyr::filter(.data$deceased == TRUE | .data$deceased_boxi == TRUE) %>% + dplyr::filter(.data$deceased == TRUE) %>% dplyr::distinct() @@ -72,16 +72,6 @@ add_activity_after_death_flag <- function( )) - # Check and print error message for records which already are TRUE for the deceased variable in the episode file, but this doesn't match the - # BOXI deceased variable - check_deceased_match <- flag_data %>% - dplyr::filter(.data$deceased != .data$deceased_boxi) - - if (nrow(check_deceased_match) != 0) { - warning("There were records in the episode file which have a deceased variable which does not match the BOXI NRS deceased variable") - } - - # Fill in date of death if missing in the episode file but available in BOXI lookup, due to historic dates of death not being carried # over from previous financial years flag_data <- flag_data %>% @@ -94,13 +84,15 @@ add_activity_after_death_flag <- function( final_data <- data %>% dplyr::left_join( flag_data, + # TODO: this join_by is not 100% accurate. Consider use ep_file_row_id to join by = c("year", "chi", "record_keydate1", "record_keydate2"), na_matches = "never" ) %>% dplyr::mutate(death_date = lubridate::as_date(ifelse(is.na(death_date) & !(is.na(death_date_boxi)), death_date_boxi, death_date ))) %>% - dplyr::select(-death_date_boxi) + dplyr::select(-death_date_boxi) %>% + dplyr::distinct() diff --git a/R/check_year_valid.R b/R/check_year_valid.R index 2197d8c0e..217aa1c2b 100644 --- a/R/check_year_valid.R +++ b/R/check_year_valid.R @@ -46,7 +46,7 @@ check_year_valid <- function( return(FALSE) } else if (year >= "2425" && type %in% "sparra") { return(FALSE) - } else if (year >= "2425" && type %in% c("ch", "hc", "sds", "at")) { + } else if (year >= "2526" && type %in% c("ch", "hc", "sds", "at")) { return(FALSE) } diff --git a/R/cost_uplift.R b/R/cost_uplift.R index f14600da6..abbbd9b5a 100644 --- a/R/cost_uplift.R +++ b/R/cost_uplift.R @@ -86,3 +86,21 @@ lookup_uplift <- function(data) { return(data) } + +#' The latest financial year for Cost uplift setting +#' +#' @description Get the latest year for cost uplift +#' latest_cost_year() is hard coded in cost_uplift(). +#' 2223 is not changed automatically with time passes. +#' It is changed only when we get a new instruction from somewhere about cost uplift. +#' Do not change unless specific instructions. +#' Changing this means that we need to change cost_uplift(). +#' +#' @return The financial year format +#' +#' @export +#' +#' @family initialisation +latest_cost_year <- function() { + "2223" +} diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 7909e2e7f..dd22dbc1d 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -179,6 +179,14 @@ create_episode_file <- function( ) } + if (!check_year_valid(year, type = "dn")) { + episode_file <- episode_file %>% + dplyr::mutate( + ccm = NA, + total_no_dn_contacts = NA + ) + } + if (anon_chi_out) { episode_file <- slfhelper::get_anon_chi(episode_file) } diff --git a/R/create_individual_file.R b/R/create_individual_file.R index dc15fcb0e..f826294d1 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -78,7 +78,7 @@ create_individual_file <- function( if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { individual_file <- individual_file %>% - aggregate_by_chi(exclude_sc_var = TRUE) + aggregate_by_chi(year = year, exclude_sc_var = TRUE) } else { individual_file <- individual_file %>% aggregate_ch_episodes() %>% diff --git a/R/join_deaths_data.R b/R/join_deaths_data.R index d2fc51b91..5e61a2082 100644 --- a/R/join_deaths_data.R +++ b/R/join_deaths_data.R @@ -15,7 +15,8 @@ join_deaths_data <- function( return( data %>% dplyr::left_join( - slf_deaths_lookup, + slf_deaths_lookup %>% + dplyr::distinct(chi, .keep_all = TRUE), by = "chi", na_matches = "never", relationship = "many-to-one" diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index a28ee3b0f..b80b35807 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -14,6 +14,8 @@ link_delayed_discharge_eps <- function( dd_data = read_file(get_source_extract_path(year, "dd")) %>% slfhelper::get_chi()) { cli::cli_alert_info("Link delayed discharge to episode file function started at {Sys.time()}") + names_ep <- names(episode_file) + episode_file <- episode_file %>% dplyr::mutate( # remember to revoke the cij_end_date with dummy_cij_end @@ -286,6 +288,11 @@ link_delayed_discharge_eps <- function( )) %>% dplyr::group_by(.data$chi, .data$cij_marker) %>% dplyr::mutate(cij_delay = max(.data$has_delay)) %>% + dplyr::mutate(cij_delay = dplyr::if_else(cij_delay == "0", + FALSE, + TRUE, + missing = NA + )) %>% dplyr::ungroup() %>% # add yearstay and monthly beddays # count_last = TRUE because DD counts last day and not the first @@ -299,37 +306,43 @@ link_delayed_discharge_eps <- function( yearstay = rowSums(dplyr::pick(dplyr::ends_with("_beddays"))) ) %>% # tidy up and rename columns to match the format of episode files + # keep variables from ep files dplyr::select( + -c( + "ep_file_row_id", + "year", + "recid", + "record_keydate1", + "record_keydate2", + "postcode", + "hbtreatcode", + "location", + "spec", + ## following are dummy variables + "cij_start_date_lower", + "cij_end_date_upper", + "cij_end_month", + "is_dummy_cij_start", + "dummy_cij_start", + "is_dummy_cij_end", + "dummy_cij_end", + "datediff_start", + "datediff_end", + "has_delay", + "is_dummy_keydate2", + "dummy_keydate2", + "dummy_id" + ) + ) %>% + dplyr::rename( "year" = "year_dd", "recid" = "recid_dd", "record_keydate1" = "record_keydate1_dd", "record_keydate2" = "record_keydate2_dd", - "smrtype", - "chi", - "gender", - "dob", - "age", - "gpprac", "postcode" = "postcode_dd", - "dd_responsible_lca", "hbtreatcode" = "hbtreatcode_dd", - "delay_end_reason", - "primary_delay_reason", - "secondary_delay_reason", - "cij_marker", - "cij_start_date", - "cij_end_date", - "cij_pattype_code", - "cij_ipdc", - "cij_admtype", - "cij_adm_spec", - "cij_dis_spec", - "cij_delay", - "location", "spec" = "spec_dd", - "dd_quality", - dplyr::ends_with("_beddays"), - "yearstay" + "location" = "location_dd" ) %>% # Combine DD with episode data dplyr::bind_rows( @@ -345,7 +358,19 @@ link_delayed_discharge_eps <- function( "dummy_cij_end" ) ) - ) + ) %>% + # populate cij_delay dd details back to ep + dplyr::group_by(chi, cij_marker) %>% + dplyr::mutate( + has_dd = any(recid == "DD"), + delay_dd = any(cij_delay) + ) %>% + dplyr::ungroup() %>% + dplyr::mutate(cij_delay = dplyr::if_else(has_dd, + delay_dd, + cij_delay + )) %>% + dplyr::select(-c("has_dd", "delay_dd")) return(linked_data) } diff --git a/R/process_extract_alarms_telecare.R b/R/process_extract_alarms_telecare.R index 6c481c3a4..9d47dd5f0 100644 --- a/R/process_extract_alarms_telecare.R +++ b/R/process_extract_alarms_telecare.R @@ -41,7 +41,7 @@ process_extract_alarms_telecare <- function( "smrtype", "chi", "dob", - "person_id", + # "person_id", "gender", "postcode", "sc_send_lca", diff --git a/R/process_extract_care_home.R b/R/process_extract_care_home.R index db7997061..dbf817af4 100644 --- a/R/process_extract_care_home.R +++ b/R/process_extract_care_home.R @@ -115,7 +115,7 @@ process_extract_care_home <- function( "recid", "smrtype", "chi", - "person_id", + # "person_id", "dob", "gender", "postcode", diff --git a/R/process_extract_home_care.R b/R/process_extract_home_care.R index 831496bd2..651be172d 100644 --- a/R/process_extract_home_care.R +++ b/R/process_extract_home_care.R @@ -96,7 +96,7 @@ process_extract_home_care <- function( "cost_total_net", "hc_provider", "hc_reablement", - "person_id" + # "person_id" ) %>% slfhelper::get_anon_chi() diff --git a/R/process_extract_sds.R b/R/process_extract_sds.R index ce317c8b9..f8e5f8579 100644 --- a/R/process_extract_sds.R +++ b/R/process_extract_sds.R @@ -41,7 +41,7 @@ process_extract_sds <- function( "smrtype", "chi", "dob", - "person_id", + # "person_id", "gender", "postcode", "sc_send_lca", diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index f1e03ee95..b8fc2fb6d 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -195,7 +195,7 @@ process_lookup_sc_client <- ) %>% dplyr::arrange(.data$chi, .data$count_not_known) %>% dplyr::distinct(.data$chi, .keep_all = TRUE) %>% - dplyr::select(-.data$sending_location) %>% + dplyr::select(-.data$sending_location, -.data$count_not_known) %>% slfhelper::get_anon_chi() if (write_to_disk) { diff --git a/R/process_refined_death.R b/R/process_refined_death.R index 48f14fd43..dc7663221 100644 --- a/R/process_refined_death.R +++ b/R/process_refined_death.R @@ -50,11 +50,12 @@ process_refined_death <- function( fy = phsmethods::extract_fin_year(death_date), fy = as.character(paste0(substr(fy, 3, 4), substr(fy, 6, 7))) ) + # TODO: check distinct death data by chi while keeping chi==NA records if (write_to_disk) { write_file( refined_death, - get_combined_slf_deaths_lookup_path() + get_combined_slf_deaths_lookup_path(create = TRUE) ) } diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R index c583fa8a7..aafc3d727 100644 --- a/R/process_sc_all_alarms_telecare.R +++ b/R/process_sc_all_alarms_telecare.R @@ -87,11 +87,11 @@ process_sc_all_alarms_telecare <- function( # Replace social_care_id with latest if needed (assuming replace_sc_id_with_latest is a custom function) data <- replace_sc_id_with_latest(data) - data$person_id <- paste0( - data$sending_location, - "-", - data$social_care_id - ) + # data$person_id <- paste0( + # data$sending_location, + # "-", + # data$social_care_id + # ) # Deal with episodes that have a package across quarters data[, pkg_count := seq_len(.N), by = list( @@ -125,7 +125,7 @@ process_sc_all_alarms_telecare <- function( dob = data.table::last(dob), postcode = data.table::last(postcode), recid = data.table::last(recid), - person_id = data.table::last(person_id), + # person_id = data.table::last(person_id), sc_send_lca = data.table::last(sc_send_lca) ), by = list( sending_location, diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index 524be1b2f..5478d50cc 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -203,7 +203,8 @@ process_sc_all_care_home <- function( # match ch_episode data with deaths data matched_deaths_data <- ch_episode %>% dplyr::left_join(refined_death, - by = "chi" + by = "chi", + na_matches = "never" ) %>% # compare discharge date with NRS and CHI death date # if either of the dates are 5 or fewer days before discharge @@ -367,7 +368,6 @@ process_sc_all_care_home <- function( ch_data_final <- adm_reason_recoded %>% - create_person_id() %>% dplyr::rename( record_keydate1 = "ch_admission_date", record_keydate2 = "ch_discharge_date", @@ -385,7 +385,7 @@ process_sc_all_care_home <- function( )) %>% dplyr::select( "chi", - "person_id", + # "person_id", "gender", "dob", "postcode", diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R index 52cab0568..352c4fff3 100644 --- a/R/process_sc_all_home_care.R +++ b/R/process_sc_all_home_care.R @@ -194,8 +194,6 @@ process_sc_all_home_care <- function( TRUE ~ "HC-Unknown" ) ) %>% - # person_id - create_person_id(type = "SC") %>% # compute lca variable from sending_location dplyr::mutate( sc_send_lca = convert_sc_sending_location_to_lca(.data$sending_location) diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R index 5306c0956..c5b7d43eb 100644 --- a/R/process_sc_all_sds.R +++ b/R/process_sc_all_sds.R @@ -128,11 +128,11 @@ process_sc_all_sds <- function( "SDS", convert_sc_sending_location_to_lca(sending_location) )] - sds_full_clean_long$person_id <- paste0( - sds_full_clean_long$sending_location, - "-", - sds_full_clean_long$social_care_id - ) + # sds_full_clean_long$person_id <- paste0( + # sds_full_clean_long$sending_location, + # "-", + # sds_full_clean_long$social_care_id + # ) # Group, arrange and create flags for episodes sds_full_clean_long[, @@ -176,7 +176,7 @@ process_sc_all_sds <- function( dob = data.table::last(dob), postcode = data.table::last(postcode), recid = data.table::last(recid), - person_id = data.table::last(person_id), + # person_id = data.table::last(person_id), sc_send_lca = data.table::last(sc_send_lca) ), by = list(sending_location, social_care_id, smrtype, episode_counter)] rm(sds_full_clean_long) diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R index ccf8e495c..c45992938 100644 --- a/R/process_tests_episode_file.R +++ b/R/process_tests_episode_file.R @@ -31,7 +31,9 @@ process_tests_episode_file <- function(data, year) { recid = TRUE ) %>% dplyr::arrange(.data[["recid"]]) %>% - write_tests_xlsx(sheet_name = "ep_file", year, workbook_name = "ep_file") + write_tests_xlsx(sheet_name = stringr::str_glue({ + "ep_file_{year}" + }), workbook_name = "ep_file") return(comparison) } diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R index 9643a4f3f..900ce7f03 100644 --- a/R/process_tests_individual_file.R +++ b/R/process_tests_individual_file.R @@ -34,7 +34,9 @@ process_tests_individual_file <- function(data, year) { old_data = produce_individual_file_tests(old_data), new_data = produce_individual_file_tests(data) ) %>% - write_tests_xlsx(sheet_name = "indiv_file", year, workbook_name = "indiv_file") + write_tests_xlsx(sheet_name = stringr::str_glue({ + "indiv_file_{year}" + }), workbook_name = "indiv_file") return(comparison) } diff --git a/R/process_tests_sc_all_at_episodes.R b/R/process_tests_sc_all_at_episodes.R index c23a4f6ed..8b5580334 100644 --- a/R/process_tests_sc_all_at_episodes.R +++ b/R/process_tests_sc_all_at_episodes.R @@ -10,9 +10,6 @@ #' #' @export process_tests_sc_all_at_episodes <- function(data) { - data <- data %>% - slfhelper::get_chi() - comparison <- produce_test_comparison( old_data = produce_sc_all_episodes_tests( read_file(get_sc_at_episodes_path(update = previous_update())) diff --git a/R/process_tests_sc_all_ch_episodes.R b/R/process_tests_sc_all_ch_episodes.R index d42eca2c7..7e9655c06 100644 --- a/R/process_tests_sc_all_ch_episodes.R +++ b/R/process_tests_sc_all_ch_episodes.R @@ -10,9 +10,6 @@ #' #' @export process_tests_sc_all_ch_episodes <- function(data) { - data <- data %>% - slfhelper::get_chi() - comparison <- produce_test_comparison( old_data = produce_sc_all_episodes_tests( read_file(get_sc_ch_episodes_path(update = previous_update())) diff --git a/R/process_tests_sc_all_hc_episodes.R b/R/process_tests_sc_all_hc_episodes.R index d037e7908..7194790c0 100644 --- a/R/process_tests_sc_all_hc_episodes.R +++ b/R/process_tests_sc_all_hc_episodes.R @@ -10,9 +10,6 @@ #' #' @export process_tests_sc_all_hc_episodes <- function(data) { - data <- data %>% - slfhelper::get_chi() - comparison <- produce_test_comparison( old_data = produce_sc_all_episodes_tests( read_file(get_sc_hc_episodes_path(update = previous_update())) diff --git a/R/process_tests_sc_all_sds_episodes.R b/R/process_tests_sc_all_sds_episodes.R index 91c32d450..cf87a671c 100644 --- a/R/process_tests_sc_all_sds_episodes.R +++ b/R/process_tests_sc_all_sds_episodes.R @@ -10,9 +10,6 @@ #' #' @export process_tests_sc_all_sds_episodes <- function(data) { - data <- data %>% - slfhelper::get_chi() - comparison <- produce_test_comparison( old_data = produce_sc_all_episodes_tests( read_file(get_sc_sds_episodes_path(update = previous_update())) diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R index ec3cc5705..6847cc977 100644 --- a/R/write_tests_xlsx.R +++ b/R/write_tests_xlsx.R @@ -24,17 +24,42 @@ write_tests_xlsx <- function(comparison_data, "cross_year" )) { # Set up the workbook ---- - tests_workbook_name <- dplyr::case_when( - is.null(year) & workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_ep_file_tests"), - !is.null(year) & workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_{year}_ep_file_tests"), - is.null(year) & workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_indiv_file_tests"), - !is.null(year) & workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_{year}_indiv_file_tests"), - is.null(year) & workbook_name == "lookup" ~ stringr::str_glue(latest_update(), "_lookups_tests"), - is.null(year) & workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests"), - is.null(year) & workbook_name == "cross_year" ~ stringr::str_glue(latest_update(), "_cross_year_tests"), - !is.null(year) & workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests"), - !is.null(year) & workbook_name == "extract" ~ stringr::str_glue(latest_update(), "_{year}_extract_tests") - ) + if (workbook_name == "ep_file") { + if (is.null(year)) { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_ep_file_tests") + } + } + if (workbook_name == "indiv_file") { + if (is.null(year)) { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_indiv_file_tests") + } + } + if (workbook_name == "lookup") { + if (is.null(year)) { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_lookups_tests") + } + } + if (workbook_name == "sandpit") { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_sandpit_extract_tests") + } + if (workbook_name == "cross_year") { + if (is.null(year)) { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_cross_year_tests") + } + } + if (workbook_name == "extract") { + if (is.null(year)) { + } else { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_{year}_extract_tests") + } + } + tests_workbook_path <- fs::path( get_slf_dir(), diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R index f679ea669..b405b5b6e 100644 --- a/Run_SLF_Files_manually/run_episode_file_1718.R +++ b/Run_SLF_Files_manually/run_episode_file_1718.R @@ -18,10 +18,6 @@ processed_data_list <- list( "source_cmh_extract_1718", store = targets_store ), - cmh = targets::tar_read( - "source_cmh_extract_1718", - store = targets_store - ), dn = targets::tar_read( "source_dn_extract_1718", store = targets_store diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R index d7a65690e..fb3227512 100644 --- a/Run_SLF_Files_manually/run_episode_file_1819.R +++ b/Run_SLF_Files_manually/run_episode_file_1819.R @@ -18,10 +18,6 @@ processed_data_list <- list( "source_cmh_extract_1819", store = targets_store ), - cmh = targets::tar_read( - "source_cmh_extract_1819", - store = targets_store - ), dn = targets::tar_read( "source_dn_extract_1819", store = targets_store diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R index e3c2ebeb0..e2e21bdac 100644 --- a/Run_SLF_Files_manually/run_episode_file_1920.R +++ b/Run_SLF_Files_manually/run_episode_file_1920.R @@ -18,10 +18,6 @@ processed_data_list <- list( "source_cmh_extract_1920", store = targets_store ), - cmh = targets::tar_read( - "source_cmh_extract_1920", - store = targets_store - ), dn = targets::tar_read( "source_dn_extract_1920", store = targets_store diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R index c66f4572d..cf98e80de 100644 --- a/Run_SLF_Files_manually/run_episode_file_2021.R +++ b/Run_SLF_Files_manually/run_episode_file_2021.R @@ -18,10 +18,6 @@ processed_data_list <- list( "source_cmh_extract_2021", store = targets_store ), - cmh = targets::tar_read( - "source_cmh_extract_2021", - store = targets_store - ), dn = targets::tar_read( "source_dn_extract_2021", store = targets_store diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R index cde974be2..3bcbf2466 100644 --- a/Run_SLF_Files_manually/run_episode_file_2122.R +++ b/Run_SLF_Files_manually/run_episode_file_2122.R @@ -18,10 +18,6 @@ processed_data_list <- list( "source_cmh_extract_2122", store = targets_store ), - cmh = targets::tar_read( - "source_cmh_extract_2122", - store = targets_store - ), dn = targets::tar_read( "source_dn_extract_2122", store = targets_store diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R index ee83082f1..af0447eed 100644 --- a/Run_SLF_Files_manually/run_episode_file_2223.R +++ b/Run_SLF_Files_manually/run_episode_file_2223.R @@ -18,10 +18,6 @@ processed_data_list <- list( "source_cmh_extract_2223", store = targets_store ), - cmh = targets::tar_read( - "source_cmh_extract_2223", - store = targets_store - ), dn = targets::tar_read( "source_dn_extract_2223", store = targets_store diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R index 508689f6d..bdf16e0f8 100644 --- a/Run_SLF_Files_manually/run_episode_file_2324.R +++ b/Run_SLF_Files_manually/run_episode_file_2324.R @@ -18,10 +18,6 @@ processed_data_list <- list( "source_cmh_extract_2324", store = targets_store ), - cmh = targets::tar_read( - "source_cmh_extract_2324", - store = targets_store - ), dn = targets::tar_read( "source_dn_extract_2324", store = targets_store diff --git a/Run_SLF_Files_manually/run_episode_file_2425.R b/Run_SLF_Files_manually/run_episode_file_2425.R new file mode 100644 index 000000000..699c197b3 --- /dev/null +++ b/Run_SLF_Files_manually/run_episode_file_2425.R @@ -0,0 +1,75 @@ +library(targets) +library(createslf) + +year <- "2425" + +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_2425", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_2425", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2425", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_2425", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_2425", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_2425", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_2425", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_2425", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_2425", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_2425", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_2425", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_2425", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_2425", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_2425", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_2425", + store = targets_store + ) +) + +# Run episode file +create_episode_file(processed_data_list, year = year) %>% + process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_individual_file_2425.R b/Run_SLF_Files_manually/run_individual_file_2425.R new file mode 100644 index 000000000..843eb505c --- /dev/null +++ b/Run_SLF_Files_manually/run_individual_file_2425.R @@ -0,0 +1,9 @@ +library(createslf) + +year <- "2425" + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_2425.R b/Run_SLF_Files_targets/run_targets_2425.R new file mode 100644 index 000000000..fe849ede8 --- /dev/null +++ b/Run_SLF_Files_targets/run_targets_2425.R @@ -0,0 +1,9 @@ +library(targets) + +year <- "2425" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("2425")) +) diff --git a/_targets.R b/_targets.R index 0fea087ca..0377e487b 100644 --- a/_targets.R +++ b/_targets.R @@ -19,7 +19,7 @@ tar_option_set( memory = "persistent" # default option ) -years_to_run <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324") +years_to_run <- createslf::years_to_run() list( tar_rds(write_to_disk, TRUE), diff --git a/man/latest_cost_year.Rd b/man/latest_cost_year.Rd index 0f50b3ac6..0045b4efb 100644 --- a/man/latest_cost_year.Rd +++ b/man/latest_cost_year.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/00-update_refs.R +% Please edit documentation in R/cost_uplift.R \name{latest_cost_year} \alias{latest_cost_year} \title{The latest financial year for Cost uplift setting} @@ -11,6 +11,11 @@ The financial year format } \description{ Get the latest year for cost uplift +latest_cost_year() is hard coded in cost_uplift(). +2223 is not changed automatically with time passes. +It is changed only when we get a new instruction from somewhere about cost uplift. +Do not change unless specific instructions. +Changing this means that we need to change cost_uplift(). } \seealso{ Other initialisation: From 69ce1738ccb6d6f6ba7bb5d867a4c6fb59e9c0ff Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 17 Sep 2024 10:36:22 +0100 Subject: [PATCH 93/96] update copy_to_hscdiip.R --- copy_to_hscdiip.R | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/copy_to_hscdiip.R b/copy_to_hscdiip.R index 7fb969e8d..8a2dcdc58 100644 --- a/copy_to_hscdiip.R +++ b/copy_to_hscdiip.R @@ -1,9 +1,12 @@ +devtools::load_all() + dir_folder <- "/conf/sourcedev/Source_Linkage_File_Updates" target_folder <- "/conf/hscdiip/01-Source-linkage-files" if (!dir.exists(target_folder)) { dir.create(target_folder, mode = "770") } -folders <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324") + +folders <- years_to_run() year_n <- length(folders) resource_consumption <- data.frame( year = rep("0", year_n), @@ -11,22 +14,27 @@ resource_consumption <- data.frame( file_size_MB = rep(0, year_n) ) -for (i in 1:length(folders)) { +for (i in 1:year_n) { timer <- Sys.time() print(stringr::str_glue("{folders[i]} starts at {Sys.time()}")) folder_path <- file.path(dir_folder, folders[i]) - old_path <- list.files(folder_path, - pattern = "^source-.*parquet", - full.names = TRUE - ) - files_name <- basename(old_path) - new_path <- file.path(target_folder, files_name) - print(files_name) + + file_names <- paste0("source-", c("episode", "individual"), "-file-", folders[i], ".parquet") + file_names_im <- paste0("source-", c("episode", "individual"), "-file-", folders[i], "-new.parquet") + + old_path <- file.path(folder_path, file_names) + new_path_im <- file.path(target_folder, file_names_im) + new_path <- file.path(target_folder, file_names) + + print(file_names) fs::file_copy(old_path, - new_path, + new_path_im, overwrite = TRUE ) + fs::file_move(new_path_im, new_path) + fs::file_chmod(new_path, mode = "640") + resource_consumption$time_consumption[i] <- (Sys.time() - timer) file_size <- sum(file.size(old_path)) / 2^20 resource_consumption$file_size_MB[i] <- file_size From 1ab7e7feb4c52b8bdfec6b2ec128aa792b4e604f Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Tue, 17 Sep 2024 11:20:07 +0100 Subject: [PATCH 94/96] Update older years to bring the data in line with our newest processes. (#988) * fix sc_client_lookup sc_send_lca * fix an issue of get_pop_path * Style code * fix the rest of get_pop_path from get_datazone_pop_path * Update documentation * fix sc_send_lca * add missing year column * Remove redundant code * Update documentation * Style code * explicitly specify the argument year to avoid corruption of targets * Update documentation * Reorder when we match on client variables This was causing NSUs to show a social care id. This now resolves this. * Update documentation * Style code * Add chi parameter to `create_demog_test_flags` * Style code * Use CHI parameter for ep/indiv tests * Use CHI parameter for extract tests (chi) * Change test sheet names to lowercase * Change date to lowercase * Update documentation * new data pipeline with targets remove create_individual_files from targets and append it to run_targets script * minor changes * Style code * Update documentation * Update documentation * Style code * undo sc_send_lca bit * Add code for running years available * Update `_targets.R` script for running old years * Style code * Update `check_year_valid` for running old years * Use `check_year_valid` where no data for old yrs * Style code * Fix pick variables This was not taking the correct variables, leading to NSUs being assigned psychiatry * SC Demographics and SDS (#900) * Style code * # read in sc demographics different variables - removed extract date as not accurate, using chi over upi after discussion with social care data management. Added in date of death just for fun. * social care demographics first draft removed a lot of the submitted variables and instead using chi variables from chi seeding. Other changes: - Fill in missing values, - create flag for latest social care id (one from database is not accurate), this makes sure that each chi only has ONE sc id as the latest to stop it creating duplicates - change postcode to choose chi over submitted * Style code * had a github error? Not sure what happened but commiting first draft of sc demographics * Style code * first draft sds. No major changes - only how demographics is matched on and how latest social care id is selected * Update documentation * demographics - add sending location to group by * Style code * Update documentation * Added ungroup() * Remove comments * Remove comments * Style code --------- Co-authored-by: SwiftySalmon Co-authored-by: marjom02 Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> Co-authored-by: Jennit07 Co-authored-by: Zihao Li * Sc all at speedup (#904) * speed up process_sc_all_alarms_telecare function with data.table package * Update documentation --------- Co-authored-by: lizihao-anu Co-authored-by: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com> Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> * Add case_when statement for `high_cc` cohort * Bug - `high_cc` in demographic cohort showing `NAs` instead of `TRUE/FALSE` (#911) Add case_when statement for `high_cc` cohort * added a casewhen to update property type description for homelessness * Update documentation * Style code * Bug - deal with missing variables (#914) * Add missing sc variables for no sc data * Fix code for including `_inc_dna` variables * Remove commented line * Bug - Fix get pop path failing and preventing the indiv file from running. (#913) Fix bug - pop file paths breaking indiv file * correct file hscp file path * Declare missing variables for older years * setup targets scripts for old years * Style code * Include `check_year_valid` for sc client path * Add check year valid to join sc client * Add if else statement * WIP - TO DO - fix dummy path for `get_chi()` * Style code * update dummy data file to read empty tibble * Update `check_year_valid` * Update declared `NA` variables * Update documentation * declare `count_not_known` as NA * supply year as default in `aggregate_by_chi` * Decalre unused variables * Style code * Update sc client with sept update new code * Specify code for running older years * Style code * Add Running SLF files manually scripts * Style code * update write_tests_xlsx * update process_refined_death * fix tests by removing get_chi * add 2425 * Style code * fix NA matches in refined_death * move latest_cost_year() to cost_uplift() * improve automation * Update documentation * fix `cij_ppa` in DD data * fix bugs of dd and populate cij_delay back to episodes * Style code * keep all variable for delayed discharge episodes * remove dummy variable names from dd_date * Style code * remove `deceased_boxi` variable - bug * remove `create_person_id`. Its matched in client * remove `create_person_id` * Update `run_slf_manually` scripts * further remove person_id * fix duplicate row introduced by adding death * remove duplicated chi when joining death data * TODO: check distinct death data by chi while keeping chi==NA records * add parameter for year * fix duplicate in add_activity_after_death_flag * Update `check_year_valid` * Declare DN variables * Style code * Declare client variables * remove extra dd variables * remove redundant variables * remove fy variable * Remove redundant variable `count_not_known` * Remove duplicate code * revert commit - remove fy * update manual run * declare missing sc variables indiv file * Style code --------- Co-authored-by: Zihao Li Co-authored-by: lizihao-anu Co-authored-by: Jennit07 Co-authored-by: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com> Co-authored-by: SwiftySalmon Co-authored-by: Zihao Li Co-authored-by: marjom02 --- R/check_year_valid.R | 5 +- R/create_episode_file.R | 65 ++++++++++++++- R/create_individual_file.R | 28 ++++++- R/get_boxi_extract_path.R | 4 +- R/get_sc_lookup_paths.R | 21 +++-- R/link_delayed_discharge_eps.R | 8 +- R/process_lookup_homelessness.R | 14 ++++ R/process_lookup_sc_client.R | 5 ++ R/read_file.R | 2 +- R/replace_sc_id_with_latest.R | 25 ++---- R/write_tests_xlsx.R | 8 ++ .../run_episode_file_1415.R | 79 +++++++++++++++++++ .../run_episode_file_1516.R | 79 +++++++++++++++++++ .../run_episode_file_1617.R | 79 +++++++++++++++++++ .../run_individual_file_1415.R | 9 +++ .../run_individual_file_1516.R | 9 +++ .../run_individual_file_1617.R | 9 +++ Run_SLF_Files_targets/run_targets_1415.R | 9 +++ Run_SLF_Files_targets/run_targets_1516.R | 9 +++ Run_SLF_Files_targets/run_targets_1617.R | 9 +++ man/check_year_valid.Rd | 6 +- run_targets_1718.R | 20 +++++ run_targets_1819.R | 20 +++++ run_targets_1920.R | 20 +++++ run_targets_2021.R | 20 +++++ run_targets_2122.R | 20 +++++ run_targets_2223.R | 20 +++++ run_targets_2324.R | 20 +++++ 28 files changed, 587 insertions(+), 35 deletions(-) create mode 100644 Run_SLF_Files_manually/run_episode_file_1415.R create mode 100644 Run_SLF_Files_manually/run_episode_file_1516.R create mode 100644 Run_SLF_Files_manually/run_episode_file_1617.R create mode 100644 Run_SLF_Files_manually/run_individual_file_1415.R create mode 100644 Run_SLF_Files_manually/run_individual_file_1516.R create mode 100644 Run_SLF_Files_manually/run_individual_file_1617.R create mode 100644 Run_SLF_Files_targets/run_targets_1415.R create mode 100644 Run_SLF_Files_targets/run_targets_1516.R create mode 100644 Run_SLF_Files_targets/run_targets_1617.R create mode 100644 run_targets_1718.R create mode 100644 run_targets_1819.R create mode 100644 run_targets_1920.R create mode 100644 run_targets_2021.R create mode 100644 run_targets_2122.R create mode 100644 run_targets_2223.R create mode 100644 run_targets_2324.R diff --git a/R/check_year_valid.R b/R/check_year_valid.R index 217aa1c2b..da257ff4c 100644 --- a/R/check_year_valid.R +++ b/R/check_year_valid.R @@ -17,6 +17,7 @@ check_year_valid <- function( "ch", "client", "cmh", + "cost_dna", "dd", "deaths", "dn", @@ -34,9 +35,9 @@ check_year_valid <- function( )) { if (year <= "1415" && type %in% c("dn", "sparra")) { return(FALSE) - } else if (year <= "1516" && type %in% c("cmh", "homelessness")) { + } else if (year <= "1516" && type %in% c("cmh", "homelessness", "dd")) { return(FALSE) - } else if (year <= "1617" && type %in% c("ch", "hc", "sds", "at")) { + } else if (year <= "1617" && type %in% c("ch", "hc", "sds", "at", "client", "cost_dna")) { return(FALSE) } else if (year <= "1718" && type %in% "hhg") { return(FALSE) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index dd22dbc1d..ecb6fc126 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -175,7 +175,65 @@ create_episode_file <- function( sc_social_worker = NA, sc_type_of_housing = NA, sc_meals = NA, - sc_day_care = NA + sc_day_care = NA, + social_care_id = NA, + sc_dementia = NA, + sc_learning_disability = NA, + sc_mental_health_disorders = NA, + sc_physical_and_sensory_disability = NA, + sc_drugs = NA, + sc_alcohol = NA, + sc_palliative_care = NA, + sc_carer = NA, + sc_elderly_frail = NA, + sc_neurological_condition = NA, + sc_autism = NA, + sc_other_vulnerable_groups = NA, + ch_provider_description = NA + ) + } + + if (!check_year_valid(year, type = "homelessness")) { + episode_file <- episode_file %>% + dplyr::mutate( + hl1_12_months_post_app = NA, + hl1_12_months_pre_app = NA, + hl1_6after_ep = NA, + hl1_6before_ep = NA, + hl1_application_ref = NA, + hl1_completeness = NA, + hl1_during_ep = NA, + hl1_in_fy = NA, + hl1_property_type = NA, + hl1_reason_ftm = NA, + hl1_sending_lca = NA + ) + } + + if (!check_year_valid(year, type = "dd")) { + episode_file <- episode_file %>% + dplyr::mutate( + cij_delay = NA, + dd_quality = NA, + dd_responsible_lca = NA, + delay_end_reason = NA, + primary_delay_reason = NA, + secondary_delay_reason = NA, + ) + } + + if (!check_year_valid(year, type = "dn")) { + episode_file <- episode_file %>% + dplyr::mutate( + ccm = NA, + total_no_dn_contacts = NA + ) + } + + if (!check_year_valid(year, type = "cost_dna")) { + episode_file <- episode_file %>% + dplyr::mutate( + cost_total_net_inc_dnas = NA ) } @@ -471,6 +529,11 @@ join_sc_client <- function(data, file_type = c("episode", "individual")) { cli::cli_alert_info("Join social care client function started at {Sys.time()}") + if (!check_year_valid(year, type = "client")) { + data_file <- data + return(data_file) + } + if (file_type == "episode") { # Match on client variables by chi data_file <- data %>% diff --git a/R/create_individual_file.R b/R/create_individual_file.R index f826294d1..273761efc 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -115,6 +115,9 @@ create_individual_file <- function( hc_personal_hours = NA, hc_non_personal_hours = NA, hc_reablement_hours = NA, + hc_non_personal_hours_cost = NA, + hc_personal_hours_cost = NA, + hc_reablement_hours_cost = NA, at_alarms = NA, at_telecare = NA, sds_option_1 = NA, @@ -125,10 +128,33 @@ create_individual_file <- function( sc_support_from_unpaid_carer = NA, sc_social_worker = NA, sc_meals = NA, - sc_day_care = NA + sc_day_care = NA, + sc_type_of_housing = NA, + count_not_known = NA, + sc_latest_submission = NA, + social_care_id = NA, + person_id = NA, + sc_alcohol = NA, + sc_autism = NA, + sc_carer = NA, + sc_dementia = NA, + sc_drugs = NA, + sc_elderly_frail = NA, + sc_learning_disability = NA, + sc_mental_health_disorders = NA, + sc_neurological_condition = NA, + sc_other_vulnerable_groups = NA, + sc_palliative_care = NA, + sc_physical_and_sensory_disability = NA ) } + if (!check_year_valid(year, type = "homelessness")) { + individual_file <- individual_file %>% + dplyr::mutate(hl1_in_fy = NA) + } + + if (anon_chi_out) { individual_file <- individual_file %>% tidyr::replace_na(list(chi = "")) %>% diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R index a1c59b4f2..9c21cabe9 100644 --- a/R/get_boxi_extract_path.R +++ b/R/get_boxi_extract_path.R @@ -86,9 +86,11 @@ get_boxi_extract_path <- function( #' #' @return an [fs::path()] to a dummy file which can be used with targets. get_dummy_boxi_extract_path <- function() { - get_file_path( + dummy_path <- get_file_path( directory = get_dev_dir(), file_name = ".dummy", create = TRUE ) + + return(dummy_path) } diff --git a/R/get_sc_lookup_paths.R b/R/get_sc_lookup_paths.R index d201f416f..90a08e7e1 100644 --- a/R/get_sc_lookup_paths.R +++ b/R/get_sc_lookup_paths.R @@ -38,11 +38,18 @@ get_sc_demog_lookup_path <- function(update = latest_update(), ...) { #' @family social care lookup file paths #' @seealso [get_file_path()] for the generic function. get_sc_client_lookup_path <- function(year, update = latest_update(), ...) { - sc_client_lookup_path <- get_file_path( - directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_client_lookup"), - file_name = stringr::str_glue("anon-sc_client_lookup_{year}_{update}.parquet"), - ... - ) - - return(sc_client_lookup_path) + if (!check_year_valid(year, type = "client")) { + return(get_dummy_boxi_extract_path()) + } else { + sc_client_lookup_path <- get_file_path( + directory = fs::path( + get_slf_dir(), + "Social_care", + "processed_sc_client_lookup" + ), + file_name = stringr::str_glue("anon-sc_client_lookup_{year}_{update}.parquet"), + ... + ) + return(sc_client_lookup_path) + } } diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index b80b35807..d4162b619 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -14,8 +14,12 @@ link_delayed_discharge_eps <- function( dd_data = read_file(get_source_extract_path(year, "dd")) %>% slfhelper::get_chi()) { cli::cli_alert_info("Link delayed discharge to episode file function started at {Sys.time()}") - names_ep <- names(episode_file) + if (!check_year_valid(year, type = "dd")) { + episode_file <- episode_file + return(episode_file) + } + names_ep <- names(episode_file) episode_file <- episode_file %>% dplyr::mutate( # remember to revoke the cij_end_date with dummy_cij_end @@ -370,7 +374,7 @@ link_delayed_discharge_eps <- function( delay_dd, cij_delay )) %>% - dplyr::select(-c("has_dd", "delay_dd")) + dplyr::select(-c("has_dd", "delay_dd", "original_admission_date", "amended_dates")) return(linked_data) } diff --git a/R/process_lookup_homelessness.R b/R/process_lookup_homelessness.R index 5341cf2b6..30772383e 100644 --- a/R/process_lookup_homelessness.R +++ b/R/process_lookup_homelessness.R @@ -15,6 +15,10 @@ create_homelessness_lookup <- function( homelessness_data = read_file(get_source_extract_path(year, "homelessness")) %>% slfhelper::get_chi()) { cli::cli_alert_info("Create homelessness lookup function started at {Sys.time()}") + # Specify years available for running + if (year < "1617") { + return(NULL) + } homelessness_lookup <- homelessness_data %>% dplyr::distinct(.data$chi, .data$record_keydate1, .data$record_keydate2) %>% tidyr::drop_na(.data$chi) %>% @@ -39,6 +43,11 @@ add_homelessness_flag <- function(data, year, lookup = create_homelessness_lookup(year)) { cli::cli_alert_info("Add homelessness flag function started at {Sys.time()}") + if (!check_year_valid(year, type = "homelessness")) { + data <- data + return(data) + } + data <- data %>% dplyr::left_join( lookup %>% @@ -65,6 +74,11 @@ add_homelessness_flag <- function(data, year, add_homelessness_date_flags <- function(data, year, lookup = create_homelessness_lookup(year)) { cli::cli_alert_info("Add homelessness date flags function started at {Sys.time()}") + if (!check_year_valid(year, type = "homelessness")) { + data <- data + return(data) + } + lookup <- lookup %>% dplyr::filter(!(is.na(.data$record_keydate2))) %>% dplyr::rename( diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index b8fc2fb6d..91c08632d 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -20,6 +20,11 @@ process_lookup_sc_client <- slfhelper::get_chi() %>% dplyr::select(c("sending_location", "social_care_id", "chi", "latest_flag")), write_to_disk = TRUE) { + # Specify years available for running + if (year < "1718") { + return(NULL) + } + # Match to demographics lookup to get CHI sc_client_demographics <- data %>% dplyr::right_join( diff --git a/R/read_file.R b/R/read_file.R index 022f4cc26..3d174606c 100644 --- a/R/read_file.R +++ b/R/read_file.R @@ -24,7 +24,7 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) { # Return an empty tibble if trying to read the dummy path if (path == get_dummy_boxi_extract_path()) { - return(tibble::tibble()) + return(tibble::tibble(anon_chi = NA_character_)) } ext <- fs::path_ext(path) diff --git a/R/replace_sc_id_with_latest.R b/R/replace_sc_id_with_latest.R index 215816753..f15808f1e 100644 --- a/R/replace_sc_id_with_latest.R +++ b/R/replace_sc_id_with_latest.R @@ -7,33 +7,23 @@ replace_sc_id_with_latest <- function(data) { # Check for required variables check_variables_exist( data, - c("sending_location", "social_care_id", "chi", "period") + c("sending_location", "social_care_id", "chi", "latest_flag") ) # select variables we need filter_data <- data %>% dplyr::select( - "sending_location", "social_care_id", "chi", "period" + "sending_location", "social_care_id", "chi", "latest_flag" ) %>% - dplyr::filter(!(is.na(.data$chi))) + dplyr::filter(!(is.na(.data$chi))) %>% + dplyr::distinct() change_sc_id <- filter_data %>% - # Sort (by sending_location, chi and period) for unique chi/sending location - dplyr::arrange( - .data$sending_location, - .data$chi, - dplyr::desc(.data$period) - ) %>% - # Find the latest sc_id for each chi/sending location by keeping latest period - dplyr::distinct( - .data$sending_location, - .data$chi, - .keep_all = TRUE - ) %>% + dplyr::filter(latest_flag == 1) %>% # Rename for latest sc id dplyr::rename(latest_sc_id = "social_care_id") %>% - # drop period for matching - dplyr::select(-"period") + # drop latest_flag for matching + dplyr::select(-"latest_flag") return_data <- change_sc_id %>% # Match back onto data @@ -41,6 +31,7 @@ replace_sc_id_with_latest <- function(data) { by = c("sending_location", "chi"), multiple = "all" ) %>% + dplyr::filter(!(is.na(period))) %>% # Overwrite sc id with the latest dplyr::mutate( social_care_id = dplyr::if_else( diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R index 6847cc977..a1b53f971 100644 --- a/R/write_tests_xlsx.R +++ b/R/write_tests_xlsx.R @@ -121,6 +121,14 @@ write_tests_xlsx <- function(comparison_data, date_today <- stringr::str_to_lower(date_today) + sheet_name_dated <- ifelse( + is.null(year), + stringr::str_glue("{sheet_name}_{date_today}"), + stringr::str_glue("{year}_{sheet_name}_{date_today}") + ) + + date_today <- stringr::str_to_lower(date_today) + if (is.null(year)) { sheet_name_dated <- stringr::str_glue("{sheet_name}_{date_today}") } else { diff --git a/Run_SLF_Files_manually/run_episode_file_1415.R b/Run_SLF_Files_manually/run_episode_file_1415.R new file mode 100644 index 000000000..b5a2eab38 --- /dev/null +++ b/Run_SLF_Files_manually/run_episode_file_1415.R @@ -0,0 +1,79 @@ +library(targets) +library(createslf) + +year <- "1415" + +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1415", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1415", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1415", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1415", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1415", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1415", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1415", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1415", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1415", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1415", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1415", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1415", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1415", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1415", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1415", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1415", + store = targets_store + ) +) + +# Run episode file +create_episode_file(processed_data_list, year = year) %>% + process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1516.R b/Run_SLF_Files_manually/run_episode_file_1516.R new file mode 100644 index 000000000..59c7ddc63 --- /dev/null +++ b/Run_SLF_Files_manually/run_episode_file_1516.R @@ -0,0 +1,79 @@ +library(targets) +library(createslf) + +year <- "1516" + +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1516", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1516", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1516", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1516", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1516", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1516", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1516", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1516", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1516", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1516", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1516", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1516", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1516", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1516", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1516", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1516", + store = targets_store + ) +) + +# Run episode file +create_episode_file(processed_data_list, year = year) ## %>% +# process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1617.R b/Run_SLF_Files_manually/run_episode_file_1617.R new file mode 100644 index 000000000..b10372be6 --- /dev/null +++ b/Run_SLF_Files_manually/run_episode_file_1617.R @@ -0,0 +1,79 @@ +library(targets) +library(createslf) + +year <- "1617" + +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1617", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1617", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1617", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1617", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1617", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1617", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1617", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1617", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1617", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1617", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1617", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1617", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1617", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1617", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1617", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1617", + store = targets_store + ) +) + +# Run episode file +create_episode_file(processed_data_list, year = year) ## %>% +# process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_individual_file_1415.R b/Run_SLF_Files_manually/run_individual_file_1415.R new file mode 100644 index 000000000..70aa2bfca --- /dev/null +++ b/Run_SLF_Files_manually/run_individual_file_1415.R @@ -0,0 +1,9 @@ +library(createslf) + +year <- "1415" + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_1516.R b/Run_SLF_Files_manually/run_individual_file_1516.R new file mode 100644 index 000000000..8e8dae906 --- /dev/null +++ b/Run_SLF_Files_manually/run_individual_file_1516.R @@ -0,0 +1,9 @@ +library(createslf) + +year <- "1516" + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_1617.R b/Run_SLF_Files_manually/run_individual_file_1617.R new file mode 100644 index 000000000..255e4e674 --- /dev/null +++ b/Run_SLF_Files_manually/run_individual_file_1617.R @@ -0,0 +1,9 @@ +library(createslf) + +year <- "1617" + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_targets/run_targets_1415.R b/Run_SLF_Files_targets/run_targets_1415.R new file mode 100644 index 000000000..a37068c0d --- /dev/null +++ b/Run_SLF_Files_targets/run_targets_1415.R @@ -0,0 +1,9 @@ +library(targets) + +year <- "1415" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1415")) +) diff --git a/Run_SLF_Files_targets/run_targets_1516.R b/Run_SLF_Files_targets/run_targets_1516.R new file mode 100644 index 000000000..7930d5bb5 --- /dev/null +++ b/Run_SLF_Files_targets/run_targets_1516.R @@ -0,0 +1,9 @@ +library(targets) + +year <- "1516" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1516")) +) diff --git a/Run_SLF_Files_targets/run_targets_1617.R b/Run_SLF_Files_targets/run_targets_1617.R new file mode 100644 index 000000000..16361f71e --- /dev/null +++ b/Run_SLF_Files_targets/run_targets_1617.R @@ -0,0 +1,9 @@ +library(targets) + +year <- "1617" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1617")) +) diff --git a/man/check_year_valid.Rd b/man/check_year_valid.Rd index 91c29861e..59960da30 100644 --- a/man/check_year_valid.Rd +++ b/man/check_year_valid.Rd @@ -6,9 +6,9 @@ \usage{ check_year_valid( year, - type = c("acute", "ae", "at", "ch", "client", "cmh", "dd", "deaths", "dn", "gpooh", - "hc", "homelessness", "hhg", "maternity", "mh", "nsu", "outpatients", "pis", "sds", - "sparra") + type = c("acute", "ae", "at", "ch", "client", "cmh", "cost_dna", "dd", "deaths", "dn", + "gpooh", "hc", "homelessness", "hhg", "maternity", "mh", "nsu", "outpatients", "pis", + "sds", "sparra") ) } \arguments{ diff --git a/run_targets_1718.R b/run_targets_1718.R new file mode 100644 index 000000000..488918e1d --- /dev/null +++ b/run_targets_1718.R @@ -0,0 +1,20 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year <- "1718" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1718")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/run_targets_1819.R b/run_targets_1819.R new file mode 100644 index 000000000..7c63807e8 --- /dev/null +++ b/run_targets_1819.R @@ -0,0 +1,20 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year <- "1819" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1819")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/run_targets_1920.R b/run_targets_1920.R new file mode 100644 index 000000000..d3361a34c --- /dev/null +++ b/run_targets_1920.R @@ -0,0 +1,20 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year <- "1920" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1920")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/run_targets_2021.R b/run_targets_2021.R new file mode 100644 index 000000000..efcfaed7a --- /dev/null +++ b/run_targets_2021.R @@ -0,0 +1,20 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year <- "2021" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("2021")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/run_targets_2122.R b/run_targets_2122.R new file mode 100644 index 000000000..e92d75c7d --- /dev/null +++ b/run_targets_2122.R @@ -0,0 +1,20 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year <- "2122" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("2122")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/run_targets_2223.R b/run_targets_2223.R new file mode 100644 index 000000000..f5c93ee2f --- /dev/null +++ b/run_targets_2223.R @@ -0,0 +1,20 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year <- "2223" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("2223")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/run_targets_2324.R b/run_targets_2324.R new file mode 100644 index 000000000..5e3885bc2 --- /dev/null +++ b/run_targets_2324.R @@ -0,0 +1,20 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year <- "2324" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("2324")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) From 4e3e185dfd3b4b5f715ea6ad73bdb977dcc91627 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 17 Sep 2024 11:56:35 +0100 Subject: [PATCH 95/96] Update `replace_sc_id_with_latest` function --- R/replace_sc_id_with_latest.R | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/R/replace_sc_id_with_latest.R b/R/replace_sc_id_with_latest.R index f15808f1e..db1dc578c 100644 --- a/R/replace_sc_id_with_latest.R +++ b/R/replace_sc_id_with_latest.R @@ -7,31 +7,40 @@ replace_sc_id_with_latest <- function(data) { # Check for required variables check_variables_exist( data, - c("sending_location", "social_care_id", "chi", "latest_flag") + c("sending_location", "social_care_id", "chi", "period") ) # select variables we need filter_data <- data %>% dplyr::select( - "sending_location", "social_care_id", "chi", "latest_flag" + "sending_location", "social_care_id", "chi", "period" ) %>% - dplyr::filter(!(is.na(.data$chi))) %>% - dplyr::distinct() + dplyr::filter(!(is.na(.data$chi))) change_sc_id <- filter_data %>% - dplyr::filter(latest_flag == 1) %>% + # Sort (by sending_location, chi and period) for unique chi/sending location + dplyr::arrange( + .data$sending_location, + .data$chi, + dplyr::desc(.data$period) + ) %>% + # Find the latest sc_id for each chi/sending location by keeping latest period + dplyr::distinct( + .data$sending_location, + .data$chi, + .keep_all = TRUE + ) %>% # Rename for latest sc id dplyr::rename(latest_sc_id = "social_care_id") %>% - # drop latest_flag for matching - dplyr::select(-"latest_flag") + # drop period for matching + dplyr::select(-"period") return_data <- change_sc_id %>% # Match back onto data dplyr::right_join(data, - by = c("sending_location", "chi"), - multiple = "all" + by = c("sending_location", "chi"), + multiple = "all" ) %>% - dplyr::filter(!(is.na(period))) %>% # Overwrite sc id with the latest dplyr::mutate( social_care_id = dplyr::if_else( @@ -39,9 +48,6 @@ replace_sc_id_with_latest <- function(data) { .data$latest_sc_id, .data$social_care_id ) - ) %>% - dplyr::filter(!(is.na(period))) - - + ) return(return_data) } From 27ac757d4f713bd5b53008345f6c7b16c7203c3c Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Tue, 17 Sep 2024 10:58:43 +0000 Subject: [PATCH 96/96] Style code --- R/replace_sc_id_with_latest.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/replace_sc_id_with_latest.R b/R/replace_sc_id_with_latest.R index db1dc578c..9478ebefe 100644 --- a/R/replace_sc_id_with_latest.R +++ b/R/replace_sc_id_with_latest.R @@ -38,8 +38,8 @@ replace_sc_id_with_latest <- function(data) { return_data <- change_sc_id %>% # Match back onto data dplyr::right_join(data, - by = c("sending_location", "chi"), - multiple = "all" + by = c("sending_location", "chi"), + multiple = "all" ) %>% # Overwrite sc id with the latest dplyr::mutate(