From 3820c19861e01309af8ce5c6067bd6be148bd3c2 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 14 Aug 2023 10:16:38 +0100 Subject: [PATCH 01/19] Fix locality (#802) Tiny error and a simple fix. Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> --- R/process_lookup_postcode.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/process_lookup_postcode.R b/R/process_lookup_postcode.R index 878c51f37..69cc13bd8 100644 --- a/R/process_lookup_postcode.R +++ b/R/process_lookup_postcode.R @@ -53,7 +53,7 @@ process_lookup_postcode <- function(spd_path = get_spd_path(), tidyselect::matches("datazone\\d{4}$") ) %>% dplyr::mutate( - locality = tidyr::replace_na("locality", "No Locality Information") + locality = tidyr::replace_na(.data$locality, "No Locality Information") ) From 8ea15c0f742994f4863d3fe49a50cff14469dbbe Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 14 Aug 2023 15:01:35 +0100 Subject: [PATCH 02/19] Add simple scripts for running targets as a workbench job (#767) --- .Rbuildignore | 1 + run_targets_1718.R | 4 ++++ run_targets_1819.R | 4 ++++ run_targets_1920.R | 4 ++++ run_targets_2021.R | 4 ++++ run_targets_2122.R | 4 ++++ run_targets_2223.R | 4 ++++ 7 files changed, 25 insertions(+) create mode 100644 run_targets_1718.R create mode 100644 run_targets_1819.R create mode 100644 run_targets_1920.R create mode 100644 run_targets_2021.R create mode 100644 run_targets_2122.R create mode 100644 run_targets_2223.R diff --git a/.Rbuildignore b/.Rbuildignore index 168a3e006..2cab1bda6 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -22,3 +22,4 @@ ^_targets\.R$ ^_targets\.yaml$ ^_SPSS_archived$ +^run_targets_ diff --git a/run_targets_1718.R b/run_targets_1718.R new file mode 100644 index 000000000..ebc58895f --- /dev/null +++ b/run_targets_1718.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("1718")) +) diff --git a/run_targets_1819.R b/run_targets_1819.R new file mode 100644 index 000000000..83bbcedef --- /dev/null +++ b/run_targets_1819.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("1819")) +) diff --git a/run_targets_1920.R b/run_targets_1920.R new file mode 100644 index 000000000..1640d1900 --- /dev/null +++ b/run_targets_1920.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("1920")) +) diff --git a/run_targets_2021.R b/run_targets_2021.R new file mode 100644 index 000000000..80749e81a --- /dev/null +++ b/run_targets_2021.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("2021")) +) diff --git a/run_targets_2122.R b/run_targets_2122.R new file mode 100644 index 000000000..aa95d7b24 --- /dev/null +++ b/run_targets_2122.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("2122")) +) diff --git a/run_targets_2223.R b/run_targets_2223.R new file mode 100644 index 000000000..2ded7d5fd --- /dev/null +++ b/run_targets_2223.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("2223")) +) From 80799a2838b8b2bad133e310091dd6b3434cf477 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Mon, 14 Aug 2023 15:14:14 +0100 Subject: [PATCH 03/19] Fix CHI duplicates of chi in individual file (#791) * fix duplicated matches in chi in sc data. * Update R/create_individual_file.R * update on join_sc_client * Create a test checking if individual files have duplicated chi * add duplicated chi number to the tests in process_tests_individual_file --------- Co-authored-by: lizihao-anu Co-authored-by: James McMahon --- R/create_individual_file.R | 18 +++++++++++++++--- R/process_tests_individual_file.R | 16 ++++++++++++---- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 84dbd28ee..f0e6bcdfc 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -61,7 +61,7 @@ create_individual_file <- function( remove_blank_chi() %>% add_cij_columns() %>% add_all_columns() %>% - aggregate_ch_episodes_zihao() %>% + aggregate_ch_episodes() %>% clean_up_ch(year) %>% recode_gender() %>% aggregate_by_chi() %>% @@ -741,13 +741,25 @@ join_sc_client <- function( sc_demographics %>% dplyr::select("sending_location", "social_care_id", "chi"), by = c("sending_location", "social_care_id") - ) + ) %>% + dplyr::mutate(count_not_known = rowSums(dplyr::select(., all_of( + c( + "sc_living_alone", + "sc_support_from_unpaid_carer", + "sc_social_worker", + "sc_meals", + "sc_day_care" + ) + )) == "Not Known")) %>% + dplyr::arrange(chi, count_not_known) %>% + dplyr::distinct(chi, .keep_all = TRUE) # Match on client variables by chi individual_file <- individual_file %>% dplyr::left_join( join_client_demog, - by = "chi" + by = "chi", + relationship = "one-to-one" ) %>% dplyr::select(!c("sending_location", "social_care_id", "sc_latest_submission")) diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R index 2c93f243e..a9d193465 100644 --- a/R/process_tests_individual_file.R +++ b/R/process_tests_individual_file.R @@ -64,9 +64,8 @@ produce_individual_file_tests <- function(data) { create_demog_test_flags() %>% create_hb_test_flags(.data$hbrescode) %>% create_hb_cost_test_flags(.data$hbrescode, .data$health_net_cost) %>% - create_hscp_test_flags(.data$hscp2018) %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select(c("valid_chi":dplyr::last_col())) %>% # use function to sum new test flags calculate_measures(measure = "sum") @@ -86,7 +85,9 @@ produce_individual_file_tests <- function(data) { min_max_measures <- data %>% calculate_measures( - vars = "health_net_cost", + vars = c( + "health_net_cost" + ), measure = "min-max" ) @@ -99,11 +100,18 @@ produce_individual_file_tests <- function(data) { measure = "sum" ) + dup_chi <- data.frame( + measure = "duplicated chi number", + value = duplicated(data$chi) %>% + sum() %>% as.integer() + ) + join_output <- list( test_flags, all_measures, min_max_measures, - sum_measures + sum_measures, + dup_chi ) %>% purrr::reduce(dplyr::full_join, by = c("measure", "value")) From 19779e3fd6c4e9265661f617103e7f8dda044444 Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Mon, 14 Aug 2023 15:17:35 +0100 Subject: [PATCH 04/19] Update NSU code for new 22/23 cohort (#784) Update `check_year_valid` for NSUs --- R/check_year_valid.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/check_year_valid.R b/R/check_year_valid.R index d170cd5b5..1361eb47e 100644 --- a/R/check_year_valid.R +++ b/R/check_year_valid.R @@ -42,7 +42,7 @@ check_year_valid <- function( return(FALSE) } else if (year >= "2122" && type %in% c("CMH", "DN")) { return(FALSE) - } else if (year >= "2223" && type %in% "NSU") { + } else if (year >= "2324" && type %in% "NSU") { return(FALSE) } else if (year >= "2324" && type %in% c("SPARRA", "HHG")) { return(FALSE) From 7e3215da42b2c92f395de458500deb23b1952d54 Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Mon, 14 Aug 2023 15:21:05 +0100 Subject: [PATCH 05/19] Amend `get_boxi_extract_path` function for archiving DN and CMH data (#785) * Update `get_boxi_extract_path` for DN/CMH data * Remove extra function * [check-spelling] Update metadata Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/5856792420/attempts/1 Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/785#issuecomment-1677400900 Signed-off-by: check-spelling-bot --------- Signed-off-by: check-spelling-bot Co-authored-by: Jennit07 Co-authored-by: James McMahon --- .github/actions/spelling/expect.txt | 1 + R/get_boxi_extract_path.R | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 87300a6a1..51c0a6c6b 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -91,6 +91,7 @@ hjust hms homecare homev +hscdiip hscp hscpnames IDPC diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R index 60dd7857a..6096525e5 100644 --- a/R/get_boxi_extract_path.R +++ b/R/get_boxi_extract_path.R @@ -29,7 +29,11 @@ get_boxi_extract_path <- function( )) { type <- match.arg(type) - year_dir <- get_year_dir(year, extracts_dir = TRUE) + if (type %in% c("DN", "CMH")) { + dir <- fs::path(get_slf_dir(), "Archived_data") + } else { + dir <- get_year_dir(year, extracts_dir = TRUE) + } if (!check_year_valid(year, type)) { return(get_dummy_boxi_extract_path()) @@ -53,11 +57,12 @@ get_boxi_extract_path <- function( ) boxi_extract_path_csv_gz <- fs::path( - year_dir, + dir, stringr::str_glue("{file_name}-20{year}.csv.gz") ) + boxi_extract_path_csv <- fs::path( - year_dir, + dir, stringr::str_glue("{file_name}-20{year}.csv") ) From 612e0698cc2401faa040a9607062f97cb5d9207b Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Mon, 14 Aug 2023 15:21:58 +0100 Subject: [PATCH 06/19] Fix increase in total preventable beddays (#779) * further obsolete code change * fix the preventable_beddays Co-authored-by: James McMahon --------- Co-authored-by: James McMahon Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> --- R/aggregate_by_chi.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R index 99da03ba8..5e7ebc7c0 100644 --- a/R/aggregate_by_chi.R +++ b/R/aggregate_by_chi.R @@ -126,9 +126,9 @@ aggregate_by_chi <- function(episode_file) { individual_file_cols6 <- episode_file[, .( preventable_beddays = ifelse( - max(cij_ppa, na.rm = TRUE), - max(cij_end_date) - min(cij_start_date), - NA_real_ + any(cij_ppa, na.rm = TRUE), + as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))), + NA_integer_ ) ), # cij_marker has been renamed as cij_total From 51a0b0590a554613c6e56001eb326cf6600977c3 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 15 Aug 2023 15:17:33 +0100 Subject: [PATCH 07/19] fix warning on `:=` (#797) * fix warning on `:=` * Update R/aggregate_by_chi.R Co-authored-by: James McMahon * Style code --------- Co-authored-by: James McMahon Co-authored-by: lizihao-anu --- R/aggregate_by_chi.R | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R index 5e7ebc7c0..db12f7a9e 100644 --- a/R/aggregate_by_chi.R +++ b/R/aggregate_by_chi.R @@ -203,12 +203,19 @@ aggregate_ch_episodes <- function(episode_file) { data.table::setDT(episode_file) # Perform grouping and aggregation - episode_file <- episode_file[, `:=`( - ch_no_cost = max(ch_no_cost), - ch_ep_start = min(record_keydate1), - ch_ep_end = max(ch_ep_end), - ch_cost_per_day = mean(ch_cost_per_day) - ), by = c("chi", "ch_chi_cis")] + episode_file[, c( + "ch_no_cost", + "ch_ep_start", + "ch_ep_end", + "ch_cost_per_day" + ) := list( + max(ch_no_cost), + min(record_keydate1), + max(ch_ep_end), + mean(ch_cost_per_day) + ), + by = c("chi", "ch_chi_cis") + ] # Convert back to tibble if needed episode_file <- tibble::as_tibble(episode_file) From 0f25195e234fac4fe33d677e240d798dc3e7a76c Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 14 Aug 2023 15:28:00 +0100 Subject: [PATCH 08/19] Add 2324 targets/workbench job file --- run_targets_2324.R | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 run_targets_2324.R diff --git a/run_targets_2324.R b/run_targets_2324.R new file mode 100644 index 000000000..b875984f4 --- /dev/null +++ b/run_targets_2324.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("2324")) +) From c4a54f84a95e2a691085c98e366b225d330bac18 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 14 Aug 2023 15:39:00 +0100 Subject: [PATCH 09/19] Use `get_source_extract_path` in homelessness (#796) This was already set up, just not used for some reason. Note that this will switch from using a `.rds` to `.parquet` (unless you do `get_source_extract_path(year, "Homelessness", ext = "rds")`). Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> --- R/get_source_extract_path.R | 49 ++++++++++++++++++-------------- R/process_extract_homelessness.R | 13 +++++---- 2 files changed, 35 insertions(+), 27 deletions(-) diff --git a/R/get_source_extract_path.R b/R/get_source_extract_path.R index 1816ceb25..4cb5eef44 100644 --- a/R/get_source_extract_path.R +++ b/R/get_source_extract_path.R @@ -10,27 +10,34 @@ #' @export #' #' @family extract file paths -get_source_extract_path <- function(year, - type = c( - "Acute", - "AE", - "AT", - "CH", - "Client", - "CMH", - "DD", - "Deaths", - "DN", - "GPOoH", - "HC", - "Homelessness", - "Maternity", - "MH", - "Outpatients", - "PIS", - "SDS" - ), - ...) { +get_source_extract_path <- function( + year, + type = c( + "Acute", + "AE", + "AT", + "CH", + "Client", + "CMH", + "DD", + "Deaths", + "DN", + "GPOoH", + "HC", + "Homelessness", + "Maternity", + "MH", + "Outpatients", + "PIS", + "SDS" + ), + ...) { + if (year %in% type) { + cli::cli_abort("{.val {year}} was supplied to the {.arg year} argument.") + } + + year <- check_year_format(year) + type <- match.arg(type) if (!check_year_valid(year, type)) { diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R index a16c9a57b..a900cff9a 100644 --- a/R/process_extract_homelessness.R +++ b/R/process_extract_homelessness.R @@ -146,13 +146,14 @@ process_extract_homelessness <- function( ) if (write_to_disk) { - final_data %>% - write_file(get_file_path( - get_year_dir(year), - stringr::str_glue("homelessness_for_source-20{year}"), - ext = "rds", + write_file( + final_data, + get_source_extract_path( + year = year, + type = "Homelessness", check_mode = "write" - )) + ) + ) } return(final_data) From e36c97c17a711510605c15a8b7a21cd045bb8fdc Mon Sep 17 00:00:00 2001 From: James McMahon Date: Mon, 14 Aug 2023 15:41:56 +0100 Subject: [PATCH 10/19] Correct tests for NSU --- tests/testthat/test-check_year_valid.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-check_year_valid.R b/tests/testthat/test-check_year_valid.R index ca0738c89..eda74dbdf 100644 --- a/tests/testthat/test-check_year_valid.R +++ b/tests/testthat/test-check_year_valid.R @@ -49,7 +49,8 @@ test_that("Check year valid works for specific datasets ", { expect_true(check_year_valid("1920", "NSU")) expect_true(check_year_valid("2021", "NSU")) expect_true(check_year_valid("2122", "NSU")) - expect_false(check_year_valid("2223", "NSU")) + expect_true(check_year_valid("2223", "NSU")) + expect_false(check_year_valid("2324", "NSU")) # SPARRA expect_false(check_year_valid("1415", "SPARRA")) From 62a41740b01c6bc266b3842684dc5b77608aa6a5 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 15 Aug 2023 15:41:40 +0100 Subject: [PATCH 11/19] Update script for extracting NSU from SMRA space --- .../All_years/02-Lookups/99_extract_NSU_data.R | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R index d33dfbe49..54cc316e8 100644 --- a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R +++ b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R @@ -6,7 +6,7 @@ library(glue) nsu_dir <- path("/conf/hscdiip/SLF_Extracts/NSU") # Change the year -fin_year <- "1516" +fin_year <- "2324" db_connection <- odbc::dbConnect( odbc::odbc(), @@ -16,7 +16,7 @@ db_connection <- odbc::dbConnect( ) # Check the table name and change if required. -table <- dbplyr::in_schema("ROBERM18", "FINAL_2") +table <- dbplyr::in_schema("ROBERM18", "FINAL_1") # Read NSU data nsu_data <- @@ -35,9 +35,11 @@ nsu_data <- collect() # Write out the data -file_path <- path(nsu_dir, glue("All_CHIs_20{fin_year}.zsav")) +file_path <- path(nsu_dir, glue("All_CHIs_20{fin_year}.parquet")) # This will archive the existing file for later comparison if (file_exists(file_path)) { - file_copy(file_path, path(nsu_dir, glue("All_CHIs_20{fin_year}_OLD.zsav"))) + file_copy(file_path, path(nsu_dir, glue("All_CHIs_20{fin_year}_OLD.parquet"))) } -write_sav(nsu_data, file_path, compress = TRUE) + +nsu_data %>% +arrow::write_parquet(file_path, compression = "zstd", compression_level = 10) From d310dfd3bc1f586001b4d09bcec358d5b458ca09 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 15 Aug 2023 15:47:29 +0100 Subject: [PATCH 12/19] Update year in 99_NSU extract script --- _SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R index 54cc316e8..8bbd0513c 100644 --- a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R +++ b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R @@ -6,7 +6,7 @@ library(glue) nsu_dir <- path("/conf/hscdiip/SLF_Extracts/NSU") # Change the year -fin_year <- "2324" +fin_year <- "2223" db_connection <- odbc::dbConnect( odbc::odbc(), From 51c4a637d7701c1061af67964adda0d5519cfa47 Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Tue, 15 Aug 2023 16:04:23 +0100 Subject: [PATCH 13/19] Update news for September 23 update (#811) * Update News for March and June updates * Update release date * WIP - update news for Sep update * Update NEWS.md Fix some typos / grammar --------- Co-authored-by: James McMahon --- NEWS.md | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index cbcb62079..2a3453eea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,48 @@ -# March 2023 Update - Unreleased +# September 2023 Update - Unreleased +* Update of 2017/18 onwards to include bug fixes within the files. +* New 2023/24 files. +* New NSU cohort for 2022/23 file. +* Re addition of: + * HRIs in individual file. + * Homelessness Flags. +* Bug fixes: + * Blank `datazone` in A&E. This has been fixed and was due to PC8 postcode format matching onto SLF pc lookup. + * Large increase in preventable beddays. This was caused due to an SPSS vs R logic difference. Uses SPSS logic which + brings the difference down to `3.3%`. + * Issue with `locality` which showed `locality` in each row instead of its true `locality`. This has now been fixed. + * Duplicated CHI in the individual file. The issue was identified when trying to include HRIs. This has now been corrected. +* Internal changes to SLF development: + * `DN` and `CMH` data are now archived in an HSCDIIP folder as the BOXI datamart is now closed down for these. Function `get_boxi_extract_path` has been updated to reflect this. + * Tests updated to include `HSCP`count. + * Tests created for `Delayed Discharges` extract and `Social care Client lookup`. +# June 2023 Update - Released 24-Jul-2023 +* 2011/12 -> 2013/14 – These files have not been altered, other than to make them available in a new file type (parquet). +* 2017/18 – These files have been recreated using our new R pipeline, but the data has not changed. We did this so that we would have a good comparator file. +* 2018/19 -> 2022/23 – These files have been recreated using the R pipeline and are also using updated data (as in a ‘normal’ update). +* Files changed into parquet format. +* SLFhelper updated. +* Removal of `keydate1_dateformat` and `keydate2_dateformat`. +* `dd_responsible_lca` – This variable now uses CA2019 codes instead of the 2-digit ‘old’ LCA code. +* Preventable beddays - not able to calculate these correctly. * Death fixes not included. +* Variables not ordered in R like they used to be in SPSS. +* End of HHG. +* New variable `ch_postcode`. +* rename of variables `cost_total_net_incdnas`, `ooh_outcome.1`, `ooh_outcome.2`, `ooh_outcome.3`, `ooh_outcome.4`, `totalnodncontacts`. +* HRI's not included. +* Homelessness flags not included. +* Keep_population flag not included. + + +# March 2023 Update - Released 10-Mar-2023 +* 2021/22 episode and individual files refreshed with updated activity. +* 2022/23 file updated and contains data up to the end of Q3. +* Social care data is available for 2022/23. +* Typo in the variable name `ooh_covid_assessment` +* Next update in May as a test run in R but won't be released. +* Next release in June. + # December 2022 Update - Released 07-Dec-2022 * Now using the 2022v2 Scottish Postcode Directory. * Now using the 2020 Urban Rural classifications (instead of the older 2016 ones), this means variables such as `URx_2016` will now be called `URx_2020`. From ee3943ffcc7ec9cb5932de6f4bb8f9ca5ba7423c Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 16 Aug 2023 09:01:09 +0100 Subject: [PATCH 14/19] Apply styling --- _SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R index 8bbd0513c..ea6f81bfc 100644 --- a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R +++ b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R @@ -42,4 +42,4 @@ if (file_exists(file_path)) { } nsu_data %>% -arrow::write_parquet(file_path, compression = "zstd", compression_level = 10) + arrow::write_parquet(file_path, compression = "zstd", compression_level = 10) From 61283cc69f072a17cdad8f09d053626b2f2ad31f Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 16 Aug 2023 09:16:29 +0100 Subject: [PATCH 15/19] Fix issue with `case_match` types (#810) * Fix issue with `case_match` types It seems that `case_match()` is stricter about types than `case_when()`. See the below code: ```r library(dplyr) # Breaks mutate(starwars, new_height = case_when( height == "172" ~ "170"), new_height2 = case_match( height, "172" ~ "170" ), .after = "height" ) # Works mutate(starwars, new_height = case_when( height == "172" ~ "170"), new_height2 = case_match( height, 172L ~ "170" ), .after = "height" ) ``` Since `sending_location` is an integer, the LHS of `case_match` must be numeric. It was slightly incorrect previously but `case_when` let us get away with it! I also updated and added to the tests. * Style code * Style code --------- Co-authored-by: Moohan Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> Co-authored-by: Jennit07 --- R/convert_sending_location_to_lca.R | 66 ++++++++-------- .../_snaps/convert_sending_location_to_lca.md | 8 +- .../test-convert_sending_location_to_lca.R | 79 +++++++++++-------- 3 files changed, 82 insertions(+), 71 deletions(-) diff --git a/R/convert_sending_location_to_lca.R b/R/convert_sending_location_to_lca.R index 6e9c577c0..d0d79dd39 100644 --- a/R/convert_sending_location_to_lca.R +++ b/R/convert_sending_location_to_lca.R @@ -18,38 +18,40 @@ convert_sending_location_to_lca <- function(sending_location) { lca <- dplyr::case_match( sending_location, - "100" ~ "01", # Aberdeen City - "110" ~ "02", # Aberdeenshire - "120" ~ "03", # Angus - "130" ~ "04", # Argyll and Bute - "355" ~ "05", # Scottish Borders - "150" ~ "06", # Clackmannanshire - "395" ~ "07", # West Dumbartonshire - "170" ~ "08", # Dumfries and Galloway - "180" ~ "09", # Dundee City - "190" ~ "10", # East Ayrshire - "200" ~ "11", # East Dunbartonshire - "210" ~ "12", # East Lothian - "220" ~ "13", # East Renfrewshire - "230" ~ "14", # City of Edinburgh - "240" ~ "15", # Falkirk - "250" ~ "16", # Fife - "260" ~ "17", # Glasgow City - "270" ~ "18", # Highland - "280" ~ "19", # Inverclyde - "290" ~ "20", # Midlothian - "300" ~ "21", # Moray - "310" ~ "22", # North Ayrshire - "320" ~ "23", # North Lanarkshire - "330" ~ "24", # Orkney Islands - "340" ~ "25", # Perth and Kinross - "350" ~ "26", # Renfrewshire - "360" ~ "27", # Shetland Islands - "370" ~ "28", # South Ayrshire - "380" ~ "29", # South Lanarkshire - "390" ~ "30", # Stirling - "400" ~ "31", # West Lothian - "235" ~ "32" # Na_h_Eileanan_Siar + 100L ~ "01", # Aberdeen City + 110L ~ "02", # Aberdeenshire + 120L ~ "03", # Angus + 130L ~ "04", # Argyll and Bute + 355L ~ "05", # Scottish Borders + 150L ~ "06", # Clackmannanshire + 395L ~ "07", # West Dunbartonshire + 170L ~ "08", # Dumfries and Galloway + 180L ~ "09", # Dundee City + 190L ~ "10", # East Ayrshire + 200L ~ "11", # East Dunbartonshire + 210L ~ "12", # East Lothian + 220L ~ "13", # East Renfrewshire + 230L ~ "14", # City of Edinburgh + 240L ~ "15", # Falkirk + 250L ~ "16", # Fife + 260L ~ "17", # Glasgow City + 270L ~ "18", # Highland + 280L ~ "19", # Inverclyde + 290L ~ "20", # Midlothian + 300L ~ "21", # Moray + 310L ~ "22", # North Ayrshire + 320L ~ "23", # North Lanarkshire + 330L ~ "24", # Orkney Islands + 340L ~ "25", # Perth and Kinross + 350L ~ "26", # Renfrewshire + 360L ~ "27", # Shetland Islands + 370L ~ "28", # South Ayrshire + 380L ~ "29", # South Lanarkshire + 390L ~ "30", # Stirling + 400L ~ "31", # West Lothian + 235L ~ "32", # Na_h_Eileanan_Siar + .default = NA_character_ ) + return(lca) } diff --git a/tests/testthat/_snaps/convert_sending_location_to_lca.md b/tests/testthat/_snaps/convert_sending_location_to_lca.md index 464ff2d37..1fa02dc14 100644 --- a/tests/testthat/_snaps/convert_sending_location_to_lca.md +++ b/tests/testthat/_snaps/convert_sending_location_to_lca.md @@ -1,10 +1,10 @@ # Can convert a SC sending location to lca code Code - convert_sending_location_to_lca(c("100", "110", "120", "130", "355", "150", - "395", "170", "180", "190", "200", "210", "220", "230", "240", "250", "260", - "270", "280", "290", "300", "310", "320", "330", "340", "350", "360", "370", - "380", "390", "400", "235", "999", "0", NA)) + convert_sending_location_to_lca(c(100L, 110L, 120L, 130L, 355L, 150L, 395L, + 170L, 180L, 190L, 200L, 210L, 220L, 230L, 240L, 250L, 260L, 270L, 280L, 290L, + 300L, 310L, 320L, 330L, 340L, 350L, 360L, 370L, 380L, 390L, 400L, 235L, 999L, + 0L, NA_integer_)) Output [1] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15" [16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30" diff --git a/tests/testthat/test-convert_sending_location_to_lca.R b/tests/testthat/test-convert_sending_location_to_lca.R index 0bc67668e..eb66802a6 100644 --- a/tests/testthat/test-convert_sending_location_to_lca.R +++ b/tests/testthat/test-convert_sending_location_to_lca.R @@ -2,42 +2,51 @@ test_that("Can convert a SC sending location to lca code", { expect_snapshot( convert_sending_location_to_lca( c( - "100", - "110", - "120", - "130", - "355", - "150", - "395", - "170", - "180", - "190", - "200", - "210", - "220", - "230", - "240", - "250", - "260", - "270", - "280", - "290", - "300", - "310", - "320", - "330", - "340", - "350", - "360", - "370", - "380", - "390", - "400", - "235", - "999", - "0", - NA + 100L, + 110L, + 120L, + 130L, + 355L, + 150L, + 395L, + 170L, + 180L, + 190L, + 200L, + 210L, + 220L, + 230L, + 240L, + 250L, + 260L, + 270L, + 280L, + 290L, + 300L, + 310L, + 320L, + 330L, + 340L, + 350L, + 360L, + 370L, + 380L, + 390L, + 400L, + 235L, + 999L, + 0L, + NA_integer_ ) ) ) }) + +test_that("Errors on unexpected input", { + expect_error( + convert_sending_location_to_lca("100") + ) + expect_error( + convert_sending_location_to_lca(c("100", 99L)) + ) +}) From c88562cad41b619a7cc071523d5038f53ff6c57d Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Wed, 16 Aug 2023 10:32:52 +0100 Subject: [PATCH 16/19] Bug - Outpatients tests failing due to missing HSCP (#816) * Update `produce_source_extract_tests` * Update outpatients tests with hscp_var = FALSE * Revert "Style code" This reverts commit 8e73d4abc042986a76754c2acc1d197292a1c245. * Style code * simplify code * Update documentation * Rename `hscp_var` to `add_hscp_count` * Update documentation --------- Co-authored-by: Jennit07 Co-authored-by: James McMahon Co-authored-by: Moohan --- R/process_tests_outpatients.R | 6 ++++-- R/produce_source_extract_tests.R | 13 ++++++++++--- man/produce_source_extract_tests.Rd | 5 ++++- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/R/process_tests_outpatients.R b/R/process_tests_outpatients.R index f8a7a6a2e..5ab3e82db 100644 --- a/R/process_tests_outpatients.R +++ b/R/process_tests_outpatients.R @@ -12,11 +12,13 @@ process_tests_outpatients <- function(data, year) { comparison <- produce_test_comparison( old_data = produce_source_extract_tests(old_data, sum_mean_vars = "cost", - max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net") + max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net"), + add_hscp_count = FALSE ), new_data = produce_source_extract_tests(data, sum_mean_vars = "cost", - max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net") + max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net"), + add_hscp_count = FALSE ) ) %>% write_tests_xlsx(sheet_name = "00B", year) diff --git a/R/produce_source_extract_tests.R b/R/produce_source_extract_tests.R index 10f842fc6..7f8feda92 100644 --- a/R/produce_source_extract_tests.R +++ b/R/produce_source_extract_tests.R @@ -13,6 +13,7 @@ #' (data is from [get_source_extract_path()]) #' @param sum_mean_vars variables used when selecting 'all' measures from [calculate_measures()] #' @param max_min_vars variables used when selecting 'min-max' from [calculate_measures()] +#' @param add_hscp_count Default set to TRUE. For use where `hscp variable` is not available, specify FALSE. #' #' @return a dataframe with a count of each flag #' from [calculate_measures()] @@ -28,13 +29,19 @@ produce_source_extract_tests <- function(data, max_min_vars = c( "record_keydate1", "record_keydate2", "cost_total_net", "yearstay" - )) { + ), + add_hscp_count = TRUE) { test_flags <- data %>% # use functions to create HB and partnership flags create_demog_test_flags() %>% create_hb_test_flags(.data$hbtreatcode) %>% - create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>% - create_hscp_test_flags(.data$hscp) %>% + create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) + + if (add_hscp_count) { + test_flags <- create_hscp_test_flags(test_flags, .data$hscp) + } + + test_flags <- test_flags %>% # keep variables for comparison dplyr::select("valid_chi":dplyr::last_col()) %>% # use function to sum new test flags diff --git a/man/produce_source_extract_tests.Rd b/man/produce_source_extract_tests.Rd index 679132127..97984103a 100644 --- a/man/produce_source_extract_tests.Rd +++ b/man/produce_source_extract_tests.Rd @@ -7,7 +7,8 @@ produce_source_extract_tests( data, sum_mean_vars = c("beddays", "cost", "yearstay"), - max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net", "yearstay") + max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net", "yearstay"), + add_hscp_count = TRUE ) } \arguments{ @@ -17,6 +18,8 @@ produce_source_extract_tests( \item{sum_mean_vars}{variables used when selecting 'all' measures from \code{\link[=calculate_measures]{calculate_measures()}}} \item{max_min_vars}{variables used when selecting 'min-max' from \code{\link[=calculate_measures]{calculate_measures()}}} + +\item{add_hscp_count}{Default set to TRUE. For use where \verb{hscp variable} is not available, specify FALSE.} } \value{ a dataframe with a count of each flag From 1e06921a5a0c484fb84f416a28b5a1b9c37cee66 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 16 Aug 2023 10:53:57 +0100 Subject: [PATCH 17/19] fix read_sc_all_alarms_telecare with incorrect format in period (#814) * fix read_sc_all_alarms_telecare with the incorrect format in period --------- Co-authored-by: lizihao-anu Co-authored-by: James McMahon --- .github/actions/spelling/expect.txt | 1 + R/read_sc_all_alarms_telecare.R | 16 +++++++--------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 51c0a6c6b..464adca0e 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -108,6 +108,7 @@ keyring keytime keytimex kis +lazydt lgl los ltc diff --git a/R/read_sc_all_alarms_telecare.R b/R/read_sc_all_alarms_telecare.R index ac3ac206d..2c7bd03db 100644 --- a/R/read_sc_all_alarms_telecare.R +++ b/R/read_sc_all_alarms_telecare.R @@ -22,21 +22,19 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection "service_start_date", "service_end_date" ) %>% - # fix bad period (2017, 2020 & 2021) + dplyr::collect() %>% + # fix bad period (2017, 2020, 2021, and so on) dplyr::mutate( - period = dplyr::case_match( - .data$period, - "2017" ~ "2017Q4", - "2020" ~ "2020Q4", - "2021" ~ "2021Q4", - .default = .data$period + period = dplyr::if_else( + grepl("\\d{4}$", .data$period), + paste0(.data$period, "Q4"), + .data$period ) ) %>% dplyr::mutate( dplyr::across(c("sending_location", "service_type"), ~ as.integer(.x)) ) %>% - dplyr::arrange(.data$sending_location, .data$social_care_id) %>% - dplyr::collect() + dplyr::arrange(.data$sending_location, .data$social_care_id) return(at_full_data) } From 2c6853c255b3584830660cd37d0d23f617349d17 Mon Sep 17 00:00:00 2001 From: James McMahon Date: Wed, 16 Aug 2023 11:14:25 +0100 Subject: [PATCH 18/19] Fix `convert_sending_location_to_lca` example --- R/convert_sending_location_to_lca.R | 2 +- man/convert_sending_location_to_lca.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/convert_sending_location_to_lca.R b/R/convert_sending_location_to_lca.R index d0d79dd39..ff7e51db1 100644 --- a/R/convert_sending_location_to_lca.R +++ b/R/convert_sending_location_to_lca.R @@ -9,7 +9,7 @@ #' @export #' #' @examples -#' sending_location <- c("100", "120") +#' sending_location <- c(100, 120) #' convert_sending_location_to_lca(sending_location) #' #' @family code functions diff --git a/man/convert_sending_location_to_lca.Rd b/man/convert_sending_location_to_lca.Rd index 8c7a29088..78bf475ba 100644 --- a/man/convert_sending_location_to_lca.Rd +++ b/man/convert_sending_location_to_lca.Rd @@ -17,7 +17,7 @@ Convert Social Care Sending Location Codes into the Local Council Authority Codes. } \examples{ -sending_location <- c("100", "120") +sending_location <- c(100, 120) convert_sending_location_to_lca(sending_location) } From ff4d35f48c8c2076a98d748912492573e88caad2 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 1 Sep 2023 12:07:43 +0100 Subject: [PATCH 19/19] Use `col_select` instead of `columns` in tests --- R/get_existing_data_for_tests.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/get_existing_data_for_tests.R b/R/get_existing_data_for_tests.R index 91fa2293e..1dd0b350e 100644 --- a/R/get_existing_data_for_tests.R +++ b/R/get_existing_data_for_tests.R @@ -46,7 +46,7 @@ get_existing_data_for_tests <- function(new_data, file_version = "episode") { slf_data <- suppressMessages(slfhelper::read_slf_episode( year = year, recids = recids, - columns = variable_names + col_select = variable_names )) if ("hscp2018" %in% variable_names) { slf_data <- dplyr::rename(slf_data, "hscp" = "hscp2018") @@ -54,7 +54,7 @@ get_existing_data_for_tests <- function(new_data, file_version = "episode") { } else { slf_data <- suppressMessages(slfhelper::read_slf_individual( year = year, - columns = variable_names + col_select = variable_names )) }