From 58585763e45ceb6c43d5e51383d924d491c5a5e5 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Thu, 11 Jan 2024 11:32:14 +0000 Subject: [PATCH 001/105] fix sc_client_lookup sc_send_lca --- R/create_episode_file.R | 6 ++++++ R/process_lookup_sc_client.R | 3 +++ 2 files changed, 9 insertions(+) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 3de9223dd..be6e6df44 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -31,6 +31,9 @@ create_episode_file <- function( sc_client = read_file(get_sc_client_lookup_path(year)), write_to_disk = TRUE, anon_chi_out = TRUE) { + sc_client = sc_client %>% + dplyr::select(-sc_send_lca) + processed_data_list <- purrr::discard(processed_data_list, ~ is.null(.x) | identical(.x, tibble::tibble())) episode_file <- dplyr::bind_rows(processed_data_list) %>% @@ -434,6 +437,9 @@ join_sc_client <- function(data, year, sc_client = read_file(get_sc_client_lookup_path(year)), file_type = c("episode", "individual")) { + sc_client = sc_client %>% + dplyr::select(-sc_send_lca) + if (file_type == "episode") { # Match on client variables by chi data_file <- data %>% diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index e64d4b6ba..58b72ebab 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -150,6 +150,9 @@ process_lookup_sc_client <- )) == "Not Known")) %>% dplyr::arrange(chi, count_not_known) %>% dplyr::distinct(chi, .keep_all = TRUE) %>% + dplyr::mutate( + sc_send_lca = convert_sc_sending_location_to_lca(sending_location) + ) %>% dplyr::select(-sending_location) if (write_to_disk) { From 41d6c58db74abf01c79a7461f361a4a10db556a5 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Thu, 11 Jan 2024 13:51:30 +0000 Subject: [PATCH 002/105] fix an issue of get_pop_path --- R/add_keep_population_flag.R | 2 +- R/get_lookup_paths.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R/add_keep_population_flag.R b/R/add_keep_population_flag.R index 6050b278f..d418ac18c 100644 --- a/R/add_keep_population_flag.R +++ b/R/add_keep_population_flag.R @@ -15,7 +15,7 @@ add_keep_population_flag <- function(individual_file, year) { } else { ## Obtain the population estimates for Locality AgeGroup and Gender. pop_estimates <- - readr::read_rds(get_datazone_pop_path("DataZone2011_pop_est_2011_2021.rds")) %>% + readr::read_rds(get_pop_path(type = "datazone")) %>% dplyr::select(year, datazone2011, sex, age0:age90plus) # Step 1: Obtain the population estimates for Locality, AgeGroup, and Gender diff --git a/R/get_lookup_paths.R b/R/get_lookup_paths.R index fe35a7d2f..7df5c52e2 100644 --- a/R/get_lookup_paths.R +++ b/R/get_lookup_paths.R @@ -126,7 +126,7 @@ get_pop_path <- function(file_name = NULL, "intzone" ~ stringr::str_glue("IntZone_pop_est_2011_\\d+?\\.{ext}") ) - datazone_pop_path <- get_file_path( + pop_path <- get_file_path( directory = pop_dir, file_name = file_name, ext = ext, From 859957460e14713ad5c76d1cde4c8519111853e9 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Thu, 11 Jan 2024 13:54:36 +0000 Subject: [PATCH 003/105] Style code --- R/create_episode_file.R | 4 ++-- R/create_individual_file.R | 3 ++- R/get_fy_quarter_dates.R | 8 ++++---- Run_SLF_Files_manually/run_episode_file_1718.R | 3 ++- Run_SLF_Files_manually/run_episode_file_1819.R | 3 ++- Run_SLF_Files_manually/run_episode_file_1920.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2021.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2122.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2223.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2324.R | 3 ++- 10 files changed, 22 insertions(+), 14 deletions(-) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index be6e6df44..5efdc50ae 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -31,7 +31,7 @@ create_episode_file <- function( sc_client = read_file(get_sc_client_lookup_path(year)), write_to_disk = TRUE, anon_chi_out = TRUE) { - sc_client = sc_client %>% + sc_client <- sc_client %>% dplyr::select(-sc_send_lca) processed_data_list <- purrr::discard(processed_data_list, ~ is.null(.x) | identical(.x, tibble::tibble())) @@ -437,7 +437,7 @@ join_sc_client <- function(data, year, sc_client = read_file(get_sc_client_lookup_path(year)), file_type = c("episode", "individual")) { - sc_client = sc_client %>% + sc_client <- sc_client %>% dplyr::select(-sc_send_lca) if (file_type == "episode") { diff --git a/R/create_individual_file.R b/R/create_individual_file.R index d9316b41b..4ca2f96d7 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -483,7 +483,8 @@ add_ch_columns <- function(episode_file, prefix, condition) { ch_ep_end = dplyr::if_else( eval(condition), .data$record_keydate2, - lubridate::NA_Date_ ), + lubridate::NA_Date_ + ), # If end date is missing use the first day of next FY quarter ch_ep_end = dplyr::if_else( eval(condition) & is.na(.data$ch_ep_end), diff --git a/R/get_fy_quarter_dates.R b/R/get_fy_quarter_dates.R index cd4c3492c..a772099b8 100644 --- a/R/get_fy_quarter_dates.R +++ b/R/get_fy_quarter_dates.R @@ -15,7 +15,7 @@ start_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) @@ -47,7 +47,7 @@ start_fy_quarter <- function(quarter) { end_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) @@ -80,7 +80,7 @@ end_fy_quarter <- function(quarter) { start_next_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) @@ -112,7 +112,7 @@ start_next_fy_quarter <- function(quarter) { end_next_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R index 9be2eb9c6..ab75b94d7 100644 --- a/Run_SLF_Files_manually/run_episode_file_1718.R +++ b/Run_SLF_Files_manually/run_episode_file_1718.R @@ -4,7 +4,8 @@ library(createslf) year <- "1718" processed_data_list <- targets::tar_read("processed_data_list_1718", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R index 7dec9e5c1..cd5a7435f 100644 --- a/Run_SLF_Files_manually/run_episode_file_1819.R +++ b/Run_SLF_Files_manually/run_episode_file_1819.R @@ -4,7 +4,8 @@ library(createslf) year <- "1819" processed_data_list <- targets::tar_read("processed_data_list_1819", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R index 066bd27b7..a9dc591b1 100644 --- a/Run_SLF_Files_manually/run_episode_file_1920.R +++ b/Run_SLF_Files_manually/run_episode_file_1920.R @@ -4,7 +4,8 @@ library(createslf) year <- "1920" processed_data_list <- targets::tar_read("processed_data_list_1920", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R index 8354f49ae..37708ee8b 100644 --- a/Run_SLF_Files_manually/run_episode_file_2021.R +++ b/Run_SLF_Files_manually/run_episode_file_2021.R @@ -4,7 +4,8 @@ library(createslf) year <- "2021" processed_data_list <- targets::tar_read("processed_data_list_2021", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R index 4057770d1..47400e2d1 100644 --- a/Run_SLF_Files_manually/run_episode_file_2122.R +++ b/Run_SLF_Files_manually/run_episode_file_2122.R @@ -4,7 +4,8 @@ library(createslf) year <- "2122" processed_data_list <- targets::tar_read("processed_data_list_2122", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R index 5df7b5db6..e64a57f32 100644 --- a/Run_SLF_Files_manually/run_episode_file_2223.R +++ b/Run_SLF_Files_manually/run_episode_file_2223.R @@ -4,7 +4,8 @@ library(createslf) year <- "2223" processed_data_list <- targets::tar_read("processed_data_list_2223", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R index af9a3efe5..4a7f0ad29 100644 --- a/Run_SLF_Files_manually/run_episode_file_2324.R +++ b/Run_SLF_Files_manually/run_episode_file_2324.R @@ -4,7 +4,8 @@ library(createslf) year <- "2324" processed_data_list <- targets::tar_read("processed_data_list_2324", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% From 8c76e2cbf7e801dd5871effe50f9f8cbe236b112 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Thu, 11 Jan 2024 15:13:46 +0000 Subject: [PATCH 004/105] fix the rest of get_pop_path from get_datazone_pop_path --- Rmarkdown/costs_district_nursing.Rmd | 2 +- man/read_file.Rd | 2 +- tests/testthat/test-get_lookup_paths.R | 8 ++++---- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Rmarkdown/costs_district_nursing.Rmd b/Rmarkdown/costs_district_nursing.Rmd index e3c9bba13..59b8353f8 100644 --- a/Rmarkdown/costs_district_nursing.Rmd +++ b/Rmarkdown/costs_district_nursing.Rmd @@ -75,7 +75,7 @@ dn_raw_costs_contacts <- left_join(dn_raw_contacts, # Of the two HSCPs, Argyll and Bute provides the # District Nursing data which is 27% of the population. -population_lookup <- read_file(get_datazone_pop_path("HSCP2019_pop_est_1981_2021.rds")) %>% +population_lookup <- read_file(get_pop_path(type = "hscp")) %>% # Select only the HSCPs for NHS Highland & years since 2015 filter( hscp2019 %in% c("S37000004", "S37000016"), diff --git a/man/read_file.Rd b/man/read_file.Rd index 1ef351342..b8231218f 100644 --- a/man/read_file.Rd +++ b/man/read_file.Rd @@ -14,7 +14,7 @@ read_file(path, col_select = NULL, as_data_frame = TRUE, ...) \link[tidyselect:eval_select]{tidy selection specification} of columns, as used in \code{dplyr::select()}.} -\item{as_data_frame}{Should the function return a \code{tibble} (default) or +\item{as_data_frame}{Should the function return a \code{data.frame} (default) or an Arrow \link[arrow]{Table}?} \item{...}{Addition arguments passed to the relevant function.} diff --git a/tests/testthat/test-get_lookup_paths.R b/tests/testthat/test-get_lookup_paths.R index c56752b03..d5f416ed8 100644 --- a/tests/testthat/test-get_lookup_paths.R +++ b/tests/testthat/test-get_lookup_paths.R @@ -48,13 +48,13 @@ test_that("SIMD file path returns as expected", { test_that("population estimates file path returns as expected", { suppressMessages({ - expect_s3_class(get_datazone_pop_path(), "fs_path") + expect_s3_class(get_pop_path(type = "datazone"), "fs_path") - expect_equal(fs::path_ext(get_datazone_pop_path()), "rds") + expect_equal(fs::path_ext(get_pop_path(type = "datazone")), "rds") - expect_match(get_datazone_pop_path(), "DataZone2011_pop_est_2001_\\d+?") + expect_match(get_pop_path(type = "datazone"), "DataZone2011_pop_est_2011_\\d+?") - expect_true(fs::file_exists(get_datazone_pop_path())) + expect_true(fs::file_exists(get_pop_path(type = "datazone"))) }) }) From 0b7edbe890768424ba02c333fa3d2ab3f3b6e445 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Fri, 12 Jan 2024 09:55:35 +0000 Subject: [PATCH 005/105] Update documentation --- man/read_file.Rd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/man/read_file.Rd b/man/read_file.Rd index b8231218f..1ef351342 100644 --- a/man/read_file.Rd +++ b/man/read_file.Rd @@ -14,7 +14,7 @@ read_file(path, col_select = NULL, as_data_frame = TRUE, ...) \link[tidyselect:eval_select]{tidy selection specification} of columns, as used in \code{dplyr::select()}.} -\item{as_data_frame}{Should the function return a \code{data.frame} (default) or +\item{as_data_frame}{Should the function return a \code{tibble} (default) or an Arrow \link[arrow]{Table}?} \item{...}{Addition arguments passed to the relevant function.} From d4b0660878b1c42bc1c152a4c8e725698a33a1b1 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Fri, 12 Jan 2024 10:54:44 +0000 Subject: [PATCH 006/105] fix sc_send_lca --- R/create_episode_file.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 5efdc50ae..021737d89 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -437,9 +437,6 @@ join_sc_client <- function(data, year, sc_client = read_file(get_sc_client_lookup_path(year)), file_type = c("episode", "individual")) { - sc_client <- sc_client %>% - dplyr::select(-sc_send_lca) - if (file_type == "episode") { # Match on client variables by chi data_file <- data %>% From 77e6c0986427d6136fa2b3cb30fcecc4a23c4b97 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Fri, 12 Jan 2024 15:00:58 +0000 Subject: [PATCH 007/105] add missing year column --- R/aggregate_by_chi.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R index 8d9dff96d..2136f53a3 100644 --- a/R/aggregate_by_chi.R +++ b/R/aggregate_by_chi.R @@ -187,6 +187,7 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) { individual_file_cols5[, chi := NULL], individual_file_cols6[, chi := NULL] ) + individual_file <- individual_file[, year := year] # convert back to tibble return(dplyr::as_tibble(individual_file)) From 7d05ce9c8f5f678abd147c8325cb9c64b221b03c Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 16 Jan 2024 15:39:38 +0000 Subject: [PATCH 008/105] Remove redundant code --- R/process_tests_district_nursing.R | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/R/process_tests_district_nursing.R b/R/process_tests_district_nursing.R index d3d55a15a..df3c876f0 100644 --- a/R/process_tests_district_nursing.R +++ b/R/process_tests_district_nursing.R @@ -13,14 +13,7 @@ process_tests_district_nursing <- function(data, year) { return(data) } - old_data <- get_existing_data_for_tests(data) %>% - # TODO: remove this bit after SPSS stopped - # replace NA by 0 in monthly costs - dplyr::mutate(dplyr::across( - dplyr::ends_with("_cost"), - ~ tidyr::replace_na(.x, 0.0) - )) - + old_data <- get_existing_data_for_tests(data) data <- rename_hscp(data) comparison <- produce_test_comparison( From d1718f0a2a630b854cd9fd2add7912cd984e5514 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Tue, 16 Jan 2024 15:41:28 +0000 Subject: [PATCH 009/105] Update documentation --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5123289dd..4bb0c6f18 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,4 +73,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.0 From 6aec7b1ce6ca0be3d1902240fa6ae371ef82bb3b Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Tue, 16 Jan 2024 15:44:11 +0000 Subject: [PATCH 010/105] Style code --- R/create_individual_file.R | 3 ++- R/get_fy_quarter_dates.R | 8 ++++---- Run_SLF_Files_manually/run_episode_file_1718.R | 3 ++- Run_SLF_Files_manually/run_episode_file_1819.R | 3 ++- Run_SLF_Files_manually/run_episode_file_1920.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2021.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2122.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2223.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2324.R | 3 ++- 9 files changed, 20 insertions(+), 12 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index d9316b41b..4ca2f96d7 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -483,7 +483,8 @@ add_ch_columns <- function(episode_file, prefix, condition) { ch_ep_end = dplyr::if_else( eval(condition), .data$record_keydate2, - lubridate::NA_Date_ ), + lubridate::NA_Date_ + ), # If end date is missing use the first day of next FY quarter ch_ep_end = dplyr::if_else( eval(condition) & is.na(.data$ch_ep_end), diff --git a/R/get_fy_quarter_dates.R b/R/get_fy_quarter_dates.R index cd4c3492c..a772099b8 100644 --- a/R/get_fy_quarter_dates.R +++ b/R/get_fy_quarter_dates.R @@ -15,7 +15,7 @@ start_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) @@ -47,7 +47,7 @@ start_fy_quarter <- function(quarter) { end_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) @@ -80,7 +80,7 @@ end_fy_quarter <- function(quarter) { start_next_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) @@ -112,7 +112,7 @@ start_next_fy_quarter <- function(quarter) { end_next_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R index 9be2eb9c6..ab75b94d7 100644 --- a/Run_SLF_Files_manually/run_episode_file_1718.R +++ b/Run_SLF_Files_manually/run_episode_file_1718.R @@ -4,7 +4,8 @@ library(createslf) year <- "1718" processed_data_list <- targets::tar_read("processed_data_list_1718", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R index 7dec9e5c1..cd5a7435f 100644 --- a/Run_SLF_Files_manually/run_episode_file_1819.R +++ b/Run_SLF_Files_manually/run_episode_file_1819.R @@ -4,7 +4,8 @@ library(createslf) year <- "1819" processed_data_list <- targets::tar_read("processed_data_list_1819", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R index 066bd27b7..a9dc591b1 100644 --- a/Run_SLF_Files_manually/run_episode_file_1920.R +++ b/Run_SLF_Files_manually/run_episode_file_1920.R @@ -4,7 +4,8 @@ library(createslf) year <- "1920" processed_data_list <- targets::tar_read("processed_data_list_1920", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R index 8354f49ae..37708ee8b 100644 --- a/Run_SLF_Files_manually/run_episode_file_2021.R +++ b/Run_SLF_Files_manually/run_episode_file_2021.R @@ -4,7 +4,8 @@ library(createslf) year <- "2021" processed_data_list <- targets::tar_read("processed_data_list_2021", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R index 4057770d1..47400e2d1 100644 --- a/Run_SLF_Files_manually/run_episode_file_2122.R +++ b/Run_SLF_Files_manually/run_episode_file_2122.R @@ -4,7 +4,8 @@ library(createslf) year <- "2122" processed_data_list <- targets::tar_read("processed_data_list_2122", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R index 5df7b5db6..e64a57f32 100644 --- a/Run_SLF_Files_manually/run_episode_file_2223.R +++ b/Run_SLF_Files_manually/run_episode_file_2223.R @@ -4,7 +4,8 @@ library(createslf) year <- "2223" processed_data_list <- targets::tar_read("processed_data_list_2223", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R index af9a3efe5..4a7f0ad29 100644 --- a/Run_SLF_Files_manually/run_episode_file_2324.R +++ b/Run_SLF_Files_manually/run_episode_file_2324.R @@ -4,7 +4,8 @@ library(createslf) year <- "2324" processed_data_list <- targets::tar_read("processed_data_list_2324", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% From fe5ceb1571a5fb6d662f8ca80c356d132522b0e7 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Thu, 18 Jan 2024 09:51:59 +0000 Subject: [PATCH 011/105] explicitly specify the argument year to avoid corruption of targets --- R/aggregate_by_chi.R | 2 +- R/create_individual_file.R | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R index 2136f53a3..f0d000110 100644 --- a/R/aggregate_by_chi.R +++ b/R/aggregate_by_chi.R @@ -7,7 +7,7 @@ #' @importFrom data.table .SD #' #' @inheritParams create_individual_file -aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) { +aggregate_by_chi <- function(episode_file, year, exclude_sc_var = FALSE) { cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") # Convert to data.table diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 4ca2f96d7..7266026a3 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -73,7 +73,7 @@ create_individual_file <- function( ))) %>% remove_blank_chi() %>% add_cij_columns() %>% - add_all_columns() + add_all_columns(year = year) if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { individual_file <- individual_file %>% @@ -82,7 +82,7 @@ create_individual_file <- function( individual_file <- individual_file %>% aggregate_ch_episodes() %>% clean_up_ch(year) %>% - aggregate_by_chi(exclude_sc_var = FALSE) + aggregate_by_chi(year = year, exclude_sc_var = FALSE) } individual_file <- individual_file %>% @@ -202,7 +202,7 @@ add_cij_columns <- function(episode_file) { #' of prefixed column names created based on some condition. #' @family individual_file #' @inheritParams create_individual_file -add_all_columns <- function(episode_file) { +add_all_columns <- function(episode_file, year) { cli::cli_alert_info("Add all columns function started at {Sys.time()}") episode_file <- episode_file %>% From ca6f25f8257fbad0f43067a77e589acf964b45f9 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Thu, 18 Jan 2024 09:54:17 +0000 Subject: [PATCH 012/105] Update documentation --- DESCRIPTION | 2 +- man/add_all_columns.Rd | 4 +++- man/aggregate_by_chi.Rd | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5123289dd..4bb0c6f18 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,4 +73,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.0 diff --git a/man/add_all_columns.Rd b/man/add_all_columns.Rd index 345a59e01..deb1594b3 100644 --- a/man/add_all_columns.Rd +++ b/man/add_all_columns.Rd @@ -4,10 +4,12 @@ \alias{add_all_columns} \title{Add all columns} \usage{ -add_all_columns(episode_file) +add_all_columns(episode_file, year) } \arguments{ \item{episode_file}{Tibble containing episodic data.} + +\item{year}{The year to process, in FY format.} } \description{ Add new columns based on SMRType and recid which follow a pattern diff --git a/man/aggregate_by_chi.Rd b/man/aggregate_by_chi.Rd index 84c9c0ad3..16bf7d792 100644 --- a/man/aggregate_by_chi.Rd +++ b/man/aggregate_by_chi.Rd @@ -4,10 +4,12 @@ \alias{aggregate_by_chi} \title{Aggregate by CHI} \usage{ -aggregate_by_chi(episode_file, exclude_sc_var = FALSE) +aggregate_by_chi(episode_file, year, exclude_sc_var = FALSE) } \arguments{ \item{episode_file}{Tibble containing episodic data.} + +\item{year}{The year to process, in FY format.} } \description{ Aggregate episode file by CHI to convert into From 65e8caa56008ec4eccac7f828e329064de9219e0 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 22 Jan 2024 10:14:00 +0000 Subject: [PATCH 013/105] Reorder when we match on client variables This was causing NSUs to show a social care id. This now resolves this. --- R/create_episode_file.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 3de9223dd..493d71bd3 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -103,6 +103,8 @@ create_episode_file <- function( "mar_beddays" ) ) %>% + # match on sc client variables + join_sc_client(year, sc_client = sc_client, file_type = "episode") %>% # Check chi is valid using phsmethods function # If the CHI is invalid for whatever reason, set the CHI to NA dplyr::mutate( @@ -135,7 +137,6 @@ create_episode_file <- function( year, slf_deaths_lookup ) %>% - join_sc_client(year, sc_client = sc_client, file_type = "episode") %>% load_ep_file_vars(year) if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { From 35bcddcbfc18a3d034dc7ae1ba1cd2ecdfdec437 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Mon, 22 Jan 2024 10:16:22 +0000 Subject: [PATCH 014/105] Update documentation --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5123289dd..4bb0c6f18 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,4 +73,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.0 From 800083a72d212e82d08898effc8f602a290922e2 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Mon, 22 Jan 2024 10:23:44 +0000 Subject: [PATCH 015/105] Style code --- R/create_individual_file.R | 3 ++- R/get_fy_quarter_dates.R | 8 ++++---- Run_SLF_Files_manually/run_episode_file_1718.R | 3 ++- Run_SLF_Files_manually/run_episode_file_1819.R | 3 ++- Run_SLF_Files_manually/run_episode_file_1920.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2021.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2122.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2223.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2324.R | 3 ++- 9 files changed, 20 insertions(+), 12 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index d9316b41b..4ca2f96d7 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -483,7 +483,8 @@ add_ch_columns <- function(episode_file, prefix, condition) { ch_ep_end = dplyr::if_else( eval(condition), .data$record_keydate2, - lubridate::NA_Date_ ), + lubridate::NA_Date_ + ), # If end date is missing use the first day of next FY quarter ch_ep_end = dplyr::if_else( eval(condition) & is.na(.data$ch_ep_end), diff --git a/R/get_fy_quarter_dates.R b/R/get_fy_quarter_dates.R index cd4c3492c..a772099b8 100644 --- a/R/get_fy_quarter_dates.R +++ b/R/get_fy_quarter_dates.R @@ -15,7 +15,7 @@ start_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) @@ -47,7 +47,7 @@ start_fy_quarter <- function(quarter) { end_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) @@ -80,7 +80,7 @@ end_fy_quarter <- function(quarter) { start_next_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) @@ -112,7 +112,7 @@ start_next_fy_quarter <- function(quarter) { end_next_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R index 9be2eb9c6..ab75b94d7 100644 --- a/Run_SLF_Files_manually/run_episode_file_1718.R +++ b/Run_SLF_Files_manually/run_episode_file_1718.R @@ -4,7 +4,8 @@ library(createslf) year <- "1718" processed_data_list <- targets::tar_read("processed_data_list_1718", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R index 7dec9e5c1..cd5a7435f 100644 --- a/Run_SLF_Files_manually/run_episode_file_1819.R +++ b/Run_SLF_Files_manually/run_episode_file_1819.R @@ -4,7 +4,8 @@ library(createslf) year <- "1819" processed_data_list <- targets::tar_read("processed_data_list_1819", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R index 066bd27b7..a9dc591b1 100644 --- a/Run_SLF_Files_manually/run_episode_file_1920.R +++ b/Run_SLF_Files_manually/run_episode_file_1920.R @@ -4,7 +4,8 @@ library(createslf) year <- "1920" processed_data_list <- targets::tar_read("processed_data_list_1920", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R index 8354f49ae..37708ee8b 100644 --- a/Run_SLF_Files_manually/run_episode_file_2021.R +++ b/Run_SLF_Files_manually/run_episode_file_2021.R @@ -4,7 +4,8 @@ library(createslf) year <- "2021" processed_data_list <- targets::tar_read("processed_data_list_2021", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R index 4057770d1..47400e2d1 100644 --- a/Run_SLF_Files_manually/run_episode_file_2122.R +++ b/Run_SLF_Files_manually/run_episode_file_2122.R @@ -4,7 +4,8 @@ library(createslf) year <- "2122" processed_data_list <- targets::tar_read("processed_data_list_2122", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R index 5df7b5db6..e64a57f32 100644 --- a/Run_SLF_Files_manually/run_episode_file_2223.R +++ b/Run_SLF_Files_manually/run_episode_file_2223.R @@ -4,7 +4,8 @@ library(createslf) year <- "2223" processed_data_list <- targets::tar_read("processed_data_list_2223", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R index af9a3efe5..4a7f0ad29 100644 --- a/Run_SLF_Files_manually/run_episode_file_2324.R +++ b/Run_SLF_Files_manually/run_episode_file_2324.R @@ -4,7 +4,8 @@ library(createslf) year <- "2324" processed_data_list <- targets::tar_read("processed_data_list_2324", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% From e4d91284ecfbc82700c4b36d5d668b6d82ebb15f Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 22 Jan 2024 12:59:08 +0000 Subject: [PATCH 016/105] Add chi parameter to `create_demog_test_flags` --- DESCRIPTION | 2 +- R/create_demog_test_flags.R | 10 +++++----- man/create_demog_test_flags.Rd | 4 +++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5123289dd..4bb0c6f18 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,4 +73,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.0 diff --git a/R/create_demog_test_flags.R b/R/create_demog_test_flags.R index 3023292ce..b909679d9 100644 --- a/R/create_demog_test_flags.R +++ b/R/create_demog_test_flags.R @@ -3,19 +3,19 @@ #' @description Create the demographic flags for testing #' #' @param data a dataframe containing demographic variables e.g. chi +#' @param chi Specify chi or anon_chi. #' #' @return a dataframe with flag (1 or 0) for each demographic variable. #' Missing value flag from [is_missing()] #' #' @family flag functions -create_demog_test_flags <- function(data) { +create_demog_test_flags <- function(data, chi = c(chi, anon_chi)) { data %>% - dplyr::arrange(.data$chi) %>% + dplyr::arrange({{ chi }}) %>% # create test flags dplyr::mutate( - valid_chi = phsmethods::chi_check(.data$chi) == "Valid CHI", - unique_chi = dplyr::lag(.data$chi) != .data$chi, - n_missing_chi = is_missing(.data$chi), + unique_chi = dplyr::lag({{ chi }}) != {{ chi }}, + n_missing_chi = is_missing({{ chi }}), n_males = .data$gender == 1L, n_females = .data$gender == 2L, n_postcode = !is.na(.data$postcode) | !.data$postcode == "", diff --git a/man/create_demog_test_flags.Rd b/man/create_demog_test_flags.Rd index 589877738..fbc0fadcc 100644 --- a/man/create_demog_test_flags.Rd +++ b/man/create_demog_test_flags.Rd @@ -4,10 +4,12 @@ \alias{create_demog_test_flags} \title{Create demographic test flags} \usage{ -create_demog_test_flags(data) +create_demog_test_flags(data, chi = c(chi, anon_chi)) } \arguments{ \item{data}{a dataframe containing demographic variables e.g. chi} + +\item{chi}{Specify chi or anon_chi.} } \value{ a dataframe with flag (1 or 0) for each demographic variable. From daa9ee7a87ba8e5daa4c42fca7c9256a32f84246 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Mon, 22 Jan 2024 13:02:10 +0000 Subject: [PATCH 017/105] Style code --- R/create_individual_file.R | 3 ++- R/get_fy_quarter_dates.R | 8 ++++---- Run_SLF_Files_manually/run_episode_file_1718.R | 3 ++- Run_SLF_Files_manually/run_episode_file_1819.R | 3 ++- Run_SLF_Files_manually/run_episode_file_1920.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2021.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2122.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2223.R | 3 ++- Run_SLF_Files_manually/run_episode_file_2324.R | 3 ++- 9 files changed, 20 insertions(+), 12 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index d9316b41b..4ca2f96d7 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -483,7 +483,8 @@ add_ch_columns <- function(episode_file, prefix, condition) { ch_ep_end = dplyr::if_else( eval(condition), .data$record_keydate2, - lubridate::NA_Date_ ), + lubridate::NA_Date_ + ), # If end date is missing use the first day of next FY quarter ch_ep_end = dplyr::if_else( eval(condition) & is.na(.data$ch_ep_end), diff --git a/R/get_fy_quarter_dates.R b/R/get_fy_quarter_dates.R index cd4c3492c..a772099b8 100644 --- a/R/get_fy_quarter_dates.R +++ b/R/get_fy_quarter_dates.R @@ -15,7 +15,7 @@ start_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) @@ -47,7 +47,7 @@ start_fy_quarter <- function(quarter) { end_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) @@ -80,7 +80,7 @@ end_fy_quarter <- function(quarter) { start_next_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) @@ -112,7 +112,7 @@ start_next_fy_quarter <- function(quarter) { end_next_fy_quarter <- function(quarter) { quarter_unique <- unique(quarter) - #check_quarter_format(quarter) + # check_quarter_format(quarter) cal_quarter_date_unique <- lubridate::yq(quarter_unique) diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R index 9be2eb9c6..ab75b94d7 100644 --- a/Run_SLF_Files_manually/run_episode_file_1718.R +++ b/Run_SLF_Files_manually/run_episode_file_1718.R @@ -4,7 +4,8 @@ library(createslf) year <- "1718" processed_data_list <- targets::tar_read("processed_data_list_1718", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R index 7dec9e5c1..cd5a7435f 100644 --- a/Run_SLF_Files_manually/run_episode_file_1819.R +++ b/Run_SLF_Files_manually/run_episode_file_1819.R @@ -4,7 +4,8 @@ library(createslf) year <- "1819" processed_data_list <- targets::tar_read("processed_data_list_1819", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R index 066bd27b7..a9dc591b1 100644 --- a/Run_SLF_Files_manually/run_episode_file_1920.R +++ b/Run_SLF_Files_manually/run_episode_file_1920.R @@ -4,7 +4,8 @@ library(createslf) year <- "1920" processed_data_list <- targets::tar_read("processed_data_list_1920", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R index 8354f49ae..37708ee8b 100644 --- a/Run_SLF_Files_manually/run_episode_file_2021.R +++ b/Run_SLF_Files_manually/run_episode_file_2021.R @@ -4,7 +4,8 @@ library(createslf) year <- "2021" processed_data_list <- targets::tar_read("processed_data_list_2021", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R index 4057770d1..47400e2d1 100644 --- a/Run_SLF_Files_manually/run_episode_file_2122.R +++ b/Run_SLF_Files_manually/run_episode_file_2122.R @@ -4,7 +4,8 @@ library(createslf) year <- "2122" processed_data_list <- targets::tar_read("processed_data_list_2122", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R index 5df7b5db6..e64a57f32 100644 --- a/Run_SLF_Files_manually/run_episode_file_2223.R +++ b/Run_SLF_Files_manually/run_episode_file_2223.R @@ -4,7 +4,8 @@ library(createslf) year <- "2223" processed_data_list <- targets::tar_read("processed_data_list_2223", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R index af9a3efe5..4a7f0ad29 100644 --- a/Run_SLF_Files_manually/run_episode_file_2324.R +++ b/Run_SLF_Files_manually/run_episode_file_2324.R @@ -4,7 +4,8 @@ library(createslf) year <- "2324" processed_data_list <- targets::tar_read("processed_data_list_2324", - store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets")) + store = fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") +) # Run episode file create_episode_file(processed_data_list, year = year) %>% From 702225fd6d31ca64d30067bc1b42c2c0dab4313f Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 22 Jan 2024 14:40:00 +0000 Subject: [PATCH 018/105] Use CHI parameter for ep/indiv tests --- R/process_tests_episode_file.R | 12 ++---------- R/process_tests_individual_file.R | 12 ++---------- 2 files changed, 4 insertions(+), 20 deletions(-) diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R index eaa946e3e..6b66cd655 100644 --- a/R/process_tests_episode_file.R +++ b/R/process_tests_episode_file.R @@ -73,15 +73,7 @@ produce_episode_file_tests <- function( test_flags <- data %>% dplyr::group_by(.data$recid) %>% # use functions to create HB and partnership flags - dplyr::mutate( - unique_anon_chi = dplyr::lag(.data$anon_chi) != .data$anon_chi, - n_missing_anon_chi = is_missing(.data$anon_chi), - n_males = .data$gender == 1L, - n_females = .data$gender == 2L, - n_postcode = !is.na(.data$postcode) | !.data$postcode == "", - n_missing_postcode = is_missing(.data$postcode), - missing_dob = is.na(.data$dob) - ) %>% + create_demog_test_flags(chi = anon_chi) %>% create_hb_test_flags(.data$hbtreatcode) %>% create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>% create_hscp_test_flags(.data$hscp2018) %>% @@ -111,7 +103,7 @@ produce_episode_file_tests <- function( test_flags <- test_flags %>% # keep variables for comparison - dplyr::select("unique_anon_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum", group_by = "recid") diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R index bbd13948c..35ad7443a 100644 --- a/R/process_tests_individual_file.R +++ b/R/process_tests_individual_file.R @@ -60,19 +60,11 @@ produce_individual_file_tests <- function(data) { test_flags <- data %>% # use functions to create HB and partnership flags - dplyr::mutate( - unique_anon_chi = dplyr::lag(.data$anon_chi) != .data$anon_chi, - n_missing_anon_chi = is_missing(.data$anon_chi), - n_males = .data$gender == 1L, - n_females = .data$gender == 2L, - n_postcode = !is.na(.data$postcode) | !.data$postcode == "", - n_missing_postcode = is_missing(.data$postcode), - missing_dob = is.na(.data$dob) - ) %>% + create_demog_test_flags(chi = anon_chi) %>% create_hb_test_flags(.data$hbrescode) %>% create_hb_cost_test_flags(.data$hbrescode, .data$health_net_cost) %>% # keep variables for comparison - dplyr::select(c("unique_anon_chi":dplyr::last_col())) %>% + dplyr::select(c("unique_chi":dplyr::last_col())) %>% # use function to sum new test flags calculate_measures(measure = "sum") From d0fb3cdcb8b4120244d748544c70f910fe35ec31 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 22 Jan 2024 14:48:35 +0000 Subject: [PATCH 019/105] Use CHI parameter for extract tests (chi) --- R/process_tests_alarms_telecare.R | 4 ++-- R/process_tests_care_home.R | 4 ++-- R/process_tests_cmh.R | 4 ++-- R/process_tests_district_nursing.R | 4 ++-- R/process_tests_home_care.R | 4 ++-- R/process_tests_homelessness.R | 4 ++-- R/process_tests_nrs_deaths.R | 4 ++-- R/process_tests_prescribing.R | 4 ++-- R/process_tests_sc_demographics.R | 2 +- R/process_tests_sds.R | 4 ++-- R/produce_sc_all_episodes_tests.R | 4 ++-- R/produce_source_extract_tests.R | 4 ++-- 12 files changed, 23 insertions(+), 23 deletions(-) diff --git a/R/process_tests_alarms_telecare.R b/R/process_tests_alarms_telecare.R index d7f9fa699..3c70c8cab 100644 --- a/R/process_tests_alarms_telecare.R +++ b/R/process_tests_alarms_telecare.R @@ -37,14 +37,14 @@ produce_source_at_tests <- function(data, max_min_vars = c("record_keydate1", "record_keydate2")) { test_flags <- data %>% # create test flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% dplyr::mutate( n_at_alarms = .data$smrtype == "AT-Alarm", n_at_telecare = .data$smrtype == "AT-Tele" ) %>% create_lca_test_flags(.data$sc_send_lca) %>% # remove variables that won't be summed - dplyr::select(.data$valid_chi:.data$West_Lothian) %>% + dplyr::select(.data$unique_chi:.data$West_Lothian) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_care_home.R b/R/process_tests_care_home.R index 2032c2473..a46071c22 100644 --- a/R/process_tests_care_home.R +++ b/R/process_tests_care_home.R @@ -47,7 +47,7 @@ produce_source_ch_tests <- function(data, )) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% dplyr::mutate( n_episodes = 1L, ch_name_missing = is.na(.data$ch_name), @@ -60,7 +60,7 @@ produce_source_ch_tests <- function(data, ) %>% create_lca_test_flags(.data$sc_send_lca) %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_cmh.R b/R/process_tests_cmh.R index 09a17bdbb..4a34c08f9 100644 --- a/R/process_tests_cmh.R +++ b/R/process_tests_cmh.R @@ -43,11 +43,11 @@ process_tests_cmh <- function(data, year) { produce_source_cmh_tests <- function(data) { test_flags <- data %>% # create test flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% create_hb_test_flags(hb_var = .data$hbrescode) %>% dplyr::mutate(n_episodes = 1L) %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_district_nursing.R b/R/process_tests_district_nursing.R index d3d55a15a..8f428b954 100644 --- a/R/process_tests_district_nursing.R +++ b/R/process_tests_district_nursing.R @@ -65,11 +65,11 @@ produce_source_dn_tests <- function(data, )) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% create_hb_test_flags(.data$hbtreatcode) %>% create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>% # keep variables for comparison - dplyr::select(.data$valid_chi:.data$NHS_Lanarkshire_cost) %>% + dplyr::select(.data$unique_chi:.data$NHS_Lanarkshire_cost) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_home_care.R b/R/process_tests_home_care.R index c1af63e97..a8ee76672 100644 --- a/R/process_tests_home_care.R +++ b/R/process_tests_home_care.R @@ -49,7 +49,7 @@ produce_source_hc_tests <- function(data, )) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% dplyr::mutate( n_episodes = 1L, hc_per = dplyr::if_else(.data$smrtype == "HC-Per", 1L, 0L), @@ -61,7 +61,7 @@ produce_source_hc_tests <- function(data, ) %>% create_lca_test_flags(.data$sc_send_lca) %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_homelessness.R b/R/process_tests_homelessness.R index 4d49f1aa4..0b2c33880 100644 --- a/R/process_tests_homelessness.R +++ b/R/process_tests_homelessness.R @@ -38,10 +38,10 @@ produce_slf_homelessness_tests <- function(data, test_flags <- data %>% dplyr::arrange(.data$chi) %>% # create test flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% create_lca_test_flags(.data$hl1_sending_lca) %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_nrs_deaths.R b/R/process_tests_nrs_deaths.R index c1a963dcf..3796476c5 100644 --- a/R/process_tests_nrs_deaths.R +++ b/R/process_tests_nrs_deaths.R @@ -38,10 +38,10 @@ process_tests_nrs_deaths <- function(data, year) { produce_source_nrs_tests <- function(data) { test_flags <- data %>% # create test flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% dplyr::mutate(n_deaths = 1L) %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_prescribing.R b/R/process_tests_prescribing.R index bac0e3c52..8a7b0e73f 100644 --- a/R/process_tests_prescribing.R +++ b/R/process_tests_prescribing.R @@ -41,10 +41,10 @@ process_tests_prescribing <- function(data, year) { produce_source_pis_tests <- function(data) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% dplyr::mutate(n_episodes = 1L) %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_sc_demographics.R b/R/process_tests_sc_demographics.R index dfb110aa9..b503969ef 100644 --- a/R/process_tests_sc_demographics.R +++ b/R/process_tests_sc_demographics.R @@ -36,7 +36,7 @@ process_tests_sc_demographics <- function(data) { produce_sc_demog_lookup_tests <- function(data) { data %>% # create test flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% dplyr::mutate( n_missing_sending_loc = is.na(.data$sending_location), n_missing_sc_id = is.na(.data$social_care_id) diff --git a/R/process_tests_sds.R b/R/process_tests_sds.R index f624f504b..ce6de656b 100644 --- a/R/process_tests_sds.R +++ b/R/process_tests_sds.R @@ -35,10 +35,10 @@ produce_source_sds_tests <- function(data, max_min_vars = c("record_keydate1", "record_keydate2")) { test_flags <- data %>% # create test flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% create_lca_test_flags(.data$sc_send_lca) %>% # remove variables that won't be summed - dplyr::select("valid_chi":"West_Lothian") %>% + dplyr::select("unique_chi":"West_Lothian") %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/produce_sc_all_episodes_tests.R b/R/produce_sc_all_episodes_tests.R index efe980cd4..4c5f736bb 100644 --- a/R/produce_sc_all_episodes_tests.R +++ b/R/produce_sc_all_episodes_tests.R @@ -10,7 +10,7 @@ produce_sc_all_episodes_tests <- function(data) { data %>% # create test flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% dplyr::mutate( n_missing_sending_loc = dplyr::if_else( is.na(.data$sending_location), @@ -24,7 +24,7 @@ produce_sc_all_episodes_tests <- function(data) { ) ) %>% # keep variables for comparison - dplyr::select(c("valid_chi":dplyr::last_col())) %>% + dplyr::select(c("unique_chi":dplyr::last_col())) %>% # use function to sum new test flags calculate_measures(measure = "sum") } diff --git a/R/produce_source_extract_tests.R b/R/produce_source_extract_tests.R index d9a07c893..13b33d549 100644 --- a/R/produce_source_extract_tests.R +++ b/R/produce_source_extract_tests.R @@ -33,7 +33,7 @@ produce_source_extract_tests <- function(data, add_hscp_count = TRUE) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags() %>% + create_demog_test_flags(chi = chi) %>% create_hb_test_flags(.data$hbtreatcode) %>% create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) @@ -43,7 +43,7 @@ produce_source_extract_tests <- function(data, test_flags <- test_flags %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select("unique_chi":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") From bbf28dd6ad0f535800e17bc16d5abd8ea08f4811 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 23 Jan 2024 11:09:42 +0000 Subject: [PATCH 020/105] Change test sheet names to lowercase --- R/process_tests_acute.R | 2 +- R/process_tests_ae.R | 2 +- R/process_tests_alarms_telecare.R | 2 +- R/process_tests_care_home.R | 2 +- R/process_tests_cmh.R | 2 +- R/process_tests_delayed_discharges.R | 2 +- R/process_tests_gp_ooh.R | 2 +- R/process_tests_home_care.R | 2 +- R/process_tests_homelessness.R | 2 +- R/process_tests_maternity.R | 2 +- R/process_tests_mental_health.R | 2 +- R/process_tests_nrs_deaths.R | 2 +- R/process_tests_outpatients.R | 2 +- R/process_tests_prescribing.R | 2 +- 14 files changed, 14 insertions(+), 14 deletions(-) diff --git a/R/process_tests_acute.R b/R/process_tests_acute.R index 759d866b7..8a974e442 100644 --- a/R/process_tests_acute.R +++ b/R/process_tests_acute.R @@ -18,7 +18,7 @@ process_tests_acute <- function(data, year) { old_data = produce_source_extract_tests(old_data), new_data = produce_source_extract_tests(data) ) %>% - write_tests_xlsx(sheet_name = "01B", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "01b", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_ae.R b/R/process_tests_ae.R index 5bcd6a3c9..2ec97b8d0 100644 --- a/R/process_tests_ae.R +++ b/R/process_tests_ae.R @@ -21,7 +21,7 @@ process_tests_ae <- function(data, year) { max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net") ) ) %>% - write_tests_xlsx(sheet_name = "AE2", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "ae2", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_alarms_telecare.R b/R/process_tests_alarms_telecare.R index d7f9fa699..c99aaa857 100644 --- a/R/process_tests_alarms_telecare.R +++ b/R/process_tests_alarms_telecare.R @@ -18,7 +18,7 @@ process_tests_alarms_telecare <- function(data, year) { ) comparison %>% - write_tests_xlsx(sheet_name = "AT", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "at", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_care_home.R b/R/process_tests_care_home.R index 2032c2473..dd54bd98b 100644 --- a/R/process_tests_care_home.R +++ b/R/process_tests_care_home.R @@ -15,7 +15,7 @@ process_tests_care_home <- function(data, year) { old_data = produce_source_ch_tests(old_data), new_data = produce_source_ch_tests(data) ) %>% - write_tests_xlsx(sheet_name = "CH", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "ch", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_cmh.R b/R/process_tests_cmh.R index 09a17bdbb..7ddec1657 100644 --- a/R/process_tests_cmh.R +++ b/R/process_tests_cmh.R @@ -20,7 +20,7 @@ process_tests_cmh <- function(data, year) { old_data = produce_source_cmh_tests(old_data), new_data = produce_source_cmh_tests(data) ) %>% - write_tests_xlsx(sheet_name = "CMH", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "cmh", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_delayed_discharges.R b/R/process_tests_delayed_discharges.R index c2370eb76..86a60968d 100644 --- a/R/process_tests_delayed_discharges.R +++ b/R/process_tests_delayed_discharges.R @@ -18,7 +18,7 @@ process_tests_delayed_discharges <- function(data, year) { old_data = produce_source_dd_tests(old_data), new_data = produce_source_dd_tests(data) ) %>% - write_tests_xlsx(sheet_name = "DD", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "dd", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_gp_ooh.R b/R/process_tests_gp_ooh.R index fd3ec5f59..6ce3ab5e5 100644 --- a/R/process_tests_gp_ooh.R +++ b/R/process_tests_gp_ooh.R @@ -19,7 +19,7 @@ process_tests_gp_ooh <- function(data, year) { sum_mean_vars = "cost" ) ) %>% - write_tests_xlsx(sheet_name = "GPOoH", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "gpooh", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_home_care.R b/R/process_tests_home_care.R index c1af63e97..f39cf8f5d 100644 --- a/R/process_tests_home_care.R +++ b/R/process_tests_home_care.R @@ -17,7 +17,7 @@ process_tests_home_care <- function(data, year) { ) comparison %>% - write_tests_xlsx(sheet_name = "home_care", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "hc", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_homelessness.R b/R/process_tests_homelessness.R index 4d49f1aa4..9c70161e1 100644 --- a/R/process_tests_homelessness.R +++ b/R/process_tests_homelessness.R @@ -16,7 +16,7 @@ process_tests_homelessness <- function(data, year) { old_data = produce_slf_homelessness_tests(old_data), new_data = produce_slf_homelessness_tests(data) ) %>% - write_tests_xlsx(sheet_name = "HL1", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "hl1", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_maternity.R b/R/process_tests_maternity.R index 90f0ec449..39d7c8aa1 100644 --- a/R/process_tests_maternity.R +++ b/R/process_tests_maternity.R @@ -15,7 +15,7 @@ process_tests_maternity <- function(data, year) { old_data = produce_source_extract_tests(old_data), new_data = produce_source_extract_tests(data) ) %>% - write_tests_xlsx(sheet_name = "02B", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "02b", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_mental_health.R b/R/process_tests_mental_health.R index 96283d47b..5b5cb9001 100644 --- a/R/process_tests_mental_health.R +++ b/R/process_tests_mental_health.R @@ -15,7 +15,7 @@ process_tests_mental_health <- function(data, year) { old_data = produce_source_extract_tests(old_data), new_data = produce_source_extract_tests(data) ) %>% - write_tests_xlsx(sheet_name = "04B", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "04b", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_nrs_deaths.R b/R/process_tests_nrs_deaths.R index c1a963dcf..cbfff2d54 100644 --- a/R/process_tests_nrs_deaths.R +++ b/R/process_tests_nrs_deaths.R @@ -15,7 +15,7 @@ process_tests_nrs_deaths <- function(data, year) { old_data = produce_source_nrs_tests(old_data), new_data = produce_source_nrs_tests(data) ) %>% - write_tests_xlsx(sheet_name = "NRS", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "nrs", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_outpatients.R b/R/process_tests_outpatients.R index 5787e6884..6a377fcf2 100644 --- a/R/process_tests_outpatients.R +++ b/R/process_tests_outpatients.R @@ -23,7 +23,7 @@ process_tests_outpatients <- function(data, year) { add_hscp_count = FALSE ) ) %>% - write_tests_xlsx(sheet_name = "00B", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "00b", year, workbook_name = "extract") return(comparison) } diff --git a/R/process_tests_prescribing.R b/R/process_tests_prescribing.R index bac0e3c52..003f00091 100644 --- a/R/process_tests_prescribing.R +++ b/R/process_tests_prescribing.R @@ -15,7 +15,7 @@ process_tests_prescribing <- function(data, year) { old_data = produce_source_pis_tests(old_data), new_data = produce_source_pis_tests(data) ) %>% - write_tests_xlsx(sheet_name = "PIS", year, workbook_name = "extract") + write_tests_xlsx(sheet_name = "pis", year, workbook_name = "extract") return(comparison) } From b3d826bb1ff034b1ba5573299490a0ee1f5b6071 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 23 Jan 2024 11:10:37 +0000 Subject: [PATCH 021/105] Change date to lowercase --- R/write_tests_xlsx.R | 3 +++ 1 file changed, 3 insertions(+) diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R index c6a962857..f05f20025 100644 --- a/R/write_tests_xlsx.R +++ b/R/write_tests_xlsx.R @@ -89,6 +89,9 @@ write_tests_xlsx <- function(comparison_data, # add a new sheet for tests date_today <- format(Sys.Date(), "%d_%b") + + date_today<- stringr::str_to_lower(date_today) + sheet_name_dated <- ifelse( is.null(year), stringr::str_glue("{sheet_name}_{date_today}"), From 4ca03b7f738056c618a8a18adb11d5db7c483d1c Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Tue, 23 Jan 2024 11:12:23 +0000 Subject: [PATCH 022/105] Update documentation --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5123289dd..4bb0c6f18 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,4 +73,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.0 From 0c61266f191f4392b9d9c7da14c962151c1acb31 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 23 Jan 2024 13:48:18 +0000 Subject: [PATCH 023/105] new data pipeline with targets remove create_individual_files from targets and append it to run_targets script --- _targets.R | 34 +++++++++++++++++----------------- run_targets_1718.R | 22 ++++++++++++++++++++++ run_targets_1819.R | 22 ++++++++++++++++++++++ run_targets_1920.R | 22 ++++++++++++++++++++++ run_targets_2021.R | 22 ++++++++++++++++++++++ run_targets_2122.R | 22 ++++++++++++++++++++++ run_targets_2223.R | 22 ++++++++++++++++++++++ run_targets_2324.R | 22 ++++++++++++++++++++++ 8 files changed, 171 insertions(+), 17 deletions(-) create mode 100644 run_targets_1718.R create mode 100644 run_targets_1819.R create mode 100644 run_targets_1920.R create mode 100644 run_targets_2021.R create mode 100644 run_targets_2122.R create mode 100644 run_targets_2223.R create mode 100644 run_targets_2324.R diff --git a/_targets.R b/_targets.R index 81adbf7c2..a3450d3f8 100644 --- a/_targets.R +++ b/_targets.R @@ -591,23 +591,23 @@ list( data = episode_file, year = year ) - ), - tar_target( - individual_file, - create_individual_file( - episode_file = episode_file, - year = year, - homelessness_lookup = homelessness_lookup, - write_to_disk = write_to_disk - ) - ), - tar_target( - individual_file_tests, - process_tests_individual_file( - data = individual_file, - year = year - ) - ) # , + )#, + # tar_target( + # individual_file, + # create_individual_file( + # episode_file = episode_file, + # year = year, + # homelessness_lookup = homelessness_lookup, + # write_to_disk = write_to_disk + # ) + # ), + # tar_target( + # individual_file_tests, + # process_tests_individual_file( + # data = individual_file, + # year = year + # ) + # ) # , # tar_target( # episode_file_dataset, # arrow::write_dataset( diff --git a/run_targets_1718.R b/run_targets_1718.R new file mode 100644 index 000000000..a205b4356 --- /dev/null +++ b/run_targets_1718.R @@ -0,0 +1,22 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year = "1718" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1718")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +individual_file = create_individual_file(episode_file, year = year) + +individual_file_test = individual_file %>% + process_tests_individual_file(year = year) diff --git a/run_targets_1819.R b/run_targets_1819.R new file mode 100644 index 000000000..f6ac6074d --- /dev/null +++ b/run_targets_1819.R @@ -0,0 +1,22 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year = "1819" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1819")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +individual_file = create_individual_file(episode_file, year = year) + +individual_file_test = individual_file %>% + process_tests_individual_file(year = year) diff --git a/run_targets_1920.R b/run_targets_1920.R new file mode 100644 index 000000000..55928ff38 --- /dev/null +++ b/run_targets_1920.R @@ -0,0 +1,22 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year = "1920" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1920")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +individual_file = create_individual_file(episode_file, year = year) + +individual_file_test = individual_file %>% + process_tests_individual_file(year = year) diff --git a/run_targets_2021.R b/run_targets_2021.R new file mode 100644 index 000000000..f8865bb11 --- /dev/null +++ b/run_targets_2021.R @@ -0,0 +1,22 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year = "2021" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("2021")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +individual_file = create_individual_file(episode_file, year = year) + +individual_file_test = individual_file %>% + process_tests_individual_file(year = year) diff --git a/run_targets_2122.R b/run_targets_2122.R new file mode 100644 index 000000000..e833ff886 --- /dev/null +++ b/run_targets_2122.R @@ -0,0 +1,22 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year = "2122" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("2122")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +individual_file = create_individual_file(episode_file, year = year) + +individual_file_test = individual_file %>% + process_tests_individual_file(year = year) diff --git a/run_targets_2223.R b/run_targets_2223.R new file mode 100644 index 000000000..10d9f42ea --- /dev/null +++ b/run_targets_2223.R @@ -0,0 +1,22 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year = "2223" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("2223")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +individual_file = create_individual_file(episode_file, year = year) + +individual_file_test = individual_file %>% + process_tests_individual_file(year = year) diff --git a/run_targets_2324.R b/run_targets_2324.R new file mode 100644 index 000000000..7fe78b3bd --- /dev/null +++ b/run_targets_2324.R @@ -0,0 +1,22 @@ +library(targets) + +Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") + +year = "2324" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("2324")) +) + +# use targets to create individual files due to RAM limit +library(createslf) + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +individual_file = create_individual_file(episode_file, year = year) + +individual_file_test = individual_file %>% + process_tests_individual_file(year = year) From 524219109da87911230b43ea76e7cc4c118bae78 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 23 Jan 2024 13:52:48 +0000 Subject: [PATCH 024/105] minor changes --- run_targets_1718.R | 4 +--- run_targets_1819.R | 4 +--- run_targets_1920.R | 4 +--- run_targets_2021.R | 4 +--- run_targets_2122.R | 4 +--- run_targets_2223.R | 4 +--- run_targets_2324.R | 4 +--- 7 files changed, 7 insertions(+), 21 deletions(-) diff --git a/run_targets_1718.R b/run_targets_1718.R index a205b4356..762f3898f 100644 --- a/run_targets_1718.R +++ b/run_targets_1718.R @@ -16,7 +16,5 @@ library(createslf) episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -individual_file = create_individual_file(episode_file, year = year) - -individual_file_test = individual_file %>% +create_individual_file(episode_file, year = year) %>% process_tests_individual_file(year = year) diff --git a/run_targets_1819.R b/run_targets_1819.R index f6ac6074d..e01fa2f9b 100644 --- a/run_targets_1819.R +++ b/run_targets_1819.R @@ -16,7 +16,5 @@ library(createslf) episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -individual_file = create_individual_file(episode_file, year = year) - -individual_file_test = individual_file %>% +create_individual_file(episode_file, year = year) %>% process_tests_individual_file(year = year) diff --git a/run_targets_1920.R b/run_targets_1920.R index 55928ff38..197200f33 100644 --- a/run_targets_1920.R +++ b/run_targets_1920.R @@ -16,7 +16,5 @@ library(createslf) episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -individual_file = create_individual_file(episode_file, year = year) - -individual_file_test = individual_file %>% +create_individual_file(episode_file, year = year) %>% process_tests_individual_file(year = year) diff --git a/run_targets_2021.R b/run_targets_2021.R index f8865bb11..612c0bfd9 100644 --- a/run_targets_2021.R +++ b/run_targets_2021.R @@ -16,7 +16,5 @@ library(createslf) episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -individual_file = create_individual_file(episode_file, year = year) - -individual_file_test = individual_file %>% +create_individual_file(episode_file, year = year) %>% process_tests_individual_file(year = year) diff --git a/run_targets_2122.R b/run_targets_2122.R index e833ff886..6fa91fd9a 100644 --- a/run_targets_2122.R +++ b/run_targets_2122.R @@ -16,7 +16,5 @@ library(createslf) episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -individual_file = create_individual_file(episode_file, year = year) - -individual_file_test = individual_file %>% +create_individual_file(episode_file, year = year) %>% process_tests_individual_file(year = year) diff --git a/run_targets_2223.R b/run_targets_2223.R index 10d9f42ea..e5919be6b 100644 --- a/run_targets_2223.R +++ b/run_targets_2223.R @@ -16,7 +16,5 @@ library(createslf) episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -individual_file = create_individual_file(episode_file, year = year) - -individual_file_test = individual_file %>% +create_individual_file(episode_file, year = year) %>% process_tests_individual_file(year = year) diff --git a/run_targets_2324.R b/run_targets_2324.R index 7fe78b3bd..8c259660f 100644 --- a/run_targets_2324.R +++ b/run_targets_2324.R @@ -16,7 +16,5 @@ library(createslf) episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -individual_file = create_individual_file(episode_file, year = year) - -individual_file_test = individual_file %>% +create_individual_file(episode_file, year = year) %>% process_tests_individual_file(year = year) From 45651a2c72b468e3cbf94b84ab3217df25ac464d Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 23 Jan 2024 13:54:48 +0000 Subject: [PATCH 025/105] Style code --- _targets.R | 2 +- run_targets_1718.R | 2 +- run_targets_1819.R | 2 +- run_targets_1920.R | 2 +- run_targets_2021.R | 2 +- run_targets_2122.R | 2 +- run_targets_2223.R | 2 +- run_targets_2324.R | 2 +- 8 files changed, 8 insertions(+), 8 deletions(-) diff --git a/_targets.R b/_targets.R index a3450d3f8..15d2584bb 100644 --- a/_targets.R +++ b/_targets.R @@ -591,7 +591,7 @@ list( data = episode_file, year = year ) - )#, + ) # , # tar_target( # individual_file, # create_individual_file( diff --git a/run_targets_1718.R b/run_targets_1718.R index 762f3898f..488918e1d 100644 --- a/run_targets_1718.R +++ b/run_targets_1718.R @@ -2,7 +2,7 @@ library(targets) Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") -year = "1718" +year <- "1718" # use targets for the process until testing episode files tar_make_future( diff --git a/run_targets_1819.R b/run_targets_1819.R index e01fa2f9b..7c63807e8 100644 --- a/run_targets_1819.R +++ b/run_targets_1819.R @@ -2,7 +2,7 @@ library(targets) Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") -year = "1819" +year <- "1819" # use targets for the process until testing episode files tar_make_future( diff --git a/run_targets_1920.R b/run_targets_1920.R index 197200f33..d3361a34c 100644 --- a/run_targets_1920.R +++ b/run_targets_1920.R @@ -2,7 +2,7 @@ library(targets) Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") -year = "1920" +year <- "1920" # use targets for the process until testing episode files tar_make_future( diff --git a/run_targets_2021.R b/run_targets_2021.R index 612c0bfd9..efcfaed7a 100644 --- a/run_targets_2021.R +++ b/run_targets_2021.R @@ -2,7 +2,7 @@ library(targets) Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") -year = "2021" +year <- "2021" # use targets for the process until testing episode files tar_make_future( diff --git a/run_targets_2122.R b/run_targets_2122.R index 6fa91fd9a..e92d75c7d 100644 --- a/run_targets_2122.R +++ b/run_targets_2122.R @@ -2,7 +2,7 @@ library(targets) Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") -year = "2122" +year <- "2122" # use targets for the process until testing episode files tar_make_future( diff --git a/run_targets_2223.R b/run_targets_2223.R index e5919be6b..f5c93ee2f 100644 --- a/run_targets_2223.R +++ b/run_targets_2223.R @@ -2,7 +2,7 @@ library(targets) Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") -year = "2223" +year <- "2223" # use targets for the process until testing episode files tar_make_future( diff --git a/run_targets_2324.R b/run_targets_2324.R index 8c259660f..5e3885bc2 100644 --- a/run_targets_2324.R +++ b/run_targets_2324.R @@ -2,7 +2,7 @@ library(targets) Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf") -year = "2324" +year <- "2324" # use targets for the process until testing episode files tar_make_future( From 0e69e503643c08ee33a559ac8e7010c118c164f8 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Wed, 24 Jan 2024 13:24:51 +0000 Subject: [PATCH 026/105] Update documentation --- DESCRIPTION | 2 +- man/calculate_stay.Rd | 4 ++-- man/compute_mid_year_age.Rd | 4 ++-- man/convert_date_to_numeric.Rd | 4 ++-- man/convert_numeric_to_date.Rd | 4 ++-- man/end_fy.Rd | 2 +- man/end_fy_quarter.Rd | 2 +- man/end_next_fy_quarter.Rd | 4 ++-- man/fy_interval.Rd | 4 ++-- man/is_date_in_fyyear.Rd | 4 ++-- man/last_date_month.Rd | 4 ++-- man/midpoint_fy.Rd | 4 ++-- man/next_fy.Rd | 4 ++-- man/start_fy.Rd | 2 +- man/start_fy_quarter.Rd | 2 +- man/start_next_fy_quarter.Rd | 6 +++--- 16 files changed, 28 insertions(+), 28 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4bb0c6f18..3a75852e2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,4 +73,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.0 +RoxygenNote: 7.3.1 diff --git a/man/calculate_stay.Rd b/man/calculate_stay.Rd index 43b7bd166..5e9266b10 100644 --- a/man/calculate_stay.Rd +++ b/man/calculate_stay.Rd @@ -34,16 +34,16 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/compute_mid_year_age.Rd b/man/compute_mid_year_age.Rd index 142fa4aab..5a50370e0 100644 --- a/man/compute_mid_year_age.Rd +++ b/man/compute_mid_year_age.Rd @@ -31,16 +31,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/convert_date_to_numeric.Rd b/man/convert_date_to_numeric.Rd index 5511fec84..b67eaa778 100644 --- a/man/convert_date_to_numeric.Rd +++ b/man/convert_date_to_numeric.Rd @@ -24,16 +24,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{compute_mid_year_age}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/convert_numeric_to_date.Rd b/man/convert_numeric_to_date.Rd index f786e0319..a09b7b9b9 100644 --- a/man/convert_numeric_to_date.Rd +++ b/man/convert_numeric_to_date.Rd @@ -24,16 +24,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_fy.Rd b/man/end_fy.Rd index 2925ffe60..6220f5f32 100644 --- a/man/end_fy.Rd +++ b/man/end_fy.Rd @@ -34,8 +34,8 @@ Other date functions: \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_fy_quarter.Rd b/man/end_fy_quarter.Rd index 0efe9624a..26c439a04 100644 --- a/man/end_fy_quarter.Rd +++ b/man/end_fy_quarter.Rd @@ -33,8 +33,8 @@ Other date functions: \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_next_fy_quarter.Rd b/man/end_next_fy_quarter.Rd index f9cc1720a..702446e82 100644 --- a/man/end_next_fy_quarter.Rd +++ b/man/end_next_fy_quarter.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/fy_interval.Rd b/man/fy_interval.Rd index 12d1d36bb..00b9ea52c 100644 --- a/man/fy_interval.Rd +++ b/man/fy_interval.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/is_date_in_fyyear.Rd b/man/is_date_in_fyyear.Rd index 97a0f3639..e74bd5734 100644 --- a/man/is_date_in_fyyear.Rd +++ b/man/is_date_in_fyyear.Rd @@ -41,15 +41,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/last_date_month.Rd b/man/last_date_month.Rd index f52305356..3d3b9544e 100644 --- a/man/last_date_month.Rd +++ b/man/last_date_month.Rd @@ -25,15 +25,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/midpoint_fy.Rd b/man/midpoint_fy.Rd index 7bac9b6b3..2363df773 100644 --- a/man/midpoint_fy.Rd +++ b/man/midpoint_fy.Rd @@ -27,15 +27,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/next_fy.Rd b/man/next_fy.Rd index 19e1193f4..7524c5f11 100644 --- a/man/next_fy.Rd +++ b/man/next_fy.Rd @@ -27,15 +27,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/start_fy.Rd b/man/start_fy.Rd index 4996bfb72..9951af2ec 100644 --- a/man/start_fy.Rd +++ b/man/start_fy.Rd @@ -27,8 +27,8 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, diff --git a/man/start_fy_quarter.Rd b/man/start_fy_quarter.Rd index f5729dcb0..9936736a8 100644 --- a/man/start_fy_quarter.Rd +++ b/man/start_fy_quarter.Rd @@ -26,8 +26,8 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, diff --git a/man/start_next_fy_quarter.Rd b/man/start_next_fy_quarter.Rd index 098f0bf73..fdac297a7 100644 --- a/man/start_next_fy_quarter.Rd +++ b/man/start_next_fy_quarter.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, -\code{\link{start_fy}()} +\code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()} } \concept{date functions} From cbf5ae4cd6f6d6f12ee0de18a01313abd3aaa3df Mon Sep 17 00:00:00 2001 From: SwiftySalmon Date: Wed, 24 Jan 2024 13:58:36 +0000 Subject: [PATCH 027/105] Update documentation --- DESCRIPTION | 2 +- man/calculate_stay.Rd | 4 ++-- man/compute_mid_year_age.Rd | 4 ++-- man/convert_date_to_numeric.Rd | 4 ++-- man/convert_numeric_to_date.Rd | 4 ++-- man/end_fy.Rd | 2 +- man/end_fy_quarter.Rd | 2 +- man/end_next_fy_quarter.Rd | 4 ++-- man/fy_interval.Rd | 4 ++-- man/is_date_in_fyyear.Rd | 4 ++-- man/last_date_month.Rd | 4 ++-- man/midpoint_fy.Rd | 4 ++-- man/next_fy.Rd | 4 ++-- man/start_fy.Rd | 2 +- man/start_fy_quarter.Rd | 2 +- man/start_next_fy_quarter.Rd | 6 +++--- 16 files changed, 28 insertions(+), 28 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 4bb0c6f18..3a75852e2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,4 +73,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.0 +RoxygenNote: 7.3.1 diff --git a/man/calculate_stay.Rd b/man/calculate_stay.Rd index 43b7bd166..5e9266b10 100644 --- a/man/calculate_stay.Rd +++ b/man/calculate_stay.Rd @@ -34,16 +34,16 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/compute_mid_year_age.Rd b/man/compute_mid_year_age.Rd index 142fa4aab..5a50370e0 100644 --- a/man/compute_mid_year_age.Rd +++ b/man/compute_mid_year_age.Rd @@ -31,16 +31,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/convert_date_to_numeric.Rd b/man/convert_date_to_numeric.Rd index 5511fec84..b67eaa778 100644 --- a/man/convert_date_to_numeric.Rd +++ b/man/convert_date_to_numeric.Rd @@ -24,16 +24,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{compute_mid_year_age}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/convert_numeric_to_date.Rd b/man/convert_numeric_to_date.Rd index f786e0319..a09b7b9b9 100644 --- a/man/convert_numeric_to_date.Rd +++ b/man/convert_numeric_to_date.Rd @@ -24,16 +24,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_fy.Rd b/man/end_fy.Rd index 2925ffe60..6220f5f32 100644 --- a/man/end_fy.Rd +++ b/man/end_fy.Rd @@ -34,8 +34,8 @@ Other date functions: \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_fy_quarter.Rd b/man/end_fy_quarter.Rd index 0efe9624a..26c439a04 100644 --- a/man/end_fy_quarter.Rd +++ b/man/end_fy_quarter.Rd @@ -33,8 +33,8 @@ Other date functions: \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_next_fy_quarter.Rd b/man/end_next_fy_quarter.Rd index f9cc1720a..702446e82 100644 --- a/man/end_next_fy_quarter.Rd +++ b/man/end_next_fy_quarter.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/fy_interval.Rd b/man/fy_interval.Rd index 12d1d36bb..00b9ea52c 100644 --- a/man/fy_interval.Rd +++ b/man/fy_interval.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/is_date_in_fyyear.Rd b/man/is_date_in_fyyear.Rd index 97a0f3639..e74bd5734 100644 --- a/man/is_date_in_fyyear.Rd +++ b/man/is_date_in_fyyear.Rd @@ -41,15 +41,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/last_date_month.Rd b/man/last_date_month.Rd index f52305356..3d3b9544e 100644 --- a/man/last_date_month.Rd +++ b/man/last_date_month.Rd @@ -25,15 +25,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/midpoint_fy.Rd b/man/midpoint_fy.Rd index 7bac9b6b3..2363df773 100644 --- a/man/midpoint_fy.Rd +++ b/man/midpoint_fy.Rd @@ -27,15 +27,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/next_fy.Rd b/man/next_fy.Rd index 19e1193f4..7524c5f11 100644 --- a/man/next_fy.Rd +++ b/man/next_fy.Rd @@ -27,15 +27,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/start_fy.Rd b/man/start_fy.Rd index 4996bfb72..9951af2ec 100644 --- a/man/start_fy.Rd +++ b/man/start_fy.Rd @@ -27,8 +27,8 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, diff --git a/man/start_fy_quarter.Rd b/man/start_fy_quarter.Rd index f5729dcb0..9936736a8 100644 --- a/man/start_fy_quarter.Rd +++ b/man/start_fy_quarter.Rd @@ -26,8 +26,8 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, diff --git a/man/start_next_fy_quarter.Rd b/man/start_next_fy_quarter.Rd index 098f0bf73..fdac297a7 100644 --- a/man/start_next_fy_quarter.Rd +++ b/man/start_next_fy_quarter.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, -\code{\link{start_fy}()} +\code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()} } \concept{date functions} From 3055d54f80f75b3b8c29306116a0ad83837d8645 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Mon, 29 Jan 2024 09:56:34 +0000 Subject: [PATCH 028/105] Style code --- R/write_tests_xlsx.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R index f05f20025..ffe86f48f 100644 --- a/R/write_tests_xlsx.R +++ b/R/write_tests_xlsx.R @@ -90,7 +90,7 @@ write_tests_xlsx <- function(comparison_data, # add a new sheet for tests date_today <- format(Sys.Date(), "%d_%b") - date_today<- stringr::str_to_lower(date_today) + date_today <- stringr::str_to_lower(date_today) sheet_name_dated <- ifelse( is.null(year), From 2acc38f2daa07c15d863cef5a20241431d75b48d Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 30 Jan 2024 15:00:55 +0000 Subject: [PATCH 029/105] undo sc_send_lca bit --- R/create_episode_file.R | 3 --- R/process_lookup_sc_client.R | 3 --- 2 files changed, 6 deletions(-) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 376173c97..493d71bd3 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -31,9 +31,6 @@ create_episode_file <- function( sc_client = read_file(get_sc_client_lookup_path(year)), write_to_disk = TRUE, anon_chi_out = TRUE) { - sc_client <- sc_client %>% - dplyr::select(-sc_send_lca) - processed_data_list <- purrr::discard(processed_data_list, ~ is.null(.x) | identical(.x, tibble::tibble())) episode_file <- dplyr::bind_rows(processed_data_list) %>% diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index 58b72ebab..e64d4b6ba 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -150,9 +150,6 @@ process_lookup_sc_client <- )) == "Not Known")) %>% dplyr::arrange(chi, count_not_known) %>% dplyr::distinct(chi, .keep_all = TRUE) %>% - dplyr::mutate( - sc_send_lca = convert_sc_sending_location_to_lca(sending_location) - ) %>% dplyr::select(-sending_location) if (write_to_disk) { From ce77f750de4b3e7633e6f9af10bd914ac8afecff Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 30 Jan 2024 15:09:28 +0000 Subject: [PATCH 030/105] Add code for running years available --- R/process_lookup_homelessness.R | 6 ++++++ R/process_lookup_sc_client.R | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/R/process_lookup_homelessness.R b/R/process_lookup_homelessness.R index 7137c6393..ddd589c4b 100644 --- a/R/process_lookup_homelessness.R +++ b/R/process_lookup_homelessness.R @@ -13,6 +13,12 @@ create_homelessness_lookup <- function( year, homelessness_data = read_file(get_source_extract_path(year, "homelessness"))) { + + # Specify years available for running + if (year < "1617") { + return(NULL) + } + homelessness_lookup <- homelessness_data %>% dplyr::distinct(.data$chi, .data$record_keydate1, .data$record_keydate2) %>% tidyr::drop_na(.data$chi) %>% diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index e64d4b6ba..e60d36473 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -20,6 +20,12 @@ process_lookup_sc_client <- col_select = c("sending_location", "social_care_id", "chi") ), write_to_disk = TRUE) { + + # Specify years available for running + if (year < "1718") { + return(NULL) + } + client_clean <- data %>% # Replace 'unknown' responses with NA dplyr::mutate( From 9f16a688da28188fd26134f9bbcf8f31a4dd7dde Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 30 Jan 2024 15:12:11 +0000 Subject: [PATCH 031/105] Update `_targets.R` script for running old years --- _targets.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/_targets.R b/_targets.R index 15d2584bb..817491755 100644 --- a/_targets.R +++ b/_targets.R @@ -19,7 +19,8 @@ tar_option_set( memory = "persistent" # default option ) -years_to_run <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324") +years_to_run <- c("1415", "1516", "1617","1718", "1819", "1920", + "2021", "2122", "2223", "2324") list( tar_rds(write_to_disk, TRUE), From 81c78f5c8e382b7fd4303cdb897f2f23a729f665 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Tue, 30 Jan 2024 15:14:06 +0000 Subject: [PATCH 032/105] Style code --- R/process_lookup_homelessness.R | 1 - R/process_lookup_sc_client.R | 1 - _targets.R | 6 ++++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/process_lookup_homelessness.R b/R/process_lookup_homelessness.R index ddd589c4b..ee03809d1 100644 --- a/R/process_lookup_homelessness.R +++ b/R/process_lookup_homelessness.R @@ -13,7 +13,6 @@ create_homelessness_lookup <- function( year, homelessness_data = read_file(get_source_extract_path(year, "homelessness"))) { - # Specify years available for running if (year < "1617") { return(NULL) diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index e60d36473..784982339 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -20,7 +20,6 @@ process_lookup_sc_client <- col_select = c("sending_location", "social_care_id", "chi") ), write_to_disk = TRUE) { - # Specify years available for running if (year < "1718") { return(NULL) diff --git a/_targets.R b/_targets.R index 817491755..4bbcd10f3 100644 --- a/_targets.R +++ b/_targets.R @@ -19,8 +19,10 @@ tar_option_set( memory = "persistent" # default option ) -years_to_run <- c("1415", "1516", "1617","1718", "1819", "1920", - "2021", "2122", "2223", "2324") +years_to_run <- c( + "1415", "1516", "1617", "1718", "1819", "1920", + "2021", "2122", "2223", "2324" +) list( tar_rds(write_to_disk, TRUE), From 334a2bbbffe676f93fcaedc4fdb2d82fcf7b9443 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 31 Jan 2024 12:14:20 +0000 Subject: [PATCH 033/105] Update `check_year_valid` for running old years --- R/check_year_valid.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/check_year_valid.R b/R/check_year_valid.R index 51c66e1b0..990487e39 100644 --- a/R/check_year_valid.R +++ b/R/check_year_valid.R @@ -34,9 +34,9 @@ check_year_valid <- function( )) { if (year <= "1415" && type %in% c("dn", "sparra")) { return(FALSE) - } else if (year <= "1516" && type %in% c("cmh", "homelessness")) { + } else if (year <= "1516" && type %in% c("cmh", "homelessness", "dd")) { return(FALSE) - } else if (year <= "1617" && type %in% c("ch", "hc", "sds", "at")) { + } else if (year <= "1617" && type %in% c("ch", "hc", "sds", "at", "client")) { return(FALSE) } else if (year <= "1718" && type %in% "hhg") { return(FALSE) From dc716036319034afccb6b96163819a6dab9a36b1 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 31 Jan 2024 12:36:56 +0000 Subject: [PATCH 034/105] Use `check_year_valid` where no data for old yrs --- R/create_episode_file.R | 6 ++++++ R/link_delayed_discharge_eps.R | 6 ++++++ R/process_lookup_homelessness.R | 12 ++++++++++++ 3 files changed, 24 insertions(+) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 493d71bd3..f280fac35 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -435,6 +435,12 @@ join_sc_client <- function(data, year, sc_client = read_file(get_sc_client_lookup_path(year)), file_type = c("episode", "individual")) { + + if (!check_year_valid(year, type = "client")) { + data_file <- data + return(data_file) + } + if (file_type == "episode") { # Match on client variables by chi data_file <- data %>% diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index b4c3b2f5b..dcf627024 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -12,6 +12,12 @@ link_delayed_discharge_eps <- function( episode_file, year, dd_data = read_file(get_source_extract_path(year, "dd"))) { + + if (!check_year_valid(year, type = "dd")) { + episode_file <- episode_file + return(episode_file) + } + episode_file <- episode_file %>% dplyr::mutate( # remember to revoke the cij_end_date with dummy_cij_end diff --git a/R/process_lookup_homelessness.R b/R/process_lookup_homelessness.R index ee03809d1..df6c1fdd3 100644 --- a/R/process_lookup_homelessness.R +++ b/R/process_lookup_homelessness.R @@ -40,6 +40,12 @@ create_homelessness_lookup <- function( #' @export add_homelessness_flag <- function(data, year, lookup = create_homelessness_lookup(year)) { + + if (!check_year_valid(year, type = "homelessness")) { + data <- data + return(data) + } + data <- data %>% dplyr::left_join( lookup %>% @@ -64,6 +70,12 @@ add_homelessness_flag <- function(data, year, #' @return the final data as a [tibble][tibble::tibble-package]. #' @export add_homelessness_date_flags <- function(data, year, lookup = create_homelessness_lookup(year)) { + + if (!check_year_valid(year, type = "homelessness")) { + data <- data + return(data) + } + lookup <- lookup %>% dplyr::filter(!(is.na(.data$record_keydate2))) %>% dplyr::rename( From 85af5f4246958078c67faee3fd2bdea2eb81498a Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Wed, 31 Jan 2024 12:40:38 +0000 Subject: [PATCH 035/105] Style code --- R/create_episode_file.R | 1 - R/link_delayed_discharge_eps.R | 1 - R/process_lookup_homelessness.R | 2 -- 3 files changed, 4 deletions(-) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index f280fac35..4db7436e9 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -435,7 +435,6 @@ join_sc_client <- function(data, year, sc_client = read_file(get_sc_client_lookup_path(year)), file_type = c("episode", "individual")) { - if (!check_year_valid(year, type = "client")) { data_file <- data return(data_file) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index dcf627024..be9c221cf 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -12,7 +12,6 @@ link_delayed_discharge_eps <- function( episode_file, year, dd_data = read_file(get_source_extract_path(year, "dd"))) { - if (!check_year_valid(year, type = "dd")) { episode_file <- episode_file return(episode_file) diff --git a/R/process_lookup_homelessness.R b/R/process_lookup_homelessness.R index df6c1fdd3..9ccf226c4 100644 --- a/R/process_lookup_homelessness.R +++ b/R/process_lookup_homelessness.R @@ -40,7 +40,6 @@ create_homelessness_lookup <- function( #' @export add_homelessness_flag <- function(data, year, lookup = create_homelessness_lookup(year)) { - if (!check_year_valid(year, type = "homelessness")) { data <- data return(data) @@ -70,7 +69,6 @@ add_homelessness_flag <- function(data, year, #' @return the final data as a [tibble][tibble::tibble-package]. #' @export add_homelessness_date_flags <- function(data, year, lookup = create_homelessness_lookup(year)) { - if (!check_year_valid(year, type = "homelessness")) { data <- data return(data) From 30cb567fe1df6cfab6e0818017a0e6aef5014c51 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 5 Feb 2024 13:41:25 +0000 Subject: [PATCH 036/105] Fix pick variables This was not taking the correct variables, leading to NSUs being assigned psychiatry --- R/create_service_use_lookup.R | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/R/create_service_use_lookup.R b/R/create_service_use_lookup.R index 4acbfc507..242e0b351 100644 --- a/R/create_service_use_lookup.R +++ b/R/create_service_use_lookup.R @@ -908,7 +908,13 @@ assign_cohort_names <- function(data) { # Situation where no cost is greater than another, # so the maximum is the same as the mean .data$cost_max == rowSums( - dplyr::pick("psychiatry_cost":"residential_care_cost") + dplyr::pick(c( + "psychiatry_cost", "maternity_cost", "geriatric_cost", + "elective_inpatient_cost", "limited_daycases_cost", + "routine_daycase_cost", "single_emergency_cost", + "multiple_emergency_cost", "prescribing_cost", + "outpatient_cost", "ae2_cost", "residential_care_cost" + )) ) / 12.0 ~ "Unassigned", .data$cost_max == .data$psychiatry_cost ~ "Psychiatry", .data$cost_max == .data$maternity_cost ~ "Maternity", From 744bbc024b5cba09ebe9514cd36ad794b1e24fff Mon Sep 17 00:00:00 2001 From: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com> Date: Wed, 7 Feb 2024 14:28:21 +0000 Subject: [PATCH 037/105] SC Demographics and SDS (#900) * Style code * # read in sc demographics different variables - removed extract date as not accurate, using chi over upi after discussion with social care data management. Added in date of death just for fun. * social care demographics first draft removed a lot of the submitted variables and instead using chi variables from chi seeding. Other changes: - Fill in missing values, - create flag for latest social care id (one from database is not accurate), this makes sure that each chi only has ONE sc id as the latest to stop it creating duplicates - change postcode to choose chi over submitted * Style code * had a github error? Not sure what happened but commiting first draft of sc demographics * Style code * first draft sds. No major changes - only how demographics is matched on and how latest social care id is selected * Update documentation * demographics - add sending location to group by * Style code * Update documentation * Added ungroup() * Remove comments * Remove comments * Style code --------- Co-authored-by: SwiftySalmon Co-authored-by: marjom02 Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> Co-authored-by: Jennit07 Co-authored-by: Zihao Li --- R/fix_sc_dates.R | 8 +-- R/process_lookup_sc_demographics.R | 97 +++++++++++++++++++----------- R/process_sc_all_sds.R | 19 ++++-- R/read_lookup_sc_demographics.R | 15 +++-- R/read_sc_all_sds.R | 8 +-- R/replace_sc_id_with_latest.R | 25 +++----- man/fix_sc_end_dates.Rd | 2 +- 7 files changed, 98 insertions(+), 76 deletions(-) diff --git a/R/fix_sc_dates.R b/R/fix_sc_dates.R index c636980a6..117acbaab 100644 --- a/R/fix_sc_dates.R +++ b/R/fix_sc_dates.R @@ -9,7 +9,7 @@ #' @return A date vector with replaced end dates fix_sc_start_dates <- function(start_date, period_start) { # Fix sds_start_date is missing by setting start_date to be the start of - # financial year + # financial period start_date <- dplyr::if_else( is.na(start_date), period_start, @@ -30,12 +30,12 @@ fix_sc_start_dates <- function(start_date, period_start) { #' @param period Social care latest submission period. #' #' @return A date vector with replaced end dates -fix_sc_end_dates <- function(start_date, end_date, period) { +fix_sc_end_dates <- function(start_date, end_date, period_end_date) { # Fix sds_end_date is earlier than sds_start_date by setting end_date to be # the end of financial year end_date <- dplyr::if_else( start_date > end_date, - end_fy(year = stringr::str_sub(period, 1L, 4L), "alternate"), + period_end_date, end_date ) @@ -57,7 +57,7 @@ fix_sc_end_dates <- function(start_date, end_date, period) { #' @return A date vector with replaced end dates fix_sc_missing_end_dates <- function(end_date, period_end) { # Fix sds_end_date is earlier than sds_start_date by setting end_date to be - # the end of financial year + # the end of financial period end_date <- dplyr::if_else( is.na(end_date), period_end, diff --git a/R/process_lookup_sc_demographics.R b/R/process_lookup_sc_demographics.R index 8c363f547..96adc985e 100644 --- a/R/process_lookup_sc_demographics.R +++ b/R/process_lookup_sc_demographics.R @@ -28,30 +28,46 @@ process_lookup_sc_demographics <- function( dplyr::pull(.data$pc7) - # Data Cleaning --------------------------------------- - + # Fill in missing data and flag latest cases to keep --------------------------------------- sc_demog <- data %>% - dplyr::mutate( - # use chi if upi is NA - upi = dplyr::coalesce(.data$upi, .data$chi_upi), - # check gender code - replace code 99 with 9 - submitted_gender = replace(.data$submitted_gender, .data$submitted_gender == 99L, 9L) + dplyr::rename( + chi = chi_upi, + gender = chi_gender_code, + dob = chi_date_of_birth ) %>% + # fill in missing demographic details + dplyr::arrange(period, social_care_id) %>% + dplyr::group_by(social_care_id, sending_location) %>% + tidyr::fill(chi, .direction = ("updown")) %>% + tidyr::fill(dob, .direction = ("updown")) %>% + tidyr::fill(date_of_death, .direction = ("updown")) %>% + tidyr::fill(gender, .direction = ("updown")) %>% + tidyr::fill(chi_postcode, .direction = ("updown")) %>% + tidyr::fill(submitted_postcode, .direction = ("updown")) %>% + dplyr::ungroup() %>% + # format postcodes using `phsmethods` + dplyr::mutate(dplyr::across(tidyselect::contains("postcode"), ~ phsmethods::format_postcode(.x, format = "pc7"))) # are sc postcodes even used anywhere? + + + # flag unique cases of chi and sc_id, and flag the latest record (sc_demographics latest flag is not accurate) + sc_demog <- sc_demog %>% + dplyr::group_by(chi, sending_location) %>% + dplyr::mutate(latest = dplyr::last(period)) %>% # flag latest period for chi + dplyr::group_by(chi, social_care_id, sending_location) %>% + dplyr::mutate(latest_sc_id = dplyr::last(period)) %>% # flag latest period for social care + dplyr::group_by(chi, sending_location) %>% + dplyr::mutate(last_sc_id = dplyr::last(social_care_id)) %>% dplyr::mutate( - # use CHI sex if available - gender = dplyr::if_else( - is.na(.data$chi_gender_code) | .data$chi_gender_code == 9L, - .data$submitted_gender, - .data$chi_gender_code - ), - # Use CHI DoB if available - dob = dplyr::coalesce(.data$chi_date_of_birth, .data$submitted_date_of_birth) + latest_flag = ifelse((latest == period & last_sc_id == social_care_id) | is.na(chi), 1, 0), + keep = ifelse(latest_sc_id == period, 1, 0) ) %>% - # format postcodes using `phsmethods` - dplyr::mutate(dplyr::across( - tidyselect::contains("postcode"), - ~ phsmethods::format_postcode(.x, format = "pc7") - )) + dplyr::ungroup() + + sc_demog <- sc_demog %>% + dplyr::select(-period, -latest_record_flag, -latest, -last_sc_id, -latest_sc_id) %>% + dplyr::distinct() + + # postcodes --------------------------------------------------------------- # count number of na postcodes na_postcodes <- sc_demog %>% @@ -69,29 +85,32 @@ process_lookup_sc_demographics <- function( ~ dplyr::if_else(stringr::str_detect(.x, uk_pc_regexp), .x, NA) )) %>% dplyr::select( - "latest_record_flag", - "extract_date", "sending_location", "social_care_id", - "upi", + "chi", "gender", "dob", + "date_of_death", "submitted_postcode", - "chi_postcode" + "chi_postcode", + "keep", + "latest_flag" ) %>% # check if submitted_postcode matches with postcode lookup dplyr::mutate( - valid_pc = .data$submitted_postcode %in% valid_spd_postcodes + valid_pc_submitted = .data$submitted_postcode %in% valid_spd_postcodes, + valid_pc_chi = .data$chi_postcode %in% valid_spd_postcodes ) %>% # use submitted_postcode if valid, otherwise use chi_postcode dplyr::mutate(postcode = dplyr::case_when( - (!is.na(.data$submitted_postcode) & .data$valid_pc) ~ .data$submitted_postcode, - (is.na(.data$submitted_postcode) & !.data$valid_pc) ~ .data$chi_postcode + (!is.na(.data$chi_postcode) & .data$valid_pc_chi) ~ .data$chi_postcode, + ((is.na(.data$chi_postcode) | !(.data$valid_pc_chi)) & !(is.na(.data$submitted_postcode)) & .data$valid_pc_submitted) ~ .data$submitted_postcode, + (is.na(.data$submitted_postcode) & !.data$valid_pc_submitted) ~ .data$chi_postcode )) %>% dplyr::mutate(postcode_type = dplyr::case_when( - (!is.na(.data$submitted_postcode) & .data$valid_pc) ~ "submitted", - (is.na(.data$submitted_postcode) & !.data$valid_pc) ~ "chi", - (is.na(.data$submitted_postcode) & is.na(.data$chi_postcode)) ~ "missing" + (postcode == chi_postcode) ~ "chi", + (postcode == submitted_postcode) ~ "submitted", + (is.na(.data$submitted_postcode) & is.na(.data$chi_postcode) | is.na(.data$postcode)) ~ "missing" )) # Check where the postcodes are coming from @@ -102,26 +121,32 @@ process_lookup_sc_demographics <- function( na_replaced_postcodes <- sc_demog %>% dplyr::count(dplyr::across(tidyselect::ends_with("_postcode"), ~ is.na(.x))) - sc_demog_lookup <- sc_demog %>% + dplyr::filter(keep == 1) %>% # filter to only keep latest record for sc id and chi + dplyr::select(-postcode_type, -valid_pc_submitted, -valid_pc_chi, -submitted_postcode, -chi_postcode) %>% + dplyr::distinct() %>% # group by sending location and ID - dplyr::group_by(.data$sending_location, .data$social_care_id) %>% + dplyr::group_by(.data$sending_location, .data$chi, .data$social_care_id, .data$latest_flag) %>% # arrange so latest submissions are last dplyr::arrange( .data$sending_location, .data$social_care_id, - .data$latest_record_flag, - .data$extract_date + .data$latest_flag ) %>% # summarise to select the last (non NA) submission dplyr::summarise( - chi = dplyr::last(.data$upi), gender = dplyr::last(.data$gender), dob = dplyr::last(.data$dob), - postcode = dplyr::last(.data$postcode) + postcode = dplyr::last(.data$postcode), + date_of_death = dplyr::last(.data$date_of_death) ) %>% dplyr::ungroup() + # check to make sure all cases of chi are still there + dplyr::n_distinct(sc_demog_lookup$chi) # 524810 + dplyr::n_distinct(sc_demog_lookup$social_care_id) # 636404 + + if (write_to_disk) { write_file( sc_demog_lookup, diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R index f9ca52f24..453db3e40 100644 --- a/R/process_sc_all_sds.R +++ b/R/process_sc_all_sds.R @@ -15,14 +15,17 @@ process_sc_all_sds <- function( sc_demog_lookup, write_to_disk = TRUE) { # Match on demographics data (chi, gender, dob and postcode) - matched_sds_data <- data %>% - dplyr::left_join( + matched_sds_data <- data %>% # + dplyr::filter(.data$sds_start_date_after_period_end_date != 1) %>% + dplyr::right_join( sc_demog_lookup, by = c("sending_location", "social_care_id") ) %>% # when multiple social_care_id from sending_location for single CHI # replace social_care_id with latest - replace_sc_id_with_latest() + replace_sc_id_with_latest() %>% + dplyr::select(-latest_sc_id, -latest_flag, -sds_start_date_after_period_end_date) %>% + dplyr::distinct() # Data Cleaning --------------------------------------- sds_full_clean <- matched_sds_data %>% @@ -50,7 +53,7 @@ process_sc_all_sds <- function( .data$sds_start_date, .data$sds_period_start_date ), - # If SDS end date is missing, assign end of FY + # If SDS end date is missing, assign end of financial period sds_end_date = fix_sc_missing_end_dates( .data$sds_end_date, .data$sds_period_end_date @@ -59,14 +62,19 @@ process_sc_all_sds <- function( sds_end_date = fix_sc_end_dates( .data$sds_start_date, .data$sds_end_date, - .data$period + .data$sds_period_end_date ) ) %>% + dplyr::select( + -sds_period_start_date, -sds_period_end_date, + -sds_start_date_after_end_date + ) %>% # rename for matching source variables dplyr::rename( record_keydate1 = .data$sds_start_date, record_keydate2 = .data$sds_end_date ) %>% + dplyr::distinct() %>% # Pivot longer on sds option variables tidyr::pivot_longer( cols = tidyselect::contains("sds_option_"), @@ -103,6 +111,7 @@ process_sc_all_sds <- function( ) %>% dplyr::arrange(.data$period, .data$record_keydate1, + .data$record_keydate2, .by_group = TRUE ) %>% # Create a flag for episodes that are going to be merged diff --git a/R/read_lookup_sc_demographics.R b/R/read_lookup_sc_demographics.R index fcdde5417..fe9a5e71f 100644 --- a/R/read_lookup_sc_demographics.R +++ b/R/read_lookup_sc_demographics.R @@ -12,16 +12,15 @@ read_lookup_sc_demographics <- function(sc_connection = phs_db_connection(dsn = ) %>% dplyr::select( "latest_record_flag", - "extract_date", + "period", "sending_location", + "sending_location_name", "social_care_id", - "upi", "chi_upi", - "submitted_postcode", - "chi_postcode", - "submitted_date_of_birth", "chi_date_of_birth", - "submitted_gender", + "date_of_death", + "chi_postcode", + "submitted_postcode", "chi_gender_code" ) %>% dplyr::collect() %>% @@ -29,10 +28,10 @@ read_lookup_sc_demographics <- function(sc_connection = phs_db_connection(dsn = dplyr::across(c( "latest_record_flag", "sending_location", - "submitted_gender", "chi_gender_code" ), as.integer) - ) + ) %>% + dplyr::distinct() return(sc_demog) } diff --git a/R/read_sc_all_sds.R b/R/read_sc_all_sds.R index 18c5b52ec..ab9bb20e1 100644 --- a/R/read_sc_all_sds.R +++ b/R/read_sc_all_sds.R @@ -22,9 +22,8 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR "sds_option_1", "sds_option_2", "sds_option_3", - "sds_start_date_after_end_date", - "sds_start_date_after_period_end_date", - "sds_end_date_not_within_period" + "sds_start_date_after_end_date", # get fixed + "sds_start_date_after_period_end_date" # get removed ) %>% dplyr::collect() %>% dplyr::distinct() %>% @@ -33,8 +32,7 @@ read_sc_all_sds <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPR "sds_option_1", "sds_option_2", "sds_option_3" - ), as.integer)) %>% - dplyr::filter(.data$sds_start_date_after_period_end_date != 1) + ), as.integer)) return(sds_full_data) } diff --git a/R/replace_sc_id_with_latest.R b/R/replace_sc_id_with_latest.R index 73c1a3706..2c32bbb93 100644 --- a/R/replace_sc_id_with_latest.R +++ b/R/replace_sc_id_with_latest.R @@ -7,33 +7,23 @@ replace_sc_id_with_latest <- function(data) { # Check for required variables check_variables_exist( data, - c("sending_location", "social_care_id", "chi", "period") + c("sending_location", "social_care_id", "chi", "latest_flag") ) # select variables we need filter_data <- data %>% dplyr::select( - "sending_location", "social_care_id", "chi", "period" + "sending_location", "social_care_id", "chi", "latest_flag" ) %>% - dplyr::filter(!(is.na(.data$chi))) + dplyr::filter(!(is.na(.data$chi))) %>% + dplyr::distinct() change_sc_id <- filter_data %>% - # Sort (by sending_location, chi and period) for unique chi/sending location - dplyr::arrange( - .data$sending_location, - .data$chi, - dplyr::desc(.data$period) - ) %>% - # Find the latest sc_id for each chi/sending location by keeping latest period - dplyr::distinct( - .data$sending_location, - .data$chi, - .keep_all = TRUE - ) %>% + dplyr::filter(latest_flag == 1) %>% # Rename for latest sc id dplyr::rename(latest_sc_id = "social_care_id") %>% - # drop period for matching - dplyr::select(-"period") + # drop latest_flag for matching + dplyr::select(-"latest_flag") return_data <- change_sc_id %>% # Match back onto data @@ -41,6 +31,7 @@ replace_sc_id_with_latest <- function(data) { by = c("sending_location", "chi"), multiple = "all" ) %>% + dplyr::filter(!(is.na(period))) %>% # Overwrite sc id with the latest dplyr::mutate( social_care_id = dplyr::if_else( diff --git a/man/fix_sc_end_dates.Rd b/man/fix_sc_end_dates.Rd index 1bf808bea..041751319 100644 --- a/man/fix_sc_end_dates.Rd +++ b/man/fix_sc_end_dates.Rd @@ -4,7 +4,7 @@ \alias{fix_sc_end_dates} \title{Fix sc end dates} \usage{ -fix_sc_end_dates(start_date, end_date, period) +fix_sc_end_dates(start_date, end_date, period_end_date) } \arguments{ \item{start_date}{A vector containing dates.} From cd8b35948abf588a8eea7fa0474be8e8cd8c03a0 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Wed, 7 Feb 2024 15:32:47 +0000 Subject: [PATCH 038/105] Sc all at speedup (#904) * speed up process_sc_all_alarms_telecare function with data.table package * Update documentation --------- Co-authored-by: lizihao-anu Co-authored-by: Megan McNicol <43570769+SwiftySalmon@users.noreply.github.com> Co-authored-by: Jennit07 <67372904+Jennit07@users.noreply.github.com> --- R/process_sc_all_alarms_telecare.R | 181 +++++++++++++++++------------ 1 file changed, 104 insertions(+), 77 deletions(-) diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R index 988d1f3e7..bc417a8cd 100644 --- a/R/process_sc_all_alarms_telecare.R +++ b/R/process_sc_all_alarms_telecare.R @@ -17,58 +17,85 @@ process_sc_all_alarms_telecare <- function( write_to_disk = TRUE) { # Data Cleaning----------------------------------------------------- - replaced_dates <- data %>% - # If the end date is missing, set this to the end of the period - dplyr::mutate( - service_end_date = fix_sc_missing_end_dates( - .data$service_end_date, - .data$period_end_date - ), - # If the start_date is missing, set this to the start of the period - service_start_date = fix_sc_start_dates( - .data$service_start_date, - .data$period_start_date - ), - # Fix service_end_date if earlier than service_start_date by setting end_date to the end of fy - service_end_date = fix_sc_end_dates( - .data$service_start_date, - .data$service_end_date, - .data$period - ) + # Convert to data.table + data.table::setDT(data) + data.table::setDT(sc_demog_lookup) + + # Fix dates and create new variables + data[ + , + service_end_date := fix_sc_missing_end_dates( + service_end_date, + period_end_date ) + ] + data[ + , + service_start_date := fix_sc_start_dates( + service_start_date, + period_start_date + ) + ] + data[ + , + service_end_date := fix_sc_end_dates( + service_start_date, + service_end_date, + period + ) + ] - at_full_clean <- replaced_dates %>% - # rename for matching source variables - dplyr::rename( - record_keydate1 = "service_start_date", - record_keydate2 = "service_end_date" - ) %>% - # Include source variables - dplyr::mutate( - recid = "AT", - smrtype = dplyr::case_when( - .data$service_type == 1L ~ "AT-Alarm", - .data$service_type == 2L ~ "AT-Tele" + # Rename columns + data.table::setnames( + data, + old = c("service_start_date", "service_end_date"), + new = c("record_keydate1", "record_keydate2") + ) + + # Additional mutations + data[ + , + c( + "recid", + "smrtype", + "sc_send_lca" + ) := list( + "AT", + data.table::fcase( + service_type == 1L, + "AT-Alarm", + service_type == 2L, + "AT-Tele", + default, + NA_character_ ), - # Create person id variable - person_id = stringr::str_glue("{sending_location}-{social_care_id}"), - # Use function for creating sc send lca variables - sc_send_lca = convert_sc_sending_location_to_lca(.data$sending_location) - ) %>% - # Match on demographics data (chi, gender, dob and postcode) - dplyr::left_join( - sc_demog_lookup, - by = c("sending_location", "social_care_id") - ) %>% - # when multiple social_care_id from sending_location for single CHI - # replace social_care_id with latest - replace_sc_id_with_latest() + convert_sc_sending_location_to_lca(sending_location) + ) + ] + data$person_id <- paste0( + data$sending_location, + "-", + data$social_care_id + ) + + # Join with sc_demog_lookup + data <- sc_demog_lookup[data, on = .(sending_location, social_care_id)] - # Deal with episodes which have a package across quarters. - qtr_merge <- at_full_clean %>% - # use as.data.table to change the data format to data.table to accelerate - data.table::as.data.table() %>% + # Replace social_care_id with latest if needed (assuming replace_sc_id_with_latest is a custom function) + data <- replace_sc_id_with_latest(data) + + # Deal with episodes that have a package across quarters + data[, pkg_count := seq_len(.N), by = .( + sending_location, + social_care_id, + record_keydate1, + smrtype, + period + )] + + # Order data before summarizing + data <- data %>% dplyr::group_by( .data$sending_location, .data$social_care_id, @@ -76,38 +103,38 @@ process_sc_all_alarms_telecare <- function( .data$smrtype, .data$period ) %>% - # Create a count for the package number across episodes - dplyr::mutate(pkg_count = dplyr::row_number()) %>% # Sort prior to merging dplyr::arrange(.by_group = TRUE) %>% - # group for merging episodes - dplyr::group_by( - .data$sending_location, - .data$social_care_id, - .data$record_keydate1, - .data$smrtype, - .data$pkg_count - ) %>% - # merge episodes with packages across quarters - # drop variables not needed - dplyr::summarise( - sending_location = dplyr::last(.data$sending_location), - social_care_id = dplyr::last(.data$social_care_id), - sc_latest_submission = dplyr::last(.data$period), - record_keydate1 = dplyr::last(.data$record_keydate1), - record_keydate2 = dplyr::last(.data$record_keydate2), - smrtype = dplyr::last(.data$smrtype), - pkg_count = dplyr::last(.data$pkg_count), - chi = dplyr::last(.data$chi), - gender = dplyr::last(.data$gender), - dob = dplyr::last(.data$dob), - postcode = dplyr::last(.data$postcode), - recid = dplyr::last(.data$recid), - person_id = dplyr::last(.data$person_id), - sc_send_lca = dplyr::last(.data$sc_send_lca) - ) %>% - # change the data format from data.table to data.frame - tibble::as_tibble() + dplyr::ungroup() %>% + data.table::as.data.table() + + # Summarize to merge episodes + qtr_merge <- data[, .( + sending_location = data.table::last(sending_location), + social_care_id = data.table::last(social_care_id), + sc_latest_submission = data.table::last(period), + record_keydate1 = data.table::last(record_keydate1), + record_keydate2 = data.table::last(record_keydate2), + smrtype = data.table::last(smrtype), + pkg_count = data.table::last(pkg_count), + chi = data.table::last(chi), + gender = data.table::last(gender), + dob = data.table::last(dob), + postcode = data.table::last(postcode), + recid = data.table::last(recid), + person_id = data.table::last(person_id), + sc_send_lca = data.table::last(sc_send_lca) + ), by = .( + sending_location, + social_care_id, + record_keydate1, + smrtype, + pkg_count + )] + + # Convert back to data.frame if necessary + qtr_merge <- as.data.frame(qtr_merge) + if (write_to_disk) { write_file( From b1a9523623b740144098418d59891228a005e74e Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 7 Feb 2024 16:40:51 +0000 Subject: [PATCH 039/105] Add case_when statement for `high_cc` cohort --- R/create_demographic_lookup.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/create_demographic_lookup.R b/R/create_demographic_lookup.R index 2b252a151..d0e0c9988 100644 --- a/R/create_demographic_lookup.R +++ b/R/create_demographic_lookup.R @@ -344,18 +344,21 @@ assign_d_cohort_high_cc <- function(dementia, liver, cancer, spec) { - high_cc <- + high_cc <- dplyr::case_when( + spec == "G5" ~ TRUE, # FOR FUTURE: PhysicalandSensoryDisabilityClientGroup or LearningDisabilityClientGroup = "Y", # then high_cc_cohort = TRUE # FOR FUTURE: Care home removed, here's the code: .data$recid = "CH" & age < 65 - rowSums(dplyr::pick(c( + (rowSums(dplyr::pick(c( "dementia", "hefailure", "refailure", "liver", "cancer" - )), na.rm = TRUE) >= 1L | - spec == "G5" + )), na.rm = TRUE) >= 1L) ~ TRUE, + .default = FALSE + ) + return(high_cc) } From 6829c1acb55586050a786edc478c11281e675eb6 Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Mon, 12 Feb 2024 12:04:34 +0000 Subject: [PATCH 040/105] Bug - `high_cc` in demographic cohort showing `NAs` instead of `TRUE/FALSE` (#911) Add case_when statement for `high_cc` cohort --- R/create_demographic_lookup.R | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/R/create_demographic_lookup.R b/R/create_demographic_lookup.R index 2b252a151..d0e0c9988 100644 --- a/R/create_demographic_lookup.R +++ b/R/create_demographic_lookup.R @@ -344,18 +344,21 @@ assign_d_cohort_high_cc <- function(dementia, liver, cancer, spec) { - high_cc <- + high_cc <- dplyr::case_when( + spec == "G5" ~ TRUE, # FOR FUTURE: PhysicalandSensoryDisabilityClientGroup or LearningDisabilityClientGroup = "Y", # then high_cc_cohort = TRUE # FOR FUTURE: Care home removed, here's the code: .data$recid = "CH" & age < 65 - rowSums(dplyr::pick(c( + (rowSums(dplyr::pick(c( "dementia", "hefailure", "refailure", "liver", "cancer" - )), na.rm = TRUE) >= 1L | - spec == "G5" + )), na.rm = TRUE) >= 1L) ~ TRUE, + .default = FALSE + ) + return(high_cc) } From c7a140068bc1376cd8ae3951e96a1aca74bcd7fd Mon Sep 17 00:00:00 2001 From: marjom02 Date: Tue, 13 Feb 2024 11:55:27 +0000 Subject: [PATCH 041/105] added a casewhen to update property type description for homelessness --- R/process_extract_homelessness.R | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R index 3211f0fb7..3b9756183 100644 --- a/R/process_extract_homelessness.R +++ b/R/process_extract_homelessness.R @@ -100,6 +100,36 @@ process_extract_homelessness <- function( ) ) ) %>% + dplyr::mutate(property_type_code = as.character(property_type_code)) %>% + dplyr::mutate( + property_type_code = dplyr::case_when( + property_type_code == "1" ~ "1 - Own Property - LA Tenancy", + property_type_code == "2" ~ "2 - Own Property - RSL Tenancy", + property_type_code == "3" ~ "3 - Own Property - private rented tenancy", + property_type_code == "4" ~ "4 - Own Property - tenancy secured through employment/tied house", + property_type_code == "5" ~ "5 - Own Property - owning/buying", + property_type_code == "6" ~ "6 - Parental / family home / relatives", + property_type_code == "7" ~ " 7 - Friends / partners", + property_type_code == "8" ~ "8 - Armed Services Accommodation", + property_type_code == "9" ~ "9 - Prison", + property_type_code == "10" ~ "10 - Hospital", + property_type_code == "11" ~ "11 - Children's residential accommodation (looked after by the local authority)", + property_type_code == "12" ~ "12 - Supported accommodation", + property_type_code == "13" ~ "13 - Hostel (unsupported)", + property_type_code == "14" ~ "14 - Bed & Breakfast", + property_type_code == "15" ~ "15 - Caravan / mobile home", + property_type_code == "16" ~ "16 - Long-term roofless", + property_type_code == "17" ~ "17 - Long-term sofa surfing", + property_type_code == "18" ~ "18 - Other", + property_type_code == "19" ~ "19 - Not known / refused", + property_type_code == "20" ~ "20 - Own property - Shared ownership/Shared equity/ LCHO", + property_type_code == "21" ~ "21 - Lodger", + property_type_code == "22" ~ "22 - Shared Property - Private Rented Sector", + property_type_code == "23" ~ "23 - Shared Property - Local Authority", + property_type_code == "24" ~ "24 - Shared Property - RSL", + TRUE ~ property_type_code + ) + ) %>% dplyr::left_join( la_code_lookup, by = dplyr::join_by("sending_local_authority_code_9" == "CA") @@ -117,7 +147,7 @@ process_extract_homelessness <- function( if (!is.null(completeness_data)) { filtered_data <- data %>% dplyr::left_join(completeness_data, - by = c("year", "sending_local_authority_name") + by = c("year", "sending_local_authority_name") ) %>% dplyr::filter( dplyr::between(.data[["pct_complete_all"]], 0.90, 1.05) | From ea192202d8b57c2182ac0d01c2761c14745abc0a Mon Sep 17 00:00:00 2001 From: SwiftySalmon Date: Tue, 13 Feb 2024 11:58:45 +0000 Subject: [PATCH 042/105] Update documentation --- DESCRIPTION | 2 +- man/calculate_stay.Rd | 4 ++-- man/compute_mid_year_age.Rd | 4 ++-- man/convert_date_to_numeric.Rd | 4 ++-- man/convert_numeric_to_date.Rd | 4 ++-- man/end_fy.Rd | 2 +- man/end_fy_quarter.Rd | 2 +- man/end_next_fy_quarter.Rd | 4 ++-- man/fy_interval.Rd | 4 ++-- man/is_date_in_fyyear.Rd | 4 ++-- man/last_date_month.Rd | 4 ++-- man/midpoint_fy.Rd | 4 ++-- man/next_fy.Rd | 4 ++-- man/start_fy.Rd | 2 +- man/start_fy_quarter.Rd | 2 +- man/start_next_fy_quarter.Rd | 6 +++--- 16 files changed, 28 insertions(+), 28 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5123289dd..3a75852e2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,4 +73,4 @@ Encoding: UTF-8 Language: en-GB LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.2.3 +RoxygenNote: 7.3.1 diff --git a/man/calculate_stay.Rd b/man/calculate_stay.Rd index 43b7bd166..5e9266b10 100644 --- a/man/calculate_stay.Rd +++ b/man/calculate_stay.Rd @@ -34,16 +34,16 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/compute_mid_year_age.Rd b/man/compute_mid_year_age.Rd index 142fa4aab..5a50370e0 100644 --- a/man/compute_mid_year_age.Rd +++ b/man/compute_mid_year_age.Rd @@ -31,16 +31,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/convert_date_to_numeric.Rd b/man/convert_date_to_numeric.Rd index 5511fec84..b67eaa778 100644 --- a/man/convert_date_to_numeric.Rd +++ b/man/convert_date_to_numeric.Rd @@ -24,16 +24,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{compute_mid_year_age}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/convert_numeric_to_date.Rd b/man/convert_numeric_to_date.Rd index f786e0319..a09b7b9b9 100644 --- a/man/convert_numeric_to_date.Rd +++ b/man/convert_numeric_to_date.Rd @@ -24,16 +24,16 @@ Other date functions: \code{\link{calculate_stay}()}, \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_fy.Rd b/man/end_fy.Rd index 2925ffe60..6220f5f32 100644 --- a/man/end_fy.Rd +++ b/man/end_fy.Rd @@ -34,8 +34,8 @@ Other date functions: \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_fy_quarter.Rd b/man/end_fy_quarter.Rd index 0efe9624a..26c439a04 100644 --- a/man/end_fy_quarter.Rd +++ b/man/end_fy_quarter.Rd @@ -33,8 +33,8 @@ Other date functions: \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/end_next_fy_quarter.Rd b/man/end_next_fy_quarter.Rd index f9cc1720a..702446e82 100644 --- a/man/end_next_fy_quarter.Rd +++ b/man/end_next_fy_quarter.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/fy_interval.Rd b/man/fy_interval.Rd index 12d1d36bb..00b9ea52c 100644 --- a/man/fy_interval.Rd +++ b/man/fy_interval.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/is_date_in_fyyear.Rd b/man/is_date_in_fyyear.Rd index 97a0f3639..e74bd5734 100644 --- a/man/is_date_in_fyyear.Rd +++ b/man/is_date_in_fyyear.Rd @@ -41,15 +41,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/last_date_month.Rd b/man/last_date_month.Rd index f52305356..3d3b9544e 100644 --- a/man/last_date_month.Rd +++ b/man/last_date_month.Rd @@ -25,15 +25,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/midpoint_fy.Rd b/man/midpoint_fy.Rd index 7bac9b6b3..2363df773 100644 --- a/man/midpoint_fy.Rd +++ b/man/midpoint_fy.Rd @@ -27,15 +27,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/next_fy.Rd b/man/next_fy.Rd index 19e1193f4..7524c5f11 100644 --- a/man/next_fy.Rd +++ b/man/next_fy.Rd @@ -27,15 +27,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, -\code{\link{start_fy_quarter}()}, \code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()}, \code{\link{start_next_fy_quarter}()} } \concept{date functions} diff --git a/man/start_fy.Rd b/man/start_fy.Rd index 4996bfb72..9951af2ec 100644 --- a/man/start_fy.Rd +++ b/man/start_fy.Rd @@ -27,8 +27,8 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, diff --git a/man/start_fy_quarter.Rd b/man/start_fy_quarter.Rd index f5729dcb0..9936736a8 100644 --- a/man/start_fy_quarter.Rd +++ b/man/start_fy_quarter.Rd @@ -26,8 +26,8 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, diff --git a/man/start_next_fy_quarter.Rd b/man/start_next_fy_quarter.Rd index 098f0bf73..fdac297a7 100644 --- a/man/start_next_fy_quarter.Rd +++ b/man/start_next_fy_quarter.Rd @@ -26,15 +26,15 @@ Other date functions: \code{\link{compute_mid_year_age}()}, \code{\link{convert_date_to_numeric}()}, \code{\link{convert_numeric_to_date}()}, -\code{\link{end_fy_quarter}()}, \code{\link{end_fy}()}, +\code{\link{end_fy_quarter}()}, \code{\link{end_next_fy_quarter}()}, \code{\link{fy_interval}()}, \code{\link{is_date_in_fyyear}()}, \code{\link{last_date_month}()}, \code{\link{midpoint_fy}()}, \code{\link{next_fy}()}, -\code{\link{start_fy_quarter}()}, -\code{\link{start_fy}()} +\code{\link{start_fy}()}, +\code{\link{start_fy_quarter}()} } \concept{date functions} From a634ea74a511d4cee207bcc256fb5b7078c094d1 Mon Sep 17 00:00:00 2001 From: SwiftySalmon Date: Tue, 13 Feb 2024 11:59:37 +0000 Subject: [PATCH 043/105] Style code --- R/process_extract_homelessness.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R index 3b9756183..04d7082e7 100644 --- a/R/process_extract_homelessness.R +++ b/R/process_extract_homelessness.R @@ -147,7 +147,7 @@ process_extract_homelessness <- function( if (!is.null(completeness_data)) { filtered_data <- data %>% dplyr::left_join(completeness_data, - by = c("year", "sending_local_authority_name") + by = c("year", "sending_local_authority_name") ) %>% dplyr::filter( dplyr::between(.data[["pct_complete_all"]], 0.90, 1.05) | From 14cde166bffd7d1d9ac77c8732407fe17b5268d0 Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Fri, 16 Feb 2024 09:06:50 +0000 Subject: [PATCH 044/105] Bug - deal with missing variables (#914) * Add missing sc variables for no sc data * Fix code for including `_inc_dna` variables * Remove commented line --- R/add_hri_variables.R | 2 +- R/aggregate_by_chi.R | 4 ++-- R/create_episode_file.R | 7 +++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/R/add_hri_variables.R b/R/add_hri_variables.R index 710324646..519ce3694 100644 --- a/R/add_hri_variables.R +++ b/R/add_hri_variables.R @@ -82,7 +82,7 @@ add_hri_variables <- function( "mh_episodes", "gls_episodes", "op_newcons_attendances", - # op_newcons_dnas, + "op_newcons_dnas", "ae_attendances", "pis_paid_items", "ooh_cases" diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R index 8d9dff96d..6f5032242 100644 --- a/R/aggregate_by_chi.R +++ b/R/aggregate_by_chi.R @@ -89,6 +89,7 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) { "episodes", "beddays", "cost", + "_dnas", "attendances", "attend", "contacts", @@ -109,8 +110,7 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) { vars_start_with( episode_file, "sds_option" - ), - "health_net_cost_inc_dnas" + ) ) cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")] if (exclude_sc_var) { diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 493d71bd3..a9503e83c 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -143,8 +143,9 @@ create_episode_file <- function( episode_file <- episode_file %>% dplyr::mutate( ch_chi_cis = NA, - sc_id_cis = NA, + ch_sc_id_cis = NA, ch_name = NA, + ch_postcode = NA, ch_adm_reason = NA, ch_provider = NA, ch_nursing = NA, @@ -159,7 +160,9 @@ create_episode_file <- function( hc_cost_q4 = NA, hc_provider = NA, hc_reablement = NA, - sds_option_4 = NA, + person_id = NA, + sc_latest_submission = NA, + sc_send_lca = NA, sc_living_alone = NA, sc_support_from_unpaid_carer = NA, sc_social_worker = NA, From 625402b52f717b1f6b35344f7fb1ebaf4a9cecff Mon Sep 17 00:00:00 2001 From: Jennit07 <67372904+Jennit07@users.noreply.github.com> Date: Fri, 16 Feb 2024 09:07:48 +0000 Subject: [PATCH 045/105] Bug - Fix get pop path failing and preventing the indiv file from running. (#913) Fix bug - pop file paths breaking indiv file --- R/add_keep_population_flag.R | 2 +- R/get_lookup_paths.R | 2 +- Rmarkdown/costs_district_nursing.Rmd | 2 +- tests/testthat/test-get_lookup_paths.R | 8 +++----- 4 files changed, 6 insertions(+), 8 deletions(-) diff --git a/R/add_keep_population_flag.R b/R/add_keep_population_flag.R index 6050b278f..d418ac18c 100644 --- a/R/add_keep_population_flag.R +++ b/R/add_keep_population_flag.R @@ -15,7 +15,7 @@ add_keep_population_flag <- function(individual_file, year) { } else { ## Obtain the population estimates for Locality AgeGroup and Gender. pop_estimates <- - readr::read_rds(get_datazone_pop_path("DataZone2011_pop_est_2011_2021.rds")) %>% + readr::read_rds(get_pop_path(type = "datazone")) %>% dplyr::select(year, datazone2011, sex, age0:age90plus) # Step 1: Obtain the population estimates for Locality, AgeGroup, and Gender diff --git a/R/get_lookup_paths.R b/R/get_lookup_paths.R index fe35a7d2f..7df5c52e2 100644 --- a/R/get_lookup_paths.R +++ b/R/get_lookup_paths.R @@ -126,7 +126,7 @@ get_pop_path <- function(file_name = NULL, "intzone" ~ stringr::str_glue("IntZone_pop_est_2011_\\d+?\\.{ext}") ) - datazone_pop_path <- get_file_path( + pop_path <- get_file_path( directory = pop_dir, file_name = file_name, ext = ext, diff --git a/Rmarkdown/costs_district_nursing.Rmd b/Rmarkdown/costs_district_nursing.Rmd index e3c9bba13..fb198bccb 100644 --- a/Rmarkdown/costs_district_nursing.Rmd +++ b/Rmarkdown/costs_district_nursing.Rmd @@ -75,7 +75,7 @@ dn_raw_costs_contacts <- left_join(dn_raw_contacts, # Of the two HSCPs, Argyll and Bute provides the # District Nursing data which is 27% of the population. -population_lookup <- read_file(get_datazone_pop_path("HSCP2019_pop_est_1981_2021.rds")) %>% +population_lookup <- read_file(get_pop_path(type = "datazone")) %>% # Select only the HSCPs for NHS Highland & years since 2015 filter( hscp2019 %in% c("S37000004", "S37000016"), diff --git a/tests/testthat/test-get_lookup_paths.R b/tests/testthat/test-get_lookup_paths.R index c56752b03..29d538cc1 100644 --- a/tests/testthat/test-get_lookup_paths.R +++ b/tests/testthat/test-get_lookup_paths.R @@ -48,13 +48,11 @@ test_that("SIMD file path returns as expected", { test_that("population estimates file path returns as expected", { suppressMessages({ - expect_s3_class(get_datazone_pop_path(), "fs_path") + expect_s3_class(get_pop_path(type = "datazone"), "fs_path") - expect_equal(fs::path_ext(get_datazone_pop_path()), "rds") + expect_equal(fs::path_ext(get_pop_path(type = "datazone")), "rds") - expect_match(get_datazone_pop_path(), "DataZone2011_pop_est_2001_\\d+?") - - expect_true(fs::file_exists(get_datazone_pop_path())) + expect_true(fs::file_exists(get_pop_path(type = "datazone"))) }) }) From 36c5e74ed28444a7f44eff390bd96009bb0f0b51 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 16 Feb 2024 09:21:13 +0000 Subject: [PATCH 046/105] correct file hscp file path --- Rmarkdown/costs_district_nursing.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Rmarkdown/costs_district_nursing.Rmd b/Rmarkdown/costs_district_nursing.Rmd index fb198bccb..59b8353f8 100644 --- a/Rmarkdown/costs_district_nursing.Rmd +++ b/Rmarkdown/costs_district_nursing.Rmd @@ -75,7 +75,7 @@ dn_raw_costs_contacts <- left_join(dn_raw_contacts, # Of the two HSCPs, Argyll and Bute provides the # District Nursing data which is 27% of the population. -population_lookup <- read_file(get_pop_path(type = "datazone")) %>% +population_lookup <- read_file(get_pop_path(type = "hscp")) %>% # Select only the HSCPs for NHS Highland & years since 2015 filter( hscp2019 %in% c("S37000004", "S37000016"), From 9d67429a28cd31d4a109a2a5d37cca750083a2a0 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 16 Feb 2024 13:47:12 +0000 Subject: [PATCH 047/105] Declare missing variables for older years --- R/create_episode_file.R | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 9189ff783..d686c7a43 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -170,8 +170,32 @@ create_episode_file <- function( sc_meals = NA, sc_day_care = NA ) + } else if (!check_year_valid(year, type = "homelessness")) { + episode_file <- episode_file %>% + dplyr::mutate( + hl1_application_ref = NA, + hl1_sending_lca = NA, + hl1_property_type = NA, + hl1_reason_ftm = NA + ) + } else if (!check_year_valid(year, type = "dd")) { + episode_file <- episode_file %>% + dplyr::mutate( + delay_end_reason = NA, + primary_delay_reason = NA, + secondary_delay_reason = NA, + dd_responsible_lca = NA, + dd_quality = NA + ) + } else if (!check_year_valid(year, type = "dn")) { + episode_file <- episode_file %>% + dplyr::mutate( + ccm = NA, + totalnodncontacts = NA + ) } + if (anon_chi_out) { episode_file <- slfhelper::get_anon_chi(episode_file) } From 0844a2ff8b562b07bceb99b16a5eed1111bc1431 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 22 Jul 2024 10:33:43 +0100 Subject: [PATCH 048/105] setup targets scripts for old years --- Run_SLF_Files_targets/run_targets_1415.R | 10 ++++++++++ Run_SLF_Files_targets/run_targets_1516.R | 10 ++++++++++ Run_SLF_Files_targets/run_targets_1617.R | 10 ++++++++++ 3 files changed, 30 insertions(+) create mode 100644 Run_SLF_Files_targets/run_targets_1415.R create mode 100644 Run_SLF_Files_targets/run_targets_1516.R create mode 100644 Run_SLF_Files_targets/run_targets_1617.R diff --git a/Run_SLF_Files_targets/run_targets_1415.R b/Run_SLF_Files_targets/run_targets_1415.R new file mode 100644 index 000000000..e0d8a3e5c --- /dev/null +++ b/Run_SLF_Files_targets/run_targets_1415.R @@ -0,0 +1,10 @@ +library(targets) + +year <- "1415" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1415")) +) + diff --git a/Run_SLF_Files_targets/run_targets_1516.R b/Run_SLF_Files_targets/run_targets_1516.R new file mode 100644 index 000000000..2a94b78b6 --- /dev/null +++ b/Run_SLF_Files_targets/run_targets_1516.R @@ -0,0 +1,10 @@ +library(targets) + +year <- "1516" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1516")) +) + diff --git a/Run_SLF_Files_targets/run_targets_1617.R b/Run_SLF_Files_targets/run_targets_1617.R new file mode 100644 index 000000000..c5f6bf5ab --- /dev/null +++ b/Run_SLF_Files_targets/run_targets_1617.R @@ -0,0 +1,10 @@ +library(targets) + +year <- "1617" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("1617")) +) + From 753d10ec786b826c99d3648eb5c212689946b485 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Mon, 22 Jul 2024 09:35:40 +0000 Subject: [PATCH 049/105] Style code --- Run_SLF_Files_targets/run_targets_1415.R | 1 - Run_SLF_Files_targets/run_targets_1516.R | 1 - Run_SLF_Files_targets/run_targets_1617.R | 1 - 3 files changed, 3 deletions(-) diff --git a/Run_SLF_Files_targets/run_targets_1415.R b/Run_SLF_Files_targets/run_targets_1415.R index e0d8a3e5c..a37068c0d 100644 --- a/Run_SLF_Files_targets/run_targets_1415.R +++ b/Run_SLF_Files_targets/run_targets_1415.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("1415")) ) - diff --git a/Run_SLF_Files_targets/run_targets_1516.R b/Run_SLF_Files_targets/run_targets_1516.R index 2a94b78b6..7930d5bb5 100644 --- a/Run_SLF_Files_targets/run_targets_1516.R +++ b/Run_SLF_Files_targets/run_targets_1516.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("1516")) ) - diff --git a/Run_SLF_Files_targets/run_targets_1617.R b/Run_SLF_Files_targets/run_targets_1617.R index c5f6bf5ab..16361f71e 100644 --- a/Run_SLF_Files_targets/run_targets_1617.R +++ b/Run_SLF_Files_targets/run_targets_1617.R @@ -7,4 +7,3 @@ tar_make_future( # it does not recognise `contains(year)` names = (targets::contains("1617")) ) - From 881fdf6a54c7c9acbfe63e5093677878b6bcd74c Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 24 Jul 2024 15:12:01 +0100 Subject: [PATCH 050/105] Include `check_year_valid` for sc client path --- R/get_sc_lookup_paths.R | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/R/get_sc_lookup_paths.R b/R/get_sc_lookup_paths.R index d201f416f..ec98a878f 100644 --- a/R/get_sc_lookup_paths.R +++ b/R/get_sc_lookup_paths.R @@ -44,5 +44,9 @@ get_sc_client_lookup_path <- function(year, update = latest_update(), ...) { ... ) + if (!check_year_valid(year, type = "client")) { + return(get_dummy_boxi_extract_path()) + } + return(sc_client_lookup_path) } From 8b0da55abe7c54a96cf6405898f41a43b589f98f Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 24 Jul 2024 17:18:45 +0100 Subject: [PATCH 051/105] Add check year valid to join sc client --- R/create_episode_file.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 0c632fd09..209a5f3be 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -487,6 +487,11 @@ join_sc_client <- function(data, file_type = c("episode", "individual")) { cli::cli_alert_info("Join social care client function started at {Sys.time()}") + if (!check_year_valid(year, type = "client")) { + data_file <- data + return(data_file) + } + if (file_type == "episode") { # Match on client variables by chi data_file <- data %>% From db24e8476ade8d59f7b2e771325595fe070b3a3f Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 24 Jul 2024 17:20:47 +0100 Subject: [PATCH 052/105] Add if else statement --- R/get_sc_lookup_paths.R | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/R/get_sc_lookup_paths.R b/R/get_sc_lookup_paths.R index ec98a878f..90a08e7e1 100644 --- a/R/get_sc_lookup_paths.R +++ b/R/get_sc_lookup_paths.R @@ -38,15 +38,18 @@ get_sc_demog_lookup_path <- function(update = latest_update(), ...) { #' @family social care lookup file paths #' @seealso [get_file_path()] for the generic function. get_sc_client_lookup_path <- function(year, update = latest_update(), ...) { - sc_client_lookup_path <- get_file_path( - directory = fs::path(get_slf_dir(), "Social_care", "processed_sc_client_lookup"), - file_name = stringr::str_glue("anon-sc_client_lookup_{year}_{update}.parquet"), - ... - ) - if (!check_year_valid(year, type = "client")) { return(get_dummy_boxi_extract_path()) + } else { + sc_client_lookup_path <- get_file_path( + directory = fs::path( + get_slf_dir(), + "Social_care", + "processed_sc_client_lookup" + ), + file_name = stringr::str_glue("anon-sc_client_lookup_{year}_{update}.parquet"), + ... + ) + return(sc_client_lookup_path) } - - return(sc_client_lookup_path) } From eaccd43899ddb4cf7194474081cd0caa1976706b Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 24 Jul 2024 17:22:24 +0100 Subject: [PATCH 053/105] WIP - TO DO - fix dummy path for `get_chi()` --- R/get_boxi_extract_path.R | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R index a1c59b4f2..5a8cde916 100644 --- a/R/get_boxi_extract_path.R +++ b/R/get_boxi_extract_path.R @@ -86,9 +86,36 @@ get_boxi_extract_path <- function( #' #' @return an [fs::path()] to a dummy file which can be used with targets. get_dummy_boxi_extract_path <- function() { - get_file_path( + + dummy_data <- data.frame(anon_chi = "DUMMY") %>% + write_file(get_file_path( + directory = get_dev_dir(), + file_name = "dummy_data.parquet" + )) + + dummy_path <- get_file_path( directory = get_dev_dir(), - file_name = ".dummy", - create = TRUE + file_name = "dummy_data.parquet" ) + + return(dummy_path) } + +#' #' Get a path to a dummy file +#' #' +#' #' @return an [fs::path()] to a dummy file which can be used with targets. +#' get_dummy_chi_path <- function() { +#' data_path <- get_file_path( +#' directory = get_dev_dir(), +#' file_name = ".dummy", +#' create = TRUE +#' ) +#' +#' data <- read_file(data_path) %>% +#' as.data.frame() %>% +#' dplyr::mutate(anon_chi = NA_character_) %>% +#' slfhelper::get_chi() +#' +#' return(data_path) +#' +#' } From bcd55adf43ca508da5b348f6b8fb45ac3800b862 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Wed, 24 Jul 2024 16:23:59 +0000 Subject: [PATCH 054/105] Style code --- R/get_boxi_extract_path.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R index 5a8cde916..a50c3e8ef 100644 --- a/R/get_boxi_extract_path.R +++ b/R/get_boxi_extract_path.R @@ -86,12 +86,11 @@ get_boxi_extract_path <- function( #' #' @return an [fs::path()] to a dummy file which can be used with targets. get_dummy_boxi_extract_path <- function() { - dummy_data <- data.frame(anon_chi = "DUMMY") %>% write_file(get_file_path( - directory = get_dev_dir(), - file_name = "dummy_data.parquet" - )) + directory = get_dev_dir(), + file_name = "dummy_data.parquet" + )) dummy_path <- get_file_path( directory = get_dev_dir(), From 0ec4d9d5161ec3b6a55584d57937cad1a5a4e8d2 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 25 Jul 2024 10:13:39 +0100 Subject: [PATCH 055/105] update dummy data file to read empty tibble --- R/get_boxi_extract_path.R | 28 ++-------------------------- R/read_file.R | 2 +- 2 files changed, 3 insertions(+), 27 deletions(-) diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R index a50c3e8ef..9c21cabe9 100644 --- a/R/get_boxi_extract_path.R +++ b/R/get_boxi_extract_path.R @@ -86,35 +86,11 @@ get_boxi_extract_path <- function( #' #' @return an [fs::path()] to a dummy file which can be used with targets. get_dummy_boxi_extract_path <- function() { - dummy_data <- data.frame(anon_chi = "DUMMY") %>% - write_file(get_file_path( - directory = get_dev_dir(), - file_name = "dummy_data.parquet" - )) - dummy_path <- get_file_path( directory = get_dev_dir(), - file_name = "dummy_data.parquet" + file_name = ".dummy", + create = TRUE ) return(dummy_path) } - -#' #' Get a path to a dummy file -#' #' -#' #' @return an [fs::path()] to a dummy file which can be used with targets. -#' get_dummy_chi_path <- function() { -#' data_path <- get_file_path( -#' directory = get_dev_dir(), -#' file_name = ".dummy", -#' create = TRUE -#' ) -#' -#' data <- read_file(data_path) %>% -#' as.data.frame() %>% -#' dplyr::mutate(anon_chi = NA_character_) %>% -#' slfhelper::get_chi() -#' -#' return(data_path) -#' -#' } diff --git a/R/read_file.R b/R/read_file.R index be0a6fc65..fa4960804 100644 --- a/R/read_file.R +++ b/R/read_file.R @@ -29,7 +29,7 @@ read_file <- function(path, col_select = NULL, as_data_frame = TRUE, ...) { # Return an empty tibble if trying to read the dummy path if (path == get_dummy_boxi_extract_path()) { - return(tibble::tibble()) + return(tibble::tibble(anon_chi = NA_character_)) } ext <- fs::path_ext(path) From 6af1e415805d048ee02eada5dcf379ec02181c15 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 25 Jul 2024 15:24:43 +0100 Subject: [PATCH 056/105] Update `check_year_valid` --- R/check_year_valid.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/check_year_valid.R b/R/check_year_valid.R index b4eb3872e..52e312028 100644 --- a/R/check_year_valid.R +++ b/R/check_year_valid.R @@ -17,6 +17,7 @@ check_year_valid <- function( "ch", "client", "cmh", + "cost_dna", "dd", "deaths", "dn", @@ -36,7 +37,7 @@ check_year_valid <- function( return(FALSE) } else if (year <= "1516" && type %in% c("cmh", "homelessness", "dd")) { return(FALSE) - } else if (year <= "1617" && type %in% c("ch", "hc", "sds", "at", "client")) { + } else if (year <= "1617" && type %in% c("ch", "hc", "sds", "at", "client", "cost_dna")) { return(FALSE) } else if (year <= "1718" && type %in% "hhg") { return(FALSE) From 7322df2ee9e7fe354f846e7680ae4058a3b59bb4 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 25 Jul 2024 15:25:21 +0100 Subject: [PATCH 057/105] Update declared `NA` variables --- R/create_episode_file.R | 37 +++++++++++++++++++++++++++++-------- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 209a5f3be..03bc92e36 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -175,26 +175,41 @@ create_episode_file <- function( sc_social_worker = NA, sc_type_of_housing = NA, sc_meals = NA, - sc_day_care = NA + sc_day_care = NA, + social_care_id = NA ) - } else if (!check_year_valid(year, type = "homelessness")) { + } + + if (!check_year_valid(year, type = "homelessness")) { episode_file <- episode_file %>% dplyr::mutate( + hl1_12_months_post_app = NA, + hl1_12_months_pre_app = NA, + hl1_6after_ep = NA, + hl1_6before_ep = NA, hl1_application_ref = NA, - hl1_sending_lca = NA, + hl1_completeness = NA, + hl1_during_ep = NA, + hl1_in_fy = NA, hl1_property_type = NA, - hl1_reason_ftm = NA + hl1_reason_ftm = NA, + hl1_sending_lca = NA ) - } else if (!check_year_valid(year, type = "dd")) { + } + + if (!check_year_valid(year, type = "dd")) { episode_file <- episode_file %>% dplyr::mutate( + cij_delay = NA, + dd_quality = NA, + dd_responsible_lca = NA, delay_end_reason = NA, primary_delay_reason = NA, secondary_delay_reason = NA, - dd_responsible_lca = NA, - dd_quality = NA ) - } else if (!check_year_valid(year, type = "dn")) { + } + + if (!check_year_valid(year, type = "dn")) { episode_file <- episode_file %>% dplyr::mutate( ccm = NA, @@ -202,6 +217,12 @@ create_episode_file <- function( ) } + if (!check_year_valid(year, type = "cost_dna")) { + episode_file <- episode_file %>% + dplyr::mutate( + cost_total_net_inc_dnas = NA + ) + } if (anon_chi_out) { episode_file <- slfhelper::get_anon_chi(episode_file) From faf564da22ac509721a8eea6206ca9687e1f7728 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Thu, 25 Jul 2024 14:27:27 +0000 Subject: [PATCH 058/105] Update documentation --- man/check_year_valid.Rd | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/man/check_year_valid.Rd b/man/check_year_valid.Rd index 91c29861e..59960da30 100644 --- a/man/check_year_valid.Rd +++ b/man/check_year_valid.Rd @@ -6,9 +6,9 @@ \usage{ check_year_valid( year, - type = c("acute", "ae", "at", "ch", "client", "cmh", "dd", "deaths", "dn", "gpooh", - "hc", "homelessness", "hhg", "maternity", "mh", "nsu", "outpatients", "pis", "sds", - "sparra") + type = c("acute", "ae", "at", "ch", "client", "cmh", "cost_dna", "dd", "deaths", "dn", + "gpooh", "hc", "homelessness", "hhg", "maternity", "mh", "nsu", "outpatients", "pis", + "sds", "sparra") ) } \arguments{ From 41500ef800d4f90d05765f0b8f71e07e158b5287 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 31 Jul 2024 12:39:51 +0100 Subject: [PATCH 059/105] declare `count_not_known` as NA --- R/create_episode_file.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 03bc92e36..cd06b5f3e 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -176,7 +176,8 @@ create_episode_file <- function( sc_type_of_housing = NA, sc_meals = NA, sc_day_care = NA, - social_care_id = NA + social_care_id = NA, + count_not_known = NA ) } From ac62956b7da9723a06221da79619abebfc16ce3a Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 31 Jul 2024 15:43:40 +0100 Subject: [PATCH 060/105] supply year as default in `aggregate_by_chi` --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index dc15fcb0e..f826294d1 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -78,7 +78,7 @@ create_individual_file <- function( if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { individual_file <- individual_file %>% - aggregate_by_chi(exclude_sc_var = TRUE) + aggregate_by_chi(year = year, exclude_sc_var = TRUE) } else { individual_file <- individual_file %>% aggregate_ch_episodes() %>% From 51dc1ce91a62f1f6a4e0cd31f19ad87ec6278870 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 2 Aug 2024 10:00:43 +0100 Subject: [PATCH 061/105] Decalre unused variables --- R/create_individual_file.R | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index f826294d1..cb76d5566 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -115,6 +115,9 @@ create_individual_file <- function( hc_personal_hours = NA, hc_non_personal_hours = NA, hc_reablement_hours = NA, + hc_non_personal_hours_cost = NA, + hc_personal_hours_cost = NA, + hc_reablement_hours_cost = NA, at_alarms = NA, at_telecare = NA, sds_option_1 = NA, @@ -125,10 +128,20 @@ create_individual_file <- function( sc_support_from_unpaid_carer = NA, sc_social_worker = NA, sc_meals = NA, - sc_day_care = NA + sc_day_care = NA, + sc_type_of_housing= NA, + count_not_known = NA, + sc_latest_submission = NA, + social_care_id = NA ) } + if (!check_year_valid(year, type = "homelessness")) { + individual_file <- individual_file %>% + dplyr::mutate(hl1_in_fy = NA) + } + + if (anon_chi_out) { individual_file <- individual_file %>% tidyr::replace_na(list(chi = "")) %>% From 976e74b09c0002c8c754d01cfa1d4f2197cddc6a Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Fri, 2 Aug 2024 09:02:29 +0000 Subject: [PATCH 062/105] Style code --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index cb76d5566..6d6c13f19 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -129,7 +129,7 @@ create_individual_file <- function( sc_social_worker = NA, sc_meals = NA, sc_day_care = NA, - sc_type_of_housing= NA, + sc_type_of_housing = NA, count_not_known = NA, sc_latest_submission = NA, social_care_id = NA From b0b12d3ae6125c48f405588612ee37d01228efb5 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 2 Aug 2024 10:15:17 +0100 Subject: [PATCH 063/105] Update sc client with sept update new code --- R/process_lookup_sc_client.R | 158 +++++++++++++++++++++-------------- 1 file changed, 96 insertions(+), 62 deletions(-) diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index 3420f466f..4be1e4f77 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -20,33 +20,29 @@ process_lookup_sc_client <- slfhelper::get_chi() %>% dplyr::select(c("sending_location", "social_care_id", "chi", "latest_flag")), write_to_disk = TRUE) { - # Specify years available for running - if (year < "1718") { - return(NULL) - } - - client_clean <- data %>% - # Replace 'unknown' responses with NA - dplyr::mutate( - dplyr::across( - c( - "support_from_unpaid_carer", - "social_worker", - "meals", - "living_alone", - "day_care" - ), - dplyr::na_if, - 9L - ), - type_of_housing = dplyr::na_if(.data$type_of_housing, 6L) + # Match to demographics lookup to get CHI + sc_client_demographics <- data %>% + dplyr::right_join( + sc_demographics, + by = c("sending_location", "social_care_id") ) %>% - dplyr::group_by(.data$sending_location, .data$social_care_id) %>% + # need period for the replace sc id with latest function + dplyr::mutate(period = ifelse(!(is.na(.data$financial_quarter)), paste0(.data$financial_year, "Q", financial_quarter), + financial_year + )) %>% + replace_sc_id_with_latest() %>% + # remove cases with no data in client + dplyr::filter(!(is.na(.data$financial_year))) %>% + dplyr::select(-.data$latest_sc_id, -.data$latest_flag, -.data$period) + + + client_clean <- sc_client_demographics %>% + dplyr::group_by(.data$sending_location, .data$social_care_id, .data$chi) %>% # summarise to take last submission dplyr::summarise(dplyr::across( c( "dementia", - "mental_health_problems", + "mental_health_disorders", "learning_disability", "physical_and_sensory_disability", "drugs", @@ -75,19 +71,9 @@ process_lookup_sc_client <- "social_worker", "meals", "living_alone", - "day_care" - ), - tidyr::replace_na, - 9L - ), - type_of_housing = tidyr::replace_na(.data$type_of_housing, 6L) - ) %>% - # factor labels - dplyr::mutate( - dplyr::across( - c( + "day_care", "dementia", - "mental_health_problems", + "mental_health_disorders", "learning_disability", "physical_and_sensory_disability", "drugs", @@ -97,37 +83,64 @@ process_lookup_sc_client <- "elderly_frail", "neurological_condition", "autism", - "other_vulnerable_groups" + "other_vulnerable_groups", + "type_of_housing" ), - factor, - levels = c(0L, 1L), - labels = c("No", "Yes") - ), + tidyr::replace_na, 9L + ) + ) %>% + # factor labels + dplyr::mutate( dplyr::across( c( "living_alone", "support_from_unpaid_carer", "social_worker", "meals", - "day_care" + "day_care", + "dementia", + "mental_health_disorders", + "learning_disability", + "physical_and_sensory_disability", + "drugs", + "alcohol", + "palliative_care", + "carer", + "elderly_frail", + "neurological_condition", + "autism", + "other_vulnerable_groups" ), factor, levels = c(0L, 1L, 9L), labels = c("No", "Yes", "Not Known") ), type_of_housing = factor(.data$type_of_housing, - levels = 1L:6L + levels = 1L:9L, + labels = c( + "Mainstream", # 1 + "Supported", # 2 + "Long Stay Care Home", # 3 + "Hospital or other medical establishment", # 4 + "Homeless", # 5 + "Penal Institutions", # 6 + "Not Known", # 7 + "Other", # 8 + "Not Known" # 9 + ) ) ) %>% # rename variables dplyr::rename_with( - .cols = -c("sending_location", "social_care_id"), + .cols = -c("sending_location", "social_care_id", "chi"), .fn = ~ paste0("sc_", .x) ) + sc_client_lookup <- client_clean %>% # reorder dplyr::select( + "chi", "sending_location", "social_care_id", "sc_living_alone", @@ -135,29 +148,50 @@ process_lookup_sc_client <- "sc_social_worker", "sc_type_of_housing", "sc_meals", - "sc_day_care" - ) + "sc_day_care", + "sc_dementia", + "sc_learning_disability", + "sc_mental_health_disorders", + "sc_physical_and_sensory_disability", + "sc_drugs", + "sc_alcohol", + "sc_palliative_care", + "sc_carer", + "sc_elderly_frail", + "sc_neurological_condition", + "sc_autism", + "sc_other_vulnerable_groups" + ) %>% + create_person_id() + - # Match to demographics lookup to get CHI - sc_client_lookup <- sc_client_lookup %>% - dplyr::left_join( - sc_demographics, - by = c("sending_location", "social_care_id") - ) sc_client_lookup <- dplyr::mutate(sc_client_lookup, - count_not_known = rowSums( - dplyr::select(sc_client_lookup, tidyr::all_of( - c( - "sc_living_alone", - "sc_support_from_unpaid_carer", - "sc_social_worker", - "sc_meals", - "sc_day_care" - ) - )) == "Not Known", - na.rm = TRUE - ) + count_not_known = rowSums( + dplyr::select(sc_client_lookup, tidyr::all_of( + c( + "sc_living_alone", + "sc_support_from_unpaid_carer", + "sc_social_worker", + "sc_type_of_housing", + "sc_meals", + "sc_day_care", + "sc_dementia", + "sc_learning_disability", + "sc_mental_health_disorders", + "sc_physical_and_sensory_disability", + "sc_drugs", + "sc_alcohol", + "sc_palliative_care", + "sc_carer", + "sc_elderly_frail", + "sc_neurological_condition", + "sc_autism", + "sc_other_vulnerable_groups" + ) + )) == "Not Known", + na.rm = TRUE + ) ) %>% dplyr::arrange(.data$chi, .data$count_not_known) %>% dplyr::distinct(.data$chi, .keep_all = TRUE) %>% From 0f568fb7e3400ed45f0718984aff8107f1b84ed4 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 2 Aug 2024 10:16:04 +0100 Subject: [PATCH 064/105] Specify code for running older years --- R/process_lookup_sc_client.R | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index 4be1e4f77..eb3748697 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -20,6 +20,12 @@ process_lookup_sc_client <- slfhelper::get_chi() %>% dplyr::select(c("sending_location", "social_care_id", "chi", "latest_flag")), write_to_disk = TRUE) { + + # Specify years available for running + if (year < "1718") { + return(NULL) + } + # Match to demographics lookup to get CHI sc_client_demographics <- data %>% dplyr::right_join( From e7130586a6730857d169e419b1b4acdf21826d4e Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Fri, 2 Aug 2024 09:23:14 +0000 Subject: [PATCH 065/105] Style code --- R/process_lookup_sc_client.R | 77 ++++++++++++++++++------------------ 1 file changed, 38 insertions(+), 39 deletions(-) diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index eb3748697..0f20804fa 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -20,7 +20,6 @@ process_lookup_sc_client <- slfhelper::get_chi() %>% dplyr::select(c("sending_location", "social_care_id", "chi", "latest_flag")), write_to_disk = TRUE) { - # Specify years available for running if (year < "1718") { return(NULL) @@ -34,7 +33,7 @@ process_lookup_sc_client <- ) %>% # need period for the replace sc id with latest function dplyr::mutate(period = ifelse(!(is.na(.data$financial_quarter)), paste0(.data$financial_year, "Q", financial_quarter), - financial_year + financial_year )) %>% replace_sc_id_with_latest() %>% # remove cases with no data in client @@ -122,18 +121,18 @@ process_lookup_sc_client <- labels = c("No", "Yes", "Not Known") ), type_of_housing = factor(.data$type_of_housing, - levels = 1L:9L, - labels = c( - "Mainstream", # 1 - "Supported", # 2 - "Long Stay Care Home", # 3 - "Hospital or other medical establishment", # 4 - "Homeless", # 5 - "Penal Institutions", # 6 - "Not Known", # 7 - "Other", # 8 - "Not Known" # 9 - ) + levels = 1L:9L, + labels = c( + "Mainstream", # 1 + "Supported", # 2 + "Long Stay Care Home", # 3 + "Hospital or other medical establishment", # 4 + "Homeless", # 5 + "Penal Institutions", # 6 + "Not Known", # 7 + "Other", # 8 + "Not Known" # 9 + ) ) ) %>% # rename variables @@ -173,31 +172,31 @@ process_lookup_sc_client <- sc_client_lookup <- dplyr::mutate(sc_client_lookup, - count_not_known = rowSums( - dplyr::select(sc_client_lookup, tidyr::all_of( - c( - "sc_living_alone", - "sc_support_from_unpaid_carer", - "sc_social_worker", - "sc_type_of_housing", - "sc_meals", - "sc_day_care", - "sc_dementia", - "sc_learning_disability", - "sc_mental_health_disorders", - "sc_physical_and_sensory_disability", - "sc_drugs", - "sc_alcohol", - "sc_palliative_care", - "sc_carer", - "sc_elderly_frail", - "sc_neurological_condition", - "sc_autism", - "sc_other_vulnerable_groups" - ) - )) == "Not Known", - na.rm = TRUE - ) + count_not_known = rowSums( + dplyr::select(sc_client_lookup, tidyr::all_of( + c( + "sc_living_alone", + "sc_support_from_unpaid_carer", + "sc_social_worker", + "sc_type_of_housing", + "sc_meals", + "sc_day_care", + "sc_dementia", + "sc_learning_disability", + "sc_mental_health_disorders", + "sc_physical_and_sensory_disability", + "sc_drugs", + "sc_alcohol", + "sc_palliative_care", + "sc_carer", + "sc_elderly_frail", + "sc_neurological_condition", + "sc_autism", + "sc_other_vulnerable_groups" + ) + )) == "Not Known", + na.rm = TRUE + ) ) %>% dplyr::arrange(.data$chi, .data$count_not_known) %>% dplyr::distinct(.data$chi, .keep_all = TRUE) %>% From 298cfe75125df86be39c0de73e591a448ee3c543 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 7 Aug 2024 10:06:45 +0100 Subject: [PATCH 066/105] Add Running SLF files manually scripts --- .../run_episode_file_1415.R | 81 +++++++++++++++++++ .../run_episode_file_1516.R | 79 ++++++++++++++++++ .../run_episode_file_1617.R | 79 ++++++++++++++++++ .../run_individual_file_1415.R | 9 +++ .../run_individual_file_1516.R | 9 +++ .../run_individual_file_1617.R | 9 +++ 6 files changed, 266 insertions(+) create mode 100644 Run_SLF_Files_manually/run_episode_file_1415.R create mode 100644 Run_SLF_Files_manually/run_episode_file_1516.R create mode 100644 Run_SLF_Files_manually/run_episode_file_1617.R create mode 100644 Run_SLF_Files_manually/run_individual_file_1415.R create mode 100644 Run_SLF_Files_manually/run_individual_file_1516.R create mode 100644 Run_SLF_Files_manually/run_individual_file_1617.R diff --git a/Run_SLF_Files_manually/run_episode_file_1415.R b/Run_SLF_Files_manually/run_episode_file_1415.R new file mode 100644 index 000000000..5a921a709 --- /dev/null +++ b/Run_SLF_Files_manually/run_episode_file_1415.R @@ -0,0 +1,81 @@ +library(targets) +library(createslf) + +year <- "1415" + +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1415", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1415", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1415", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1415", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1415", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1415", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1415", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1415", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1415", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1415", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1415", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1415", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1415", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1415", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1415", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1415", + store = targets_store + ) +) + +# Run episode file +create_episode_file(processed_data_list, year = year) %>% + process_tests_episode_file(year = year) + +## End of Script ## + + diff --git a/Run_SLF_Files_manually/run_episode_file_1516.R b/Run_SLF_Files_manually/run_episode_file_1516.R new file mode 100644 index 000000000..711e767b2 --- /dev/null +++ b/Run_SLF_Files_manually/run_episode_file_1516.R @@ -0,0 +1,79 @@ +library(targets) +library(createslf) + +year <- "1516" + +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1516", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1516", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1516", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1516", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1516", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1516", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1516", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1516", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1516", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1516", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1516", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1516", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1516", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1516", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1516", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1516", + store = targets_store + ) +) + +# Run episode file +create_episode_file(processed_data_list, year = year)## %>% + #process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1617.R b/Run_SLF_Files_manually/run_episode_file_1617.R new file mode 100644 index 000000000..21066de14 --- /dev/null +++ b/Run_SLF_Files_manually/run_episode_file_1617.R @@ -0,0 +1,79 @@ +library(targets) +library(createslf) + +year <- "1617" + +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_1617", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_1617", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1617", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_1617", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_1617", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_1617", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_1617", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_1617", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_1617", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_1617", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_1617", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_1617", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_1617", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_1617", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_1617", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_1617", + store = targets_store + ) +) + +# Run episode file +create_episode_file(processed_data_list, year = year) ##%>% + #process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_individual_file_1415.R b/Run_SLF_Files_manually/run_individual_file_1415.R new file mode 100644 index 000000000..70aa2bfca --- /dev/null +++ b/Run_SLF_Files_manually/run_individual_file_1415.R @@ -0,0 +1,9 @@ +library(createslf) + +year <- "1415" + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_1516.R b/Run_SLF_Files_manually/run_individual_file_1516.R new file mode 100644 index 000000000..179e228cb --- /dev/null +++ b/Run_SLF_Files_manually/run_individual_file_1516.R @@ -0,0 +1,9 @@ +library(createslf) + +year <- "1516" + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year)# %>% + #process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_1617.R b/Run_SLF_Files_manually/run_individual_file_1617.R new file mode 100644 index 000000000..dc7a41c1f --- /dev/null +++ b/Run_SLF_Files_manually/run_individual_file_1617.R @@ -0,0 +1,9 @@ +library(createslf) + +year <- "1617" + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) #%>% + #process_tests_individual_file(year = year) From 5588584a7940c092b95e5523069d8e45666f3140 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Wed, 7 Aug 2024 09:08:49 +0000 Subject: [PATCH 067/105] Style code --- Run_SLF_Files_manually/run_episode_file_1415.R | 2 -- Run_SLF_Files_manually/run_episode_file_1516.R | 4 ++-- Run_SLF_Files_manually/run_episode_file_1617.R | 4 ++-- Run_SLF_Files_manually/run_individual_file_1516.R | 4 ++-- Run_SLF_Files_manually/run_individual_file_1617.R | 4 ++-- 5 files changed, 8 insertions(+), 10 deletions(-) diff --git a/Run_SLF_Files_manually/run_episode_file_1415.R b/Run_SLF_Files_manually/run_episode_file_1415.R index 5a921a709..b5a2eab38 100644 --- a/Run_SLF_Files_manually/run_episode_file_1415.R +++ b/Run_SLF_Files_manually/run_episode_file_1415.R @@ -77,5 +77,3 @@ create_episode_file(processed_data_list, year = year) %>% process_tests_episode_file(year = year) ## End of Script ## - - diff --git a/Run_SLF_Files_manually/run_episode_file_1516.R b/Run_SLF_Files_manually/run_episode_file_1516.R index 711e767b2..59c7ddc63 100644 --- a/Run_SLF_Files_manually/run_episode_file_1516.R +++ b/Run_SLF_Files_manually/run_episode_file_1516.R @@ -73,7 +73,7 @@ processed_data_list <- list( ) # Run episode file -create_episode_file(processed_data_list, year = year)## %>% - #process_tests_episode_file(year = year) +create_episode_file(processed_data_list, year = year) ## %>% +# process_tests_episode_file(year = year) ## End of Script ## diff --git a/Run_SLF_Files_manually/run_episode_file_1617.R b/Run_SLF_Files_manually/run_episode_file_1617.R index 21066de14..b10372be6 100644 --- a/Run_SLF_Files_manually/run_episode_file_1617.R +++ b/Run_SLF_Files_manually/run_episode_file_1617.R @@ -73,7 +73,7 @@ processed_data_list <- list( ) # Run episode file -create_episode_file(processed_data_list, year = year) ##%>% - #process_tests_episode_file(year = year) +create_episode_file(processed_data_list, year = year) ## %>% +# process_tests_episode_file(year = year) ## End of Script ## diff --git a/Run_SLF_Files_manually/run_individual_file_1516.R b/Run_SLF_Files_manually/run_individual_file_1516.R index 179e228cb..aace110c4 100644 --- a/Run_SLF_Files_manually/run_individual_file_1516.R +++ b/Run_SLF_Files_manually/run_individual_file_1516.R @@ -5,5 +5,5 @@ year <- "1516" episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year)# %>% - #process_tests_individual_file(year = year) +create_individual_file(episode_file, year = year) # %>% +# process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_1617.R b/Run_SLF_Files_manually/run_individual_file_1617.R index dc7a41c1f..9789d0b8e 100644 --- a/Run_SLF_Files_manually/run_individual_file_1617.R +++ b/Run_SLF_Files_manually/run_individual_file_1617.R @@ -5,5 +5,5 @@ year <- "1617" episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year) #%>% - #process_tests_individual_file(year = year) +create_individual_file(episode_file, year = year) # %>% +# process_tests_individual_file(year = year) From 0746ee6e1235e641a10d9d736a2138e8b7285879 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Fri, 23 Aug 2024 15:51:45 +0100 Subject: [PATCH 068/105] update write_tests_xlsx --- R/process_tests_episode_file.R | 2 +- R/process_tests_individual_file.R | 2 +- R/write_tests_xlsx.R | 47 +++++++++++++++++++++++-------- 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R index ccf8e495c..289b43686 100644 --- a/R/process_tests_episode_file.R +++ b/R/process_tests_episode_file.R @@ -31,7 +31,7 @@ process_tests_episode_file <- function(data, year) { recid = TRUE ) %>% dplyr::arrange(.data[["recid"]]) %>% - write_tests_xlsx(sheet_name = "ep_file", year, workbook_name = "ep_file") + write_tests_xlsx(sheet_name = stringr::str_glue({"ep_file_{year}"}), workbook_name = "ep_file") return(comparison) } diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R index 9643a4f3f..4283ec029 100644 --- a/R/process_tests_individual_file.R +++ b/R/process_tests_individual_file.R @@ -34,7 +34,7 @@ process_tests_individual_file <- function(data, year) { old_data = produce_individual_file_tests(old_data), new_data = produce_individual_file_tests(data) ) %>% - write_tests_xlsx(sheet_name = "indiv_file", year, workbook_name = "indiv_file") + write_tests_xlsx(sheet_name = stringr::str_glue({"indiv_file_{year}"}), workbook_name = "indiv_file") return(comparison) } diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R index ec3cc5705..a924d51de 100644 --- a/R/write_tests_xlsx.R +++ b/R/write_tests_xlsx.R @@ -24,17 +24,42 @@ write_tests_xlsx <- function(comparison_data, "cross_year" )) { # Set up the workbook ---- - tests_workbook_name <- dplyr::case_when( - is.null(year) & workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_ep_file_tests"), - !is.null(year) & workbook_name == "ep_file" ~ stringr::str_glue(latest_update(), "_{year}_ep_file_tests"), - is.null(year) & workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_indiv_file_tests"), - !is.null(year) & workbook_name == "indiv_file" ~ stringr::str_glue(latest_update(), "_{year}_indiv_file_tests"), - is.null(year) & workbook_name == "lookup" ~ stringr::str_glue(latest_update(), "_lookups_tests"), - is.null(year) & workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests"), - is.null(year) & workbook_name == "cross_year" ~ stringr::str_glue(latest_update(), "_cross_year_tests"), - !is.null(year) & workbook_name == "sandpit" ~ stringr::str_glue(latest_update(), "_sandpit_extract_tests"), - !is.null(year) & workbook_name == "extract" ~ stringr::str_glue(latest_update(), "_{year}_extract_tests") - ) + if (workbook_name == "ep_file") { + if (is.null(year)) { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_ep_file_tests") + } + } + if (workbook_name == "indiv_file") { + if (is.null(year)) { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_indiv_file_tests") + } + } + if (workbook_name == "lookup") { + if (is.null(year)) { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_lookups_tests") + } + } + if (workbook_name == "sandpit") { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_sandpit_extract_tests") + } + if (workbook_name == "cross_year") { + if (is.null(year)) { + tests_workbook_name <- + stringr::str_glue(latest_update(), "_cross_year_tests") + } + } + if (workbook_name == "extract") { + if (is.null(year)) { + } else{ + tests_workbook_name <- + stringr::str_glue(latest_update(), "_{year}_extract_tests") + } + } + tests_workbook_path <- fs::path( get_slf_dir(), From 3dfbc8e376ec2dac0bd598d3f9b9d5c4e16d56df Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Fri, 23 Aug 2024 15:53:11 +0100 Subject: [PATCH 069/105] update process_refined_death --- R/process_refined_death.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/process_refined_death.R b/R/process_refined_death.R index 48f14fd43..58ed1b29d 100644 --- a/R/process_refined_death.R +++ b/R/process_refined_death.R @@ -54,7 +54,7 @@ process_refined_death <- function( if (write_to_disk) { write_file( refined_death, - get_combined_slf_deaths_lookup_path() + get_combined_slf_deaths_lookup_path(create = TRUE) ) } From 1bfe269063c6c53334223c4ce5d478d515d436a8 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Mon, 26 Aug 2024 07:43:03 +0100 Subject: [PATCH 070/105] fix tests by removing get_chi --- R/process_tests_sc_all_at_episodes.R | 3 --- R/process_tests_sc_all_ch_episodes.R | 3 --- R/process_tests_sc_all_hc_episodes.R | 3 --- R/process_tests_sc_all_sds_episodes.R | 3 --- 4 files changed, 12 deletions(-) diff --git a/R/process_tests_sc_all_at_episodes.R b/R/process_tests_sc_all_at_episodes.R index c23a4f6ed..8b5580334 100644 --- a/R/process_tests_sc_all_at_episodes.R +++ b/R/process_tests_sc_all_at_episodes.R @@ -10,9 +10,6 @@ #' #' @export process_tests_sc_all_at_episodes <- function(data) { - data <- data %>% - slfhelper::get_chi() - comparison <- produce_test_comparison( old_data = produce_sc_all_episodes_tests( read_file(get_sc_at_episodes_path(update = previous_update())) diff --git a/R/process_tests_sc_all_ch_episodes.R b/R/process_tests_sc_all_ch_episodes.R index d42eca2c7..7e9655c06 100644 --- a/R/process_tests_sc_all_ch_episodes.R +++ b/R/process_tests_sc_all_ch_episodes.R @@ -10,9 +10,6 @@ #' #' @export process_tests_sc_all_ch_episodes <- function(data) { - data <- data %>% - slfhelper::get_chi() - comparison <- produce_test_comparison( old_data = produce_sc_all_episodes_tests( read_file(get_sc_ch_episodes_path(update = previous_update())) diff --git a/R/process_tests_sc_all_hc_episodes.R b/R/process_tests_sc_all_hc_episodes.R index d037e7908..7194790c0 100644 --- a/R/process_tests_sc_all_hc_episodes.R +++ b/R/process_tests_sc_all_hc_episodes.R @@ -10,9 +10,6 @@ #' #' @export process_tests_sc_all_hc_episodes <- function(data) { - data <- data %>% - slfhelper::get_chi() - comparison <- produce_test_comparison( old_data = produce_sc_all_episodes_tests( read_file(get_sc_hc_episodes_path(update = previous_update())) diff --git a/R/process_tests_sc_all_sds_episodes.R b/R/process_tests_sc_all_sds_episodes.R index 91c32d450..cf87a671c 100644 --- a/R/process_tests_sc_all_sds_episodes.R +++ b/R/process_tests_sc_all_sds_episodes.R @@ -10,9 +10,6 @@ #' #' @export process_tests_sc_all_sds_episodes <- function(data) { - data <- data %>% - slfhelper::get_chi() - comparison <- produce_test_comparison( old_data = produce_sc_all_episodes_tests( read_file(get_sc_sds_episodes_path(update = previous_update())) From cf2a547868767fa3acffc0cd610317e12837a546 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Mon, 26 Aug 2024 07:43:21 +0100 Subject: [PATCH 071/105] add 2425 --- Run_SLF_Files_targets/run_targets_2425.R | 9 +++++++++ _targets.R | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 Run_SLF_Files_targets/run_targets_2425.R diff --git a/Run_SLF_Files_targets/run_targets_2425.R b/Run_SLF_Files_targets/run_targets_2425.R new file mode 100644 index 000000000..fe849ede8 --- /dev/null +++ b/Run_SLF_Files_targets/run_targets_2425.R @@ -0,0 +1,9 @@ +library(targets) + +year <- "2425" + +# use targets for the process until testing episode files +tar_make_future( + # it does not recognise `contains(year)` + names = (targets::contains("2425")) +) diff --git a/_targets.R b/_targets.R index 0fea087ca..0f8556309 100644 --- a/_targets.R +++ b/_targets.R @@ -19,7 +19,7 @@ tar_option_set( memory = "persistent" # default option ) -years_to_run <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324") +years_to_run <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324", "2425") list( tar_rds(write_to_disk, TRUE), From c5e7c7faabf04750190b8fc96c23d81d9ec06645 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Mon, 26 Aug 2024 06:46:41 +0000 Subject: [PATCH 072/105] Style code --- R/process_tests_episode_file.R | 4 +++- R/process_tests_individual_file.R | 4 +++- R/write_tests_xlsx.R | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R index 289b43686..c45992938 100644 --- a/R/process_tests_episode_file.R +++ b/R/process_tests_episode_file.R @@ -31,7 +31,9 @@ process_tests_episode_file <- function(data, year) { recid = TRUE ) %>% dplyr::arrange(.data[["recid"]]) %>% - write_tests_xlsx(sheet_name = stringr::str_glue({"ep_file_{year}"}), workbook_name = "ep_file") + write_tests_xlsx(sheet_name = stringr::str_glue({ + "ep_file_{year}" + }), workbook_name = "ep_file") return(comparison) } diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R index 4283ec029..900ce7f03 100644 --- a/R/process_tests_individual_file.R +++ b/R/process_tests_individual_file.R @@ -34,7 +34,9 @@ process_tests_individual_file <- function(data, year) { old_data = produce_individual_file_tests(old_data), new_data = produce_individual_file_tests(data) ) %>% - write_tests_xlsx(sheet_name = stringr::str_glue({"indiv_file_{year}"}), workbook_name = "indiv_file") + write_tests_xlsx(sheet_name = stringr::str_glue({ + "indiv_file_{year}" + }), workbook_name = "indiv_file") return(comparison) } diff --git a/R/write_tests_xlsx.R b/R/write_tests_xlsx.R index a924d51de..6847cc977 100644 --- a/R/write_tests_xlsx.R +++ b/R/write_tests_xlsx.R @@ -54,7 +54,7 @@ write_tests_xlsx <- function(comparison_data, } if (workbook_name == "extract") { if (is.null(year)) { - } else{ + } else { tests_workbook_name <- stringr::str_glue(latest_update(), "_{year}_extract_tests") } From a57f9931da0fa085bd29349e61ba0058f225e9ae Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Mon, 26 Aug 2024 15:19:03 +0100 Subject: [PATCH 073/105] fix NA matches in refined_death --- R/process_sc_all_care_home.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index 524be1b2f..ff5e35ffe 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -203,7 +203,8 @@ process_sc_all_care_home <- function( # match ch_episode data with deaths data matched_deaths_data <- ch_episode %>% dplyr::left_join(refined_death, - by = "chi" + by = "chi", + na_matches = "never" ) %>% # compare discharge date with NRS and CHI death date # if either of the dates are 5 or fewer days before discharge From ecd019d75ac844a4bd5661cce630c5185422ab9d Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 27 Aug 2024 10:37:24 +0100 Subject: [PATCH 074/105] move latest_cost_year() to cost_uplift() --- R/00-update_refs.R | 13 ------------- R/cost_uplift.R | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/R/00-update_refs.R b/R/00-update_refs.R index a4a21ea73..33022edf6 100644 --- a/R/00-update_refs.R +++ b/R/00-update_refs.R @@ -64,19 +64,6 @@ get_dd_period <- function() { "Jul16_Jun24" } -#' The latest financial year for Cost uplift setting -#' -#' @description Get the latest year for cost uplift -#' -#' @return The financial year format -#' -#' @export -#' -#' @family initialisation -latest_cost_year <- function() { - "2324" -} - #' The year list for slf to update #' #' @description Get the vector of years to update slf diff --git a/R/cost_uplift.R b/R/cost_uplift.R index f14600da6..abbbd9b5a 100644 --- a/R/cost_uplift.R +++ b/R/cost_uplift.R @@ -86,3 +86,21 @@ lookup_uplift <- function(data) { return(data) } + +#' The latest financial year for Cost uplift setting +#' +#' @description Get the latest year for cost uplift +#' latest_cost_year() is hard coded in cost_uplift(). +#' 2223 is not changed automatically with time passes. +#' It is changed only when we get a new instruction from somewhere about cost uplift. +#' Do not change unless specific instructions. +#' Changing this means that we need to change cost_uplift(). +#' +#' @return The financial year format +#' +#' @export +#' +#' @family initialisation +latest_cost_year <- function() { + "2223" +} From 9d5bd1242afa591cd5f2bc18da5e0bcdedc3e46f Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 27 Aug 2024 10:38:00 +0100 Subject: [PATCH 075/105] improve automation --- _targets.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/_targets.R b/_targets.R index 0f8556309..0377e487b 100644 --- a/_targets.R +++ b/_targets.R @@ -19,7 +19,7 @@ tar_option_set( memory = "persistent" # default option ) -years_to_run <- c("1718", "1819", "1920", "2021", "2122", "2223", "2324", "2425") +years_to_run <- createslf::years_to_run() list( tar_rds(write_to_disk, TRUE), From 2f5e0a037fc4a00ce6a127ca899ccf2b59d2881f Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 27 Aug 2024 09:41:58 +0000 Subject: [PATCH 076/105] Update documentation --- man/latest_cost_year.Rd | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/man/latest_cost_year.Rd b/man/latest_cost_year.Rd index 0f50b3ac6..0045b4efb 100644 --- a/man/latest_cost_year.Rd +++ b/man/latest_cost_year.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/00-update_refs.R +% Please edit documentation in R/cost_uplift.R \name{latest_cost_year} \alias{latest_cost_year} \title{The latest financial year for Cost uplift setting} @@ -11,6 +11,11 @@ The financial year format } \description{ Get the latest year for cost uplift +latest_cost_year() is hard coded in cost_uplift(). +2223 is not changed automatically with time passes. +It is changed only when we get a new instruction from somewhere about cost uplift. +Do not change unless specific instructions. +Changing this means that we need to change cost_uplift(). } \seealso{ Other initialisation: From 45ddf9ab8fa02c4c480c185b54397eac74875653 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 27 Aug 2024 11:24:18 +0100 Subject: [PATCH 077/105] fix `cij_ppa` in DD data --- R/create_episode_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 7909e2e7f..efb7c33a9 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -124,8 +124,8 @@ create_episode_file <- function( fill_missing_cij_markers() %>% add_homelessness_flag(year, lookup = homelessness_lookup) %>% add_homelessness_date_flags(year, lookup = homelessness_lookup) %>% - add_ppa_flag() %>% link_delayed_discharge_eps(year, dd_data) %>% + add_ppa_flag() %>% add_nsu_cohort(year, nsu_cohort) %>% match_on_ltcs(year, ltc_data) %>% correct_demographics(year) %>% From 500b166732b7e46525159f4b1b5b0adff0298b1e Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 27 Aug 2024 14:54:03 +0100 Subject: [PATCH 078/105] fix bugs of dd and populate cij_delay back to episodes --- R/create_episode_file.R | 2 +- R/link_delayed_discharge_eps.R | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index efb7c33a9..7909e2e7f 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -124,8 +124,8 @@ create_episode_file <- function( fill_missing_cij_markers() %>% add_homelessness_flag(year, lookup = homelessness_lookup) %>% add_homelessness_date_flags(year, lookup = homelessness_lookup) %>% - link_delayed_discharge_eps(year, dd_data) %>% add_ppa_flag() %>% + link_delayed_discharge_eps(year, dd_data) %>% add_nsu_cohort(year, nsu_cohort) %>% match_on_ltcs(year, ltc_data) %>% correct_demographics(year) %>% diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index a28ee3b0f..8a4ee5e9b 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -286,6 +286,10 @@ link_delayed_discharge_eps <- function( )) %>% dplyr::group_by(.data$chi, .data$cij_marker) %>% dplyr::mutate(cij_delay = max(.data$has_delay)) %>% + dplyr::mutate(cij_delay = dplyr::if_else(cij_delay == "0", + FALSE, + TRUE, + missing = NA)) %>% dplyr::ungroup() %>% # add yearstay and monthly beddays # count_last = TRUE because DD counts last day and not the first @@ -299,6 +303,7 @@ link_delayed_discharge_eps <- function( yearstay = rowSums(dplyr::pick(dplyr::ends_with("_beddays"))) ) %>% # tidy up and rename columns to match the format of episode files + # add variables that you want to keep dplyr::select( "year" = "year_dd", "recid" = "recid_dd", @@ -317,6 +322,7 @@ link_delayed_discharge_eps <- function( "primary_delay_reason", "secondary_delay_reason", "cij_marker", + "cij_ppa", "cij_start_date", "cij_end_date", "cij_pattype_code", @@ -345,7 +351,16 @@ link_delayed_discharge_eps <- function( "dummy_cij_end" ) ) - ) + ) %>% + # populate cij_delay dd details back to ep + dplyr::group_by(chi, cij_marker) %>% + dplyr::mutate(has_dd = any(recid == "DD"), + delay_dd = any(cij_delay)) %>% + dplyr::ungroup() %>% + dplyr::mutate(cij_delay = dplyr::if_else(has_dd, + delay_dd, + cij_delay)) %>% + dplyr::select(-c("has_dd", "delay_dd")) return(linked_data) } From 24dea0bbf7dcfad602d97b3858dbd34273fb0ea3 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 27 Aug 2024 13:55:41 +0000 Subject: [PATCH 079/105] Style code --- R/link_delayed_discharge_eps.R | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index 8a4ee5e9b..aacb0235f 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -287,9 +287,10 @@ link_delayed_discharge_eps <- function( dplyr::group_by(.data$chi, .data$cij_marker) %>% dplyr::mutate(cij_delay = max(.data$has_delay)) %>% dplyr::mutate(cij_delay = dplyr::if_else(cij_delay == "0", - FALSE, - TRUE, - missing = NA)) %>% + FALSE, + TRUE, + missing = NA + )) %>% dplyr::ungroup() %>% # add yearstay and monthly beddays # count_last = TRUE because DD counts last day and not the first @@ -354,12 +355,15 @@ link_delayed_discharge_eps <- function( ) %>% # populate cij_delay dd details back to ep dplyr::group_by(chi, cij_marker) %>% - dplyr::mutate(has_dd = any(recid == "DD"), - delay_dd = any(cij_delay)) %>% + dplyr::mutate( + has_dd = any(recid == "DD"), + delay_dd = any(cij_delay) + ) %>% dplyr::ungroup() %>% dplyr::mutate(cij_delay = dplyr::if_else(has_dd, - delay_dd, - cij_delay)) %>% + delay_dd, + cij_delay + )) %>% dplyr::select(-c("has_dd", "delay_dd")) return(linked_data) From 2f2fd94937ba00450ffc6e07b7c1442da12a05fc Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 27 Aug 2024 15:39:40 +0100 Subject: [PATCH 080/105] keep all variable for delayed discharge episodes --- R/link_delayed_discharge_eps.R | 39 ++++++++++++---------------------- 1 file changed, 13 insertions(+), 26 deletions(-) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index aacb0235f..9b96ac63c 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -304,39 +304,26 @@ link_delayed_discharge_eps <- function( yearstay = rowSums(dplyr::pick(dplyr::ends_with("_beddays"))) ) %>% # tidy up and rename columns to match the format of episode files - # add variables that you want to keep + # keep all variables dplyr::select( + -c( + "year", + "recid", + "record_keydate1", + "record_keydate2", + "postcode", + "hbtreatcode", + "spec" + ) + ) %>% + dplyr::rename( "year" = "year_dd", "recid" = "recid_dd", "record_keydate1" = "record_keydate1_dd", "record_keydate2" = "record_keydate2_dd", - "smrtype", - "chi", - "gender", - "dob", - "age", - "gpprac", "postcode" = "postcode_dd", - "dd_responsible_lca", "hbtreatcode" = "hbtreatcode_dd", - "delay_end_reason", - "primary_delay_reason", - "secondary_delay_reason", - "cij_marker", - "cij_ppa", - "cij_start_date", - "cij_end_date", - "cij_pattype_code", - "cij_ipdc", - "cij_admtype", - "cij_adm_spec", - "cij_dis_spec", - "cij_delay", - "location", - "spec" = "spec_dd", - "dd_quality", - dplyr::ends_with("_beddays"), - "yearstay" + "spec" = "spec_dd" ) %>% # Combine DD with episode data dplyr::bind_rows( From 394865439b7b4206716e3cb0f4ed1b4f974bae00 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 27 Aug 2024 18:30:47 +0100 Subject: [PATCH 081/105] remove dummy variable names from dd_date --- R/link_delayed_discharge_eps.R | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index 9b96ac63c..266e7c960 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -14,6 +14,8 @@ link_delayed_discharge_eps <- function( dd_data = read_file(get_source_extract_path(year, "dd")) %>% slfhelper::get_chi()) { cli::cli_alert_info("Link delayed discharge to episode file function started at {Sys.time()}") + names_ep = names(episode_file) + episode_file <- episode_file %>% dplyr::mutate( # remember to revoke the cij_end_date with dummy_cij_end @@ -304,16 +306,32 @@ link_delayed_discharge_eps <- function( yearstay = rowSums(dplyr::pick(dplyr::ends_with("_beddays"))) ) %>% # tidy up and rename columns to match the format of episode files - # keep all variables + # keep variables from ep files dplyr::select( -c( + "ep_file_row_id", "year", "recid", "record_keydate1", "record_keydate2", "postcode", "hbtreatcode", - "spec" + "location", + "spec", + ## following are dummy variables + "cij_start_date_lower", + "cij_end_date_upper", + "cij_end_month", + "is_dummy_cij_start", + "dummy_cij_start", + "is_dummy_cij_end", + "dummy_cij_end", + "datediff_start", + "datediff_end", + "has_delay", + "is_dummy_keydate2", + "dummy_keydate2", + "dummy_id" ) ) %>% dplyr::rename( @@ -323,7 +341,8 @@ link_delayed_discharge_eps <- function( "record_keydate2" = "record_keydate2_dd", "postcode" = "postcode_dd", "hbtreatcode" = "hbtreatcode_dd", - "spec" = "spec_dd" + "spec" = "spec_dd", + "location" = "location_dd" ) %>% # Combine DD with episode data dplyr::bind_rows( From 08c58f7e716498b87cc7340da262c5e5f665e817 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 27 Aug 2024 17:32:46 +0000 Subject: [PATCH 082/105] Style code --- R/link_delayed_discharge_eps.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index 266e7c960..b80b35807 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -14,7 +14,7 @@ link_delayed_discharge_eps <- function( dd_data = read_file(get_source_extract_path(year, "dd")) %>% slfhelper::get_chi()) { cli::cli_alert_info("Link delayed discharge to episode file function started at {Sys.time()}") - names_ep = names(episode_file) + names_ep <- names(episode_file) episode_file <- episode_file %>% dplyr::mutate( From f3a90a7a57b56f984cf62b1c52ab73c473e028f6 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 28 Aug 2024 10:22:08 +0100 Subject: [PATCH 083/105] remove `deceased_boxi` variable - bug --- R/add_activity_after_death_flag.R | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index a45e4296a..3d9b234f4 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -26,7 +26,7 @@ add_activity_after_death_flag <- function( by = "chi", suffix = c("", "_boxi") ) %>% - dplyr::filter(.data$deceased == TRUE | .data$deceased_boxi == TRUE) %>% + dplyr::filter(.data$deceased == TRUE) %>% dplyr::distinct() @@ -72,16 +72,6 @@ add_activity_after_death_flag <- function( )) - # Check and print error message for records which already are TRUE for the deceased variable in the episode file, but this doesn't match the - # BOXI deceased variable - check_deceased_match <- flag_data %>% - dplyr::filter(.data$deceased != .data$deceased_boxi) - - if (nrow(check_deceased_match) != 0) { - warning("There were records in the episode file which have a deceased variable which does not match the BOXI NRS deceased variable") - } - - # Fill in date of death if missing in the episode file but available in BOXI lookup, due to historic dates of death not being carried # over from previous financial years flag_data <- flag_data %>% From baadb99dad9df3b0708f391be32b3f9c8b4cde4d Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 28 Aug 2024 14:35:59 +0100 Subject: [PATCH 084/105] remove `create_person_id`. Its matched in client --- R/process_sc_all_care_home.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index ff5e35ffe..c0411cb39 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -368,7 +368,6 @@ process_sc_all_care_home <- function( ch_data_final <- adm_reason_recoded %>% - create_person_id() %>% dplyr::rename( record_keydate1 = "ch_admission_date", record_keydate2 = "ch_discharge_date", From bfed6379d067204988bbb96d5715138c70828b25 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 28 Aug 2024 14:41:52 +0100 Subject: [PATCH 085/105] remove `create_person_id` --- R/process_sc_all_home_care.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/process_sc_all_home_care.R b/R/process_sc_all_home_care.R index 52cab0568..352c4fff3 100644 --- a/R/process_sc_all_home_care.R +++ b/R/process_sc_all_home_care.R @@ -194,8 +194,6 @@ process_sc_all_home_care <- function( TRUE ~ "HC-Unknown" ) ) %>% - # person_id - create_person_id(type = "SC") %>% # compute lca variable from sending_location dplyr::mutate( sc_send_lca = convert_sc_sending_location_to_lca(.data$sending_location) From 0e19c5f31b2293494a699db9c8ef2c025f2a7a79 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Wed, 28 Aug 2024 16:20:31 +0100 Subject: [PATCH 086/105] Update `run_slf_manually` scripts --- .../run_episode_file_1718.R | 4 - .../run_episode_file_1819.R | 4 - .../run_episode_file_1920.R | 4 - .../run_episode_file_2021.R | 4 - .../run_episode_file_2122.R | 4 - .../run_episode_file_2223.R | 4 - .../run_episode_file_2324.R | 4 - .../run_episode_file_2425.R | 75 +++++++++++++++++++ .../run_individual_file_2425.R | 9 +++ 9 files changed, 84 insertions(+), 28 deletions(-) create mode 100644 Run_SLF_Files_manually/run_episode_file_2425.R create mode 100644 Run_SLF_Files_manually/run_individual_file_2425.R diff --git a/Run_SLF_Files_manually/run_episode_file_1718.R b/Run_SLF_Files_manually/run_episode_file_1718.R index f679ea669..b405b5b6e 100644 --- a/Run_SLF_Files_manually/run_episode_file_1718.R +++ b/Run_SLF_Files_manually/run_episode_file_1718.R @@ -18,10 +18,6 @@ processed_data_list <- list( "source_cmh_extract_1718", store = targets_store ), - cmh = targets::tar_read( - "source_cmh_extract_1718", - store = targets_store - ), dn = targets::tar_read( "source_dn_extract_1718", store = targets_store diff --git a/Run_SLF_Files_manually/run_episode_file_1819.R b/Run_SLF_Files_manually/run_episode_file_1819.R index d7a65690e..fb3227512 100644 --- a/Run_SLF_Files_manually/run_episode_file_1819.R +++ b/Run_SLF_Files_manually/run_episode_file_1819.R @@ -18,10 +18,6 @@ processed_data_list <- list( "source_cmh_extract_1819", store = targets_store ), - cmh = targets::tar_read( - "source_cmh_extract_1819", - store = targets_store - ), dn = targets::tar_read( "source_dn_extract_1819", store = targets_store diff --git a/Run_SLF_Files_manually/run_episode_file_1920.R b/Run_SLF_Files_manually/run_episode_file_1920.R index e3c2ebeb0..e2e21bdac 100644 --- a/Run_SLF_Files_manually/run_episode_file_1920.R +++ b/Run_SLF_Files_manually/run_episode_file_1920.R @@ -18,10 +18,6 @@ processed_data_list <- list( "source_cmh_extract_1920", store = targets_store ), - cmh = targets::tar_read( - "source_cmh_extract_1920", - store = targets_store - ), dn = targets::tar_read( "source_dn_extract_1920", store = targets_store diff --git a/Run_SLF_Files_manually/run_episode_file_2021.R b/Run_SLF_Files_manually/run_episode_file_2021.R index c66f4572d..cf98e80de 100644 --- a/Run_SLF_Files_manually/run_episode_file_2021.R +++ b/Run_SLF_Files_manually/run_episode_file_2021.R @@ -18,10 +18,6 @@ processed_data_list <- list( "source_cmh_extract_2021", store = targets_store ), - cmh = targets::tar_read( - "source_cmh_extract_2021", - store = targets_store - ), dn = targets::tar_read( "source_dn_extract_2021", store = targets_store diff --git a/Run_SLF_Files_manually/run_episode_file_2122.R b/Run_SLF_Files_manually/run_episode_file_2122.R index cde974be2..3bcbf2466 100644 --- a/Run_SLF_Files_manually/run_episode_file_2122.R +++ b/Run_SLF_Files_manually/run_episode_file_2122.R @@ -18,10 +18,6 @@ processed_data_list <- list( "source_cmh_extract_2122", store = targets_store ), - cmh = targets::tar_read( - "source_cmh_extract_2122", - store = targets_store - ), dn = targets::tar_read( "source_dn_extract_2122", store = targets_store diff --git a/Run_SLF_Files_manually/run_episode_file_2223.R b/Run_SLF_Files_manually/run_episode_file_2223.R index ee83082f1..af0447eed 100644 --- a/Run_SLF_Files_manually/run_episode_file_2223.R +++ b/Run_SLF_Files_manually/run_episode_file_2223.R @@ -18,10 +18,6 @@ processed_data_list <- list( "source_cmh_extract_2223", store = targets_store ), - cmh = targets::tar_read( - "source_cmh_extract_2223", - store = targets_store - ), dn = targets::tar_read( "source_dn_extract_2223", store = targets_store diff --git a/Run_SLF_Files_manually/run_episode_file_2324.R b/Run_SLF_Files_manually/run_episode_file_2324.R index 508689f6d..bdf16e0f8 100644 --- a/Run_SLF_Files_manually/run_episode_file_2324.R +++ b/Run_SLF_Files_manually/run_episode_file_2324.R @@ -18,10 +18,6 @@ processed_data_list <- list( "source_cmh_extract_2324", store = targets_store ), - cmh = targets::tar_read( - "source_cmh_extract_2324", - store = targets_store - ), dn = targets::tar_read( "source_dn_extract_2324", store = targets_store diff --git a/Run_SLF_Files_manually/run_episode_file_2425.R b/Run_SLF_Files_manually/run_episode_file_2425.R new file mode 100644 index 000000000..699c197b3 --- /dev/null +++ b/Run_SLF_Files_manually/run_episode_file_2425.R @@ -0,0 +1,75 @@ +library(targets) +library(createslf) + +year <- "2425" + +targets_store <- fs::path("/conf/sourcedev/Source_Linkage_File_Updates/", "_targets") + +processed_data_list <- list( + acute = targets::tar_read( + "source_acute_extract_2425", + store = targets_store + ), + ae = targets::tar_read( + "source_ae_extract_2425", + store = targets_store + ), + cmh = targets::tar_read( + "source_cmh_extract_2425", + store = targets_store + ), + dn = targets::tar_read( + "source_dn_extract_2425", + store = targets_store + ), + deaths = targets::tar_read( + "source_nrs_deaths_extract_2425", + store = targets_store + ), + homelessness = targets::tar_read( + "source_homelessness_extract_2425", + store = targets_store + ), + maternity = targets::tar_read( + "source_maternity_extract_2425", + store = targets_store + ), + mental_health = targets::tar_read( + "source_mental_health_extract_2425", + store = targets_store + ), + outpatients = targets::tar_read( + "source_outpatients_extract_2425", + store = targets_store + ), + gp_ooh = targets::tar_read( + "source_ooh_extract_2425", + store = targets_store + ), + prescribing = targets::tar_read( + "source_prescribing_extract_2425", + store = targets_store + ), + care_home = targets::tar_read( + "source_sc_care_home_2425", + store = targets_store + ), + home_care = targets::tar_read( + "source_sc_home_care_2425", + store = targets_store + ), + at = targets::tar_read( + "source_sc_alarms_tele_2425", + store = targets_store + ), + sds = targets::tar_read( + "source_sc_sds_2425", + store = targets_store + ) +) + +# Run episode file +create_episode_file(processed_data_list, year = year) %>% + process_tests_episode_file(year = year) + +## End of Script ## diff --git a/Run_SLF_Files_manually/run_individual_file_2425.R b/Run_SLF_Files_manually/run_individual_file_2425.R new file mode 100644 index 000000000..843eb505c --- /dev/null +++ b/Run_SLF_Files_manually/run_individual_file_2425.R @@ -0,0 +1,9 @@ +library(createslf) + +year <- "2425" + +episode_file <- arrow::read_parquet(get_slf_episode_path(year)) + +# Run individual file +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) From 5948fb0634dc9617474c8df0a8fd144fa16c4d98 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Thu, 29 Aug 2024 11:50:16 +0100 Subject: [PATCH 087/105] further remove person_id --- R/process_extract_alarms_telecare.R | 2 +- R/process_extract_care_home.R | 2 +- R/process_extract_home_care.R | 2 +- R/process_extract_sds.R | 2 +- R/process_sc_all_alarms_telecare.R | 12 ++++++------ R/process_sc_all_care_home.R | 2 +- R/process_sc_all_sds.R | 12 ++++++------ 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/R/process_extract_alarms_telecare.R b/R/process_extract_alarms_telecare.R index 6c481c3a4..9d47dd5f0 100644 --- a/R/process_extract_alarms_telecare.R +++ b/R/process_extract_alarms_telecare.R @@ -41,7 +41,7 @@ process_extract_alarms_telecare <- function( "smrtype", "chi", "dob", - "person_id", + # "person_id", "gender", "postcode", "sc_send_lca", diff --git a/R/process_extract_care_home.R b/R/process_extract_care_home.R index db7997061..dbf817af4 100644 --- a/R/process_extract_care_home.R +++ b/R/process_extract_care_home.R @@ -115,7 +115,7 @@ process_extract_care_home <- function( "recid", "smrtype", "chi", - "person_id", + # "person_id", "dob", "gender", "postcode", diff --git a/R/process_extract_home_care.R b/R/process_extract_home_care.R index 831496bd2..651be172d 100644 --- a/R/process_extract_home_care.R +++ b/R/process_extract_home_care.R @@ -96,7 +96,7 @@ process_extract_home_care <- function( "cost_total_net", "hc_provider", "hc_reablement", - "person_id" + # "person_id" ) %>% slfhelper::get_anon_chi() diff --git a/R/process_extract_sds.R b/R/process_extract_sds.R index ce317c8b9..f8e5f8579 100644 --- a/R/process_extract_sds.R +++ b/R/process_extract_sds.R @@ -41,7 +41,7 @@ process_extract_sds <- function( "smrtype", "chi", "dob", - "person_id", + # "person_id", "gender", "postcode", "sc_send_lca", diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R index c583fa8a7..aafc3d727 100644 --- a/R/process_sc_all_alarms_telecare.R +++ b/R/process_sc_all_alarms_telecare.R @@ -87,11 +87,11 @@ process_sc_all_alarms_telecare <- function( # Replace social_care_id with latest if needed (assuming replace_sc_id_with_latest is a custom function) data <- replace_sc_id_with_latest(data) - data$person_id <- paste0( - data$sending_location, - "-", - data$social_care_id - ) + # data$person_id <- paste0( + # data$sending_location, + # "-", + # data$social_care_id + # ) # Deal with episodes that have a package across quarters data[, pkg_count := seq_len(.N), by = list( @@ -125,7 +125,7 @@ process_sc_all_alarms_telecare <- function( dob = data.table::last(dob), postcode = data.table::last(postcode), recid = data.table::last(recid), - person_id = data.table::last(person_id), + # person_id = data.table::last(person_id), sc_send_lca = data.table::last(sc_send_lca) ), by = list( sending_location, diff --git a/R/process_sc_all_care_home.R b/R/process_sc_all_care_home.R index c0411cb39..5478d50cc 100644 --- a/R/process_sc_all_care_home.R +++ b/R/process_sc_all_care_home.R @@ -385,7 +385,7 @@ process_sc_all_care_home <- function( )) %>% dplyr::select( "chi", - "person_id", + # "person_id", "gender", "dob", "postcode", diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R index 5306c0956..c5b7d43eb 100644 --- a/R/process_sc_all_sds.R +++ b/R/process_sc_all_sds.R @@ -128,11 +128,11 @@ process_sc_all_sds <- function( "SDS", convert_sc_sending_location_to_lca(sending_location) )] - sds_full_clean_long$person_id <- paste0( - sds_full_clean_long$sending_location, - "-", - sds_full_clean_long$social_care_id - ) + # sds_full_clean_long$person_id <- paste0( + # sds_full_clean_long$sending_location, + # "-", + # sds_full_clean_long$social_care_id + # ) # Group, arrange and create flags for episodes sds_full_clean_long[, @@ -176,7 +176,7 @@ process_sc_all_sds <- function( dob = data.table::last(dob), postcode = data.table::last(postcode), recid = data.table::last(recid), - person_id = data.table::last(person_id), + # person_id = data.table::last(person_id), sc_send_lca = data.table::last(sc_send_lca) ), by = list(sending_location, social_care_id, smrtype, episode_counter)] rm(sds_full_clean_long) From d54dafc71e6afa5f9ddcc84899e679ab9c826c53 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Fri, 30 Aug 2024 17:38:19 +0100 Subject: [PATCH 088/105] fix duplicate row introduced by adding death --- R/add_activity_after_death_flag.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index 3d9b234f4..034210bc3 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -84,13 +84,15 @@ add_activity_after_death_flag <- function( final_data <- data %>% dplyr::left_join( flag_data, + # TODO: this join_by is not 100% accurate. Consider use ep_file_row_id to join by = c("year", "chi", "record_keydate1", "record_keydate2"), na_matches = "never" ) %>% dplyr::mutate(death_date = lubridate::as_date(ifelse(is.na(death_date) & !(is.na(death_date_boxi)), death_date_boxi, death_date ))) %>% - dplyr::select(-death_date_boxi) + dplyr::select(-death_date_boxi) %>% + dplyr::distinct(ep_file_row_id, .keep_all = TRUE) From 8d6f3e785f1bed42a96d5bc5becb994cfae5342a Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Fri, 30 Aug 2024 17:39:39 +0100 Subject: [PATCH 089/105] remove duplicated chi when joining death data --- R/join_deaths_data.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/join_deaths_data.R b/R/join_deaths_data.R index d2fc51b91..5e61a2082 100644 --- a/R/join_deaths_data.R +++ b/R/join_deaths_data.R @@ -15,7 +15,8 @@ join_deaths_data <- function( return( data %>% dplyr::left_join( - slf_deaths_lookup, + slf_deaths_lookup %>% + dplyr::distinct(chi, .keep_all = TRUE), by = "chi", na_matches = "never", relationship = "many-to-one" From bd01b2839561a4978c0a7a91d4974bff6ec0c8af Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Fri, 30 Aug 2024 17:47:12 +0100 Subject: [PATCH 090/105] TODO: check distinct death data by chi while keeping chi==NA records --- R/process_refined_death.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/process_refined_death.R b/R/process_refined_death.R index 58ed1b29d..dc7663221 100644 --- a/R/process_refined_death.R +++ b/R/process_refined_death.R @@ -50,6 +50,7 @@ process_refined_death <- function( fy = phsmethods::extract_fin_year(death_date), fy = as.character(paste0(substr(fy, 3, 4), substr(fy, 6, 7))) ) + # TODO: check distinct death data by chi while keeping chi==NA records if (write_to_disk) { write_file( From fc6404fbce961db3a32d1df9c3424096a05a4854 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 2 Sep 2024 14:05:06 +0100 Subject: [PATCH 091/105] add parameter for year --- R/create_individual_file.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index dc15fcb0e..f826294d1 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -78,7 +78,7 @@ create_individual_file <- function( if (!check_year_valid(year, type = c("ch", "hc", "at", "sds"))) { individual_file <- individual_file %>% - aggregate_by_chi(exclude_sc_var = TRUE) + aggregate_by_chi(year = year, exclude_sc_var = TRUE) } else { individual_file <- individual_file %>% aggregate_ch_episodes() %>% From 026148276eff08ff792f3094e0cb63c805d2c8a2 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Mon, 2 Sep 2024 16:36:59 +0100 Subject: [PATCH 092/105] fix duplicate in add_activity_after_death_flag --- R/add_activity_after_death_flag.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index 034210bc3..5e800c80b 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -92,7 +92,7 @@ add_activity_after_death_flag <- function( death_date_boxi, death_date ))) %>% dplyr::select(-death_date_boxi) %>% - dplyr::distinct(ep_file_row_id, .keep_all = TRUE) + dplyr::distinct() From 746da2b42433d38f3cbb3aa3e028b14d0311d7eb Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 3 Sep 2024 13:30:56 +0100 Subject: [PATCH 093/105] Update `check_year_valid` --- R/check_year_valid.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/check_year_valid.R b/R/check_year_valid.R index 2197d8c0e..217aa1c2b 100644 --- a/R/check_year_valid.R +++ b/R/check_year_valid.R @@ -46,7 +46,7 @@ check_year_valid <- function( return(FALSE) } else if (year >= "2425" && type %in% "sparra") { return(FALSE) - } else if (year >= "2425" && type %in% c("ch", "hc", "sds", "at")) { + } else if (year >= "2526" && type %in% c("ch", "hc", "sds", "at")) { return(FALSE) } From 57c7521075cd3b5e7635f8a71cb6a37d439a9903 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 9 Sep 2024 12:29:23 +0100 Subject: [PATCH 094/105] Declare DN variables --- R/create_episode_file.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 7909e2e7f..999a00b8c 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -179,6 +179,13 @@ create_episode_file <- function( ) } + if (!check_year_valid(year, type = "dn")) { + episode_file <- episode_file %>% + dplyr::mutate( + ccm = NA, + total_no_dn_contacts = NA) + } + if (anon_chi_out) { episode_file <- slfhelper::get_anon_chi(episode_file) } From a5fed7f4380b2976d52d46a6a8d3252723ee1fe4 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Mon, 9 Sep 2024 11:32:30 +0000 Subject: [PATCH 095/105] Style code --- R/create_episode_file.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index 999a00b8c..dd22dbc1d 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -183,7 +183,8 @@ create_episode_file <- function( episode_file <- episode_file %>% dplyr::mutate( ccm = NA, - total_no_dn_contacts = NA) + total_no_dn_contacts = NA + ) } if (anon_chi_out) { From 97110d9052331e561d4738672bdf49681982e84b Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 10 Sep 2024 14:33:05 +0100 Subject: [PATCH 096/105] Declare client variables --- R/create_episode_file.R | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index b41153259..a856f7ddf 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -177,7 +177,20 @@ create_episode_file <- function( sc_meals = NA, sc_day_care = NA, social_care_id = NA, - count_not_known = NA + count_not_known = NA, + sc_dementia = NA, + sc_learning_disability = NA, + sc_mental_health_disorders = NA, + sc_physical_and_sensory_disability = NA, + sc_drugs = NA, + sc_alcohol = NA, + sc_palliative_care = NA, + sc_carer = NA, + sc_elderly_frail = NA, + sc_neurological_condition = NA, + sc_autism = NA, + sc_other_vulnerable_groups = NA, + ch_provider_description = NA ) } @@ -214,7 +227,7 @@ create_episode_file <- function( episode_file <- episode_file %>% dplyr::mutate( ccm = NA, - totalnodncontacts = NA + total_no_dn_contacts = NA ) } From 1bb5fa96e1c070affe8bdc6ce1a2eb812d19330c Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Tue, 10 Sep 2024 14:35:43 +0100 Subject: [PATCH 097/105] remove extra dd variables --- R/link_delayed_discharge_eps.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index 4ebf56da7..d4162b619 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -374,7 +374,7 @@ link_delayed_discharge_eps <- function( delay_dd, cij_delay )) %>% - dplyr::select(-c("has_dd", "delay_dd")) + dplyr::select(-c("has_dd", "delay_dd", "original_admission_date", "amended_dates")) return(linked_data) } From 71cc2596972e7ddd0db283c44221017ee7422e4a Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Thu, 12 Sep 2024 11:48:49 +0100 Subject: [PATCH 098/105] remove redundant variables --- R/process_lookup_sc_client.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index 0f20804fa..91c08632d 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -200,7 +200,7 @@ process_lookup_sc_client <- ) %>% dplyr::arrange(.data$chi, .data$count_not_known) %>% dplyr::distinct(.data$chi, .keep_all = TRUE) %>% - dplyr::select(-.data$sending_location) %>% + dplyr::select(-.data$sending_location, -.data$count_not_known) %>% slfhelper::get_anon_chi() if (write_to_disk) { From f30508b3efe14e15e5fb5e8b9159fa2fd736dd67 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 12 Sep 2024 14:54:58 +0100 Subject: [PATCH 099/105] remove fy variable --- R/add_activity_after_death_flag.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index 5e800c80b..5fa0f145f 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -91,7 +91,7 @@ add_activity_after_death_flag <- function( dplyr::mutate(death_date = lubridate::as_date(ifelse(is.na(death_date) & !(is.na(death_date_boxi)), death_date_boxi, death_date ))) %>% - dplyr::select(-death_date_boxi) %>% + dplyr::select(-death_date_boxi, -fy) %>% dplyr::distinct() From c32546d537c7abd36d92bb49f175c0cb852a2513 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Thu, 12 Sep 2024 14:58:48 +0100 Subject: [PATCH 100/105] Remove redundant variable `count_not_known` --- R/create_episode_file.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index a856f7ddf..ecb6fc126 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -177,7 +177,6 @@ create_episode_file <- function( sc_meals = NA, sc_day_care = NA, social_care_id = NA, - count_not_known = NA, sc_dementia = NA, sc_learning_disability = NA, sc_mental_health_disorders = NA, From 615ab24c37594a08a209d86d67e3950092250ee8 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 13 Sep 2024 09:04:33 +0100 Subject: [PATCH 101/105] Remove duplicate code --- R/create_episode_file.R | 8 -------- 1 file changed, 8 deletions(-) diff --git a/R/create_episode_file.R b/R/create_episode_file.R index ecb6fc126..18807882b 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -237,14 +237,6 @@ create_episode_file <- function( ) } - if (!check_year_valid(year, type = "dn")) { - episode_file <- episode_file %>% - dplyr::mutate( - ccm = NA, - total_no_dn_contacts = NA - ) - } - if (anon_chi_out) { episode_file <- slfhelper::get_anon_chi(episode_file) } From cb9b930d4adcb90512a1d1a9fdd106ba24103a75 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Fri, 13 Sep 2024 12:36:05 +0100 Subject: [PATCH 102/105] revert commit - remove fy --- R/add_activity_after_death_flag.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/add_activity_after_death_flag.R b/R/add_activity_after_death_flag.R index 5fa0f145f..5e800c80b 100644 --- a/R/add_activity_after_death_flag.R +++ b/R/add_activity_after_death_flag.R @@ -91,7 +91,7 @@ add_activity_after_death_flag <- function( dplyr::mutate(death_date = lubridate::as_date(ifelse(is.na(death_date) & !(is.na(death_date_boxi)), death_date_boxi, death_date ))) %>% - dplyr::select(-death_date_boxi, -fy) %>% + dplyr::select(-death_date_boxi) %>% dplyr::distinct() From c4123d2152dc16206d3c41f846f9ea968ed98416 Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 16 Sep 2024 16:41:25 +0100 Subject: [PATCH 103/105] update manual run --- Run_SLF_Files_manually/run_individual_file_1516.R | 4 ++-- Run_SLF_Files_manually/run_individual_file_1617.R | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Run_SLF_Files_manually/run_individual_file_1516.R b/Run_SLF_Files_manually/run_individual_file_1516.R index aace110c4..b2dbc4213 100644 --- a/Run_SLF_Files_manually/run_individual_file_1516.R +++ b/Run_SLF_Files_manually/run_individual_file_1516.R @@ -5,5 +5,5 @@ year <- "1516" episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year) # %>% -# process_tests_individual_file(year = year) +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_1617.R b/Run_SLF_Files_manually/run_individual_file_1617.R index 9789d0b8e..3523eb1d7 100644 --- a/Run_SLF_Files_manually/run_individual_file_1617.R +++ b/Run_SLF_Files_manually/run_individual_file_1617.R @@ -5,5 +5,5 @@ year <- "1617" episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year) # %>% -# process_tests_individual_file(year = year) +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) From 2be85412e3d2869ddd3a079b069a3b9b8aae154d Mon Sep 17 00:00:00 2001 From: Jennifer Thom Date: Mon, 16 Sep 2024 16:41:55 +0100 Subject: [PATCH 104/105] declare missing sc variables indiv file --- R/create_individual_file.R | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 6d6c13f19..273761efc 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -132,7 +132,20 @@ create_individual_file <- function( sc_type_of_housing = NA, count_not_known = NA, sc_latest_submission = NA, - social_care_id = NA + social_care_id = NA, + person_id = NA, + sc_alcohol = NA, + sc_autism = NA, + sc_carer = NA, + sc_dementia = NA, + sc_drugs = NA, + sc_elderly_frail = NA, + sc_learning_disability = NA, + sc_mental_health_disorders = NA, + sc_neurological_condition = NA, + sc_other_vulnerable_groups = NA, + sc_palliative_care = NA, + sc_physical_and_sensory_disability = NA ) } From 1dd71830cb69fc98f5507cc69980d7c81a806396 Mon Sep 17 00:00:00 2001 From: Jennit07 Date: Mon, 16 Sep 2024 15:45:08 +0000 Subject: [PATCH 105/105] Style code --- Run_SLF_Files_manually/run_individual_file_1516.R | 4 ++-- Run_SLF_Files_manually/run_individual_file_1617.R | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Run_SLF_Files_manually/run_individual_file_1516.R b/Run_SLF_Files_manually/run_individual_file_1516.R index b2dbc4213..8e8dae906 100644 --- a/Run_SLF_Files_manually/run_individual_file_1516.R +++ b/Run_SLF_Files_manually/run_individual_file_1516.R @@ -5,5 +5,5 @@ year <- "1516" episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file -create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) +create_individual_file(episode_file, year = year) %>% + process_tests_individual_file(year = year) diff --git a/Run_SLF_Files_manually/run_individual_file_1617.R b/Run_SLF_Files_manually/run_individual_file_1617.R index 3523eb1d7..255e4e674 100644 --- a/Run_SLF_Files_manually/run_individual_file_1617.R +++ b/Run_SLF_Files_manually/run_individual_file_1617.R @@ -6,4 +6,4 @@ episode_file <- arrow::read_parquet(get_slf_episode_path(year)) # Run individual file create_individual_file(episode_file, year = year) %>% - process_tests_individual_file(year = year) + process_tests_individual_file(year = year)