diff --git a/.Rbuildignore b/.Rbuildignore index 168a3e006..2cab1bda6 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -22,3 +22,4 @@ ^_targets\.R$ ^_targets\.yaml$ ^_SPSS_archived$ +^run_targets_ diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 87300a6a1..473e0304d 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -91,6 +91,7 @@ hjust hms homecare homev +hscdiip hscp hscpnames IDPC @@ -107,6 +108,7 @@ keyring keytime keytimex kis +lazydt lgl los ltc @@ -166,6 +168,7 @@ readxl reasonwait recid refsource +renviron rlang rmarkdown roxygen diff --git a/NAMESPACE b/NAMESPACE index 6c4f3cd52..d0323b8c4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,6 +13,7 @@ export(convert_hscp_to_hscpnames) export(convert_numeric_to_date) export(convert_sending_location_to_lca) export(convert_year_to_fyyear) +export(create_episode_file) export(create_individual_file) export(create_service_use_cohorts) export(end_fy) @@ -155,7 +156,7 @@ export(read_sc_all_alarms_telecare) export(read_sc_all_care_home) export(read_sc_all_home_care) export(read_sc_all_sds) -export(run_episode_file) +export(setup_keyring) export(start_fy) export(start_fy_quarter) export(start_next_fy_quarter) diff --git a/NEWS.md b/NEWS.md index cbcb62079..2a3453eea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,48 @@ -# March 2023 Update - Unreleased +# September 2023 Update - Unreleased +* Update of 2017/18 onwards to include bug fixes within the files. +* New 2023/24 files. +* New NSU cohort for 2022/23 file. +* Re addition of: + * HRIs in individual file. + * Homelessness Flags. +* Bug fixes: + * Blank `datazone` in A&E. This has been fixed and was due to PC8 postcode format matching onto SLF pc lookup. + * Large increase in preventable beddays. This was caused due to an SPSS vs R logic difference. Uses SPSS logic which + brings the difference down to `3.3%`. + * Issue with `locality` which showed `locality` in each row instead of its true `locality`. This has now been fixed. + * Duplicated CHI in the individual file. The issue was identified when trying to include HRIs. This has now been corrected. +* Internal changes to SLF development: + * `DN` and `CMH` data are now archived in an HSCDIIP folder as the BOXI datamart is now closed down for these. Function `get_boxi_extract_path` has been updated to reflect this. + * Tests updated to include `HSCP`count. + * Tests created for `Delayed Discharges` extract and `Social care Client lookup`. +# June 2023 Update - Released 24-Jul-2023 +* 2011/12 -> 2013/14 – These files have not been altered, other than to make them available in a new file type (parquet). +* 2017/18 – These files have been recreated using our new R pipeline, but the data has not changed. We did this so that we would have a good comparator file. +* 2018/19 -> 2022/23 – These files have been recreated using the R pipeline and are also using updated data (as in a ‘normal’ update). +* Files changed into parquet format. +* SLFhelper updated. +* Removal of `keydate1_dateformat` and `keydate2_dateformat`. +* `dd_responsible_lca` – This variable now uses CA2019 codes instead of the 2-digit ‘old’ LCA code. +* Preventable beddays - not able to calculate these correctly. * Death fixes not included. +* Variables not ordered in R like they used to be in SPSS. +* End of HHG. +* New variable `ch_postcode`. +* rename of variables `cost_total_net_incdnas`, `ooh_outcome.1`, `ooh_outcome.2`, `ooh_outcome.3`, `ooh_outcome.4`, `totalnodncontacts`. +* HRI's not included. +* Homelessness flags not included. +* Keep_population flag not included. + + +# March 2023 Update - Released 10-Mar-2023 +* 2021/22 episode and individual files refreshed with updated activity. +* 2022/23 file updated and contains data up to the end of Q3. +* Social care data is available for 2022/23. +* Typo in the variable name `ooh_covid_assessment` +* Next update in May as a test run in R but won't be released. +* Next release in June. + # December 2022 Update - Released 07-Dec-2022 * Now using the 2022v2 Scottish Postcode Directory. * Now using the 2020 Urban Rural classifications (instead of the older 2016 ones), this means variables such as `URx_2016` will now be called `URx_2020`. diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R index 99da03ba8..db12f7a9e 100644 --- a/R/aggregate_by_chi.R +++ b/R/aggregate_by_chi.R @@ -126,9 +126,9 @@ aggregate_by_chi <- function(episode_file) { individual_file_cols6 <- episode_file[, .( preventable_beddays = ifelse( - max(cij_ppa, na.rm = TRUE), - max(cij_end_date) - min(cij_start_date), - NA_real_ + any(cij_ppa, na.rm = TRUE), + as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))), + NA_integer_ ) ), # cij_marker has been renamed as cij_total @@ -203,12 +203,19 @@ aggregate_ch_episodes <- function(episode_file) { data.table::setDT(episode_file) # Perform grouping and aggregation - episode_file <- episode_file[, `:=`( - ch_no_cost = max(ch_no_cost), - ch_ep_start = min(record_keydate1), - ch_ep_end = max(ch_ep_end), - ch_cost_per_day = mean(ch_cost_per_day) - ), by = c("chi", "ch_chi_cis")] + episode_file[, c( + "ch_no_cost", + "ch_ep_start", + "ch_ep_end", + "ch_cost_per_day" + ) := list( + max(ch_no_cost), + min(record_keydate1), + max(ch_ep_end), + mean(ch_cost_per_day) + ), + by = c("chi", "ch_chi_cis") + ] # Convert back to tibble if needed episode_file <- tibble::as_tibble(episode_file) diff --git a/R/check_year_valid.R b/R/check_year_valid.R index d170cd5b5..1361eb47e 100644 --- a/R/check_year_valid.R +++ b/R/check_year_valid.R @@ -42,7 +42,7 @@ check_year_valid <- function( return(FALSE) } else if (year >= "2122" && type %in% c("CMH", "DN")) { return(FALSE) - } else if (year >= "2223" && type %in% "NSU") { + } else if (year >= "2324" && type %in% "NSU") { return(FALSE) } else if (year >= "2324" && type %in% c("SPARRA", "HHG")) { return(FALSE) diff --git a/R/compute_mid_year_age.R b/R/compute_mid_year_age.R index 01bfaf5d5..0e2483cf7 100644 --- a/R/compute_mid_year_age.R +++ b/R/compute_mid_year_age.R @@ -20,7 +20,7 @@ compute_mid_year_age <- function(fyyear, dob) { age_intervals <- lubridate::interval( start = dob, - end = as.Date(midpoint_fy(fyyear)) + end = midpoint_fy(fyyear) ) ages <- lubridate::as.period(age_intervals)$year diff --git a/R/convert_sending_location_to_lca.R b/R/convert_sending_location_to_lca.R index 6e9c577c0..ff7e51db1 100644 --- a/R/convert_sending_location_to_lca.R +++ b/R/convert_sending_location_to_lca.R @@ -9,7 +9,7 @@ #' @export #' #' @examples -#' sending_location <- c("100", "120") +#' sending_location <- c(100, 120) #' convert_sending_location_to_lca(sending_location) #' #' @family code functions @@ -18,38 +18,40 @@ convert_sending_location_to_lca <- function(sending_location) { lca <- dplyr::case_match( sending_location, - "100" ~ "01", # Aberdeen City - "110" ~ "02", # Aberdeenshire - "120" ~ "03", # Angus - "130" ~ "04", # Argyll and Bute - "355" ~ "05", # Scottish Borders - "150" ~ "06", # Clackmannanshire - "395" ~ "07", # West Dumbartonshire - "170" ~ "08", # Dumfries and Galloway - "180" ~ "09", # Dundee City - "190" ~ "10", # East Ayrshire - "200" ~ "11", # East Dunbartonshire - "210" ~ "12", # East Lothian - "220" ~ "13", # East Renfrewshire - "230" ~ "14", # City of Edinburgh - "240" ~ "15", # Falkirk - "250" ~ "16", # Fife - "260" ~ "17", # Glasgow City - "270" ~ "18", # Highland - "280" ~ "19", # Inverclyde - "290" ~ "20", # Midlothian - "300" ~ "21", # Moray - "310" ~ "22", # North Ayrshire - "320" ~ "23", # North Lanarkshire - "330" ~ "24", # Orkney Islands - "340" ~ "25", # Perth and Kinross - "350" ~ "26", # Renfrewshire - "360" ~ "27", # Shetland Islands - "370" ~ "28", # South Ayrshire - "380" ~ "29", # South Lanarkshire - "390" ~ "30", # Stirling - "400" ~ "31", # West Lothian - "235" ~ "32" # Na_h_Eileanan_Siar + 100L ~ "01", # Aberdeen City + 110L ~ "02", # Aberdeenshire + 120L ~ "03", # Angus + 130L ~ "04", # Argyll and Bute + 355L ~ "05", # Scottish Borders + 150L ~ "06", # Clackmannanshire + 395L ~ "07", # West Dunbartonshire + 170L ~ "08", # Dumfries and Galloway + 180L ~ "09", # Dundee City + 190L ~ "10", # East Ayrshire + 200L ~ "11", # East Dunbartonshire + 210L ~ "12", # East Lothian + 220L ~ "13", # East Renfrewshire + 230L ~ "14", # City of Edinburgh + 240L ~ "15", # Falkirk + 250L ~ "16", # Fife + 260L ~ "17", # Glasgow City + 270L ~ "18", # Highland + 280L ~ "19", # Inverclyde + 290L ~ "20", # Midlothian + 300L ~ "21", # Moray + 310L ~ "22", # North Ayrshire + 320L ~ "23", # North Lanarkshire + 330L ~ "24", # Orkney Islands + 340L ~ "25", # Perth and Kinross + 350L ~ "26", # Renfrewshire + 360L ~ "27", # Shetland Islands + 370L ~ "28", # South Ayrshire + 380L ~ "29", # South Lanarkshire + 390L ~ "30", # Stirling + 400L ~ "31", # West Lothian + 235L ~ "32", # Na_h_Eileanan_Siar + .default = NA_character_ ) + return(lca) } diff --git a/R/run_episode_file.R b/R/create_episode_file.R similarity index 97% rename from R/run_episode_file.R rename to R/create_episode_file.R index 19c2481f2..bad42be5e 100644 --- a/R/run_episode_file.R +++ b/R/create_episode_file.R @@ -1,16 +1,15 @@ -#' Produce the Source Episode file +#' Create the Source Episode file #' #' @param processed_data_list containing data from processed extracts. #' @param year The year to process, in FY format. #' @param write_to_disk (optional) Should the data be written to disk default is #' `TRUE` i.e. write the data to disk. #' @param anon_chi_out (Default:TRUE) Should `anon_chi` be used in the output -#' (instead of chi) +#' (instead of chi). #' -#' @return a [tibble][tibble::tibble-package] containing the episode file +#' @return the Source Episode file as a [tibble][tibble::tibble-package]. #' @export -#' -run_episode_file <- function( +create_episode_file <- function( processed_data_list, year, write_to_disk = TRUE, @@ -134,7 +133,7 @@ run_episode_file <- function( #' Store the unneeded episode file variables #' #' @param data The in-progress episode file data. -#' @inheritParams run_episode_file +#' @inheritParams create_episode_file #' @param vars_to_keep a character vector of the variables to keep, all others #' will be stored. #' @@ -172,7 +171,7 @@ store_ep_file_vars <- function(data, year, vars_to_keep) { #' Load the unneeded episode file variables #' -#' @inheritParams run_episode_file +#' @inheritParams create_episode_file #' @inheritParams store_ep_file_vars #' #' @return The full SLF data. diff --git a/R/create_individual_file.R b/R/create_individual_file.R index 84dbd28ee..436f1c8d7 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -1,11 +1,11 @@ -#' Create individual file +#' Create the Source Individual file #' -#' @description Creates individual file from episode file +#' @description Creates the individual file from the episode file. #' -#' @param episode_file Tibble containing episodic data +#' @param episode_file Tibble containing episodic data. #' @param anon_chi_in (Default:TRUE) Is `anon_chi` used in the input -#' (instead of chi) -#' @inheritParams run_episode_file +#' (instead of chi). +#' @inheritParams create_episode_file #' #' @return The processed individual file #' @export @@ -61,7 +61,7 @@ create_individual_file <- function( remove_blank_chi() %>% add_cij_columns() %>% add_all_columns() %>% - aggregate_ch_episodes_zihao() %>% + aggregate_ch_episodes() %>% clean_up_ch(year) %>% recode_gender() %>% aggregate_by_chi() %>% @@ -741,13 +741,25 @@ join_sc_client <- function( sc_demographics %>% dplyr::select("sending_location", "social_care_id", "chi"), by = c("sending_location", "social_care_id") - ) + ) %>% + dplyr::mutate(count_not_known = rowSums(dplyr::select(., all_of( + c( + "sc_living_alone", + "sc_support_from_unpaid_carer", + "sc_social_worker", + "sc_meals", + "sc_day_care" + ) + )) == "Not Known")) %>% + dplyr::arrange(chi, count_not_known) %>% + dplyr::distinct(chi, .keep_all = TRUE) # Match on client variables by chi individual_file <- individual_file %>% dplyr::left_join( join_client_demog, - by = "chi" + by = "chi", + relationship = "one-to-one" ) %>% dplyr::select(!c("sending_location", "social_care_id", "sc_latest_submission")) diff --git a/R/get_boxi_extract_path.R b/R/get_boxi_extract_path.R index 60dd7857a..6096525e5 100644 --- a/R/get_boxi_extract_path.R +++ b/R/get_boxi_extract_path.R @@ -29,7 +29,11 @@ get_boxi_extract_path <- function( )) { type <- match.arg(type) - year_dir <- get_year_dir(year, extracts_dir = TRUE) + if (type %in% c("DN", "CMH")) { + dir <- fs::path(get_slf_dir(), "Archived_data") + } else { + dir <- get_year_dir(year, extracts_dir = TRUE) + } if (!check_year_valid(year, type)) { return(get_dummy_boxi_extract_path()) @@ -53,11 +57,12 @@ get_boxi_extract_path <- function( ) boxi_extract_path_csv_gz <- fs::path( - year_dir, + dir, stringr::str_glue("{file_name}-20{year}.csv.gz") ) + boxi_extract_path_csv <- fs::path( - year_dir, + dir, stringr::str_glue("{file_name}-20{year}.csv") ) diff --git a/R/get_connection_PHS_database.R b/R/get_connection_PHS_database.R index 5973e0003..0a528f45b 100644 --- a/R/get_connection_PHS_database.R +++ b/R/get_connection_PHS_database.R @@ -1,22 +1,32 @@ #' Open a connection to a PHS database #' -#' @description Opens a connection to PHS database to allow data to be collected +#' @description Opens a connection to PHS database given a Data Source Name +#' (DSN) it will try to get the username, asking for input if in an interactive +#' session. It will also use [keyring][keyring::keyring-package] to find +#' an existing keyring called 'createslf' which should contain a `db_password` +#' key with the users database password. #' -#' @param dsn The Data Source Name passed on to `odbc::dbconnect` -#' the dsn must be setup first. e.g. SMRA or DVPROD +#' @param dsn The Data Source Name (DSN) passed on to [odbc::dbConnect()] +#' the DSN must be set up first. e.g. `SMRA` or `DVPROD` #' @param username The username to use for authentication, -#' if not supplied it first will check the environment variable -#' and finally ask the user for input. +#' if not supplied it will try to find it automatically and if possible ask the +#' user for input. #' -#' @return a connection to the specified dsn +#' @return a connection to the specified Data Source. #' @export -#' -phs_db_connection <- function(dsn, username = Sys.getenv("USER")) { - # Collect username from the environment - username <- Sys.getenv("USER") +phs_db_connection <- function(dsn, username) { + if (missing(username)) { + # Collect username if possible + username <- dplyr::case_when( + Sys.info()["USER"] != "unknown" ~ Sys.info()["USER"], + Sys.getenv("USER") != "" ~ Sys.getenv("USER"), + system2("whoami", stdout = TRUE) != "" ~ system2("whoami", stdout = TRUE), + .default = NA + ) + } - # Check the username is not empty and take input if not - if (is.na(username) || username == "") { + # If the username is missing try to get input from the user + if (is.na(username)) { if (rlang::is_interactive()) { username <- rstudioapi::showPrompt( title = "Username", @@ -24,46 +34,219 @@ phs_db_connection <- function(dsn, username = Sys.getenv("USER")) { default = "" ) } else { - cli::cli_abort("No username found, you should supply one with {.arg username}") + cli::cli_abort( + c( + "x" = "No username found, you can use the {.arg username} argument.", + "i" = "Alternatively, add {.code USER = \"\"} to your + {.file .Renviron} file." + ) + ) } } - # TODO improve error messages and provide instructions for setting up keyring - # Add the following code to R profile. - # Sys.setenv("CREATESLF_KEYRING_PASS" = "createslf"), - # keyring_create("createslf", password = Sys.getenv("CREATESLF_KEYRING_PASS")), - # key_set(keyring = "createslf", service = "db_password") + # Check the status of keyring + # Does the 'createslf' keyring exist + keyring_exists <- "createslf" %in% keyring::keyring_list()[["keyring"]] - if (!("createslf" %in% keyring::keyring_list()[["keyring"]])) { - cli::cli_abort("The {.val createslf} keyring does not exist.") + # Does the 'db_password' key exist in the 'createslf' keyring + if (keyring_exists) { + key_exists <- "db_password" %in% keyring::key_list(keyring = "createslf")[["service"]] + } else { + key_exists <- FALSE } - if (!("db_password" %in% keyring::key_list(keyring = "createslf")[["service"]])) { - cli::cli_abort("{.val db_password} is missing from the {.val createslf} keyring.") - } + # Does the 'CREATESLF_KEYRING_PASS' environment variable exist + env_var_pass_exists <- Sys.getenv("CREATESLF_KEYRING_PASS") != "" - if (Sys.getenv("CREATESLF_KEYRING_PASS") == "") { - cli::cli_abort("You must have the password to unlock the {.val createslf} keyring in your environment as - {.envvar CREATESLF_KEYRING_PASS}. Please set this up in your {.file .Renviron} or {.file .Rprofile}") + if (!all(keyring_exists, key_exists, env_var_pass_exists)) { + if (rlang::is_interactive()) { + setup_keyring( + keyring = "createslf", + key = "db_password", + keyring_exists = keyring_exists, + key_exists = key_exists, + env_var_pass_exists = env_var_pass_exists + ) + } else { + if (any(keyring_exists, key_exists, env_var_pass_exists)) { + cli::cli_abort( + c( + "x" = "Your keyring needs to be set up, run:", + "{.code setup_keyring(keyring = \"createslf\", key = \"db_password\", + keyring_exists = {keyring_exists}, key_exists = {key_exists}, + env_var_pass_exists = {env_var_pass_exists})}" + ) + ) + } else { + cli::cli_abort( + c( + "x" = "Your keyring needs to be set up, run:", + "{.code setup_keyring(keyring = \"createslf\", + key = \"db_password\")}" + ) + ) + } + } } - keyring::keyring_unlock(keyring = "createslf", password = Sys.getenv("CREATESLF_KEYRING_PASS")) - - if (keyring::keyring_is_locked(keyring = "createslf")) { - cli::cli_abort("Keyring is locked. To unlock createslf keyring, please use {.fun keyring::keyring_unlock}") + if (env_var_pass_exists) { + keyring::keyring_unlock( + keyring = "createslf", + password = Sys.getenv("CREATESLF_KEYRING_PASS") + ) + } else { + keyring::keyring_unlock( + keyring = "createslf", + password = rstudioapi::askForPassword( + prompt = "Enter the password for the keyring you just created." + ) + ) } # Create the connection - password_text <- stringr::str_glue("{dsn} password for user: {username}") db_connection <- odbc::dbConnect( odbc::odbc(), dsn = dsn, uid = username, - pwd = keyring::key_get(keyring = "createslf", service = "db_password") + pwd = keyring::key_get( + keyring = "createslf", + service = "db_password" + ) ) keyring::keyring_lock(keyring = "createslf") return(db_connection) } + +#' Interactively set up the keyring +#' +#' @description +#' This is meant to be used with [phs_db_connection()], it can only be used +#' interactively i.e. not in targets or in a workbench job. +#' +#' With the default options it will go through the steps to set up a keyring +#' which can be used to supply passwords to [odbc::dbConnect()] (or others) in a +#' secure and seamless way. +#' +#' 1. Create an .Renviron file in the project and add a password (for the +#' keyring) to it. +#' 2. Create a keyring with the password - Since we have saved the password as +#' an environment variable it can be picked unlocked and used automatically. +#' 3. Add the database password to the keyring. +#' +#' +#' @param keyring Name of the keyring +#' @param key Name of the key +#' @param keyring_exists Does the keyring already exist +#' @param key_exists Does the key already exist +#' @param env_var_pass_exists Does the password for the keyring already exist +#' in the environment. +#' +#' @return NULL (invisibly) +#' @export +setup_keyring <- function( + keyring = "createslf", + key = "db_password", + keyring_exists = FALSE, + key_exists = FALSE, + env_var_pass_exists = FALSE) { + # First we need the password as an environment variable + if (!env_var_pass_exists) { + if (Sys.getenv("CREATESLF_KEYRING_PASS") != "") { + cli::cli_alert_warning( + "{.env CREATESLF_KEYRING_PASS} already exists in the environment, you + will need to clean this up manually if it's not correct." + ) + keyring_password <- Sys.getenv("CREATESLF_KEYRING_PASS") + } else if ( + any(stringr::str_detect( + readr::read_lines(".Renviron"), + "^CREATESLF_KEYRING_PASS\\s*?=\\s*?['\"].+?['\"]$" + )) + + ) { + cli::cli_abort( + "Your {.file .Renviron} file looks ok, try restarting your session." + ) + } else { + keyring_password <- rstudioapi::askForPassword( + prompt = stringr::str_glue( + "Enter a password for the '{keyring}' keyring, this should + not be your LDAP / database password." + ) + ) + if (is.null(keyring_password)) { + cli::cli_abort("No keyring password entered.") + } + if (!fs::file_exists(".Renviron")) { + cli::cli_alert_success("Creating an {.file .Renviron} file.") + } + + renviron_text <- stringr::str_glue( + "CREATESLF_KEYRING_PASS = \"{keyring_password}\"" + ) + + readr::write_lines( + x = renviron_text, + file = ".Renviron", + append = TRUE + ) + + cli::cli_alert_success( + "Added {.code {renviron_text}} to the {.file .Renviron} file." + ) + + cli::cli_alert_info("You will need to restart your R session.") + } + } else { + keyring_password <- Sys.getenv("CREATESLF_KEYRING_PASS") + } + + # If the keyring doesn't exist create it now. + if (!keyring_exists) { + if (keyring %in% keyring::keyring_list()[["keyring"]]) { + cli::cli_alert_warning( + "The {keyring} keyring already exists, you will be asked to + overwrite it." + ) + } + keyring::keyring_create( + keyring = keyring, + password = keyring_password + ) + + cli::cli_alert_success( + "Created the '{keyring}' keyring with {.fun keyring::keyring_create}." + ) + } + + # If we just created the keyring it will already be unlocked + keyring::keyring_unlock( + keyring = keyring, + password = keyring_password + ) + + # Now add the password to the keyring + if (!key_exists) { + keyring::key_set( + keyring = keyring, + service = key, + prompt = "Enter you LDAP password for database connections." + ) + + cli::cli_alert_success( + "Added the '{key}' key to the '{keyring}' keyring with + {.fun keyring::keyring_set}." + ) + } + + keyring::keyring_lock(keyring = keyring) + + cli::cli_alert_success( + "The keyring should now be set up correctly." + ) + + return(invisible(NULL)) +} diff --git a/R/get_existing_data_for_tests.R b/R/get_existing_data_for_tests.R index 91fa2293e..1dd0b350e 100644 --- a/R/get_existing_data_for_tests.R +++ b/R/get_existing_data_for_tests.R @@ -46,7 +46,7 @@ get_existing_data_for_tests <- function(new_data, file_version = "episode") { slf_data <- suppressMessages(slfhelper::read_slf_episode( year = year, recids = recids, - columns = variable_names + col_select = variable_names )) if ("hscp2018" %in% variable_names) { slf_data <- dplyr::rename(slf_data, "hscp" = "hscp2018") @@ -54,7 +54,7 @@ get_existing_data_for_tests <- function(new_data, file_version = "episode") { } else { slf_data <- suppressMessages(slfhelper::read_slf_individual( year = year, - columns = variable_names + col_select = variable_names )) } diff --git a/R/get_source_extract_path.R b/R/get_source_extract_path.R index 1816ceb25..4cb5eef44 100644 --- a/R/get_source_extract_path.R +++ b/R/get_source_extract_path.R @@ -10,27 +10,34 @@ #' @export #' #' @family extract file paths -get_source_extract_path <- function(year, - type = c( - "Acute", - "AE", - "AT", - "CH", - "Client", - "CMH", - "DD", - "Deaths", - "DN", - "GPOoH", - "HC", - "Homelessness", - "Maternity", - "MH", - "Outpatients", - "PIS", - "SDS" - ), - ...) { +get_source_extract_path <- function( + year, + type = c( + "Acute", + "AE", + "AT", + "CH", + "Client", + "CMH", + "DD", + "Deaths", + "DN", + "GPOoH", + "HC", + "Homelessness", + "Maternity", + "MH", + "Outpatients", + "PIS", + "SDS" + ), + ...) { + if (year %in% type) { + cli::cli_abort("{.val {year}} was supplied to the {.arg year} argument.") + } + + year <- check_year_format(year) + type <- match.arg(type) if (!check_year_valid(year, type)) { diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R index 4358a0ba0..f4fb7d3e5 100644 --- a/R/process_extract_homelessness.R +++ b/R/process_extract_homelessness.R @@ -146,13 +146,14 @@ process_extract_homelessness <- function( ) if (write_to_disk) { - final_data %>% - write_file(get_file_path( - get_year_dir(year), - stringr::str_glue("homelessness_for_source-20{year}"), - ext = "rds", + write_file( + final_data, + get_source_extract_path( + year = year, + type = "Homelessness", check_mode = "write" - )) + ) + ) } return(final_data) diff --git a/R/process_lookup_postcode.R b/R/process_lookup_postcode.R index a17a0c97e..f9f1d47f4 100644 --- a/R/process_lookup_postcode.R +++ b/R/process_lookup_postcode.R @@ -53,7 +53,7 @@ process_lookup_postcode <- function(spd_path = get_spd_path(), tidyselect::matches("datazone\\d{4}$") ) %>% dplyr::mutate( - locality = tidyr::replace_na("locality", "No Locality Information") + locality = tidyr::replace_na(.data$locality, "No Locality Information") ) diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R index 2c93f243e..a9d193465 100644 --- a/R/process_tests_individual_file.R +++ b/R/process_tests_individual_file.R @@ -64,9 +64,8 @@ produce_individual_file_tests <- function(data) { create_demog_test_flags() %>% create_hb_test_flags(.data$hbrescode) %>% create_hb_cost_test_flags(.data$hbrescode, .data$health_net_cost) %>% - create_hscp_test_flags(.data$hscp2018) %>% # keep variables for comparison - dplyr::select("valid_chi":dplyr::last_col()) %>% + dplyr::select(c("valid_chi":dplyr::last_col())) %>% # use function to sum new test flags calculate_measures(measure = "sum") @@ -86,7 +85,9 @@ produce_individual_file_tests <- function(data) { min_max_measures <- data %>% calculate_measures( - vars = "health_net_cost", + vars = c( + "health_net_cost" + ), measure = "min-max" ) @@ -99,11 +100,18 @@ produce_individual_file_tests <- function(data) { measure = "sum" ) + dup_chi <- data.frame( + measure = "duplicated chi number", + value = duplicated(data$chi) %>% + sum() %>% as.integer() + ) + join_output <- list( test_flags, all_measures, min_max_measures, - sum_measures + sum_measures, + dup_chi ) %>% purrr::reduce(dplyr::full_join, by = c("measure", "value")) diff --git a/R/process_tests_outpatients.R b/R/process_tests_outpatients.R index f8a7a6a2e..5ab3e82db 100644 --- a/R/process_tests_outpatients.R +++ b/R/process_tests_outpatients.R @@ -12,11 +12,13 @@ process_tests_outpatients <- function(data, year) { comparison <- produce_test_comparison( old_data = produce_source_extract_tests(old_data, sum_mean_vars = "cost", - max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net") + max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net"), + add_hscp_count = FALSE ), new_data = produce_source_extract_tests(data, sum_mean_vars = "cost", - max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net") + max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net"), + add_hscp_count = FALSE ) ) %>% write_tests_xlsx(sheet_name = "00B", year) diff --git a/R/produce_source_extract_tests.R b/R/produce_source_extract_tests.R index 10f842fc6..7f8feda92 100644 --- a/R/produce_source_extract_tests.R +++ b/R/produce_source_extract_tests.R @@ -13,6 +13,7 @@ #' (data is from [get_source_extract_path()]) #' @param sum_mean_vars variables used when selecting 'all' measures from [calculate_measures()] #' @param max_min_vars variables used when selecting 'min-max' from [calculate_measures()] +#' @param add_hscp_count Default set to TRUE. For use where `hscp variable` is not available, specify FALSE. #' #' @return a dataframe with a count of each flag #' from [calculate_measures()] @@ -28,13 +29,19 @@ produce_source_extract_tests <- function(data, max_min_vars = c( "record_keydate1", "record_keydate2", "cost_total_net", "yearstay" - )) { + ), + add_hscp_count = TRUE) { test_flags <- data %>% # use functions to create HB and partnership flags create_demog_test_flags() %>% create_hb_test_flags(.data$hbtreatcode) %>% - create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>% - create_hscp_test_flags(.data$hscp) %>% + create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) + + if (add_hscp_count) { + test_flags <- create_hscp_test_flags(test_flags, .data$hscp) + } + + test_flags <- test_flags %>% # keep variables for comparison dplyr::select("valid_chi":dplyr::last_col()) %>% # use function to sum new test flags diff --git a/R/read_sc_all_alarms_telecare.R b/R/read_sc_all_alarms_telecare.R index ac3ac206d..2c7bd03db 100644 --- a/R/read_sc_all_alarms_telecare.R +++ b/R/read_sc_all_alarms_telecare.R @@ -22,21 +22,19 @@ read_sc_all_alarms_telecare <- function(sc_dvprod_connection = phs_db_connection "service_start_date", "service_end_date" ) %>% - # fix bad period (2017, 2020 & 2021) + dplyr::collect() %>% + # fix bad period (2017, 2020, 2021, and so on) dplyr::mutate( - period = dplyr::case_match( - .data$period, - "2017" ~ "2017Q4", - "2020" ~ "2020Q4", - "2021" ~ "2021Q4", - .default = .data$period + period = dplyr::if_else( + grepl("\\d{4}$", .data$period), + paste0(.data$period, "Q4"), + .data$period ) ) %>% dplyr::mutate( dplyr::across(c("sending_location", "service_type"), ~ as.integer(.x)) ) %>% - dplyr::arrange(.data$sending_location, .data$social_care_id) %>% - dplyr::collect() + dplyr::arrange(.data$sending_location, .data$social_care_id) return(at_full_data) } diff --git a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R index d33dfbe49..ea6f81bfc 100644 --- a/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R +++ b/_SPSS_archived/All_years/02-Lookups/99_extract_NSU_data.R @@ -6,7 +6,7 @@ library(glue) nsu_dir <- path("/conf/hscdiip/SLF_Extracts/NSU") # Change the year -fin_year <- "1516" +fin_year <- "2223" db_connection <- odbc::dbConnect( odbc::odbc(), @@ -16,7 +16,7 @@ db_connection <- odbc::dbConnect( ) # Check the table name and change if required. -table <- dbplyr::in_schema("ROBERM18", "FINAL_2") +table <- dbplyr::in_schema("ROBERM18", "FINAL_1") # Read NSU data nsu_data <- @@ -35,9 +35,11 @@ nsu_data <- collect() # Write out the data -file_path <- path(nsu_dir, glue("All_CHIs_20{fin_year}.zsav")) +file_path <- path(nsu_dir, glue("All_CHIs_20{fin_year}.parquet")) # This will archive the existing file for later comparison if (file_exists(file_path)) { - file_copy(file_path, path(nsu_dir, glue("All_CHIs_20{fin_year}_OLD.zsav"))) + file_copy(file_path, path(nsu_dir, glue("All_CHIs_20{fin_year}_OLD.parquet"))) } -write_sav(nsu_data, file_path, compress = TRUE) + +nsu_data %>% + arrow::write_parquet(file_path, compression = "zstd", compression_level = 10) diff --git a/_targets.R b/_targets.R index 9698cf27e..db26477ef 100644 --- a/_targets.R +++ b/_targets.R @@ -545,7 +545,7 @@ list( ), tar_target( episode_file, - run_episode_file( + create_episode_file( processed_data_list, year, write_to_disk diff --git a/man/add_acute_columns.Rd b/man/add_acute_columns.Rd index 52ba071b6..c2659f821 100644 --- a/man/add_acute_columns.Rd +++ b/man/add_acute_columns.Rd @@ -7,7 +7,7 @@ add_acute_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_ae_columns.Rd b/man/add_ae_columns.Rd index 9b7099513..fdc31b7ff 100644 --- a/man/add_ae_columns.Rd +++ b/man/add_ae_columns.Rd @@ -7,7 +7,7 @@ add_ae_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_all_columns.Rd b/man/add_all_columns.Rd index d502e95c3..1d2e587db 100644 --- a/man/add_all_columns.Rd +++ b/man/add_all_columns.Rd @@ -7,7 +7,7 @@ add_all_columns(episode_file) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} } \description{ Add new columns based on SMRType and recid which follow a pattern diff --git a/man/add_at_columns.Rd b/man/add_at_columns.Rd index e05ea9101..af978530a 100644 --- a/man/add_at_columns.Rd +++ b/man/add_at_columns.Rd @@ -7,7 +7,7 @@ add_at_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_ch_columns.Rd b/man/add_ch_columns.Rd index 4938f7690..a036a257e 100644 --- a/man/add_ch_columns.Rd +++ b/man/add_ch_columns.Rd @@ -7,7 +7,7 @@ add_ch_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_cij_columns.Rd b/man/add_cij_columns.Rd index 7d00e6299..c48c1a3ef 100644 --- a/man/add_cij_columns.Rd +++ b/man/add_cij_columns.Rd @@ -7,7 +7,7 @@ add_cij_columns(episode_file) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} } \description{ Add new columns related to CIJ diff --git a/man/add_cmh_columns.Rd b/man/add_cmh_columns.Rd index a1d82cba6..a1cb74abb 100644 --- a/man/add_cmh_columns.Rd +++ b/man/add_cmh_columns.Rd @@ -7,7 +7,7 @@ add_cmh_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_dd_columns.Rd b/man/add_dd_columns.Rd index 08d9c0fe4..11e85fdc7 100644 --- a/man/add_dd_columns.Rd +++ b/man/add_dd_columns.Rd @@ -7,7 +7,7 @@ add_dd_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_dn_columns.Rd b/man/add_dn_columns.Rd index bf6af008f..ffdf59a82 100644 --- a/man/add_dn_columns.Rd +++ b/man/add_dn_columns.Rd @@ -7,7 +7,7 @@ add_dn_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_gls_columns.Rd b/man/add_gls_columns.Rd index e71dc755b..6ab7e9645 100644 --- a/man/add_gls_columns.Rd +++ b/man/add_gls_columns.Rd @@ -7,7 +7,7 @@ add_gls_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_hc_columns.Rd b/man/add_hc_columns.Rd index 95d8f1d3b..a58f226ec 100644 --- a/man/add_hc_columns.Rd +++ b/man/add_hc_columns.Rd @@ -7,7 +7,7 @@ add_hc_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_hl1_columns.Rd b/man/add_hl1_columns.Rd index 7600db5e9..24fc714e9 100644 --- a/man/add_hl1_columns.Rd +++ b/man/add_hl1_columns.Rd @@ -7,7 +7,7 @@ add_hl1_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_ipdc_cols.Rd b/man/add_ipdc_cols.Rd index 0f91cbd90..bd630b9d3 100644 --- a/man/add_ipdc_cols.Rd +++ b/man/add_ipdc_cols.Rd @@ -7,7 +7,7 @@ add_ipdc_cols(episode_file, prefix, condition, ipdc_d = TRUE, elective = TRUE) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_mat_columns.Rd b/man/add_mat_columns.Rd index aae729323..5faab0dc1 100644 --- a/man/add_mat_columns.Rd +++ b/man/add_mat_columns.Rd @@ -7,7 +7,7 @@ add_mat_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_mh_columns.Rd b/man/add_mh_columns.Rd index 3c50c6cb8..c587c490a 100644 --- a/man/add_mh_columns.Rd +++ b/man/add_mh_columns.Rd @@ -7,7 +7,7 @@ add_mh_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_nrs_columns.Rd b/man/add_nrs_columns.Rd index 9d7b3f8bf..b41201a57 100644 --- a/man/add_nrs_columns.Rd +++ b/man/add_nrs_columns.Rd @@ -7,7 +7,7 @@ add_nrs_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_nsu_columns.Rd b/man/add_nsu_columns.Rd index 6a54bbcbf..5aed481f0 100644 --- a/man/add_nsu_columns.Rd +++ b/man/add_nsu_columns.Rd @@ -7,7 +7,7 @@ add_nsu_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_ooh_columns.Rd b/man/add_ooh_columns.Rd index 01814ab6d..f1e6b63f5 100644 --- a/man/add_ooh_columns.Rd +++ b/man/add_ooh_columns.Rd @@ -7,7 +7,7 @@ add_ooh_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_op_columns.Rd b/man/add_op_columns.Rd index 08c4419e2..9fb8bc158 100644 --- a/man/add_op_columns.Rd +++ b/man/add_op_columns.Rd @@ -7,7 +7,7 @@ add_op_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_pis_columns.Rd b/man/add_pis_columns.Rd index b582acf2e..836218da0 100644 --- a/man/add_pis_columns.Rd +++ b/man/add_pis_columns.Rd @@ -7,7 +7,7 @@ add_pis_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_sds_columns.Rd b/man/add_sds_columns.Rd index d5a5fb2cf..c06b88527 100644 --- a/man/add_sds_columns.Rd +++ b/man/add_sds_columns.Rd @@ -7,7 +7,7 @@ add_sds_columns(episode_file, prefix, condition) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/add_standard_cols.Rd b/man/add_standard_cols.Rd index 744aa49de..4392157d2 100644 --- a/man/add_standard_cols.Rd +++ b/man/add_standard_cols.Rd @@ -13,7 +13,7 @@ add_standard_cols( ) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{prefix}{Prefix to add to related columns, e.g. "Acute"} diff --git a/man/aggregate_by_chi.Rd b/man/aggregate_by_chi.Rd index 013123902..1585accbb 100644 --- a/man/aggregate_by_chi.Rd +++ b/man/aggregate_by_chi.Rd @@ -7,7 +7,7 @@ aggregate_by_chi(episode_file) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} } \description{ Aggregate episode file by CHI to convert into diff --git a/man/aggregate_ch_episodes.Rd b/man/aggregate_ch_episodes.Rd index 1c955d666..3223e6d25 100644 --- a/man/aggregate_ch_episodes.Rd +++ b/man/aggregate_ch_episodes.Rd @@ -7,7 +7,7 @@ aggregate_ch_episodes(episode_file) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} } \description{ Aggregate CH variables by CHI and CIS. diff --git a/man/clean_up_ch.Rd b/man/clean_up_ch.Rd index 0182c84e8..c0c61966d 100644 --- a/man/clean_up_ch.Rd +++ b/man/clean_up_ch.Rd @@ -7,7 +7,7 @@ clean_up_ch(episode_file, year) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{year}{The year to process, in FY format.} } diff --git a/man/convert_sending_location_to_lca.Rd b/man/convert_sending_location_to_lca.Rd index 8c7a29088..78bf475ba 100644 --- a/man/convert_sending_location_to_lca.Rd +++ b/man/convert_sending_location_to_lca.Rd @@ -17,7 +17,7 @@ Convert Social Care Sending Location Codes into the Local Council Authority Codes. } \examples{ -sending_location <- c("100", "120") +sending_location <- c(100, 120) convert_sending_location_to_lca(sending_location) } diff --git a/man/correct_cij_vars.Rd b/man/correct_cij_vars.Rd index 97a7f046f..558514dc6 100644 --- a/man/correct_cij_vars.Rd +++ b/man/correct_cij_vars.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{correct_cij_vars} \alias{correct_cij_vars} \title{Correct the CIJ variables} diff --git a/man/create_cohort_lookups.Rd b/man/create_cohort_lookups.Rd index f0ad267aa..109869074 100644 --- a/man/create_cohort_lookups.Rd +++ b/man/create_cohort_lookups.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{create_cohort_lookups} \alias{create_cohort_lookups} \title{Create the cohort lookups} diff --git a/man/create_cost_inc_dna.Rd b/man/create_cost_inc_dna.Rd index 69e7e37b5..47c38b176 100644 --- a/man/create_cost_inc_dna.Rd +++ b/man/create_cost_inc_dna.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{create_cost_inc_dna} \alias{create_cost_inc_dna} \title{Create cost total net inc DNA} diff --git a/man/run_episode_file.Rd b/man/create_episode_file.Rd similarity index 63% rename from man/run_episode_file.Rd rename to man/create_episode_file.Rd index 59d5fea1d..e1bda32b9 100644 --- a/man/run_episode_file.Rd +++ b/man/create_episode_file.Rd @@ -1,10 +1,10 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R -\name{run_episode_file} -\alias{run_episode_file} -\title{Produce the Source Episode file} +% Please edit documentation in R/create_episode_file.R +\name{create_episode_file} +\alias{create_episode_file} +\title{Create the Source Episode file} \usage{ -run_episode_file( +create_episode_file( processed_data_list, year, write_to_disk = TRUE, @@ -20,11 +20,11 @@ run_episode_file( \code{TRUE} i.e. write the data to disk.} \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output -(instead of chi)} +(instead of chi).} } \value{ -a \link[tibble:tibble-package]{tibble} containing the episode file +the Source Episode file as a \link[tibble:tibble-package]{tibble}. } \description{ -Produce the Source Episode file +Create the Source Episode file } diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd index fa759e7b1..4fd9a4a53 100644 --- a/man/create_individual_file.Rd +++ b/man/create_individual_file.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/create_individual_file.R \name{create_individual_file} \alias{create_individual_file} -\title{Create individual file} +\title{Create the Source Individual file} \usage{ create_individual_file( episode_file, @@ -13,7 +13,7 @@ create_individual_file( ) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} \item{year}{The year to process, in FY format.} @@ -21,14 +21,14 @@ create_individual_file( \code{TRUE} i.e. write the data to disk.} \item{anon_chi_in}{(Default:TRUE) Is \code{anon_chi} used in the input -(instead of chi)} +(instead of chi).} \item{anon_chi_out}{(Default:TRUE) Should \code{anon_chi} be used in the output -(instead of chi)} +(instead of chi).} } \value{ The processed individual file } \description{ -Creates individual file from episode file +Creates the individual file from the episode file. } diff --git a/man/fill_missing_cij_markers.Rd b/man/fill_missing_cij_markers.Rd index 03b64217e..4795eed7a 100644 --- a/man/fill_missing_cij_markers.Rd +++ b/man/fill_missing_cij_markers.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{fill_missing_cij_markers} \alias{fill_missing_cij_markers} \title{Fill any missing CIJ markers for records that should have them} diff --git a/man/join_cohort_lookups.Rd b/man/join_cohort_lookups.Rd index 445dcd7c0..15a860a36 100644 --- a/man/join_cohort_lookups.Rd +++ b/man/join_cohort_lookups.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{join_cohort_lookups} \alias{join_cohort_lookups} \title{Join cohort lookups} diff --git a/man/load_ep_file_vars.Rd b/man/load_ep_file_vars.Rd index cee9cc440..509b0e00c 100644 --- a/man/load_ep_file_vars.Rd +++ b/man/load_ep_file_vars.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{load_ep_file_vars} \alias{load_ep_file_vars} \title{Load the unneeded episode file variables} diff --git a/man/phs_db_connection.Rd b/man/phs_db_connection.Rd index 93e73ee55..8ff9d0a32 100644 --- a/man/phs_db_connection.Rd +++ b/man/phs_db_connection.Rd @@ -4,19 +4,23 @@ \alias{phs_db_connection} \title{Open a connection to a PHS database} \usage{ -phs_db_connection(dsn, username = Sys.getenv("USER")) +phs_db_connection(dsn, username) } \arguments{ -\item{dsn}{The Data Source Name passed on to \code{odbc::dbconnect} -the dsn must be setup first. e.g. SMRA or DVPROD} +\item{dsn}{The Data Source Name (DSN) passed on to \code{\link[odbc:dbConnect-OdbcDriver-method]{odbc::dbConnect()}} +the DSN must be set up first. e.g. \code{SMRA} or \code{DVPROD}} \item{username}{The username to use for authentication, -if not supplied it first will check the environment variable -and finally ask the user for input.} +if not supplied it will try to find it automatically and if possible ask the +user for input.} } \value{ -a connection to the specified dsn +a connection to the specified Data Source. } \description{ -Opens a connection to PHS database to allow data to be collected +Opens a connection to PHS database given a Data Source Name +(DSN) it will try to get the username, asking for input if in an interactive +session. It will also use \link[keyring:keyring-package]{keyring} to find +an existing keyring called 'createslf' which should contain a \code{db_password} +key with the users database password. } diff --git a/man/produce_source_extract_tests.Rd b/man/produce_source_extract_tests.Rd index 679132127..97984103a 100644 --- a/man/produce_source_extract_tests.Rd +++ b/man/produce_source_extract_tests.Rd @@ -7,7 +7,8 @@ produce_source_extract_tests( data, sum_mean_vars = c("beddays", "cost", "yearstay"), - max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net", "yearstay") + max_min_vars = c("record_keydate1", "record_keydate2", "cost_total_net", "yearstay"), + add_hscp_count = TRUE ) } \arguments{ @@ -17,6 +18,8 @@ produce_source_extract_tests( \item{sum_mean_vars}{variables used when selecting 'all' measures from \code{\link[=calculate_measures]{calculate_measures()}}} \item{max_min_vars}{variables used when selecting 'min-max' from \code{\link[=calculate_measures]{calculate_measures()}}} + +\item{add_hscp_count}{Default set to TRUE. For use where \verb{hscp variable} is not available, specify FALSE.} } \value{ a dataframe with a count of each flag diff --git a/man/recode_gender.Rd b/man/recode_gender.Rd index 526d2829d..aaa28e6eb 100644 --- a/man/recode_gender.Rd +++ b/man/recode_gender.Rd @@ -7,7 +7,7 @@ recode_gender(episode_file) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} } \description{ Recode gender to 1.5 if 0 or 9. diff --git a/man/remove_blank_chi.Rd b/man/remove_blank_chi.Rd index 9cba40a8f..b290dd1e7 100644 --- a/man/remove_blank_chi.Rd +++ b/man/remove_blank_chi.Rd @@ -7,7 +7,7 @@ remove_blank_chi(episode_file) } \arguments{ -\item{episode_file}{Tibble containing episodic data} +\item{episode_file}{Tibble containing episodic data.} } \description{ Convert blank strings to NA and remove NAs from CHI column diff --git a/man/setup_keyring.Rd b/man/setup_keyring.Rd new file mode 100644 index 000000000..c40ef31c1 --- /dev/null +++ b/man/setup_keyring.Rd @@ -0,0 +1,44 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/get_connection_PHS_database.R +\name{setup_keyring} +\alias{setup_keyring} +\title{Interactively set up the keyring} +\usage{ +setup_keyring( + keyring = "createslf", + key = "db_password", + keyring_exists = FALSE, + key_exists = FALSE, + env_var_pass_exists = FALSE +) +} +\arguments{ +\item{keyring}{Name of the keyring} + +\item{key}{Name of the key} + +\item{keyring_exists}{Does the keyring already exist} + +\item{key_exists}{Does the key already exist} + +\item{env_var_pass_exists}{Does the password for the keyring already exist +in the environment.} +} +\value{ +NULL (invisibly) +} +\description{ +This is meant to be used with \code{\link[=phs_db_connection]{phs_db_connection()}}, it can only be used +interactively i.e. not in targets or in a workbench job. + +With the default options it will go through the steps to set up a keyring +which can be used to supply passwords to \code{\link[odbc:dbConnect-OdbcDriver-method]{odbc::dbConnect()}} (or others) in a +secure and seamless way. +\enumerate{ +\item Create an .Renviron file in the project and add a password (for the +keyring) to it. +\item Create a keyring with the password - Since we have saved the password as +an environment variable it can be picked unlocked and used automatically. +\item Add the database password to the keyring. +} +} diff --git a/man/store_ep_file_vars.Rd b/man/store_ep_file_vars.Rd index 06316aac1..880266d58 100644 --- a/man/store_ep_file_vars.Rd +++ b/man/store_ep_file_vars.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/run_episode_file.R +% Please edit documentation in R/create_episode_file.R \name{store_ep_file_vars} \alias{store_ep_file_vars} \title{Store the unneeded episode file variables} diff --git a/run_targets_1718.R b/run_targets_1718.R new file mode 100644 index 000000000..ebc58895f --- /dev/null +++ b/run_targets_1718.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("1718")) +) diff --git a/run_targets_1819.R b/run_targets_1819.R new file mode 100644 index 000000000..83bbcedef --- /dev/null +++ b/run_targets_1819.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("1819")) +) diff --git a/run_targets_1920.R b/run_targets_1920.R new file mode 100644 index 000000000..1640d1900 --- /dev/null +++ b/run_targets_1920.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("1920")) +) diff --git a/run_targets_2021.R b/run_targets_2021.R new file mode 100644 index 000000000..80749e81a --- /dev/null +++ b/run_targets_2021.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("2021")) +) diff --git a/run_targets_2122.R b/run_targets_2122.R new file mode 100644 index 000000000..aa95d7b24 --- /dev/null +++ b/run_targets_2122.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("2122")) +) diff --git a/run_targets_2223.R b/run_targets_2223.R new file mode 100644 index 000000000..2ded7d5fd --- /dev/null +++ b/run_targets_2223.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("2223")) +) diff --git a/run_targets_2324.R b/run_targets_2324.R new file mode 100644 index 000000000..b875984f4 --- /dev/null +++ b/run_targets_2324.R @@ -0,0 +1,4 @@ +library(targets) +tar_make_future( + names = (targets::contains("2324")) +) diff --git a/tests/testthat/_snaps/convert_sending_location_to_lca.md b/tests/testthat/_snaps/convert_sending_location_to_lca.md index 464ff2d37..1fa02dc14 100644 --- a/tests/testthat/_snaps/convert_sending_location_to_lca.md +++ b/tests/testthat/_snaps/convert_sending_location_to_lca.md @@ -1,10 +1,10 @@ # Can convert a SC sending location to lca code Code - convert_sending_location_to_lca(c("100", "110", "120", "130", "355", "150", - "395", "170", "180", "190", "200", "210", "220", "230", "240", "250", "260", - "270", "280", "290", "300", "310", "320", "330", "340", "350", "360", "370", - "380", "390", "400", "235", "999", "0", NA)) + convert_sending_location_to_lca(c(100L, 110L, 120L, 130L, 355L, 150L, 395L, + 170L, 180L, 190L, 200L, 210L, 220L, 230L, 240L, 250L, 260L, 270L, 280L, 290L, + 300L, 310L, 320L, 330L, 340L, 350L, 360L, 370L, 380L, 390L, 400L, 235L, 999L, + 0L, NA_integer_)) Output [1] "01" "02" "03" "04" "05" "06" "07" "08" "09" "10" "11" "12" "13" "14" "15" [16] "16" "17" "18" "19" "20" "21" "22" "23" "24" "25" "26" "27" "28" "29" "30" diff --git a/tests/testthat/test-check_year_valid.R b/tests/testthat/test-check_year_valid.R index ca0738c89..eda74dbdf 100644 --- a/tests/testthat/test-check_year_valid.R +++ b/tests/testthat/test-check_year_valid.R @@ -49,7 +49,8 @@ test_that("Check year valid works for specific datasets ", { expect_true(check_year_valid("1920", "NSU")) expect_true(check_year_valid("2021", "NSU")) expect_true(check_year_valid("2122", "NSU")) - expect_false(check_year_valid("2223", "NSU")) + expect_true(check_year_valid("2223", "NSU")) + expect_false(check_year_valid("2324", "NSU")) # SPARRA expect_false(check_year_valid("1415", "SPARRA")) diff --git a/tests/testthat/test-compute_mid_year_age.R b/tests/testthat/test-compute_mid_year_age.R new file mode 100644 index 000000000..a4a542b9e --- /dev/null +++ b/tests/testthat/test-compute_mid_year_age.R @@ -0,0 +1,16 @@ +test_that("Accurately compute mid year age", { + expect_equal( + compute_mid_year_age("1718", lubridate::make_date("2000")), + phsmethods::age_calculate( + lubridate::make_date("2000"), + lubridate::make_date("2017", 9L, 30L) + ) + ) + expect_equal( + compute_mid_year_age("2021", lubridate::make_date("1999") + 1:1000), + phsmethods::age_calculate( + lubridate::make_date("1999") + 1:1000, + lubridate::make_date("2020", 9L, 30L) + ) + ) +}) diff --git a/tests/testthat/test-convert_sending_location_to_lca.R b/tests/testthat/test-convert_sending_location_to_lca.R index 0bc67668e..eb66802a6 100644 --- a/tests/testthat/test-convert_sending_location_to_lca.R +++ b/tests/testthat/test-convert_sending_location_to_lca.R @@ -2,42 +2,51 @@ test_that("Can convert a SC sending location to lca code", { expect_snapshot( convert_sending_location_to_lca( c( - "100", - "110", - "120", - "130", - "355", - "150", - "395", - "170", - "180", - "190", - "200", - "210", - "220", - "230", - "240", - "250", - "260", - "270", - "280", - "290", - "300", - "310", - "320", - "330", - "340", - "350", - "360", - "370", - "380", - "390", - "400", - "235", - "999", - "0", - NA + 100L, + 110L, + 120L, + 130L, + 355L, + 150L, + 395L, + 170L, + 180L, + 190L, + 200L, + 210L, + 220L, + 230L, + 240L, + 250L, + 260L, + 270L, + 280L, + 290L, + 300L, + 310L, + 320L, + 330L, + 340L, + 350L, + 360L, + 370L, + 380L, + 390L, + 400L, + 235L, + 999L, + 0L, + NA_integer_ ) ) ) }) + +test_that("Errors on unexpected input", { + expect_error( + convert_sending_location_to_lca("100") + ) + expect_error( + convert_sending_location_to_lca(c("100", 99L)) + ) +}) diff --git a/tests/testthat/test-get_file_paths.R b/tests/testthat/test-get_file_paths.R index 2bec746f7..a3b29a290 100644 --- a/tests/testthat/test-get_file_paths.R +++ b/tests/testthat/test-get_file_paths.R @@ -1,3 +1,28 @@ +test_that("Errors properly", { + expect_error( + get_file_path(directory = "foo", file_name = "bar"), + "The directory .+? does not exist\\." + ) + + expect_error( + get_file_path( + directory = ".", + file_name_regexp = "targets", + check_mode = "write" + ), + "`check_mode = \"write\"` can't be used" + ) +}) + +test_that("Can do check exists", { + expect_false(get_file_path( + directory = ".", + file_name = "foo.R", + check_mode = "exists" + )) +}) + + skip_on_ci() slf_updates_dir <- fs::path(