From 9b13f094d7f6d2d838ab35c1ee1a044794733583 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Mon, 15 Apr 2024 09:42:10 +0100 Subject: [PATCH] 934 review tests on createslf package (#942) * update documentation * Update sc connection name * Update documentation * update tests * skip tests on git * [check-spelling] Update metadata Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/8521649016/attempts/1https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/8521649016/attempts/1 Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/942#issuecomment-2031713390 Signed-off-by: check-spelling-bot on-behalf-of: @check-spelling * fix targets pipeline sc_send_lca no longer available in sc_client_lookup and hence corrupt the targets pipeline * fix R CMD check notes no visible binding and param * Style code * fix typo * [check-spelling] Update metadata Update for https://github.com/Public-Health-Scotland/source-linkage-files/actions/runs/8541148819/attempts/1 Accepted in https://github.com/Public-Health-Scotland/source-linkage-files/pull/942#issuecomment-2034903438 Signed-off-by: check-spelling-bot on-behalf-of: @check-spelling * update spelling * speeling action * Revert "speeling action" This reverts commit f9ffcf9a70e611663ba57e73ffd2971d56d96beb. * spelling update fix "ignored-expect-variant" * spelling update fix "ignored-expect-variant" --------- Signed-off-by: check-spelling-bot Co-authored-by: Jennifer Thom Co-authored-by: Jennit07 Co-authored-by: lizihao-anu --- .github/actions/spelling/allow.txt | 2 +- .github/actions/spelling/expect.txt | 28 +++++------ R/add_hri_variables.R | 51 +++++++++---------- R/add_keep_population_flag.R | 71 ++++++++++++++------------- R/aggregate_by_chi.R | 21 ++++++-- R/create_demog_test_flags.R | 7 ++- R/create_episode_file.R | 8 +-- R/create_individual_file.R | 1 + R/fix_sc_dates.R | 9 ++-- R/get_lookup_paths.R | 1 + R/get_sandpit_extract_path.R | 2 + R/link_delayed_discharge_eps.R | 2 + R/process_extract_care_home.R | 2 - R/process_extract_homelessness.R | 53 ++++++++++---------- R/process_lookup_sc_client.R | 31 +++++++----- R/process_lookup_sc_demographics.R | 48 +++++++++--------- R/process_sc_all_alarms_telecare.R | 15 ++++-- R/process_sc_all_sds.R | 26 ++++++---- R/process_tests_alarms_telecare.R | 2 +- R/process_tests_care_home.R | 2 +- R/process_tests_cmh.R | 2 +- R/process_tests_district_nursing.R | 2 +- R/process_tests_episode_file.R | 2 +- R/process_tests_home_care.R | 2 +- R/process_tests_homelessness.R | 2 +- R/process_tests_individual_file.R | 4 +- R/process_tests_nrs_deaths.R | 2 +- R/process_tests_prescribing.R | 2 +- R/process_tests_sc_client_lookup.R | 9 ++-- R/process_tests_sc_demographics.R | 2 +- R/process_tests_sds.R | 2 +- R/produce_sc_all_episodes_tests.R | 2 +- R/produce_source_extract_tests.R | 2 +- R/read_lookup_sc_demographics.R | 6 +-- R/replace_sc_id_with_latest.R | 4 +- man/add_hri_variables.Rd | 2 + man/aggregate_by_chi.Rd | 4 +- man/create_episode_file.Rd | 4 ++ man/create_individual_file.Rd | 2 + man/fix_sc_end_dates.Rd | 2 +- man/fix_sc_missing_end_dates.Rd | 6 +-- man/fix_sc_start_dates.Rd | 2 +- man/get_pop_path.Rd | 2 + man/get_sandpit_extract_path.Rd | 4 ++ man/join_sc_client.Rd | 4 +- man/process_extract_care_home.Rd | 3 -- man/process_extract_homelessness.Rd | 2 + man/process_lookup_sc_client.Rd | 2 + man/produce_tests_sc_client_lookup.Rd | 2 - man/read_lookup_sc_demographics.Rd | 6 ++- tests/testthat/_snaps/get_dd_path.md | 2 +- tests/testthat/test-read_file.R | 2 + tests/testthat/test-write_file.R | 2 + 53 files changed, 276 insertions(+), 204 deletions(-) diff --git a/.github/actions/spelling/allow.txt b/.github/actions/spelling/allow.txt index 4b9786da7..f23cd6eac 100644 --- a/.github/actions/spelling/allow.txt +++ b/.github/actions/spelling/allow.txt @@ -8,4 +8,4 @@ McMahon Moohan Scougal Tayside -Zihao \ No newline at end of file +Zihao diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt index 7617421ef..d4124911f 100644 --- a/.github/actions/spelling/expect.txt +++ b/.github/actions/spelling/expect.txt @@ -9,7 +9,6 @@ atlassian attendcat aut bedday -beddays birthtime bodyloc boxi @@ -61,16 +60,16 @@ dischto disdest dminutes dna -dnas dontrun downup dplyr dsn dtplyr -DVPROD dvprod envir +fcase feb +fifelse fileext Finalise fst @@ -80,31 +79,32 @@ fyyear geogs ggplot github -GLS gls gms GPOo gpprac gss hbnames +hbp hbpraccode hbrescode hbtreatcode hbtreatname hci HCP -HHG hhg hjust hms homecare homev +hri hscdiip hscp hscpnames htmlwidgets IDPC infyyear +intzone ipdc issuenumber itle @@ -119,11 +119,12 @@ keytime keytimex kis lazydt +lcap +LCHO lgl lintr los ltc -ltcs lubridate magrittr markdownguide @@ -131,7 +132,7 @@ Matern Mcbride mcmahon MMMYY -monthflag +MONTHFLAG mpat multiday multisession @@ -140,7 +141,6 @@ NAs newcons nhs nhshosp -NRS nrs nsu odbc @@ -161,9 +161,9 @@ pkgdown placeinc plics PMS +popluation postcodes PPAs -Prac prac praccode ptypes @@ -172,7 +172,7 @@ quickstart rankdir Rbuildignore rcmdcheck -rdd +RDD rds reabl reablement @@ -190,29 +190,28 @@ roxygen Rprofile Rscript rspm -RStudio rstudio rstudioapi Rtype +scoial +scotp SDcols seealso selfharm setkeyv setnafill setnames +setorder Siar sigfac simd -SLF slf slfhelper smr SMRA smrtype -SPARRA sparra spd -SPSS spss stadm starwars @@ -223,7 +222,6 @@ submis tadm tarchetypes tbl -Telecare telecare testthat thom diff --git a/R/add_hri_variables.R b/R/add_hri_variables.R index 519ce3694..3765e1d4d 100644 --- a/R/add_hri_variables.R +++ b/R/add_hri_variables.R @@ -22,15 +22,15 @@ flag_non_scottish_residents <- function( # of the postcode pc_areas <- slf_pc_lookup %>% dplyr::mutate( - pc_area = stringr::str_match(postcode, "^[A-Z]{1,3}"), + pc_area = stringr::str_match(.data$postcode, "^[A-Z]{1,3}"), scot_flag = TRUE ) %>% - dplyr::distinct(pc_area, scot_flag) + dplyr::distinct(.data$pc_area, .data$scot_flag) # Create a flag, 'keep_flag', to determine whether individuals are Scottish # residents or not return_data <- data %>% - dplyr::mutate(pc_area = stringr::str_match(postcode, "^[A-Z]{1,3}")) %>% + dplyr::mutate(pc_area = stringr::str_match(.data$postcode, "^[A-Z]{1,3}")) %>% dplyr::left_join(pc_areas, by = "pc_area") %>% dplyr::mutate( dummy_postcode = .data$postcode %in% c("BF010AA", "NF1 1AB", "NK010AA") | @@ -58,6 +58,7 @@ flag_non_scottish_residents <- function( #' @param data An SLF individual file. #' @param slf_pc_lookup The Source postcode lookup, defaults #' to [get_slf_postcode_path()] read using [read_file()]. +#' @param chi_variable string, claiming chi or anon_chi. #' #' @return The individual file with HRI variables matched on #' @export @@ -88,42 +89,42 @@ add_hri_variables <- function( "ooh_cases" ) %>% flag_non_scottish_residents(slf_pc_lookup = slf_pc_lookup) %>% - dplyr::filter(scottish_resident == 0L) %>% + dplyr::filter(.data$scottish_resident == 0L) %>% # Scotland cost and proportion dplyr::mutate( - scotland_cost = sum(health_net_cost), - scotland_pct = (health_net_cost / scotland_cost) * 100 + scotland_cost = sum(.data$health_net_cost), + scotland_pct = (.data$health_net_cost / .data$scotland_cost) * 100 ) %>% - dplyr::arrange(dplyr::desc(health_net_cost)) %>% - dplyr::mutate(hri_scotp = cumsum(scotland_pct)) %>% + dplyr::arrange(dplyr::desc(.data$health_net_cost)) %>% + dplyr::mutate(hri_scotp = cumsum(.data$scotland_pct)) %>% # Health Board - dplyr::group_by(hbrescode) %>% + dplyr::group_by(.data$hbrescode) %>% dplyr::mutate( - hb_cost = sum(health_net_cost), - hb_pct = (health_net_cost / hb_cost) * 100 + hb_cost = sum(.data$health_net_cost), + hb_pct = (.data$health_net_cost / .data$hb_cost) * 100 ) %>% - dplyr::arrange(dplyr::desc(health_net_cost), .by_group = TRUE) %>% - dplyr::mutate(hri_hbp = cumsum(hb_pct)) %>% + dplyr::arrange(dplyr::desc(.data$health_net_cost), .by_group = TRUE) %>% + dplyr::mutate(hri_hbp = cumsum(.data$hb_pct)) %>% dplyr::ungroup() %>% # LCA - dplyr::group_by(lca) %>% + dplyr::group_by(.data$lca) %>% dplyr::mutate( - lca_cost = sum(health_net_cost), - lca_pct = (health_net_cost / lca_cost) * 100 + lca_cost = sum(.data$health_net_cost), + lca_pct = (.data$health_net_cost / .data$lca_cost) * 100 ) %>% - dplyr::arrange(dplyr::desc(health_net_cost), .by_group = TRUE) %>% - dplyr::mutate(hri_lcap = cumsum(lca_pct)) %>% + dplyr::arrange(dplyr::desc(.data$health_net_cost), .by_group = TRUE) %>% + dplyr::mutate(hri_lcap = cumsum(.data$lca_pct)) %>% dplyr::ungroup() %>% # Add HRI flags dplyr::mutate( - hri_scot = hri_scotp <= 50.0, - hri_hb = hri_hbp <= 50.0, - hri_lca = hri_lcap <= 50.0, + hri_scot = .data$hri_scotp <= 50.0, + hri_hb = .data$hri_hbp <= 50.0, + hri_lca = .data$hri_lcap <= 50.0, # Deal with potential missing variables - hri_hb = dplyr::if_else(is_missing(hbrescode), FALSE, hri_hb), - hri_hbp = dplyr::if_else(is_missing(hbrescode), NA, hri_hbp), - hri_lca = dplyr::if_else(is_missing(lca), FALSE, hri_lca), - hri_lcap = dplyr::if_else(is_missing(lca), NA, hri_lcap) + hri_hb = dplyr::if_else(is_missing(.data$hbrescode), FALSE, .data$hri_hb), + hri_hbp = dplyr::if_else(is_missing(.data$hbrescode), NA, .data$hri_hbp), + hri_lca = dplyr::if_else(is_missing(.data$lca), FALSE, .data$hri_lca), + hri_lcap = dplyr::if_else(is_missing(.data$lca), NA, .data$hri_lcap) ) %>% # Select only required variables for the lookup dplyr::select( diff --git a/R/add_keep_population_flag.R b/R/add_keep_population_flag.R index d418ac18c..6f2470f53 100644 --- a/R/add_keep_population_flag.R +++ b/R/add_keep_population_flag.R @@ -16,27 +16,32 @@ add_keep_population_flag <- function(individual_file, year) { ## Obtain the population estimates for Locality AgeGroup and Gender. pop_estimates <- readr::read_rds(get_pop_path(type = "datazone")) %>% - dplyr::select(year, datazone2011, sex, age0:age90plus) + dplyr::select( + .data$year, + .data$datazone2011, + .data$sex, + .data$age0:.data$age90plus + ) # Step 1: Obtain the population estimates for Locality, AgeGroup, and Gender # Select out the estimates for the year of interest. # if we don't have estimates for this year (and so have to use previous year). year_available <- pop_estimates %>% - dplyr::pull(year) %>% + dplyr::pull(.data$year) %>% unique() if (calendar_year %in% year_available) { pop_estimates <- pop_estimates %>% - dplyr::filter(year == calendar_year) + dplyr::filter(.data$year == calendar_year) } else { previous_year <- sort(year_available, decreasing = TRUE)[1] pop_estimates <- pop_estimates %>% - dplyr::filter(year == previous_year) + dplyr::filter(.data$year == previous_year) } pop_estimates <- pop_estimates %>% # Recode gender to make it match source. - dplyr::mutate(sex = dplyr::if_else(sex == "M", 1, 2)) %>% + dplyr::mutate(sex = dplyr::if_else(.data$sex == "M", 1, 2)) %>% dplyr::rename( "age90" = "age90plus", "gender" = "sex" @@ -47,22 +52,22 @@ add_keep_population_flag <- function(individual_file, year) { values_to = "population_estimate", cols = "age0":"age90" ) %>% - dplyr::mutate(age = as.integer(age)) %>% - add_age_group(age) %>% + dplyr::mutate(age = as.integer(.data$age)) %>% + add_age_group(.data$age) %>% dplyr::left_join( readr::read_rds(get_locality_path()) %>% - dplyr::select("locality" = "hscp_locality", datazone2011), + dplyr::select("locality" = "hscp_locality", .data$datazone2011), by = "datazone2011" ) %>% - dplyr::group_by(locality, age_group, gender) %>% - dplyr::summarize(population_estimate = sum(population_estimate)) %>% + dplyr::group_by(.data$locality, .data$age_group, .data$gender) %>% + dplyr::summarize(population_estimate = sum(.data$population_estimate)) %>% dplyr::ungroup() # Step 2: Work out the current population sizes in the SLF for Locality, AgeGroup, and Gender # Work out the current population sizes in the SLF for Locality AgeGroup and Gender. individual_file <- individual_file %>% - dplyr::mutate(age = as.integer(age)) %>% - add_age_group(age) + dplyr::mutate(age = as.integer(.data$age)) %>% + add_age_group(.data$age) set.seed(100) @@ -71,41 +76,41 @@ add_keep_population_flag <- function(individual_file, year) { # If they don't have a locality, they're no good as we won't have an estimate to match them against. # Same for age and gender. nsu_keep_lookup <- individual_file %>% - dplyr::filter(gender == 1 | gender == 2) %>% - dplyr::filter(!is.na(locality), !is.na(age)) %>% + dplyr::filter(.data$gender == 1 | .data$gender == 2) %>% + dplyr::filter(!is.na(.data$locality), !is.na(.data$age)) %>% dplyr::mutate( # Flag service users who were dead at the mid year date. - flag_to_remove = dplyr::if_else(death_date <= mid_year & nsu == 0, 1, 0), + flag_to_remove = dplyr::if_else(.data$death_date <= mid_year & .data$nsu == 0, 1, 0), # If the death date is missing, keep those people. - flag_to_remove = dplyr::if_else(is.na(death_date), 0, flag_to_remove), + flag_to_remove = dplyr::if_else(is.na(.data$death_date), 0, .data$flag_to_remove), # If they are a non-service-user we want to keep them - flag_to_remove = dplyr::if_else(nsu == 1, 0, flag_to_remove) + flag_to_remove = dplyr::if_else(.data$nsu == 1, 0, .data$flag_to_remove) ) %>% # Remove anyone who was flagged as 1 from above. - dplyr::filter(flag_to_remove == 0) %>% + dplyr::filter(.data$flag_to_remove == 0) %>% # Calculate the populations of the whole SLF and of the NSU. - dplyr::group_by(locality, age_group, gender) %>% + dplyr::group_by(.data$locality, .data$age_group, .data$gender) %>% dplyr::mutate( - nsu_population = sum(nsu), + nsu_population = sum(.data$nsu), total_source_population = dplyr::n() ) %>% - dplyr::filter(nsu == 1) %>% + dplyr::filter(.data$nsu == 1) %>% dplyr::left_join(pop_estimates, by = c("locality", "age_group", "gender") ) %>% dplyr::mutate( - difference = total_source_population - population_estimate, - new_nsu_figure = nsu_population - difference, - scaling_factor = new_nsu_figure / nsu_population, - scaling_factor = dplyr::case_when(scaling_factor < 0 ~ 0, - scaling_factor > 1 ~ 1, - .default = scaling_factor + difference = .data$total_source_population - .data$population_estimate, + new_nsu_figure = .data$nsu_population - .data$difference, + scaling_factor = .data$new_nsu_figure / .data$nsu_population, + scaling_factor = dplyr::case_when(.data$scaling_factor < 0 ~ 0, + .data$scaling_factor > 1 ~ 1, + .default = .data$scaling_factor ), - keep_nsu = rbinom(nsu_population, 1, scaling_factor) + keep_nsu = stats::rbinom(.data$nsu_population, 1, .data$scaling_factor) ) %>% - dplyr::filter(keep_nsu == 1L) %>% + dplyr::filter(.data$keep_nsu == 1L) %>% dplyr::ungroup() %>% - dplyr::select(-flag_to_remove) + dplyr::select(-.data$flag_to_remove) # step 3: match the flag back onto the slf individual_file <- individual_file %>% @@ -113,13 +118,13 @@ add_keep_population_flag <- function(individual_file, year) { by = "chi", suffix = c("", ".y") ) %>% - dplyr::select(-contains(".y")) %>% + dplyr::select(-tidyselect::contains(".y")) %>% dplyr::rename("keep_population" = "keep_nsu") %>% dplyr::mutate( # Flag all non-NSUs as Keep. - keep_population = dplyr::if_else(nsu == 0, 1, keep_population), + keep_population = dplyr::if_else(.data$nsu == 0, 1, .data$keep_population), # If the flag is missing they must be a non-keep NSU so set to 0. - keep_population = dplyr::if_else(is.na(keep_population), 0, keep_population), + keep_population = dplyr::if_else(is.na(.data$keep_population), 0, .data$keep_population), ) %>% dplyr::select( -c( diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R index d207b221a..ff5cbaad1 100644 --- a/R/aggregate_by_chi.R +++ b/R/aggregate_by_chi.R @@ -5,11 +5,21 @@ #' #' @importFrom data.table .N #' @importFrom data.table .SD +#' @param year financial year, string, eg "1920" +#' @param exclude_sc_var Boolean, whether exclude social care variables #' #' @inheritParams create_individual_file aggregate_by_chi <- function(episode_file, year, exclude_sc_var = FALSE) { cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") + # recommended by `data.table` team to tackle the issue + # "no visible binding for global variable" + gender <- + chi <- + cij_ppa <- + cij_end_date <- cij_start_date <- preventable_beddays <- NULL + + # Convert to data.table data.table::setDT(episode_file) @@ -134,7 +144,7 @@ aggregate_by_chi <- function(episode_file, year, exclude_sc_var = FALSE) { data.table::setnafill(episode_file, fill = 0L, cols = cols5) # compute individual_file_cols1 <- episode_file[, - .(gender = mean(gender)), + list(gender = mean(gender)), by = "chi" ] individual_file_cols2 <- episode_file[, @@ -162,7 +172,7 @@ aggregate_by_chi <- function(episode_file, year, exclude_sc_var = FALSE) { by = "chi" ] individual_file_cols6 <- episode_file[, - .( + list( preventable_beddays = ifelse( any(cij_ppa, na.rm = TRUE), as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))), @@ -173,7 +183,7 @@ aggregate_by_chi <- function(episode_file, year, exclude_sc_var = FALSE) { by = c("chi", "cij_total") ] individual_file_cols6 <- individual_file_cols6[, - .( + list( preventable_beddays = sum(preventable_beddays, na.rm = TRUE) ), by = "chi" @@ -238,6 +248,11 @@ vars_contain <- function(data, vars, ignore_case = FALSE) { aggregate_ch_episodes <- function(episode_file) { cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}") + # recommended by `data.table` team to tackle the issue + # "no visible binding for global variable" + ch_no_cost <- + record_keydate1 <- ch_ep_end <- ch_cost_per_day <- anon_chi <- NULL + # Convert to data.table data.table::setDT(episode_file) diff --git a/R/create_demog_test_flags.R b/R/create_demog_test_flags.R index b909679d9..7b9ca36bc 100644 --- a/R/create_demog_test_flags.R +++ b/R/create_demog_test_flags.R @@ -10,11 +10,13 @@ #' #' @family flag functions create_demog_test_flags <- function(data, chi = c(chi, anon_chi)) { - data %>% + anon_chi <- NULL + data <- data %>% dplyr::arrange({{ chi }}) %>% # create test flags dplyr::mutate( unique_chi = dplyr::lag({{ chi }}) != {{ chi }}, + # first value of unique_chi is always NA because of lag() n_missing_chi = is_missing({{ chi }}), n_males = .data$gender == 1L, n_females = .data$gender == 2L, @@ -22,4 +24,7 @@ create_demog_test_flags <- function(data, chi = c(chi, anon_chi)) { n_missing_postcode = is_missing(.data$postcode), missing_dob = is.na(.data$dob) ) + # fix first value always NA, and it should always be TRUE + data[1, "unique_chi"] <- TRUE + return(data) } diff --git a/R/create_episode_file.R b/R/create_episode_file.R index a9503e83c..f6443291f 100644 --- a/R/create_episode_file.R +++ b/R/create_episode_file.R @@ -2,16 +2,18 @@ #' #' @param processed_data_list containing data from processed extracts. #' @param year The year to process, in FY format. +#' @param homelessness_lookup the lookup file for homelessness +#' @param sc_client scoial care lookup file #' @param write_to_disk (optional) Should the data be written to disk default is #' `TRUE` i.e. write the data to disk. +#' @param anon_chi_out (Default:TRUE) Should `anon_chi` be used in the output +#' (instead of chi) #' @inheritParams add_nsu_cohort #' @inheritParams fill_geographies #' @inheritParams join_cohort_lookups #' @inheritParams join_deaths_data #' @inheritParams match_on_ltcs #' @inheritParams link_delayed_discharge_eps -#' @param anon_chi_out (Default:TRUE) Should `anon_chi` be used in the output -#' (instead of chi) #' #' @return a [tibble][tibble::tibble-package] containing the episode file #' @export @@ -430,7 +432,7 @@ join_cohort_lookups <- function( #' #' @description Match on sc client variables. #' -#' @param individual_file the processed individual file +#' @param data the processed individual file #' @param year financial year. #' @param sc_client SC client lookup #' @param file_type episode or individual file diff --git a/R/create_individual_file.R b/R/create_individual_file.R index e5b0fd2fd..c98531310 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -3,6 +3,7 @@ #' @description Creates the individual file from the episode file. #' #' @param episode_file Tibble containing episodic data. +#' @param homelessness_lookup the lookup file for homelessness #' @param anon_chi_in (Default:TRUE) Is `anon_chi` used in the input #' (instead of chi). #' @inheritParams create_episode_file diff --git a/R/fix_sc_dates.R b/R/fix_sc_dates.R index 117acbaab..11cb4b764 100644 --- a/R/fix_sc_dates.R +++ b/R/fix_sc_dates.R @@ -4,7 +4,7 @@ #' Set this to the start of the fyear #' #' @param start_date A vector containing dates. -#' @param period Social care latest submission period. +#' @param period_start the beginning date of Social care latest submission period. #' #' @return A date vector with replaced end dates fix_sc_start_dates <- function(start_date, period_start) { @@ -27,7 +27,7 @@ fix_sc_start_dates <- function(start_date, period_start) { #' #' @param start_date A vector containing dates. #' @param end_date A vector containing dates. -#' @param period Social care latest submission period. +#' @param period_end_date the last date of Social care latest submission period. #' #' @return A date vector with replaced end dates fix_sc_end_dates <- function(start_date, end_date, period_end_date) { @@ -45,14 +45,13 @@ fix_sc_end_dates <- function(start_date, end_date, period_end_date) { -#' Fix sc end dates +#' Fix sc missing end dates #' #' @description Fix social care end dates when the end date is earlier than the #' start date. Set this to the end of the fyear #' -#' @param start_date A vector containing dates. #' @param end_date A vector containing dates. -#' @param period Social care latest submission period. +#' @param period_end the last date of Social care latest submission period. #' #' @return A date vector with replaced end dates fix_sc_missing_end_dates <- function(end_date, period_end) { diff --git a/R/get_lookup_paths.R b/R/get_lookup_paths.R index 7df5c52e2..dad2d1afa 100644 --- a/R/get_lookup_paths.R +++ b/R/get_lookup_paths.R @@ -100,6 +100,7 @@ get_simd_path <- function(file_name = NULL, ext = "parquet") { #' @description Get the path to the populations estimates #' #' @inheritParams get_file_path +#' @param type population type datazone, or hscp, or ca, or hb, or interzone #' #' @return An [fs::path()] to the populations estimates file #' @export diff --git a/R/get_sandpit_extract_path.R b/R/get_sandpit_extract_path.R index 9d8089122..170de7537 100644 --- a/R/get_sandpit_extract_path.R +++ b/R/get_sandpit_extract_path.R @@ -2,8 +2,10 @@ #' #' @description Get the file path for sandpit extracts #' +#' @param year financial year in string class #' @param update The update month to use, #' defaults to [latest_update()] +#' @param type sandpit extract type at, ch, hc, sds, client, or demographics #' #' @param ... additional arguments passed to [get_file_path()] #' diff --git a/R/link_delayed_discharge_eps.R b/R/link_delayed_discharge_eps.R index b4c3b2f5b..0d3030a1b 100644 --- a/R/link_delayed_discharge_eps.R +++ b/R/link_delayed_discharge_eps.R @@ -48,6 +48,8 @@ link_delayed_discharge_eps <- function( dummy_id = dplyr::row_number() ) + # fix the issue "no visible binding for global variable x, y" + x <- y <- NULL by_dd <- dplyr::join_by( "chi", x$record_keydate1 >= y$dummy_cij_start, diff --git a/R/process_extract_care_home.R b/R/process_extract_care_home.R index 8675bf0c6..06305ec0f 100644 --- a/R/process_extract_care_home.R +++ b/R/process_extract_care_home.R @@ -7,8 +7,6 @@ #' @param data The full processed data which will be selected from to create #' the year specific data. #' @param year The year to process, in FY format. -#' @param client_lookup The Social Care Client lookup, created by -#' [process_lookup_sc_client()]. #' @param ch_costs The Care Home costs lookup #' @param write_to_disk (optional) Should the data be written to disk default is #' `TRUE` i.e. write the data to disk. diff --git a/R/process_extract_homelessness.R b/R/process_extract_homelessness.R index 04d7082e7..59541cf4e 100644 --- a/R/process_extract_homelessness.R +++ b/R/process_extract_homelessness.R @@ -9,6 +9,7 @@ #' @param write_to_disk (optional) Should the data be written to disk default is #' `TRUE` i.e. write the data to disk. #' @param update The update to use (default is [latest_update()]). +#' @param la_code_lookup get local authority using opendata. #' @param sg_pub_path The path to the SG pub figures (default is #' [get_sg_homelessness_pub_path()]). #' @@ -100,34 +101,34 @@ process_extract_homelessness <- function( ) ) ) %>% - dplyr::mutate(property_type_code = as.character(property_type_code)) %>% + dplyr::mutate(property_type_code = as.character(.data$property_type_code)) %>% dplyr::mutate( property_type_code = dplyr::case_when( - property_type_code == "1" ~ "1 - Own Property - LA Tenancy", - property_type_code == "2" ~ "2 - Own Property - RSL Tenancy", - property_type_code == "3" ~ "3 - Own Property - private rented tenancy", - property_type_code == "4" ~ "4 - Own Property - tenancy secured through employment/tied house", - property_type_code == "5" ~ "5 - Own Property - owning/buying", - property_type_code == "6" ~ "6 - Parental / family home / relatives", - property_type_code == "7" ~ " 7 - Friends / partners", - property_type_code == "8" ~ "8 - Armed Services Accommodation", - property_type_code == "9" ~ "9 - Prison", - property_type_code == "10" ~ "10 - Hospital", - property_type_code == "11" ~ "11 - Children's residential accommodation (looked after by the local authority)", - property_type_code == "12" ~ "12 - Supported accommodation", - property_type_code == "13" ~ "13 - Hostel (unsupported)", - property_type_code == "14" ~ "14 - Bed & Breakfast", - property_type_code == "15" ~ "15 - Caravan / mobile home", - property_type_code == "16" ~ "16 - Long-term roofless", - property_type_code == "17" ~ "17 - Long-term sofa surfing", - property_type_code == "18" ~ "18 - Other", - property_type_code == "19" ~ "19 - Not known / refused", - property_type_code == "20" ~ "20 - Own property - Shared ownership/Shared equity/ LCHO", - property_type_code == "21" ~ "21 - Lodger", - property_type_code == "22" ~ "22 - Shared Property - Private Rented Sector", - property_type_code == "23" ~ "23 - Shared Property - Local Authority", - property_type_code == "24" ~ "24 - Shared Property - RSL", - TRUE ~ property_type_code + .data$property_type_code == "1" ~ "1 - Own Property - LA Tenancy", + .data$property_type_code == "2" ~ "2 - Own Property - RSL Tenancy", + .data$property_type_code == "3" ~ "3 - Own Property - private rented tenancy", + .data$property_type_code == "4" ~ "4 - Own Property - tenancy secured through employment/tied house", + .data$property_type_code == "5" ~ "5 - Own Property - owning/buying", + .data$property_type_code == "6" ~ "6 - Parental / family home / relatives", + .data$property_type_code == "7" ~ " 7 - Friends / partners", + .data$property_type_code == "8" ~ "8 - Armed Services Accommodation", + .data$property_type_code == "9" ~ "9 - Prison", + .data$property_type_code == "10" ~ "10 - Hospital", + .data$property_type_code == "11" ~ "11 - Children's residential accommodation (looked after by the local authority)", + .data$property_type_code == "12" ~ "12 - Supported accommodation", + .data$property_type_code == "13" ~ "13 - Hostel (unsupported)", + .data$property_type_code == "14" ~ "14 - Bed & Breakfast", + .data$property_type_code == "15" ~ "15 - Caravan / mobile home", + .data$property_type_code == "16" ~ "16 - Long-term roofless", + .data$property_type_code == "17" ~ "17 - Long-term sofa surfing", + .data$property_type_code == "18" ~ "18 - Other", + .data$property_type_code == "19" ~ "19 - Not known / refused", + .data$property_type_code == "20" ~ "20 - Own property - Shared ownership/Shared equity/ LCHO", + .data$property_type_code == "21" ~ "21 - Lodger", + .data$property_type_code == "22" ~ "22 - Shared Property - Private Rented Sector", + .data$property_type_code == "23" ~ "23 - Shared Property - Local Authority", + .data$property_type_code == "24" ~ "24 - Shared Property - RSL", + TRUE ~ .data$property_type_code ) ) %>% dplyr::left_join( diff --git a/R/process_lookup_sc_client.R b/R/process_lookup_sc_client.R index e64d4b6ba..f27229e6d 100644 --- a/R/process_lookup_sc_client.R +++ b/R/process_lookup_sc_client.R @@ -6,6 +6,7 @@ #' #' @param data The extract to process #' @param year The year to process +#' @param sc_demographics social care demographics file #' @param write_to_disk (optional) Should the data be written to disk default is #' `TRUE` i.e. write the data to disk. #' @@ -138,19 +139,25 @@ process_lookup_sc_client <- dplyr::left_join( sc_demographics, by = c("sending_location", "social_care_id") - ) %>% - dplyr::mutate(count_not_known = rowSums(dplyr::select(., all_of( - c( - "sc_living_alone", - "sc_support_from_unpaid_carer", - "sc_social_worker", - "sc_meals", - "sc_day_care" + ) + sc_client_lookup <- + dplyr::mutate(sc_client_lookup, + count_not_known = rowSums( + dplyr::select(sc_client_lookup, tidyr::all_of( + c( + "sc_living_alone", + "sc_support_from_unpaid_carer", + "sc_social_worker", + "sc_meals", + "sc_day_care" + ) + )) == "Not Known", + na.rm = TRUE ) - )) == "Not Known")) %>% - dplyr::arrange(chi, count_not_known) %>% - dplyr::distinct(chi, .keep_all = TRUE) %>% - dplyr::select(-sending_location) + ) %>% + dplyr::arrange(.data$chi, .data$count_not_known) %>% + dplyr::distinct(.data$chi, .keep_all = TRUE) %>% + dplyr::select(-.data$sending_location) if (write_to_disk) { write_file( diff --git a/R/process_lookup_sc_demographics.R b/R/process_lookup_sc_demographics.R index 96adc985e..77418aeba 100644 --- a/R/process_lookup_sc_demographics.R +++ b/R/process_lookup_sc_demographics.R @@ -31,19 +31,19 @@ process_lookup_sc_demographics <- function( # Fill in missing data and flag latest cases to keep --------------------------------------- sc_demog <- data %>% dplyr::rename( - chi = chi_upi, - gender = chi_gender_code, - dob = chi_date_of_birth + chi = .data$chi_upi, + gender = .data$chi_gender_code, + dob = .data$chi_date_of_birth ) %>% # fill in missing demographic details - dplyr::arrange(period, social_care_id) %>% - dplyr::group_by(social_care_id, sending_location) %>% - tidyr::fill(chi, .direction = ("updown")) %>% - tidyr::fill(dob, .direction = ("updown")) %>% - tidyr::fill(date_of_death, .direction = ("updown")) %>% - tidyr::fill(gender, .direction = ("updown")) %>% - tidyr::fill(chi_postcode, .direction = ("updown")) %>% - tidyr::fill(submitted_postcode, .direction = ("updown")) %>% + dplyr::arrange(.data$period, .data$social_care_id) %>% + dplyr::group_by(.data$social_care_id, .data$sending_location) %>% + tidyr::fill(.data$chi, .direction = ("updown")) %>% + tidyr::fill(.data$dob, .direction = ("updown")) %>% + tidyr::fill(.data$date_of_death, .direction = ("updown")) %>% + tidyr::fill(.data$gender, .direction = ("updown")) %>% + tidyr::fill(.data$chi_postcode, .direction = ("updown")) %>% + tidyr::fill(.data$submitted_postcode, .direction = ("updown")) %>% dplyr::ungroup() %>% # format postcodes using `phsmethods` dplyr::mutate(dplyr::across(tidyselect::contains("postcode"), ~ phsmethods::format_postcode(.x, format = "pc7"))) # are sc postcodes even used anywhere? @@ -51,20 +51,20 @@ process_lookup_sc_demographics <- function( # flag unique cases of chi and sc_id, and flag the latest record (sc_demographics latest flag is not accurate) sc_demog <- sc_demog %>% - dplyr::group_by(chi, sending_location) %>% - dplyr::mutate(latest = dplyr::last(period)) %>% # flag latest period for chi - dplyr::group_by(chi, social_care_id, sending_location) %>% - dplyr::mutate(latest_sc_id = dplyr::last(period)) %>% # flag latest period for social care - dplyr::group_by(chi, sending_location) %>% - dplyr::mutate(last_sc_id = dplyr::last(social_care_id)) %>% + dplyr::group_by(.data$chi, .data$sending_location) %>% + dplyr::mutate(latest = dplyr::last(.data$period)) %>% # flag latest period for chi + dplyr::group_by(.data$chi, .data$social_care_id, .data$sending_location) %>% + dplyr::mutate(latest_sc_id = dplyr::last(.data$period)) %>% # flag latest period for social care + dplyr::group_by(.data$chi, .data$sending_location) %>% + dplyr::mutate(last_sc_id = dplyr::last(.data$social_care_id)) %>% dplyr::mutate( - latest_flag = ifelse((latest == period & last_sc_id == social_care_id) | is.na(chi), 1, 0), - keep = ifelse(latest_sc_id == period, 1, 0) + latest_flag = ifelse((.data$latest == .data$period & .data$last_sc_id == .data$social_care_id) | is.na(.data$chi), 1, 0), + keep = ifelse(.data$latest_sc_id == .data$period, 1, 0) ) %>% dplyr::ungroup() sc_demog <- sc_demog %>% - dplyr::select(-period, -latest_record_flag, -latest, -last_sc_id, -latest_sc_id) %>% + dplyr::select(-.data$period, -.data$latest_record_flag, -.data$latest, -.data$last_sc_id, -.data$latest_sc_id) %>% dplyr::distinct() # postcodes --------------------------------------------------------------- @@ -108,8 +108,8 @@ process_lookup_sc_demographics <- function( (is.na(.data$submitted_postcode) & !.data$valid_pc_submitted) ~ .data$chi_postcode )) %>% dplyr::mutate(postcode_type = dplyr::case_when( - (postcode == chi_postcode) ~ "chi", - (postcode == submitted_postcode) ~ "submitted", + (.data$postcode == .data$chi_postcode) ~ "chi", + (.data$postcode == .data$submitted_postcode) ~ "submitted", (is.na(.data$submitted_postcode) & is.na(.data$chi_postcode) | is.na(.data$postcode)) ~ "missing" )) @@ -122,8 +122,8 @@ process_lookup_sc_demographics <- function( dplyr::count(dplyr::across(tidyselect::ends_with("_postcode"), ~ is.na(.x))) sc_demog_lookup <- sc_demog %>% - dplyr::filter(keep == 1) %>% # filter to only keep latest record for sc id and chi - dplyr::select(-postcode_type, -valid_pc_submitted, -valid_pc_chi, -submitted_postcode, -chi_postcode) %>% + dplyr::filter(.data$keep == 1) %>% # filter to only keep latest record for sc id and chi + dplyr::select(-.data$postcode_type, -.data$valid_pc_submitted, -.data$valid_pc_chi, -.data$submitted_postcode, -.data$chi_postcode) %>% dplyr::distinct() %>% # group by sending location and ID dplyr::group_by(.data$sending_location, .data$chi, .data$social_care_id, .data$latest_flag) %>% diff --git a/R/process_sc_all_alarms_telecare.R b/R/process_sc_all_alarms_telecare.R index 77877d584..a861fd92c 100644 --- a/R/process_sc_all_alarms_telecare.R +++ b/R/process_sc_all_alarms_telecare.R @@ -17,6 +17,13 @@ process_sc_all_alarms_telecare <- function( write_to_disk = TRUE) { # Data Cleaning----------------------------------------------------- + # fix "no visible binding for global variable" + service_end_date <- period_end_date <- service_start_date <- service_type <- + default <- sending_location <- social_care_id <- pkg_count <- + record_keydate1 <- smrtype <- period <- record_keydate2 <- chi <- + gender <- dob <- postcode <- recid <- person_id <- sc_send_lca <- + period_start_date <- NULL + # Convert to data.table data.table::setDT(data) data.table::setDT(sc_demog_lookup) @@ -75,7 +82,7 @@ process_sc_all_alarms_telecare <- function( ] # RIGHT_JOIN with sc_demog_lookup - data <- data[sc_demog_lookup, on = .(sending_location, social_care_id)] + data <- data[sc_demog_lookup, on = list(sending_location, social_care_id)] # Replace social_care_id with latest if needed (assuming replace_sc_id_with_latest is a custom function) data <- replace_sc_id_with_latest(data) @@ -87,7 +94,7 @@ process_sc_all_alarms_telecare <- function( ) # Deal with episodes that have a package across quarters - data[, pkg_count := seq_len(.N), by = .( + data[, pkg_count := seq_len(.N), by = list( sending_location, social_care_id, record_keydate1, @@ -110,7 +117,7 @@ process_sc_all_alarms_telecare <- function( data.table::as.data.table() # Summarize to merge episodes - qtr_merge <- data[, .( + qtr_merge <- data[, list( sc_latest_submission = data.table::last(period), record_keydate2 = data.table::last(record_keydate2), chi = data.table::last(chi), @@ -120,7 +127,7 @@ process_sc_all_alarms_telecare <- function( recid = data.table::last(recid), person_id = data.table::last(person_id), sc_send_lca = data.table::last(sc_send_lca) - ), by = .( + ), by = list( sending_location, social_care_id, record_keydate1, diff --git a/R/process_sc_all_sds.R b/R/process_sc_all_sds.R index a1a1db24a..79fa24d1a 100644 --- a/R/process_sc_all_sds.R +++ b/R/process_sc_all_sds.R @@ -24,13 +24,13 @@ process_sc_all_sds <- function( # when multiple social_care_id from sending_location for single CHI # replace social_care_id with latest replace_sc_id_with_latest() %>% - dplyr::select(-sds_start_date_after_period_end_date) %>% + dplyr::select(-.data$sds_start_date_after_period_end_date) %>% dplyr::distinct() %>% # sds_options may contain only a few NA, replace NA by 0 dplyr::mutate( - sds_option_1 = tidyr::replace_na(sds_option_1, 0), - sds_option_2 = tidyr::replace_na(sds_option_2, 0), - sds_option_3 = tidyr::replace_na(sds_option_3, 0) + sds_option_1 = tidyr::replace_na(.data$sds_option_1, 0), + sds_option_2 = tidyr::replace_na(.data$sds_option_2, 0), + sds_option_3 = tidyr::replace_na(.data$sds_option_3, 0) ) # Data Cleaning --------------------------------------- @@ -38,6 +38,14 @@ process_sc_all_sds <- function( sds_full_clean <- data.table::as.data.table(matched_sds_data) rm(matched_sds_data) + # fix "no visible binding for global variable" + sds_option_4 <- sds_start_date <- sds_period_start_date <- sds_end_date <- + sds_period_end_date <- received <- sds_option <- sending_location <- + period <- record_keydate1 <- record_keydate2 <- social_care_id <- + smrtype <- period_rank <- record_keydate1_rank <- record_keydate2_rank <- + distinct_episode <- episode_counter <- chi <- gender <- dob <- postcode <- + recid <- person_id <- sc_send_lca <- NULL + # Deal with SDS option 4 # Convert option flags into logical T/F cols_sds_option <- grep( @@ -137,7 +145,7 @@ process_sc_all_sds <- function( rank(record_keydate1), rank(record_keydate2) ), - by = .(sending_location, social_care_id, smrtype) + by = list(sending_location, social_care_id, smrtype) ] data.table::setorder( sds_full_clean_long, @@ -150,16 +158,16 @@ process_sc_all_sds <- function( distinct_episode := (data.table::shift(record_keydate2, type = "lag") < record_keydate1) %>% tidyr::replace_na(TRUE), - by = .(sending_location, social_care_id, smrtype) + by = list(sending_location, social_care_id, smrtype) ] sds_full_clean_long[, episode_counter := cumsum(distinct_episode), - by = .(sending_location, social_care_id, smrtype) + by = list(sending_location, social_care_id, smrtype) ] # Merge episodes by episode counter - final_data <- sds_full_clean_long[, .( + final_data <- sds_full_clean_long[, list( sc_latest_submission = data.table::last(period), record_keydate1 = min(record_keydate1), record_keydate2 = max(record_keydate2), @@ -170,7 +178,7 @@ process_sc_all_sds <- function( recid = data.table::last(recid), person_id = data.table::last(person_id), sc_send_lca = data.table::last(sc_send_lca) - ), by = .(sending_location, social_care_id, smrtype, episode_counter)] + ), by = list(sending_location, social_care_id, smrtype, episode_counter)] rm(sds_full_clean_long) # Drop episode_counter and convert back to data.frame if needed diff --git a/R/process_tests_alarms_telecare.R b/R/process_tests_alarms_telecare.R index 52daef496..0b2524d55 100644 --- a/R/process_tests_alarms_telecare.R +++ b/R/process_tests_alarms_telecare.R @@ -37,7 +37,7 @@ produce_source_at_tests <- function(data, max_min_vars = c("record_keydate1", "record_keydate2")) { test_flags <- data %>% # create test flags - create_demog_test_flags(chi = chi) %>% + create_demog_test_flags(chi = .data$chi) %>% dplyr::mutate( n_at_alarms = .data$smrtype == "AT-Alarm", n_at_telecare = .data$smrtype == "AT-Tele" diff --git a/R/process_tests_care_home.R b/R/process_tests_care_home.R index 21ef3e5c9..f75908d69 100644 --- a/R/process_tests_care_home.R +++ b/R/process_tests_care_home.R @@ -47,7 +47,7 @@ produce_source_ch_tests <- function(data, )) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags(chi = chi) %>% + create_demog_test_flags(chi = .data$chi) %>% dplyr::mutate( n_episodes = 1L, ch_name_missing = is.na(.data$ch_name), diff --git a/R/process_tests_cmh.R b/R/process_tests_cmh.R index dde710c00..4d246399f 100644 --- a/R/process_tests_cmh.R +++ b/R/process_tests_cmh.R @@ -43,7 +43,7 @@ process_tests_cmh <- function(data, year) { produce_source_cmh_tests <- function(data) { test_flags <- data %>% # create test flags - create_demog_test_flags(chi = chi) %>% + create_demog_test_flags(chi = .data$chi) %>% create_hb_test_flags(hb_var = .data$hbrescode) %>% dplyr::mutate(n_episodes = 1L) %>% # keep variables for comparison diff --git a/R/process_tests_district_nursing.R b/R/process_tests_district_nursing.R index b354fde09..6c890cfb4 100644 --- a/R/process_tests_district_nursing.R +++ b/R/process_tests_district_nursing.R @@ -58,7 +58,7 @@ produce_source_dn_tests <- function(data, )) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags(chi = chi) %>% + create_demog_test_flags(chi = .data$chi) %>% create_hb_test_flags(.data$hbtreatcode) %>% create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>% # keep variables for comparison diff --git a/R/process_tests_episode_file.R b/R/process_tests_episode_file.R index 6f2c73fcb..f70324f95 100644 --- a/R/process_tests_episode_file.R +++ b/R/process_tests_episode_file.R @@ -73,7 +73,7 @@ produce_episode_file_tests <- function( test_flags <- data %>% dplyr::group_by(.data$recid) %>% # use functions to create HB and partnership flags - create_demog_test_flags(chi = anon_chi) %>% + create_demog_test_flags(chi = .data$anon_chi) %>% create_hb_test_flags(.data$hbtreatcode) %>% create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) %>% create_hscp_test_flags(.data$hscp2018) %>% diff --git a/R/process_tests_home_care.R b/R/process_tests_home_care.R index 3ac8329e6..c06d9832f 100644 --- a/R/process_tests_home_care.R +++ b/R/process_tests_home_care.R @@ -49,7 +49,7 @@ produce_source_hc_tests <- function(data, )) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags(chi = chi) %>% + create_demog_test_flags(chi = .data$chi) %>% dplyr::mutate( n_episodes = 1L, hc_per = dplyr::if_else(.data$smrtype == "HC-Per", 1L, 0L), diff --git a/R/process_tests_homelessness.R b/R/process_tests_homelessness.R index e4078d227..db409ac66 100644 --- a/R/process_tests_homelessness.R +++ b/R/process_tests_homelessness.R @@ -38,7 +38,7 @@ produce_slf_homelessness_tests <- function(data, test_flags <- data %>% dplyr::arrange(.data$chi) %>% # create test flags - create_demog_test_flags(chi = chi) %>% + create_demog_test_flags(chi = .data$chi) %>% create_lca_test_flags(.data$hl1_sending_lca) %>% # keep variables for comparison dplyr::select("unique_chi":dplyr::last_col()) %>% diff --git a/R/process_tests_individual_file.R b/R/process_tests_individual_file.R index 3770d6d26..9643a4f3f 100644 --- a/R/process_tests_individual_file.R +++ b/R/process_tests_individual_file.R @@ -60,7 +60,7 @@ produce_individual_file_tests <- function(data) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags(chi = anon_chi) %>% + create_demog_test_flags(chi = .data$anon_chi) %>% create_hb_test_flags(.data$hbrescode) %>% create_hb_cost_test_flags(.data$hbrescode, .data$health_net_cost) %>% # keep variables for comparison @@ -77,7 +77,7 @@ produce_individual_file_tests <- function(data) { "attendances", "admissions", "cases", - "consulations" + "consultations" ), measure = "all" ) diff --git a/R/process_tests_nrs_deaths.R b/R/process_tests_nrs_deaths.R index d87fbde7b..f3d47b18d 100644 --- a/R/process_tests_nrs_deaths.R +++ b/R/process_tests_nrs_deaths.R @@ -38,7 +38,7 @@ process_tests_nrs_deaths <- function(data, year) { produce_source_nrs_tests <- function(data) { test_flags <- data %>% # create test flags - create_demog_test_flags(chi = chi) %>% + create_demog_test_flags(chi = .data$chi) %>% dplyr::mutate(n_deaths = 1L) %>% # keep variables for comparison dplyr::select("unique_chi":dplyr::last_col()) %>% diff --git a/R/process_tests_prescribing.R b/R/process_tests_prescribing.R index 3ad838255..415b896f4 100644 --- a/R/process_tests_prescribing.R +++ b/R/process_tests_prescribing.R @@ -41,7 +41,7 @@ process_tests_prescribing <- function(data, year) { produce_source_pis_tests <- function(data) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags(chi = chi) %>% + create_demog_test_flags(chi = .data$chi) %>% dplyr::mutate(n_episodes = 1L) %>% # keep variables for comparison dplyr::select("unique_chi":dplyr::last_col()) %>% diff --git a/R/process_tests_sc_client_lookup.R b/R/process_tests_sc_client_lookup.R index 0e4e0cef9..0bd0a7bad 100644 --- a/R/process_tests_sc_client_lookup.R +++ b/R/process_tests_sc_client_lookup.R @@ -28,15 +28,15 @@ process_tests_sc_client_lookup <- function(data, year) { #' #' @param data new or old data for testing summary flags #' (data is from [get_source_extract_path()]) -#' @param max_min_vars variables used when selecting 'min-max' from [calculate_measures()] #' @return a dataframe with a count of each flag. #' #' @family social care test functions produce_tests_sc_client_lookup <- function(data) { test_flags <- data %>% # create test flags - create_sending_location_test_flags(.data$sc_send_lca) %>% - dplyr::arrange(.data$sc_send_lca, .data$social_care_id) %>% + # create_sending_location_test_flags(.data$sc_send_lca) %>% + # dplyr::arrange(.data$sc_send_lca, .data$social_care_id) %>% + dplyr::arrange(.data$social_care_id) %>% dplyr::mutate( unique_sc_id = dplyr::lag(.data$social_care_id) != .data$social_care_id, n_sc_living_alone_yes = .data$sc_living_alone == "Yes", @@ -56,7 +56,8 @@ produce_tests_sc_client_lookup <- function(data) { n_sc_day_care_not_known = .data$sc_day_care == "Not Known", ) %>% # remove variables that won't be summed - dplyr::select("Aberdeen_City":dplyr::last_col()) %>% + # dplyr::select("Aberdeen_City":dplyr::last_col()) %>% + dplyr::select("unique_sc_id":dplyr::last_col()) %>% # use function to sum new test flags calculate_measures(measure = "sum") diff --git a/R/process_tests_sc_demographics.R b/R/process_tests_sc_demographics.R index b503969ef..6150a4e62 100644 --- a/R/process_tests_sc_demographics.R +++ b/R/process_tests_sc_demographics.R @@ -36,7 +36,7 @@ process_tests_sc_demographics <- function(data) { produce_sc_demog_lookup_tests <- function(data) { data %>% # create test flags - create_demog_test_flags(chi = chi) %>% + create_demog_test_flags(chi = .data$chi) %>% dplyr::mutate( n_missing_sending_loc = is.na(.data$sending_location), n_missing_sc_id = is.na(.data$social_care_id) diff --git a/R/process_tests_sds.R b/R/process_tests_sds.R index c972a3a6f..437b137f9 100644 --- a/R/process_tests_sds.R +++ b/R/process_tests_sds.R @@ -35,7 +35,7 @@ produce_source_sds_tests <- function(data, max_min_vars = c("record_keydate1", "record_keydate2")) { test_flags <- data %>% # create test flags - create_demog_test_flags(chi = chi) %>% + create_demog_test_flags(chi = .data$chi) %>% create_lca_test_flags(.data$sc_send_lca) %>% # remove variables that won't be summed dplyr::select("unique_chi":"West_Lothian") %>% diff --git a/R/produce_sc_all_episodes_tests.R b/R/produce_sc_all_episodes_tests.R index 4c5f736bb..c2720a928 100644 --- a/R/produce_sc_all_episodes_tests.R +++ b/R/produce_sc_all_episodes_tests.R @@ -10,7 +10,7 @@ produce_sc_all_episodes_tests <- function(data) { data %>% # create test flags - create_demog_test_flags(chi = chi) %>% + create_demog_test_flags(chi = .data$chi) %>% dplyr::mutate( n_missing_sending_loc = dplyr::if_else( is.na(.data$sending_location), diff --git a/R/produce_source_extract_tests.R b/R/produce_source_extract_tests.R index 13b33d549..e2431d4bb 100644 --- a/R/produce_source_extract_tests.R +++ b/R/produce_source_extract_tests.R @@ -33,7 +33,7 @@ produce_source_extract_tests <- function(data, add_hscp_count = TRUE) { test_flags <- data %>% # use functions to create HB and partnership flags - create_demog_test_flags(chi = chi) %>% + create_demog_test_flags(chi = .data$chi) %>% create_hb_test_flags(.data$hbtreatcode) %>% create_hb_cost_test_flags(.data$hbtreatcode, .data$cost_total_net) diff --git a/R/read_lookup_sc_demographics.R b/R/read_lookup_sc_demographics.R index 020542baa..cb3cea3c2 100644 --- a/R/read_lookup_sc_demographics.R +++ b/R/read_lookup_sc_demographics.R @@ -1,13 +1,13 @@ #' Read SC demographics #' -#' @param sc_connection Connection to the sc platform +#' @param sc_dvprod_connection Connection to the sc platform #' #' @return a [tibble][tibble::tibble-package] #' @export #' -read_lookup_sc_demographics <- function(sc_connection = phs_db_connection(dsn = "DVPROD")) { +read_lookup_sc_demographics <- function(sc_dvprod_connection = phs_db_connection(dsn = "DVPROD")) { sc_demog <- dplyr::tbl( - sc_connection, + sc_dvprod_connection, dbplyr::in_schema("social_care_2", "demographic_snapshot") ) %>% dplyr::select( diff --git a/R/replace_sc_id_with_latest.R b/R/replace_sc_id_with_latest.R index 2c32bbb93..8e815d46b 100644 --- a/R/replace_sc_id_with_latest.R +++ b/R/replace_sc_id_with_latest.R @@ -19,7 +19,7 @@ replace_sc_id_with_latest <- function(data) { dplyr::distinct() change_sc_id <- filter_data %>% - dplyr::filter(latest_flag == 1) %>% + dplyr::filter(.data$latest_flag == 1) %>% # Rename for latest sc id dplyr::rename(latest_sc_id = "social_care_id") %>% # drop latest_flag for matching @@ -31,7 +31,7 @@ replace_sc_id_with_latest <- function(data) { by = c("sending_location", "chi"), multiple = "all" ) %>% - dplyr::filter(!(is.na(period))) %>% + dplyr::filter(!(is.na(.data$period))) %>% # Overwrite sc id with the latest dplyr::mutate( social_care_id = dplyr::if_else( diff --git a/man/add_hri_variables.Rd b/man/add_hri_variables.Rd index 131a00f6b..9e0d92b45 100644 --- a/man/add_hri_variables.Rd +++ b/man/add_hri_variables.Rd @@ -13,6 +13,8 @@ add_hri_variables( \arguments{ \item{data}{An SLF individual file.} +\item{chi_variable}{string, claiming chi or anon_chi.} + \item{slf_pc_lookup}{The Source postcode lookup, defaults to \code{\link[=get_slf_postcode_path]{get_slf_postcode_path()}} read using \code{\link[=read_file]{read_file()}}.} } diff --git a/man/aggregate_by_chi.Rd b/man/aggregate_by_chi.Rd index 16bf7d792..94b1eef0b 100644 --- a/man/aggregate_by_chi.Rd +++ b/man/aggregate_by_chi.Rd @@ -9,7 +9,9 @@ aggregate_by_chi(episode_file, year, exclude_sc_var = FALSE) \arguments{ \item{episode_file}{Tibble containing episodic data.} -\item{year}{The year to process, in FY format.} +\item{year}{financial year, string, eg "1920"} + +\item{exclude_sc_var}{Boolean, whether exclude social care variables} } \description{ Aggregate episode file by CHI to convert into diff --git a/man/create_episode_file.Rd b/man/create_episode_file.Rd index a45209918..1b4c54186 100644 --- a/man/create_episode_file.Rd +++ b/man/create_episode_file.Rd @@ -27,6 +27,8 @@ create_episode_file( \item{dd_data}{The processed DD extract} +\item{homelessness_lookup}{the lookup file for homelessness} + \item{nsu_cohort}{The NSU data for the year} \item{ltc_data}{The LTC data for the year} @@ -37,6 +39,8 @@ create_episode_file( \item{slf_deaths_lookup}{The SLF deaths lookup.} +\item{sc_client}{scoial care lookup file} + \item{write_to_disk}{(optional) Should the data be written to disk default is \code{TRUE} i.e. write the data to disk.} diff --git a/man/create_individual_file.Rd b/man/create_individual_file.Rd index e8c46ad47..bfa584d54 100644 --- a/man/create_individual_file.Rd +++ b/man/create_individual_file.Rd @@ -18,6 +18,8 @@ create_individual_file( \item{year}{The year to process, in FY format.} +\item{homelessness_lookup}{the lookup file for homelessness} + \item{write_to_disk}{(optional) Should the data be written to disk default is \code{TRUE} i.e. write the data to disk.} diff --git a/man/fix_sc_end_dates.Rd b/man/fix_sc_end_dates.Rd index 041751319..a41308571 100644 --- a/man/fix_sc_end_dates.Rd +++ b/man/fix_sc_end_dates.Rd @@ -11,7 +11,7 @@ fix_sc_end_dates(start_date, end_date, period_end_date) \item{end_date}{A vector containing dates.} -\item{period}{Social care latest submission period.} +\item{period_end_date}{the last date of Social care latest submission period.} } \value{ A date vector with replaced end dates diff --git a/man/fix_sc_missing_end_dates.Rd b/man/fix_sc_missing_end_dates.Rd index 513fc4cb3..ec8404a53 100644 --- a/man/fix_sc_missing_end_dates.Rd +++ b/man/fix_sc_missing_end_dates.Rd @@ -2,16 +2,14 @@ % Please edit documentation in R/fix_sc_dates.R \name{fix_sc_missing_end_dates} \alias{fix_sc_missing_end_dates} -\title{Fix sc end dates} +\title{Fix sc missing end dates} \usage{ fix_sc_missing_end_dates(end_date, period_end) } \arguments{ \item{end_date}{A vector containing dates.} -\item{start_date}{A vector containing dates.} - -\item{period}{Social care latest submission period.} +\item{period_end}{the last date of Social care latest submission period.} } \value{ A date vector with replaced end dates diff --git a/man/fix_sc_start_dates.Rd b/man/fix_sc_start_dates.Rd index 519759c5f..aae0ccb9f 100644 --- a/man/fix_sc_start_dates.Rd +++ b/man/fix_sc_start_dates.Rd @@ -9,7 +9,7 @@ fix_sc_start_dates(start_date, period_start) \arguments{ \item{start_date}{A vector containing dates.} -\item{period}{Social care latest submission period.} +\item{period_start}{the beginning date of Social care latest submission period.} } \value{ A date vector with replaced end dates diff --git a/man/get_pop_path.Rd b/man/get_pop_path.Rd index ce9c0409c..751ab0275 100644 --- a/man/get_pop_path.Rd +++ b/man/get_pop_path.Rd @@ -14,6 +14,8 @@ get_pop_path( \item{file_name}{The file name (with extension if not supplied to \code{ext})} \item{ext}{The extension (type of the file) - optional} + +\item{type}{population type datazone, or hscp, or ca, or hb, or interzone} } \value{ An \code{\link[fs:path]{fs::path()}} to the populations estimates file diff --git a/man/get_sandpit_extract_path.Rd b/man/get_sandpit_extract_path.Rd index c938b45ea..0f0584c5c 100644 --- a/man/get_sandpit_extract_path.Rd +++ b/man/get_sandpit_extract_path.Rd @@ -12,6 +12,10 @@ get_sandpit_extract_path( ) } \arguments{ +\item{type}{sandpit extract type at, ch, hc, sds, client, or demographics} + +\item{year}{financial year in string class} + \item{update}{The update month to use, defaults to \code{\link[=latest_update]{latest_update()}}} diff --git a/man/join_sc_client.Rd b/man/join_sc_client.Rd index fee2aa737..c79a62782 100644 --- a/man/join_sc_client.Rd +++ b/man/join_sc_client.Rd @@ -12,13 +12,13 @@ join_sc_client( ) } \arguments{ +\item{data}{the processed individual file} + \item{year}{financial year.} \item{sc_client}{SC client lookup} \item{file_type}{episode or individual file} - -\item{individual_file}{the processed individual file} } \description{ Match on sc client variables. diff --git a/man/process_extract_care_home.Rd b/man/process_extract_care_home.Rd index f2d1e5154..269ae1e7d 100644 --- a/man/process_extract_care_home.Rd +++ b/man/process_extract_care_home.Rd @@ -16,9 +16,6 @@ the year specific data.} \item{write_to_disk}{(optional) Should the data be written to disk default is \code{TRUE} i.e. write the data to disk.} - -\item{client_lookup}{The Social Care Client lookup, created by -\code{\link[=process_lookup_sc_client]{process_lookup_sc_client()}}.} } \value{ the final data as a \link[tibble:tibble-package]{tibble}. diff --git a/man/process_extract_homelessness.Rd b/man/process_extract_homelessness.Rd index 1d7d3d1a7..405da34bb 100644 --- a/man/process_extract_homelessness.Rd +++ b/man/process_extract_homelessness.Rd @@ -23,6 +23,8 @@ process_extract_homelessness( \item{update}{The update to use (default is \code{\link[=latest_update]{latest_update()}}).} +\item{la_code_lookup}{get local authority using opendata.} + \item{sg_pub_path}{The path to the SG pub figures (default is \code{\link[=get_sg_homelessness_pub_path]{get_sg_homelessness_pub_path()}}).} } diff --git a/man/process_lookup_sc_client.Rd b/man/process_lookup_sc_client.Rd index 19cafe0a1..aa9cbed18 100644 --- a/man/process_lookup_sc_client.Rd +++ b/man/process_lookup_sc_client.Rd @@ -17,6 +17,8 @@ process_lookup_sc_client( \item{year}{The year to process} +\item{sc_demographics}{social care demographics file} + \item{write_to_disk}{(optional) Should the data be written to disk default is \code{TRUE} i.e. write the data to disk.} } diff --git a/man/produce_tests_sc_client_lookup.Rd b/man/produce_tests_sc_client_lookup.Rd index c1610f490..abda87cec 100644 --- a/man/produce_tests_sc_client_lookup.Rd +++ b/man/produce_tests_sc_client_lookup.Rd @@ -9,8 +9,6 @@ produce_tests_sc_client_lookup(data) \arguments{ \item{data}{new or old data for testing summary flags (data is from \code{\link[=get_source_extract_path]{get_source_extract_path()}})} - -\item{max_min_vars}{variables used when selecting 'min-max' from \code{\link[=calculate_measures]{calculate_measures()}}} } \value{ a dataframe with a count of each flag. diff --git a/man/read_lookup_sc_demographics.Rd b/man/read_lookup_sc_demographics.Rd index 3bda889fe..6c7dd049e 100644 --- a/man/read_lookup_sc_demographics.Rd +++ b/man/read_lookup_sc_demographics.Rd @@ -4,10 +4,12 @@ \alias{read_lookup_sc_demographics} \title{Read SC demographics} \usage{ -read_lookup_sc_demographics(sc_connection = phs_db_connection(dsn = "DVPROD")) +read_lookup_sc_demographics( + sc_dvprod_connection = phs_db_connection(dsn = "DVPROD") +) } \arguments{ -\item{sc_connection}{Connection to the sc platform} +\item{sc_dvprod_connection}{Connection to the sc platform} } \value{ a \link[tibble:tibble-package]{tibble} diff --git a/tests/testthat/_snaps/get_dd_path.md b/tests/testthat/_snaps/get_dd_path.md index e3f77eba9..76de0fb7b 100644 --- a/tests/testthat/_snaps/get_dd_path.md +++ b/tests/testthat/_snaps/get_dd_path.md @@ -3,7 +3,7 @@ Code dplyr::glimpse(latest_dd_file, width = 0) Output - Rows: 191,700 + Rows: 198,751 Columns: 14 $ cennum ~ $ MONTHFLAG ~ diff --git a/tests/testthat/test-read_file.R b/tests/testthat/test-read_file.R index e823180fb..ecd39acec 100644 --- a/tests/testthat/test-read_file.R +++ b/tests/testthat/test-read_file.R @@ -1,3 +1,5 @@ +skip_on_ci() + test_that("read_file works", { rds_path <- tempfile(fileext = ".rds") rds_gz_path <- tempfile(fileext = ".rds.gz") diff --git a/tests/testthat/test-write_file.R b/tests/testthat/test-write_file.R index 8620f694b..7f7991c57 100644 --- a/tests/testthat/test-write_file.R +++ b/tests/testthat/test-write_file.R @@ -1,3 +1,5 @@ +skip_on_ci() + test_that("write_file works", { rds_path <- tempfile(fileext = ".rds") parquet_path <- tempfile(fileext = ".parquet")