Merge branch 'development' into refactor/separate_opendata

Public-Health-Scotland · Sep 26, 2023 · c80e28c · c80e28c
2 parents 18873f3 + 2f6f25c
commit c80e28c
Show file tree

Hide file tree

Showing 140 changed files with 997 additions and 493 deletions.
diff --git a/.github/actions/spelling/expect.txt b/.github/actions/spelling/expect.txt
@@ -91,6 +91,7 @@ hjust
 hms
 homecare
 homev
+hscdiip
 hscp
 hscpnames
 IDPC
@@ -107,6 +108,7 @@ keyring
 keytime
 keytimex
 kis
+lazydt
 lgl
 los
 ltc
@@ -166,6 +168,7 @@ readxl
 reasonwait
 recid
 refsource
+renviron
 rlang
 rmarkdown
 roxygen

diff --git a/.github/workflows/lint-changed-files.yaml b/.github/workflows/lint-changed-files.yaml
@@ -2,7 +2,7 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   pull_request:
-    branches: [main-R, master, main]
+    branches: [master, main, development]
 
 name: lint-changed-files
 

diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml
@@ -2,9 +2,9 @@
 # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 on:
   push:
-    branches: [master, main, main-R]
+    branches: [master, main, development]
   pull_request:
-    branches: [master, main, main-R]
+    branches: [master, main, development]
 
 name: test-coverage
 

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -53,6 +53,8 @@ Imports:
     slfhelper (>= 0.10.0),
     stringdist (>= 0.9.10),
     stringr (>= 1.5.0),
+    tarchetypes (>= 0.7.6),
+    targets (>= 1.2.0),
     tibble (>= 3.2.1),
     tidyr (>= 1.3.0),
     tidyselect (>= 1.2.0),
@@ -61,8 +63,6 @@ Suggests:
     covr (>= 3.6.1),
     roxygen2 (>= 7.2.3),
     spelling (>= 2.2),
-    tarchetypes (>= 0.7.5),
-    targets (>= 0.14.3),
     testthat (>= 3.1.7)
 Remotes: 
     Public-Health-Scotland/phsmethods,

diff --git a/NAMESPACE b/NAMESPACE
@@ -13,6 +13,7 @@ export(convert_hscp_to_hscpnames)
 export(convert_numeric_to_date)
 export(convert_sending_location_to_lca)
 export(convert_year_to_fyyear)
+export(create_episode_file)
 export(create_individual_file)
 export(create_service_use_cohorts)
 export(end_fy)
@@ -155,7 +156,7 @@ export(read_sc_all_alarms_telecare)
 export(read_sc_all_care_home)
 export(read_sc_all_home_care)
 export(read_sc_all_sds)
-export(run_episode_file)
+export(setup_keyring)
 export(start_fy)
 export(start_fy_quarter)
 export(start_next_fy_quarter)

diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,48 @@
-# March 2023 Update - Unreleased
+# September 2023 Update - Unreleased
+* Update of 2017/18 onwards to include bug fixes within the files. 
+* New 2023/24 files. 
+* New NSU cohort for 2022/23 file. 
+* Re addition of:
+  * HRIs in individual file.
+  * Homelessness Flags.
+* Bug fixes: 
+  * Blank `datazone` in A&E. This has been fixed and was due to PC8 postcode format matching onto SLF pc lookup. 
+  * Large increase in preventable beddays. This was caused due to an SPSS vs R logic difference. Uses SPSS logic which 
+    brings the difference down to `3.3%`. 
+  * Issue with `locality` which showed `locality` in each row instead of its true `locality`. This has now been fixed. 
+  * Duplicated CHI in the individual file. The issue was identified when trying to include HRIs. This has now been corrected. 
+* Internal changes to SLF development: 
+  * `DN` and `CMH` data are now archived in an HSCDIIP folder as the BOXI datamart is now closed down for these. Function `get_boxi_extract_path` has been updated to reflect this. 
+  * Tests updated to include `HSCP`count. 
+  * Tests created for `Delayed Discharges` extract and `Social care Client lookup`.
 
 
+# June 2023 Update - Released 24-Jul-2023
+* 2011/12 -> 2013/14 – These files have not been altered, other than to make them available in a new file type (parquet).
+* 2017/18 – These files have been recreated using our new R pipeline, but the data has not changed. We did this so that we would have a good comparator file.
+* 2018/19 -> 2022/23 – These files have been recreated using the R pipeline and are also using updated data (as in a ‘normal’ update).
+* Files changed into parquet format. 
+* SLFhelper updated. 
+* Removal of `keydate1_dateformat` and `keydate2_dateformat`.
+* `dd_responsible_lca` – This variable now uses CA2019 codes instead of the 2-digit ‘old’ LCA code.
+* Preventable beddays - not able to calculate these correctly. * Death fixes not included.
+* Variables not ordered in R like they used to be in SPSS.
+* End of HHG.
+* New variable `ch_postcode`.
+* rename of variables `cost_total_net_incdnas`, `ooh_outcome.1`, `ooh_outcome.2`, `ooh_outcome.3`, `ooh_outcome.4`, `totalnodncontacts`. 
+* HRI's not included. 
+* Homelessness flags not included. 
+* Keep_population flag not included. 
+
+
+# March 2023 Update - Released 10-Mar-2023
+* 2021/22 episode and individual files refreshed with updated activity.
+* 2022/23 file updated and contains data up to the end of Q3. 
+* Social care data is available for 2022/23. 
+* Typo in the variable name `ooh_covid_assessment`
+* Next update in May as a test run in R but won't be released. 
+* Next release in June. 
+
 # December 2022 Update - Released 07-Dec-2022
 * Now using the 2022v2 Scottish Postcode Directory.
 * Now using the 2020 Urban Rural classifications (instead of the older 2016 ones), this means variables such as `URx_2016` will now be called `URx_2020`.

diff --git a/R/add_nsu_cohort.R b/R/add_nsu_cohort.R
@@ -2,13 +2,17 @@
 #'
 #' @param data The input data frame
 #' @param year The year being processed
+#' @param nsu_cohort The NSU data for the year
 #'
 #' @return A data frame containing the Non-Service Users as additional rows
 #' @export
 #'
 #' @family episode file
 #' @seealso [get_nsu_path()]
-add_nsu_cohort <- function(data, year) {
+add_nsu_cohort <- function(
+    data,
+    year,
+    nsu_cohort = read_file(get_nsu_path(year))) {
   year_param <- year
 
   if (!check_year_valid(year, "NSU")) {
@@ -29,9 +33,9 @@ add_nsu_cohort <- function(data, year) {
     )
   )
 
-  matched <- dplyr::full_join(data,
-    # NSU cohort file
-    read_file(get_nsu_path(year)) %>%
+  matched <- dplyr::full_join(
+    data,
+    nsu_cohort %>%
       dplyr::mutate(
         dob = as.Date(.data[["dob"]]),
         gpprac = convert_eng_gpprac_to_dummy(.data[["gpprac"]])
@@ -110,7 +114,6 @@ add_nsu_cohort <- function(data, year) {
         .data[["chi"]]
       )
     ) %>%
-    # Remove the additional columns
     dplyr::select(-dplyr::contains("_nsu"), -"has_chi")
 
   return(return_df)

diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R
@@ -126,9 +126,9 @@ aggregate_by_chi <- function(episode_file) {
   individual_file_cols6 <- episode_file[,
     .(
       preventable_beddays = ifelse(
-        max(cij_ppa, na.rm = TRUE),
-        max(cij_end_date) - min(cij_start_date),
-        NA_real_
+        any(cij_ppa, na.rm = TRUE),
+        as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))),
+        NA_integer_
       )
     ),
     # cij_marker has been renamed as cij_total
@@ -203,12 +203,19 @@ aggregate_ch_episodes <- function(episode_file) {
   data.table::setDT(episode_file)
 
   # Perform grouping and aggregation
-  episode_file <- episode_file[, `:=`(
-    ch_no_cost = max(ch_no_cost),
-    ch_ep_start = min(record_keydate1),
-    ch_ep_end = max(ch_ep_end),
-    ch_cost_per_day = mean(ch_cost_per_day)
-  ), by = c("chi", "ch_chi_cis")]
+  episode_file[, c(
+    "ch_no_cost",
+    "ch_ep_start",
+    "ch_ep_end",
+    "ch_cost_per_day"
+  ) := list(
+    max(ch_no_cost),
+    min(record_keydate1),
+    max(ch_ep_end),
+    mean(ch_cost_per_day)
+  ),
+  by = c("chi", "ch_chi_cis")
+  ]
 
   # Convert back to tibble if needed
   episode_file <- tibble::as_tibble(episode_file)

diff --git a/R/check_year_valid.R b/R/check_year_valid.R
@@ -42,7 +42,7 @@ check_year_valid <- function(
     return(FALSE)
   } else if (year >= "2122" && type %in% c("CMH", "DN")) {
     return(FALSE)
-  } else if (year >= "2223" && type %in% "NSU") {
+  } else if (year >= "2324" && type %in% "NSU") {
     return(FALSE)
   } else if (year >= "2324" && type %in% c("SPARRA", "HHG")) {
     return(FALSE)

diff --git a/R/compute_mid_year_age.R b/R/compute_mid_year_age.R
@@ -20,7 +20,7 @@
 compute_mid_year_age <- function(fyyear, dob) {
   age_intervals <- lubridate::interval(
     start = dob,
-    end = as.Date(midpoint_fy(fyyear))
+    end = midpoint_fy(fyyear)
   )
 
   ages <- lubridate::as.period(age_intervals)$year

diff --git a/R/convert_sending_location_to_lca.R b/R/convert_sending_location_to_lca.R
@@ -9,7 +9,7 @@
 #' @export
 #'
 #' @examples
-#' sending_location <- c("100", "120")
+#' sending_location <- c(100, 120)
 #' convert_sending_location_to_lca(sending_location)
 #'
 #' @family code functions
@@ -18,38 +18,40 @@
 convert_sending_location_to_lca <- function(sending_location) {
   lca <- dplyr::case_match(
     sending_location,
-    "100" ~ "01", # Aberdeen City
-    "110" ~ "02", # Aberdeenshire
-    "120" ~ "03", # Angus
-    "130" ~ "04", # Argyll and Bute
-    "355" ~ "05", # Scottish Borders
-    "150" ~ "06", # Clackmannanshire
-    "395" ~ "07", # West Dumbartonshire
-    "170" ~ "08", # Dumfries and Galloway
-    "180" ~ "09", # Dundee City
-    "190" ~ "10", # East Ayrshire
-    "200" ~ "11", # East Dunbartonshire
-    "210" ~ "12", # East Lothian
-    "220" ~ "13", # East Renfrewshire
-    "230" ~ "14", # City of Edinburgh
-    "240" ~ "15", # Falkirk
-    "250" ~ "16", # Fife
-    "260" ~ "17", # Glasgow City
-    "270" ~ "18", # Highland
-    "280" ~ "19", # Inverclyde
-    "290" ~ "20", # Midlothian
-    "300" ~ "21", # Moray
-    "310" ~ "22", # North Ayrshire
-    "320" ~ "23", # North Lanarkshire
-    "330" ~ "24", # Orkney Islands
-    "340" ~ "25", # Perth and Kinross
-    "350" ~ "26", # Renfrewshire
-    "360" ~ "27", # Shetland Islands
-    "370" ~ "28", # South Ayrshire
-    "380" ~ "29", # South Lanarkshire
-    "390" ~ "30", # Stirling
-    "400" ~ "31", # West Lothian
-    "235" ~ "32" # Na_h_Eileanan_Siar
+    100L ~ "01", # Aberdeen City
+    110L ~ "02", # Aberdeenshire
+    120L ~ "03", # Angus
+    130L ~ "04", # Argyll and Bute
+    355L ~ "05", # Scottish Borders
+    150L ~ "06", # Clackmannanshire
+    395L ~ "07", # West Dunbartonshire
+    170L ~ "08", # Dumfries and Galloway
+    180L ~ "09", # Dundee City
+    190L ~ "10", # East Ayrshire
+    200L ~ "11", # East Dunbartonshire
+    210L ~ "12", # East Lothian
+    220L ~ "13", # East Renfrewshire
+    230L ~ "14", # City of Edinburgh
+    240L ~ "15", # Falkirk
+    250L ~ "16", # Fife
+    260L ~ "17", # Glasgow City
+    270L ~ "18", # Highland
+    280L ~ "19", # Inverclyde
+    290L ~ "20", # Midlothian
+    300L ~ "21", # Moray
+    310L ~ "22", # North Ayrshire
+    320L ~ "23", # North Lanarkshire
+    330L ~ "24", # Orkney Islands
+    340L ~ "25", # Perth and Kinross
+    350L ~ "26", # Renfrewshire
+    360L ~ "27", # Shetland Islands
+    370L ~ "28", # South Ayrshire
+    380L ~ "29", # South Lanarkshire
+    390L ~ "30", # Stirling
+    400L ~ "31", # West Lothian
+    235L ~ "32", # Na_h_Eileanan_Siar
+    .default = NA_character_
   )
+
   return(lca)
 }