From ff51536611038173483b8c72162f3d4b8fd180bb Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 12 Sep 2023 15:56:26 +0100 Subject: [PATCH 1/5] test commit push --- R/create_individual_file.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index df67f8c0f..b59c497f3 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -74,6 +74,8 @@ create_individual_file <- function( add_cij_columns() %>% add_all_columns() + adfasdfasdfasdfasdf + if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) { individual_file <- individual_file %>% aggregate_by_chi_no_sc() From 1af102e0b9b008bc177c001b17189a19c52cb5c9 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 12 Sep 2023 15:56:26 +0100 Subject: [PATCH 2/5] Revert "test commit push" This reverts commit ff51536611038173483b8c72162f3d4b8fd180bb. --- R/create_individual_file.R | 2 -- 1 file changed, 2 deletions(-) diff --git a/R/create_individual_file.R b/R/create_individual_file.R index b59c497f3..df67f8c0f 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -74,8 +74,6 @@ create_individual_file <- function( add_cij_columns() %>% add_all_columns() - adfasdfasdfasdfasdf - if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) { individual_file <- individual_file %>% aggregate_by_chi_no_sc() From d38fe2995a4e6aefecdbe338d80c9d35164d2919 Mon Sep 17 00:00:00 2001 From: Zihao Li Date: Tue, 12 Sep 2023 19:04:35 +0100 Subject: [PATCH 3/5] combine aggregate_by_chi with sc and without sc --- R/aggregate_by_chi.R | 275 +++++++++---------------------------- R/create_individual_file.R | 4 +- 2 files changed, 69 insertions(+), 210 deletions(-) diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R index d2f8ca799..676d59c41 100644 --- a/R/aggregate_by_chi.R +++ b/R/aggregate_by_chi.R @@ -7,7 +7,7 @@ #' @importFrom data.table .SD #' #' @inheritParams create_individual_file -aggregate_by_chi <- function(episode_file) { +aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) { cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") # Convert to data.table @@ -28,17 +28,23 @@ aggregate_by_chi <- function(episode_file) { ) ) - data.table::setnames( - episode_file, - c( - "ch_chi_cis", "cij_marker", "ooh_case_id" - # ,"hh_in_fy" - ), - c( - "ch_cis_episodes", "cij_total", "ooh_cases" - # ,"hl1_in_fy" + if (exclude_sc_var) { + data.table::setnames(episode_file, + c("cij_marker", + "ooh_case_id"), + c("cij_total", + "ooh_cases")) + } else{ + data.table::setnames( + episode_file, + c("ch_chi_cis", + "cij_marker", + "ooh_case_id"), + c("ch_cis_episodes", + "cij_total", + "ooh_cases") ) - ) + } # column specification, grouped by chi # columns to select last @@ -48,6 +54,9 @@ aggregate_by_chi <- function(episode_file) { "gpprac", vars_start_with(episode_file, "sc_") ) + if (exclude_sc_var) { + cols2 <- cols2[!(cols2 %in% vars_start_with(episode_file, "sc_"))] + } # columns to count unique rows cols3 <- c( "ch_cis_episodes", @@ -59,6 +68,9 @@ aggregate_by_chi <- function(episode_file) { "ooh_cases", "preventable_admissions" ) + if (exclude_sc_var) { + cols3 <- cols3[!(cols3 %in% "ch_cis_episodes")] + } # columns to sum up cols4 <- c( vars_end_with( @@ -84,61 +96,70 @@ aggregate_by_chi <- function(episode_file) { "pcc" ) ), - vars_start_with( - episode_file, - "sds_option" - ), + vars_start_with(episode_file, + "sds_option"), "health_net_cost_inc_dnas" ) cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")] + if (exclude_sc_var) { + cols4 <- + cols4[!(cols4 %in% c( + vars_end_with( + episode_file, + c("alarms", + "telecare")), + vars_start_with(episode_file, + "sds_option") + ))] + } # columns to select maximum cols5 <- c("nsu", vars_contain(episode_file, "hl1_in_fy")) data.table::setnafill(episode_file, fill = 0L, cols = cols5) # compute individual_file_cols1 <- episode_file[, - .(gender = mean(gender)), - by = "chi" + .(gender = mean(gender)), + by = "chi" ] individual_file_cols2 <- episode_file[, - .SD[.N], - .SDcols = cols2, - by = "chi" + .SD[.N], + .SDcols = cols2, + by = "chi" ] individual_file_cols3 <- episode_file[, - lapply(.SD, function(x) { - data.table::uniqueN(x, na.rm = TRUE) - }), - .SDcols = cols3, - by = "chi" + lapply(.SD, function(x) { + data.table::uniqueN(x, na.rm = TRUE) + }), + .SDcols = cols3, + by = "chi" ] individual_file_cols4 <- episode_file[, - lapply(.SD, function(x) { - sum(x, na.rm = TRUE) - }), - .SDcols = cols4, - by = "chi" + lapply(.SD, function(x) { + sum(x, na.rm = TRUE) + }), + .SDcols = cols4, + by = "chi" ] individual_file_cols5 <- episode_file[, - lapply(.SD, function(x) max(x, na.rm = TRUE)), - .SDcols = cols5, - by = "chi" + lapply(.SD, function(x) max(x, na.rm = TRUE)), + .SDcols = cols5, + by = "chi" ] individual_file_cols6 <- episode_file[, - .( - preventable_beddays = ifelse( - any(cij_ppa, na.rm = TRUE), - as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))), - NA_integer_ - ) - ), - # cij_marker has been renamed as cij_total - by = c("chi", "cij_total") + .( + preventable_beddays = ifelse( + any(cij_ppa, na.rm = TRUE), + as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))), + NA_integer_ + ) + ), + # cij_marker has been renamed as cij_total + by = c("chi", "cij_total") ] individual_file_cols6 <- individual_file_cols6[, - .( - preventable_beddays = sum(preventable_beddays, na.rm = TRUE) - ), - by = "chi" + .( + preventable_beddays = sum(preventable_beddays, na.rm = TRUE) + ), + by = "chi" ] individual_file <- dplyr::bind_cols( @@ -222,165 +243,3 @@ aggregate_ch_episodes <- function(episode_file) { return(episode_file) } - - -#' Aggregate by CHI with no social care variables -#' -#' @description Aggregate episode file by CHI to convert into -#' individual file. Exclude social care variables for the creation of -#' the newest file with Q1 data. -#' -#' @importFrom data.table .N -#' @importFrom data.table .SD -#' -#' @inheritParams create_individual_file -aggregate_by_chi_no_sc <- function(episode_file) { - cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}") - - # Convert to data.table - data.table::setDT(episode_file) - - # Ensure all variable names are lowercase - data.table::setnames(episode_file, stringr::str_to_lower) - - # Sort the data - data.table::setkeyv( - episode_file, - c( - "chi", - "record_keydate1", - "keytime1", - "record_keydate2", - "keytime2" - ) - ) - - data.table::setnames( - episode_file, - c( - # "ch_chi_cis", - "cij_marker", - "ooh_case_id", - "hh_in_fy" - ), - c( - # "ch_cis_episodes", - "cij_total", - "ooh_cases", - "hl1_in_fy" - ) - ) - - # column specification, grouped by chi - # columns to select last - cols2 <- c( - "postcode", - "dob", - "gpprac" # , - # vars_start_with(episode_file, "sc_") - ) - # columns to count unique rows - cols3 <- c( - # "ch_cis_episodes", - "cij_total", - "cij_el", - "cij_non_el", - "cij_mat", - "cij_delay", - "ooh_cases", - "preventable_admissions" - ) - # columns to sum up - cols4 <- c( - vars_end_with( - episode_file, - c( - "episodes", - "beddays", - "cost", - "attendances", - "attend", - "contacts", - "hours", - # "alarms", - # "telecare", - "paid_items", - "advice", - "homev", - "time", - "assessment", - "other", - "dn", - "nhs24", - "pcc" - # ) - ), - # vars_start_with( - # episode_file, - # "sds_option" - ), - "health_net_cost_inc_dnas" - ) - # cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")] - # columns to select maximum - cols5 <- c("nsu", vars_contain(episode_file, "hl1_in_fy")) - data.table::setnafill(episode_file, fill = 0L, cols = cols5) - # compute - individual_file_cols1 <- episode_file[, - .(gender = mean(gender)), - by = "chi" - ] - individual_file_cols2 <- episode_file[, - .SD[.N], - .SDcols = cols2, - by = "chi" - ] - individual_file_cols3 <- episode_file[, - lapply(.SD, function(x) { - data.table::uniqueN(x, na.rm = TRUE) - }), - .SDcols = cols3, - by = "chi" - ] - individual_file_cols4 <- episode_file[, - lapply(.SD, function(x) { - sum(x, na.rm = TRUE) - }), - .SDcols = cols4, - by = "chi" - ] - individual_file_cols5 <- episode_file[, - lapply(.SD, function(x) max(x, na.rm = TRUE)), - .SDcols = cols5, - by = "chi" - ] - individual_file_cols6 <- episode_file[, - .( - preventable_beddays = ifelse( - any(cij_ppa, na.rm = TRUE), - as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))), - NA_integer_ - ) - ), - # cij_marker has been renamed as cij_total - by = c("chi", "cij_total") - ] - individual_file_cols6 <- individual_file_cols6[, - .( - preventable_beddays = sum(preventable_beddays, na.rm = TRUE) - ), - by = "chi" - ] - - individual_file <- dplyr::bind_cols( - individual_file_cols1, - individual_file_cols2[, chi := NULL], - individual_file_cols3[, chi := NULL], - individual_file_cols4[, chi := NULL], - individual_file_cols5[, chi := NULL], - individual_file_cols6[, chi := NULL] - ) - - # convert back to tibble - return(dplyr::as_tibble(individual_file)) -} diff --git a/R/create_individual_file.R b/R/create_individual_file.R index df67f8c0f..4dc389d6a 100644 --- a/R/create_individual_file.R +++ b/R/create_individual_file.R @@ -76,12 +76,12 @@ create_individual_file <- function( if (!check_year_valid(year, type = c("CH", "HC", "AT", "SDS"))) { individual_file <- individual_file %>% - aggregate_by_chi_no_sc() + aggregate_by_chi(exclude_sc_var = TRUE) } else { individual_file <- individual_file %>% aggregate_ch_episodes() %>% clean_up_ch(year) %>% - aggregate_by_chi() %>% + aggregate_by_chi(exclude_sc_var = FALSE) %>% join_sc_client(year) } From 7bba9d04c1b7846cb64eea3849f281cecfd34654 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Tue, 12 Sep 2023 18:08:02 +0000 Subject: [PATCH 4/5] Style code --- R/aggregate_by_chi.R | 113 +++++++++++++++++++++++++------------------ 1 file changed, 65 insertions(+), 48 deletions(-) diff --git a/R/aggregate_by_chi.R b/R/aggregate_by_chi.R index 676d59c41..8d9dff96d 100644 --- a/R/aggregate_by_chi.R +++ b/R/aggregate_by_chi.R @@ -29,20 +29,30 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) { ) if (exclude_sc_var) { - data.table::setnames(episode_file, - c("cij_marker", - "ooh_case_id"), - c("cij_total", - "ooh_cases")) - } else{ data.table::setnames( episode_file, - c("ch_chi_cis", + c( + "cij_marker", + "ooh_case_id" + ), + c( + "cij_total", + "ooh_cases" + ) + ) + } else { + data.table::setnames( + episode_file, + c( + "ch_chi_cis", "cij_marker", - "ooh_case_id"), - c("ch_cis_episodes", + "ooh_case_id" + ), + c( + "ch_cis_episodes", "cij_total", - "ooh_cases") + "ooh_cases" + ) ) } @@ -96,8 +106,10 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) { "pcc" ) ), - vars_start_with(episode_file, - "sds_option"), + vars_start_with( + episode_file, + "sds_option" + ), "health_net_cost_inc_dnas" ) cols4 <- cols4[!(cols4 %in% "ch_cis_episodes")] @@ -105,11 +117,16 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) { cols4 <- cols4[!(cols4 %in% c( vars_end_with( - episode_file, - c("alarms", - "telecare")), - vars_start_with(episode_file, - "sds_option") + episode_file, + c( + "alarms", + "telecare" + ) + ), + vars_start_with( + episode_file, + "sds_option" + ) ))] } # columns to select maximum @@ -117,49 +134,49 @@ aggregate_by_chi <- function(episode_file, exclude_sc_var = FALSE) { data.table::setnafill(episode_file, fill = 0L, cols = cols5) # compute individual_file_cols1 <- episode_file[, - .(gender = mean(gender)), - by = "chi" + .(gender = mean(gender)), + by = "chi" ] individual_file_cols2 <- episode_file[, - .SD[.N], - .SDcols = cols2, - by = "chi" + .SD[.N], + .SDcols = cols2, + by = "chi" ] individual_file_cols3 <- episode_file[, - lapply(.SD, function(x) { - data.table::uniqueN(x, na.rm = TRUE) - }), - .SDcols = cols3, - by = "chi" + lapply(.SD, function(x) { + data.table::uniqueN(x, na.rm = TRUE) + }), + .SDcols = cols3, + by = "chi" ] individual_file_cols4 <- episode_file[, - lapply(.SD, function(x) { - sum(x, na.rm = TRUE) - }), - .SDcols = cols4, - by = "chi" + lapply(.SD, function(x) { + sum(x, na.rm = TRUE) + }), + .SDcols = cols4, + by = "chi" ] individual_file_cols5 <- episode_file[, - lapply(.SD, function(x) max(x, na.rm = TRUE)), - .SDcols = cols5, - by = "chi" + lapply(.SD, function(x) max(x, na.rm = TRUE)), + .SDcols = cols5, + by = "chi" ] individual_file_cols6 <- episode_file[, - .( - preventable_beddays = ifelse( - any(cij_ppa, na.rm = TRUE), - as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))), - NA_integer_ - ) - ), - # cij_marker has been renamed as cij_total - by = c("chi", "cij_total") + .( + preventable_beddays = ifelse( + any(cij_ppa, na.rm = TRUE), + as.integer(min(cij_end_date, end_fy(year)) - max(cij_start_date, start_fy(year))), + NA_integer_ + ) + ), + # cij_marker has been renamed as cij_total + by = c("chi", "cij_total") ] individual_file_cols6 <- individual_file_cols6[, - .( - preventable_beddays = sum(preventable_beddays, na.rm = TRUE) - ), - by = "chi" + .( + preventable_beddays = sum(preventable_beddays, na.rm = TRUE) + ), + by = "chi" ] individual_file <- dplyr::bind_cols( From 55300d52f6d826e2c7da4f0d422667ae88fe82b1 Mon Sep 17 00:00:00 2001 From: lizihao-anu Date: Wed, 13 Sep 2023 13:15:10 +0000 Subject: [PATCH 5/5] Update documentation --- man/aggregate_by_chi.Rd | 2 +- man/aggregate_by_chi_no_sc.Rd | 16 ---------------- 2 files changed, 1 insertion(+), 17 deletions(-) delete mode 100644 man/aggregate_by_chi_no_sc.Rd diff --git a/man/aggregate_by_chi.Rd b/man/aggregate_by_chi.Rd index 013123902..5d5983ce2 100644 --- a/man/aggregate_by_chi.Rd +++ b/man/aggregate_by_chi.Rd @@ -4,7 +4,7 @@ \alias{aggregate_by_chi} \title{Aggregate by CHI} \usage{ -aggregate_by_chi(episode_file) +aggregate_by_chi(episode_file, exclude_sc_var = FALSE) } \arguments{ \item{episode_file}{Tibble containing episodic data} diff --git a/man/aggregate_by_chi_no_sc.Rd b/man/aggregate_by_chi_no_sc.Rd deleted file mode 100644 index 02a20d709..000000000 --- a/man/aggregate_by_chi_no_sc.Rd +++ /dev/null @@ -1,16 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/aggregate_by_chi.R -\name{aggregate_by_chi_no_sc} -\alias{aggregate_by_chi_no_sc} -\title{Aggregate by CHI with no social care variables} -\usage{ -aggregate_by_chi_no_sc(episode_file) -} -\arguments{ -\item{episode_file}{Tibble containing episodic data} -} -\description{ -Aggregate episode file by CHI to convert into -individual file. Exclude social care variables for the creation of -the newest file with Q1 data. -}