Skip to content

Commit

Permalink
Merge branch 'december-2024' into clean-up-code
Browse files Browse the repository at this point in the history
  • Loading branch information
lizihao-anu authored Oct 15, 2024
2 parents 2bf59cb + d516ee2 commit dbb1095
Show file tree
Hide file tree
Showing 17 changed files with 200 additions and 140 deletions.
4 changes: 1 addition & 3 deletions R/add_activity_after_death_flag.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ add_activity_after_death_flag <- function(
year,
deaths_data = read_file(get_combined_slf_deaths_lookup_path()) %>%
slfhelper::get_chi()) {
cli::cli_alert_info("Add activity after death flag function started at {Sys.time()}")

# to skip warnings no visible binding for global variable ‘.’
. <- NULL

Expand Down Expand Up @@ -94,7 +92,7 @@ add_activity_after_death_flag <- function(
dplyr::select(-death_date_boxi) %>%
dplyr::distinct()


cli::cli_alert_info("Add activity after death flag function finished at {Sys.time()}")

return(final_data)
}
Expand Down
2 changes: 2 additions & 0 deletions R/add_hri_variables.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ flag_non_scottish_residents <- function(
) %>%
dplyr::select(-"dummy_postcode", -"eng_prac")

cli::cli_alert_info("Add HRI variables function finished at {Sys.time()}")

return(return_data)
}

Expand Down
4 changes: 4 additions & 0 deletions R/add_keep_population_flag.R
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ add_keep_population_flag <- function(individual_file, year) {
)
)
}

cli::cli_alert_info("Add keep population function finished at {Sys.time()}")

return(individual_file)
}


Expand Down
4 changes: 2 additions & 2 deletions R/add_nsu_cohort.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ add_nsu_cohort <- function(
data,
year,
nsu_cohort = read_file(get_nsu_path(year)) %>% slfhelper::get_chi()) {
cli::cli_alert_info("Add NSU cohort function started at {Sys.time()}")

year_param <- year

if (!check_year_valid(year, "nsu")) {
Expand Down Expand Up @@ -118,5 +116,7 @@ add_nsu_cohort <- function(
) %>%
dplyr::select(-dplyr::contains("_nsu"), -"has_chi")

cli::cli_alert_info("Add NSU cohort function finished at {Sys.time()}")

return(return_df)
}
4 changes: 2 additions & 2 deletions R/add_ppa_flag.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
#' @return A data frame to use as a lookup of PPAs
#' @family episode_file
add_ppa_flag <- function(data) {
cli::cli_alert_info("Add PPA flag function started at {Sys.time()}")

check_variables_exist(
data,
variables = c(
Expand Down Expand Up @@ -227,5 +225,7 @@ add_ppa_flag <- function(data) {
.data$cij_ppa
))

cli::cli_alert_info("Add PPA flag function finished at {Sys.time()}")

return(ppa_cij_data)
}
8 changes: 4 additions & 4 deletions R/aggregate_by_chi.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
#'
#' @inheritParams create_individual_file
aggregate_by_chi <- function(episode_file, year, exclude_sc_var = FALSE) {
cli::cli_alert_info("Aggregate by CHI function started at {Sys.time()}")

# recommended by `data.table` team to tackle the issue
# "no visible binding for global variable"
gender <-
Expand Down Expand Up @@ -199,6 +197,8 @@ aggregate_by_chi <- function(episode_file, year, exclude_sc_var = FALSE) {
)
individual_file <- individual_file[, year := year]

cli::cli_alert_info("Aggregate by CHI function finished at {Sys.time()}")

# convert back to tibble
return(dplyr::as_tibble(individual_file))
}
Expand Down Expand Up @@ -246,8 +246,6 @@ vars_contain <- function(data, vars, ignore_case = FALSE) {
#'
#' @inheritParams create_individual_file
aggregate_ch_episodes <- function(episode_file) {
cli::cli_alert_info("Aggregate ch episodes function started at {Sys.time()}")

# recommended by `data.table` team to tackle the issue
# "no visible binding for global variable"
ch_no_cost <-
Expand All @@ -274,5 +272,7 @@ aggregate_ch_episodes <- function(episode_file) {
# Convert back to tibble if needed
episode_file <- tibble::as_tibble(episode_file)

cli::cli_alert_info("Aggregate ch episodes function finished at {Sys.time()}")

return(episode_file)
}
4 changes: 2 additions & 2 deletions R/check_year_valid.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ check_year_valid <- function(
return(FALSE)
} else if (year >= "2324" && type %in% c("nsu", "hhg")) {
return(FALSE)
} else if (year >= "2425" && type %in% "sparra") {
} else if (year >= "2425" && type %in% "nsu") {
return(FALSE)
} else if (year >= "2526" && type %in% c("ch", "hc", "sds", "at")) {
} else if (year >= "2526" && type %in% c("ch", "hc", "sds", "at", "sparra")) {
return(FALSE)
}

Expand Down
4 changes: 2 additions & 2 deletions R/correct_demographics.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,6 @@
#'
#' @return episode files with updated date of birth and ages
correct_demographics <- function(data, year) {
cli::cli_alert_info("Correct demographics function started at {Sys.time()}")

# keep episodes with missing chi
data_no_chi <- data %>%
dplyr::filter(is_missing(.data$chi))
Expand Down Expand Up @@ -102,5 +100,7 @@ correct_demographics <- function(data, year) {
data_chi
)

cli::cli_alert_info("Correct demographics function finished at {Sys.time()}")

return(data)
}
4 changes: 2 additions & 2 deletions R/cost_uplift.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,6 @@
#' @return episode data with uplifted costs
#' @family episode_file
apply_cost_uplift <- function(data) {
cli::cli_alert_info("Apply cost uplift function started at {Sys.time()}")

data <- data %>%
# attach a uplift scale as the last column
lookup_uplift() %>%
Expand All @@ -29,6 +27,8 @@ apply_cost_uplift <- function(data) {
# remove the last uplift column
dplyr::select(-"uplift")

cli::cli_alert_info("Apply cost uplift function finished at {Sys.time()}")

return(data)
}

Expand Down
41 changes: 23 additions & 18 deletions R/create_episode_file.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ create_episode_file <- function(
sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(),
write_to_disk = TRUE,
anon_chi_out = TRUE) {
cli::cli_alert_info("Create episode file function started at {Sys.time()}")

processed_data_list <- purrr::discard(processed_data_list, ~ is.null(.x) | identical(.x, tibble::tibble()))

episode_file <- dplyr::bind_rows(processed_data_list) %>%
Expand Down Expand Up @@ -265,8 +267,6 @@ create_episode_file <- function(
#'
#' @return `data` with only the `vars_to_keep` kept
store_ep_file_vars <- function(data, year, vars_to_keep) {
cli::cli_alert_info("Store episode file variables function started at {Sys.time()}")

tempfile_path <- get_file_path(
directory = get_year_dir(year),
file_name = stringr::str_glue("temp_ep_file_variable_store_{year}.parquet"),
Expand All @@ -289,6 +289,8 @@ store_ep_file_vars <- function(data, year, vars_to_keep) {
path = tempfile_path
)

cli::cli_alert_info("Store episode file variables function finished at {Sys.time()}")

return(
dplyr::select(
data,
Expand All @@ -304,8 +306,6 @@ store_ep_file_vars <- function(data, year, vars_to_keep) {
#'
#' @return The full SLF data.
load_ep_file_vars <- function(data, year) {
cli::cli_alert_info("Load episode file variable function started at {Sys.time()}")

tempfile_path <- get_file_path(
directory = get_year_dir(year),
file_name = stringr::str_glue("temp_ep_file_variable_store_{year}.parquet"),
Expand All @@ -324,6 +324,8 @@ load_ep_file_vars <- function(data, year) {

fs::file_delete(tempfile_path)

cli::cli_alert_info("Load episode file variable function finished at {Sys.time()}")

return(full_data)
}

Expand All @@ -333,8 +335,6 @@ load_ep_file_vars <- function(data, year) {
#'
#' @return A data frame with CIJ markers filled in for those missing.
fill_missing_cij_markers <- function(data) {
cli::cli_alert_info("Fill missing cij markers function started at {Sys.time()}")

fixable_data <- data %>%
dplyr::filter(
.data[["recid"]] %in% c("01B", "04B", "GLS", "02B", "DD") & !is.na(.data[["chi"]])
Expand Down Expand Up @@ -380,6 +380,8 @@ fill_missing_cij_markers <- function(data) {

return_data <- dplyr::bind_rows(non_fixable_data, fixed_data)

cli::cli_alert_info("Fill missing cij markers function finished at {Sys.time()}")

return(return_data)
}

Expand All @@ -389,14 +391,12 @@ fill_missing_cij_markers <- function(data) {
#'
#' @return The data with CIJ variables corrected.
correct_cij_vars <- function(data) {
cli::cli_alert_info("Correct cij variables function started at {Sys.time()}")

check_variables_exist(
data,
c("chi", "recid", "cij_admtype", "cij_pattype_code")
)

data %>%
data <- data %>%
# Change some values of cij_pattype_code based on cij_admtype
dplyr::mutate(
cij_admtype = dplyr::if_else(
Expand Down Expand Up @@ -424,6 +424,10 @@ correct_cij_vars <- function(data) {
9L ~ "Other"
)
)

cli::cli_alert_info("Correct cij variables function finished at {Sys.time()}")

return(data)
}

#' Create cost total net inc DNA
Expand All @@ -432,13 +436,11 @@ correct_cij_vars <- function(data) {
#'
#' @return The data with cost including dna.
create_cost_inc_dna <- function(data) {
cli::cli_alert_info("Create cost inc dna function started at {Sys.time()}")

check_variables_exist(data, c("cost_total_net", "attendance_status"))

# Create cost including DNAs and modify costs
# not including DNAs using cattend
data %>%
data <- data %>%
dplyr::mutate(
cost_total_net_inc_dnas = .data$cost_total_net,
# In the Cost_Total_Net column set the cost for
Expand All @@ -449,6 +451,10 @@ create_cost_inc_dna <- function(data) {
.data$cost_total_net
)
)

cli::cli_alert_info("Create cost inc dna function finished at {Sys.time()}")

return(data)
}

#' Create the cohort lookups
Expand All @@ -458,8 +464,6 @@ create_cost_inc_dna <- function(data) {
#'
#' @return The data unchanged (the cohorts are written to disk)
create_cohort_lookups <- function(data, year, update = latest_update()) {
cli::cli_alert_info("Create cohort lookups function started at {Sys.time()}")

create_demographic_cohorts(
data,
year,
Expand All @@ -474,6 +478,7 @@ create_cohort_lookups <- function(data, year, update = latest_update()) {
write_to_disk = TRUE
)

cli::cli_alert_info("Create cohort lookups function finished at {Sys.time()}")

return(data)
}
Expand All @@ -499,8 +504,6 @@ join_cohort_lookups <- function(
col_select = c("anon_chi", "service_use_cohort")
) %>%
slfhelper::get_chi()) {
cli::cli_alert_info("Join cohort lookups function started at {Sys.time()}")

join_cohort_lookups <- data %>%
dplyr::left_join(
demographic_cohort,
Expand All @@ -511,6 +514,8 @@ join_cohort_lookups <- function(
by = "chi"
)

cli::cli_alert_info("Join cohort lookups function finished at {Sys.time()}")

return(join_cohort_lookups)
}

Expand All @@ -527,8 +532,6 @@ join_sc_client <- function(data,
year,
sc_client = read_file(get_sc_client_lookup_path(year)) %>% slfhelper::get_chi(),
file_type = c("episode", "individual")) {
cli::cli_alert_info("Join social care client function started at {Sys.time()}")

if (!check_year_valid(year, type = "client")) {
data_file <- data
return(data_file)
Expand All @@ -551,5 +554,7 @@ join_sc_client <- function(data,
)
}

cli::cli_alert_info("Join social care client function finished at {Sys.time()}")

return(data_file)
}
Loading

0 comments on commit dbb1095

Please sign in to comment.