You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
# TODO social care demographics - decide what to do with non-validated chi and cases where dob does not match chi
# Data Cleaning ---------------------------------------# TODO social care demographics - tidy up code and make more efficient.sc_demog<-data %>%
dplyr::mutate(
# if one chi is missing then use the other# TODO social care demographics - decide what to do with cases where last 4 digits of chi are differentchi_upi= ifelse(is.na(chi_upi), upi, chi_upi),
upi= ifelse(is.na(upi), chi_upi, upi),
submitted_date_of_birth= ifelse(is.na(submitted_date_of_birth), chi_date_of_birth, submitted_date_of_birth),
chi_date_of_birth= ifelse(is.na(chi_date_of_birth), submitted_date_of_birth, chi_date_of_birth),
chi_date_of_birth=lubridate::as_date(chi_date_of_birth),
submitted_date_of_birth=lubridate::as_date(submitted_date_of_birth),
# check gender code - replace code 99 with 9=# use CHI sex if available# TODO social care demographics - check gender matches chi for extra validation checksubmitted_gender= replace(.data$submitted_gender, .data$submitted_gender==99L, 9L),
gender=dplyr::if_else(
is.na(.data$chi_gender_code) |.data$chi_gender_code==9L,
.data$submitted_gender,
.data$chi_gender_code
)
) %>%
# format postcodes using `phsmethods`dplyr::mutate(dplyr::across(
tidyselect::contains("postcode"),
~phsmethods::format_postcode(.x, format="pc7"))) %>%
dplyr::distinct() %>%
# if only one option is available for chi then choose thatdplyr::mutate(chi= ifelse(chi_upi==upi| is.na(upi), chi_upi,
ifelse(is.na(chi_upi), upi, NA)
)) %>%
dplyr::mutate(
# if only one option is available for DOB then choose thatdob= ifelse(chi_date_of_birth==submitted_date_of_birth| is.na(submitted_date_of_birth), chi_date_of_birth,
ifelse(is.na(chi_date_of_birth), submitted_date_of_birth, NA)),
dob=lubridate::as_date(dob)
) %>%
dplyr::arrange(chi, dob) %>%
dplyr::group_by(social_care_id, sending_location) %>%
tidyr::fill(chi, .direction= c("down")) %>%
tidyr::fill(dob, .direction= c("down")) %>%
dplyr::ungroup() %>%
# create string for DOB from CHI and the DOB to see if they match.dplyr::mutate(dob_from_chiupi= paste0(stringr::str_sub(chi, 1, 6))) %>%
dplyr::mutate(dob_from_dob= paste0(
stringr::str_sub(as.character(dob), 9, 10),
stringr::str_sub(as.character(dob), 6, 7),
stringr::str_sub(as.character(dob), 3, 4)
)) %>%
# validation flag. if dob goes with chi then flag as 1dplyr::mutate(chi_validation= ifelse(dob_from_chiupi==dob_from_dob, 1, 0))
# cases where all chi and dob are missing, nothing we can do about these and no validationmissing<-sc_demog %>%
dplyr::filter(is.na(chi_upi) & is.na(upi) & is.na(submitted_date_of_birth) & is.na(chi_date_of_birth) & is.na(chi) & is.na(dob)) %>%
dplyr::select(-dob_from_chiupi, -dob_from_dob, -chi_upi, -upi, -chi_date_of_birth, -submitted_date_of_birth)
# cases where chi and dob matchvalidated<-sc_demog %>%
dplyr::filter(chi_validation==1) %>%
dplyr::select(-dob_from_chiupi, -dob_from_dob, -chi_upi, -upi, -chi_date_of_birth, -submitted_date_of_birth)
# match on either dob to chisc_demog<-sc_demog %>%
dplyr::anti_join(missing) %>%
dplyr::filter(chi_validation!=1) %>%
# get dob from chi and submitted and see if either match with chidplyr::mutate(dob_from_chidob= paste0(
stringr::str_sub(as.character(chi_date_of_birth), 9, 10),
stringr::str_sub(as.character(chi_date_of_birth), 6, 7),
stringr::str_sub(as.character(chi_date_of_birth), 3, 4)
)) %>%
dplyr::mutate(dob_from_submitteddob= paste0(
stringr::str_sub(submitted_date_of_birth, 9, 10),
stringr::str_sub(as.character(submitted_date_of_birth), 6, 7),
stringr::str_sub(as.character(submitted_date_of_birth), 3, 4)
)) %>%
# if either dob matches with chi then use that dobdplyr::mutate(
dob= ifelse(dob_from_chiupi==dob_from_chidob, chi_date_of_birth, dob),
dob= ifelse(dob_from_chiupi==dob_from_submitteddob, submitted_date_of_birth, dob),
dob=lubridate::as_date(dob)
) %>%
dplyr::mutate(dob_from_dob= paste0(
stringr::str_sub(as.character(dob), 9, 10),
stringr::str_sub(as.character(dob), 6, 7),
stringr::str_sub(as.character(dob), 3, 4)
)) %>%
# if dob and chi match then flag as validateddplyr::mutate(chi_validation= ifelse(dob_from_chiupi==dob_from_dob, 1, 0)) %>%
dplyr::select(-dob_from_chidob, -dob_from_submitteddob)
# add the validated cases to validated dfvalidated<-validated %>%
rbind(sc_demog %>%
dplyr::filter(chi_validation==1) %>%
dplyr::select(-dob_from_chiupi, -dob_from_dob, -chi_upi, -upi, -chi_date_of_birth, -submitted_date_of_birth))
# match on dob to either chisc_demog<-sc_demog %>%
dplyr::filter(chi_validation!=1) %>%
# create dob from both chi numbersdplyr::mutate(dob_from_upi= paste0(stringr::str_sub(upi, 1, 6))) %>%
dplyr::mutate(dob_from_chi_upi= paste0(stringr::str_sub(chi_upi, 1, 6))) %>%
# use whichever one matchesdplyr::mutate(chi= ifelse(dob_from_chi_upi==dob_from_dob, chi_upi, chi)) %>%
dplyr::mutate(chi= ifelse(dob_from_upi==dob_from_dob, upi, chi)) %>%
dplyr::mutate(dob_from_chi= paste0(stringr::str_sub(chi, 1, 6))) %>%
# if chi and dob match then flag as validateddplyr::mutate(chi_validation= ifelse(dob_from_chi==dob_from_dob, 1, 0))
# all validated casesvalidated<-validated %>%
rbind(sc_demog %>%
dplyr::filter(chi_validation==1) %>%
dplyr::select(-dob_from_chiupi, -dob_from_upi, -dob_from_chi_upi, -dob_from_dob, -chi_upi, -upi, -chi_date_of_birth, -submitted_date_of_birth, -dob_from_chi))
# TODO social care demographics - decide what to do with non-validated chi and cases where dob does not match chi# Need to decide what to do with social care cases where the chi and the dob do not match.# this is why I have kept the validated/non-validated df seperate. Hoping we can get back and sort this out.sc_demog<-sc_demog %>%
dplyr::filter(chi_validation!=1) %>% # all unvalidated cases. most of these are due to missing chi or dob so there is no way to validate.dplyr::select(-dob_from_chiupi, -dob_from_upi, -dob_from_chi_upi, -dob_from_dob, -chi_upi, -upi, -chi_date_of_birth, -submitted_date_of_birth, -dob_from_chi) %>%
rbind(validated) %>%
rbind(missing)
# count number of na postcodesna_postcodes<-sc_demog %>%
The text was updated successfully, but these errors were encountered:
Need to decide what to do with social care cases where the chi and the dob do not match.
this is why I have kept the validated/non-validated df seperate. Hoping we can get back and sort this out.
source-linkage-files/R/process_lookup_sc_demographics.R
Line 150 in 1c75e3b
The text was updated successfully, but these errors were encountered: