Skip to content

Commit

Permalink
add dob_from_chi and age_from_chi
Browse files Browse the repository at this point in the history
  • Loading branch information
Tina815 committed Mar 21, 2022
1 parent 00a1d44 commit 5372c5b
Show file tree
Hide file tree
Showing 10 changed files with 361 additions and 44 deletions.
4 changes: 2 additions & 2 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
# Generated by roxygen2: do not edit by hand

export(age_from_chi)
export(age_calculate)
export(age_from_chi)
export(age_group)
export(chi_check)
export(chi_pad)
export(create_age_groups)
export(extract_fin_year)
export(dob_from_chi)
export(extract_fin_year)
export(file_size)
export(fin_year)
export(format_postcode)
Expand Down
45 changes: 29 additions & 16 deletions R/dob_from_chi.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@
#' it will return NA
#'
#' @param chi_number a CHI number or a vector of CHI numbers with \code{character} class.
#' @param min_date,max_date optional min and/or max dates that the Date of Birth could take. Must be either length 1 for a 'fixed' date or the same length as \code{chi_number} for a date per CHI number e.g. an admission date.
#' @param min_date,max_date optional min and/or max dates that the Date of Birth could take as the century needs to be guessed.
#' Must be either length 1 for a 'fixed' date or the same length as \code{chi_number} for a date per CHI number.
#' min_date can be date based on common sense in the dataset, whilst max_date can be date when an event happens such as discharge date.
#' @param chi_check logical, optionally skip checking the CHI for validity which will be
#' faster but should only be used if you have previously checked the CHI(s), the default (TRUE) will to check the CHI numbers.
#' faster but should only be used if you have previously checked the CHI(s). The default (TRUE) will check the CHI numbers.
#'
#' @return a date vector of DoB. It will be the same length as \code{chi_number}.
#' @export
Expand All @@ -22,17 +24,17 @@
#' "0101405073",
#' "0101625707"
#' ), adm_date = as.Date(c(
#' "01-01-1950",
#' "01-01-2000",
#' "01-01-2020"
#' "1950-01-01",
#' "2000-01-01",
#' "2020-01-01"
#' )))
#'
#' data %>%
#' mutate(chi_dob = dob_from_chi(chi))
#'
#' data %>%
#' mutate(chi_dob = dob_from_chi(chi,
#' min_date = as.Date("01-01-1930"),
#' min_date = as.Date("1930-01-01"),
#' max_date = adm_date
#' ))
dob_from_chi <- function(chi_number, min_date = NULL, max_date = NULL, chi_check = TRUE) {
Expand All @@ -54,6 +56,16 @@ dob_from_chi <- function(chi_number, min_date = NULL, max_date = NULL, chi_check
# Default the max_date to today (person can't be born after today)
if (is.null(max_date)) max_date <- Sys.Date()

# Fill in today's date to where max_date is missing
if (any(is.na(max_date))) max_date[is.na(max_date)] <- Sys.Date()

# max_date should not be a future date
if (any(max_date > Sys.Date())) {
to_replace <- max_date > Sys.Date()
max_date[to_replace] <- Sys.Date()
warning("any max_date where it is a future date is changed to date of today")
}

# Default the min_date to 1 Jan 1900 (person can't be born before then)
# TODO - Find out what the earliest CHI date was?
if (is.null(min_date)) min_date <- as.Date("1900-01-01")
Expand Down Expand Up @@ -93,8 +105,10 @@ dob_from_chi <- function(chi_number, min_date = NULL, max_date = NULL, chi_check
guess_dob <- as.Date(dplyr::case_when(
is.na(date_1900) ~ date_2000,
is.na(date_2000) ~ date_1900,
date_1900 <= min_date ~ date_2000,
date_2000 >= max_date ~ date_1900
(date_2000 >= min_date & date_2000 <= max_date) &
!(date_1900 >= min_date & date_1900 <= max_date) ~ date_2000,
(date_1900 >= min_date & date_1900 <= max_date) &
!(date_2000 >= min_date & date_2000 <= max_date) ~ date_1900
))

new_na_count <- sum(is.na(guess_dob)) - na_count
Expand All @@ -115,7 +129,9 @@ dob_from_chi <- function(chi_number, min_date = NULL, max_date = NULL, chi_check
#'
#' @param chi_number a CHI number or a vector of CHI numbers with \code{character} class.
#' @param ref_date calculate the age at this date, default is to use \code{Sys.Date()} i.e. today.
#' @param min_age,max_age optional min and/or max dates that the Date of Birth could take. Must be either length 1 for a 'fixed' date or the same length as \code{chi_number} for a date per CHI number e.g. an admission date.
#' @param min_age,max_age optional min and/or max dates that the Date of Birth could take as the century needs to be guessed.
#' Must be either length 1 for a 'fixed' age or the same length as \code{chi_number} for an age per CHI number.
#' min_age can be age based on common sense in the dataset, whilst max_age can be age when an event happens such as the age at discharge.
#' @param chi_check logical, optionally skip checking the CHI for validity which will be
#' faster but should only be used if you have previously checked the CHI(s), the default (TRUE) will to check the CHI numbers.
#'
Expand All @@ -132,9 +148,9 @@ dob_from_chi <- function(chi_number, min_date = NULL, max_date = NULL, chi_check
#' "0101405073",
#' "0101625707"
#' ), dis_date = as.Date(c(
#' "01-01-1950",
#' "01-01-2000",
#' "01-01-2020"
#' "1950-01-01",
#' "2000-01-01",
#' "2020-01-01"
#' )))
#'
#' data %>%
Expand Down Expand Up @@ -178,10 +194,7 @@ age_from_chi <- function(chi_number, ref_date = NULL, min_age = 0, max_age = NUL
chi_check = chi_check
)

guess_age <- lubridate::interval(guess_dob, ref_date) %>%
lubridate::as.period() %>%
.$year %>%
as.integer()
guess_age <- age_calculate(guess_dob, ref_date)

return(guess_age)
}
62 changes: 60 additions & 2 deletions README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ knitr::opts_chunk$set(
- `match_area()` converts geography codes into area names
- `format_postcode()` formats improperly recorded postcodes
- `qtr()`, `qtr_end()`, `qtr_next()` and `qtr_prev()` assign a date to a quarter
- `age_calculate` calculate age between two dates
- `age_calculate` calculates age between two dates
- `dob_from_chi` extracts Date of Birth (DoB) from the CHI number
- `age_from_chi` extracts age from the CHI number

`phsmethods` can be used on both the [server](https://rstudio.nhsnss.scot.nhs.uk/) and desktop versions of RStudio.

Expand Down Expand Up @@ -186,18 +188,74 @@ qtr_prev(f, format = "short")
my_date <- lubridate::ymd("2020-02-29")
end_date <- lubridate::ymd("2022-02-21")
# Change the argument of date_class can make a difference.
age_calculate(my_date, end_date, round_down = FALSE, date_class = "period") * 365.25
age_calculate(my_date, end_date, round_down = FALSE, date_class = "duration") * 365.25
# For a start date in leap year, age increases on 1st March every year.
leap1 <- lubridate::ymd("2020-02-29")
leap2 <- lubridate::ymd("2022-02-28")
age_calculate(leap1, leap2, date_class = "period")
```

### dob_from_chi
```{r dob_from_chi}
dob_from_chi("0101336489")
library(tibble)
library(dplyr)
data <- tibble(chi = c(
"0101336489",
"0101405073",
"0101625707"
), adm_date = as.Date(c(
"1950-01-01",
"2000-01-01",
"2020-01-01"
)))
data %>%
mutate(chi_dob = dob_from_chi(chi))
data %>%
mutate(chi_dob = dob_from_chi(chi,
min_date = as.Date("1930-01-01"),
max_date = adm_date
))
```

### age_from_chi
```{r age_from_chi}
age_from_chi("0101336489")
library(tibble)
library(dplyr)
data <- tibble(chi = c(
"0101336489",
"0101405073",
"0101625707"
), dis_date = as.Date(c(
"1950-01-01",
"2000-01-01",
"2020-01-01"
)))
data %>%
mutate(chi_age = age_from_chi(chi))
data %>%
mutate(chi_age = age_from_chi(chi, min_age = 18, max_age = 65))
data %>%
mutate(chi_age = age_from_chi(chi,
ref_date = dis_date
))
```

## Contributing to phsmethods

At present, the maintainers of this package are [David Caldwell](https://github.com/davidc92) and [Lucinda Lawrie](https://github.com/lucindalawrie).
At present, the maintainer of this package is [Tina Fu](https://github.com/Tina815).

This package is intended to be in continuous development and contributions may be made by anyone within PHS. If you would like to make a contribution, please first create an [issue](https://github.com/Public-Health-Scotland/phsmethods/issues) on GitHub and assign **both** of the package maintainers to it. This is to ensure that no duplication of effort occurs in the case of multiple people having the same idea. The package maintainers will discuss the issue and get back to you as soon as possible.

Expand Down
101 changes: 97 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ in [Public Health Scotland
- `format_postcode()` formats improperly recorded postcodes
- `qtr()`, `qtr_end()`, `qtr_next()` and `qtr_prev()` assign a date to
a quarter
- `age_calculate` calculate age between two dates
- `age_calculate` calculates age between two dates
- `dob_from_chi` extracts Date of Birth (DoB) from the CHI number
- `age_from_chi` extracts age from the CHI number

`phsmethods` can be used on both the
[server](https://rstudio.nhsnss.scot.nhs.uk/) and desktop versions of
Expand Down Expand Up @@ -277,23 +279,114 @@ qtr_prev(f, format = "short")
my_date <- lubridate::ymd("2020-02-29")
end_date <- lubridate::ymd("2022-02-21")

# Change the argument of date_class can make a difference.
age_calculate(my_date, end_date, round_down = FALSE, date_class = "period") * 365.25
#> [1] 723.0625
age_calculate(my_date, end_date, round_down = FALSE, date_class = "duration") * 365.25
#> [1] 723

# For a start date in leap year, age increases on 1st March every year.
leap1 <- lubridate::ymd("2020-02-29")
leap2 <- lubridate::ymd("2022-02-28")

age_calculate(leap1, leap2, date_class = "period")
#> [1] 1
```

### dob\_from\_chi

``` r
dob_from_chi("0101336489")
#> [1] "1933-01-01"

library(tibble)
#> Warning: package 'tibble' was built under R version 3.6.3
library(dplyr)
data <- tibble(chi = c(
"0101336489",
"0101405073",
"0101625707"
), adm_date = as.Date(c(
"1950-01-01",
"2000-01-01",
"2020-01-01"
)))

data %>%
mutate(chi_dob = dob_from_chi(chi))
#> # A tibble: 3 x 3
#> chi adm_date chi_dob
#> <chr> <date> <date>
#> 1 0101336489 1950-01-01 1933-01-01
#> 2 0101405073 2000-01-01 1940-01-01
#> 3 0101625707 2020-01-01 1962-01-01

data %>%
mutate(chi_dob = dob_from_chi(chi,
min_date = as.Date("1930-01-01"),
max_date = adm_date
))
#> # A tibble: 3 x 3
#> chi adm_date chi_dob
#> <chr> <date> <date>
#> 1 0101336489 1950-01-01 1933-01-01
#> 2 0101405073 2000-01-01 1940-01-01
#> 3 0101625707 2020-01-01 1962-01-01
```

### age\_from\_chi

``` r
age_from_chi("0101336489")
#> [1] 89

library(tibble)
library(dplyr)
data <- tibble(chi = c(
"0101336489",
"0101405073",
"0101625707"
), dis_date = as.Date(c(
"1950-01-01",
"2000-01-01",
"2020-01-01"
)))

data %>%
mutate(chi_age = age_from_chi(chi))
#> # A tibble: 3 x 3
#> chi dis_date chi_age
#> <chr> <date> <dbl>
#> 1 0101336489 1950-01-01 89
#> 2 0101405073 2000-01-01 82
#> 3 0101625707 2020-01-01 60

data %>%
mutate(chi_age = age_from_chi(chi, min_age = 18, max_age = 65))
#> 2 CHI numbers produced ambiguous dates and will be given NA for DoB, if possible try different values for min_date and/or max_date
#> # A tibble: 3 x 3
#> chi dis_date chi_age
#> <chr> <date> <dbl>
#> 1 0101336489 1950-01-01 NA
#> 2 0101405073 2000-01-01 NA
#> 3 0101625707 2020-01-01 60

data %>%
mutate(chi_age = age_from_chi(chi,
ref_date = dis_date
))
#> # A tibble: 3 x 3
#> chi dis_date chi_age
#> <chr> <date> <dbl>
#> 1 0101336489 1950-01-01 17
#> 2 0101405073 2000-01-01 60
#> 3 0101625707 2020-01-01 58
```

## Contributing to phsmethods

At present, the maintainers of this package are [David
Caldwell](https://github.com/davidc92) and [Lucinda
Lawrie](https://github.com/lucindalawrie).
At present, the maintainer of this package is [Tina
Fu](https://github.com/Tina815).

This package is intended to be in continuous development and
contributions may be made by anyone within PHS. If you would like to
Expand Down
10 changes: 6 additions & 4 deletions man/age_from_chi.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/chi_pad.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 5372c5b

Please sign in to comment.