Skip to content

Commit

Permalink
Refactor data loading and tidying into separate scripts
Browse files Browse the repository at this point in the history
This will improve readability and modularity and make it easier for us to make further changes later on. It's only a start, there's much more we could do.
  • Loading branch information
milanwiedemann committed Dec 2, 2024
1 parent 70e8857 commit d89d6e3
Show file tree
Hide file tree
Showing 4 changed files with 171 additions and 0 deletions.
88 changes: 88 additions & 0 deletions lib/functions/load_opensafely_outputs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
# Load data based on execution environment
if (Sys.getenv("OPENSAFELY_BACKEND") != "") {
# Load data from generate_pf_measures action
df_measures <- readr::read_csv(
here("output", "measures", "pf_codes_conditions_measures.csv")
)
df_descriptive_stats <- read_csv(
here("output", "measures", "pf_descriptive_stats_measures.csv")
)
df_pfmed <- read_csv(
here("output", "measures", "pf_medications_measures.csv")
)
df_condition_provider <- read_csv(
here("output", "measures", "pf_condition_provider_measures.csv")
)
} else {
# Load data from released_output directory
df_measures <- readr::read_csv(
here("released_output", "measures", "pf_codes_conditions_measures.csv")
)
df_descriptive_stats <- read_csv(
here("released_output", "measures", "pf_descriptive_stats_measures.csv")
)
df_pfmed <- read_csv(
here("released_output", "measures", "pf_medications_measures.csv")
)
df_condition_provider <- read_csv(
here("released_output", "measures", "pf_condition_provider_measures.csv")
)
}

df_measures <- tidy_measures(
data = df_measures,
pf_measures_name_dict = pf_measures_name_dict,
pf_measures_name_mapping = pf_measures_name_mapping,
pf_measures_groupby_dict = pf_measures_groupby_dict
)

df_measures$ethnicity <- factor(
df_measures$ethnicity,
levels = c(
"White",
"Mixed",
"Asian or Asian British",
"Black or Black British",
"Chinese or Other Ethnic Groups",
"Missing"
),
ordered = TRUE
)

df_measures$age_band <- factor(
df_measures$age_band,
levels = c(
"0-19",
"20-39",
"40-59",
"60-79",
"80+",
"Missing"
),
ordered = TRUE
)

df_measures$region <- factor(
df_measures$region,
levels = c(
"East",
"East Midlands",
"London",
"North East",
"North West",
"South East",
"South West",
"West Midlands",
"Yorkshire and The Humber",
"Missing"
),
ordered = TRUE
)

df_measures <- df_measures %>%
mutate(sex = factor(sex,
levels = c("female", "male"),
labels = c("Female", "Male")
))

df_measures$age_band[is.na(df_measures$age_band)] <- "Missing"
42 changes: 42 additions & 0 deletions lib/functions/load_validation_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
library(readr)
library(tidyr)
library(dplyr)
library(here)

df_bsa_consultation_validation <- read_csv(
here("lib", "validation", "data", "pf_consultation_validation_data.csv")
) %>%
rename(count_100pct = count) |>
mutate(count_40pct = round(as.numeric(count_100pct * .4), digits = 0)) %>%
mutate(source = "nhs_bsa") |>
pivot_longer(
cols = c(count_100pct, count_40pct),
names_to = "count_method",
values_to = "count"
)

df_bsa_consultation_validation <- df_bsa_consultation_validation %>%
mutate(consultation_type = factor(consultation_type,
levels = c(
"sinusitis",
"infected_insect_bites",
"uncomplicated_uti",
"acute_otitis_media",
"acute_sore_throat",
"shingles",
"impetigo"
),
labels = c(
"Acute Sinusitis",
"Infected Insect Bite",
"UTI",
"Acute Otitis Media",
"Acute Pharyngitis",
"Herpes Zoster",
"Impetigo"
)
))

df_bsa_medication_validation <- read_csv(
here("lib", "validation", "data", "pf_medication_validation_data.csv")
)
6 changes: 6 additions & 0 deletions lib/functions/plot_measures.R
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,9 @@ plot_measures <- function(

plot_tmp
}

# Colour palettes
gradient_palette <- c("#001F4D", "#0056B3", "#007BFF", "#66B3E2", "#A4D8E1", "grey")
region_palette <- c("red", "navy", "#018701", "#ffa600ca", "purple", "brown", "#f4a5b2", "cyan", "green", "grey")
ethnicity_palette <- c("#42db0188", "#0056B3", "#ff0000c2", "#a52a2a5a", "purple", "grey")
sex_palette <- c("red", "blue")
35 changes: 35 additions & 0 deletions lib/functions/tidy_measures.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,38 @@
# Define dictionaries with tidy names and mappings for measures
pf_measures_name_dict <- list(
consultation_service = "Consultation Service",
pharmacy_first_service = "Pharmacy First Consultation",
combined_pf_service = "Pharmacy First Consultations (Combined)",
acute_otitis_media = "Acute Otitis Media",
herpes_zoster = "Herpes Zoster",
acute_sinusitis = "Acute Sinusitis",
impetigo = "Impetigo",
infected_insect_bite = "Infected Insect Bite",
acute_pharyngitis = "Acute Pharyngitis",
uncomplicated_urinary_tract_infection = "UTI"
)

pf_measures_name_mapping <- list(
consultation_service = "clinical_service",
pharmacy_first_service = "clinical_service",
combined_pf_service = "pharmacy_first_services",
acute_otitis_media = "clinical_condition",
herpes_zoster = "clinical_condition",
acute_sinusitis = "clinical_condition",
impetigo = "clinical_condition",
infected_insect_bite = "clinical_condition",
acute_pharyngitis = "clinical_condition",
uncomplicated_urinary_tract_infection = "clinical_condition"
)

pf_measures_groupby_dict <- list(
age_band = "Age band",
sex = "Sex",
imd = "IMD",
region = "Region",
ethnicity = "Ethnicity"
)

#' Tidy measures data
#'
#' Creates a tidier dataframe of measures data.
Expand Down

0 comments on commit d89d6e3

Please sign in to comment.