Counts by Pharmacy First Service and Clinical Condition (#6)

* Two pharmacy first graphs for the reports rmd file (clinical services and conditions) * added plotting function * created codelist dictionary * addressed review comments
opensafely · Sep 2, 2024 · 325f379 · 325f379
1 parent 9fb30c9
commit 325f379
Show file tree

Hide file tree

Showing 10 changed files with 408 additions and 20 deletions.
diff --git a/.gitignore b/.gitignore
@@ -9,3 +9,4 @@ venv/
 .DS_Store
 .Rhistory
 .Rproj.user/
+reports/pf_report.html
diff --git a/analysis/dataset_definition.py b/analysis/dataset_definition.py
diff --git a/analysis/report_measures.py b/analysis/report_measures.py
@@ -0,0 +1,75 @@
+from ehrql import INTERVAL, create_measures, months, codelist_from_csv
+from ehrql.tables.tpp import clinical_events, patients, practice_registrations
+
+measures = create_measures()
+measures.configure_dummy_data(population_size=1000)
+
+# Dictionary of pharmacy first codes
+pharmacy_first_event_codes = {
+    # Community Pharmacy (CP) Blood Pressure (BP) Check Service (procedure)
+    "blood_pressure_service": ["1659111000000107"],
+    # Community Pharmacy (CP) Contraception Service (procedure)
+    "contraception_service": ["1659121000000101"],
+    # Community Pharmacist (CP) Consultation Service for minor illness (procedure)
+    "consultation_service": ["1577041000000109"],
+    # Pharmacy First service (qualifier value)
+    "pharmacy_first_service": ["983341000000102"],
+}
+
+# Import the codelist from CSV
+pharmacy_first_codelist = codelist_from_csv(
+    "codelists/user-chriswood-pharmacy-first-clinical-pathway-conditions.csv",
+    column="code", category_column = "term"
+)
+
+pharmacy_first_conditions_codes = {}
+# Iterate through codelist, forming a dictionary 
+for codes, term in pharmacy_first_codelist.items():
+    normalised_term = term.lower().replace(" ", "_")
+    codes = [codes]
+    pharmacy_first_conditions_codes[normalised_term] = codes
+
+registration = practice_registrations.for_patient_on(INTERVAL.end_date)
+
+# Select clinical events in interval date range
+selected_events = clinical_events.where(
+    clinical_events.date.is_on_or_between(INTERVAL.start_date, INTERVAL.end_date)
+)
+
+# Loop through each CLINICAL SERVICE to create a measure
+for pharmacy_first_event, codelist in pharmacy_first_event_codes.items():
+    condition_events = selected_events.where(
+        clinical_events.snomedct_code.is_in(codelist)
+    )
+
+    # Define the numerator as the count of events for the condition
+    numerator = condition_events.count_for_patient()
+
+    # Define the denominator as the number of patients registered
+    denominator = registration.exists_for_patient()
+
+    measures.define_measure(
+        name=f"count_{pharmacy_first_event}",
+        numerator=numerator,
+        denominator=denominator,
+        intervals=months(8).starting_on("2023-11-01")
+    )
+
+# Loop through each CLINICAL CONDITION to create a measure
+for condition_name, condition_code in pharmacy_first_conditions_codes.items():
+    condition_events = selected_events.where(
+        clinical_events.snomedct_code.is_in(condition_code)
+    )
+
+    # Define the numerator as the count of events for the condition
+    numerator = condition_events.count_for_patient()
+
+    # Define the denominator as the number of patients registered
+    denominator = registration.exists_for_patient()
+
+    measures.define_measure(
+        name=f"count_{condition_name}",
+        numerator=numerator,
+        denominator=denominator,
+        intervals=months(8).starting_on("2023-11-01")
+    )
diff --git a/analysis/stata.do b/analysis/stata.do
diff --git a/codelists/codelists.json b/codelists/codelists.json
@@ -1,3 +1,10 @@
 {
-  "files": {}
+  "files": {
+    "user-chriswood-pharmacy-first-clinical-pathway-conditions.csv": {
+      "id": "user/chriswood/pharmacy-first-clinical-pathway-conditions/7ec97762",
+      "url": "https://www.opencodelists.org/codelist/user/chriswood/pharmacy-first-clinical-pathway-conditions/7ec97762/",
+      "downloaded_at": "2024-08-22 12:53:00.167017Z",
+      "sha": "bed7f74add5c2d2ac6f7120d89f5ba94e57a28cb"
+    }
+  }
 }
diff --git a/codelists/codelists.txt b/codelists/codelists.txt
@@ -0,0 +1 @@
+user/chriswood/pharmacy-first-clinical-pathway-conditions/7ec97762
diff --git a/codelists/user-chriswood-pharmacy-first-clinical-pathway-conditions.csv b/codelists/user-chriswood-pharmacy-first-clinical-pathway-conditions.csv
@@ -0,0 +1,8 @@
+code,term
+1090711000000102,Uncomplicated urinary tract infection
+15805002,Acute sinusitis
+262550002,Infected insect bite
+3110003,Acute otitis media
+363746003,Acute pharyngitis
+4740000,Herpes zoster
+48277006,Impetigo
diff --git a/lib/functions/function_plot_measures.R b/lib/functions/function_plot_measures.R
@@ -0,0 +1,103 @@
+#' Plot Measures Over Time
+#' 
+#' Creates a line plot of measures over time, with customisable labels and colours.
+#' 
+#' @param data A dataframe containing the data to plot.
+#' @param measure_names Strings specifiying the names of measure columns to be plotted.
+#' @param custom_labels Strings specifying the names of legend labels. 
+#' @param title A string specifying the title of the plot. Default is NULL. 
+#' @param x_label A string specifying the label for the x-axis. Default is NULL.
+#' @param y_label A string specifying the label for the y-axis. Default is NULL. 
+#' @param color_label A string specifying the label for the color legend. Default is NULL.
+#' @param value_col The name of the dataframe column which contains the y-axis values. Default is "numerator".
+#' @param measure_col The name of the dataframe column which contains the categorical variable. Default is "measure".
+#' 
+#' 
+#' @return A ggplot object.
+
+# Define the function
+plot_measures <- function(
+    data,
+    measure_names,
+    custom_labels = NULL,
+    date_col = "interval_end",
+    value_col = "numerator",
+    measure_col = "measure",
+    title = NULL,
+    x_label = NULL,
+    y_label = NULL,
+    color_label = NULL,
+    axis_x_text_size = 7) {
+
+  # Check if the necessary columns exist in the data
+  if (date_col %in% names(data) == FALSE) {
+    stop("Data does not have a column with the name '", date_col, "'")
+  } else if (value_col %in% names(data) == FALSE) {
+    stop("Data does not have a column with the name '", value_col, "'")
+  } else if (measure_col %in% names(data) == FALSE) {
+    stop("Data does not have a column with the name '", measure_col, "'")
+  }
+
+  # Convert column names to symbols
+  date_sym <- sym(date_col)
+  value_sym <- sym(value_col)
+  measure_sym <- sym(measure_col)
+
+  # Ensure the date column is of Date type
+  data <- data %>%
+    mutate(!!date_sym := as.Date(!!date_sym))
+
+  # Filter measures column for user-specified measure names
+  data <- data %>%
+    filter(!!measure_sym %in% measure_names)
+
+  # Apply custom labels if provided
+  if (!is.null(custom_labels)) {
+    data <- data %>%
+      mutate(!!measure_sym := factor(!!measure_sym, levels = measure_names, labels = custom_labels))
+  }
+
+  # Create plot
+  plot1 <- ggplot(
+    data,
+    aes(
+      x = !!date_sym,
+      y = !!value_sym,
+      color = !!measure_sym,
+      group = !!measure_sym
+    )
+  ) +
+    geom_line() +
+    labs(
+      title = title,
+      x = x_label,
+      y = y_label,
+      color = color_label
+    ) +
+    geom_point() +
+    geom_line(alpha = .5) +
+    scale_y_continuous(
+      limits = c(0, NA),
+    ) +
+    theme_minimal() +
+    theme(axis.text.x = element_text(size = axis_x_text_size), 
+    legend.position="bottom",
+    legend.key.size = unit(0.5, "cm"),
+    legend.text = element_text(size = 8),
+    legend.title = element_text(size = 8)) +
+    guides(
+    color = guide_legend(nrow = 2)  # Adjust number of rows in the legend
+) +
+    geom_vline(
+      xintercept = lubridate::as_date("2024-02-01"),
+      linetype = "dotted",
+      colour = "orange",
+      linewidth = .7) +
+    scale_x_date(
+      date_breaks = "1 month",  
+      date_labels = "%b %Y"
+    )
+
+
+  plot1
+}
diff --git a/project.yaml b/project.yaml
@@ -5,8 +5,10 @@ expectations:
   population_size: 1000
 
 actions:
-  generate_dataset:
-    run: ehrql:v1 generate-dataset analysis/dataset_definition.py --output output/dataset.csv.gz
+  generate_pf_measures:
+    run: >
+      ehrql:v1 generate-measures analysis/report_measures.py
+      --output output/report/conditions_measures.csv
     outputs:
-      highly_sensitive:
-        dataset: output/dataset.csv.gz
+      moderately_sensitive:
+        measure: output/report/conditions_measures.csv
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		user/chriswood/pharmacy-first-clinical-pathway-conditions/7ec97762