opensafely · quan14 · Jan 7, 2025 · Jan 6, 2025 · Jan 6, 2025 · Jan 6, 2025
diff --git a/analysis/codelists.py b/analysis/codelists.py
@@ -0,0 +1,11 @@
+from ehrql import codelist_from_csv
+
+adhd_codelist = codelist_from_csv(
+    "codelists/nhsd-primary-care-domain-refsets-adhd_cod.csv",
+    column="code",
+)
+
+methylphenidate_codelist = codelist_from_csv(
+    "codelists/opensafely-methylphenidate-dmd.csv",
+    column="code",
+)
diff --git a/analysis/dataset_definition_core.py b/analysis/dataset_definition_core.py
@@ -1,70 +1,47 @@
-from ehrql import create_dataset, codelist_from_csv
-from ehrql.tables.tpp import patients, practice_registrations, clinical_events, medications
+from ehrql import create_dataset
+from ehrql.tables.tpp import (
+    patients,
+    practice_registrations,
+    clinical_events,
+    medications,
+)
 
-# Codelists
-adhd_codelist = codelist_from_csv("codelists/nhsd-primary-care-domain-refsets-adhd_cod.csv", column="code")
-methylphenidate_codelist = codelist_from_csv("codelists/nhsd-primary-care-domain-refsets-adhd_cod.csv", column="code")
+from codelists import adhd_codelist, methylphenidate_codelist
 
 dataset = create_dataset()
-dataset.configure_dummy_data(population_size=10000)
+dataset.configure_dummy_data(population_size=10)
 
 # Date range
-start_date = f"2016-01-01"
-end_date = f"2023-12-31"
+start_date = "2016-01-01"
+end_date = "2023-12-31"
 
 # Population variables
-was_registered = practice_registrations.for_patient_on(
-    start_date
-).exists_for_patient() & practice_registrations.for_patient_on(
-    end_date
+has_registration = practice_registrations.spanning(
+    start_date, end_date
 ).exists_for_patient()
+dataset.sex = patients.sex
+dataset.age = patients.age_on(start_date)
 
-is_female_or_male = patients.sex.is_in(["female", "male"])
-
-was_adult = (patients.age_on(start_date) >= 18) & (
-    patients.age_on(start_date) <= 120
+selected_events = clinical_events.where(
+    clinical_events.date.is_on_or_between(start_date, end_date)
 )
 
-was_alive = (
-    patients.date_of_death.is_after(end_date)
-    | patients.date_of_death.is_null()
+selected_medications = medications.where(
+    medications.date.is_on_or_between(start_date, end_date)
 )
 
-had_adhd_event = clinical_events.where(
+dataset.has_adhd_event = selected_events.where(
     clinical_events.snomedct_code.is_in(adhd_codelist)
-    & clinical_events.date.is_on_or_between(
-        start_date, end_date
-    )
 ).exists_for_patient()
 
-dataset.define_population(
-                        is_female_or_male
-                        & was_adult
-                        & was_alive
-                        & was_registered
-                        & had_adhd_event
-                    )
-
-
-# Exposure variables
-years = list(range(2016, 2023 + 1))
-# Iterate over each year and set the attribute on the dataset
-for year in years:
-    start_date = f"{year}-01-01"
-    end_date = f"{year}-12-31"
-
-    # Construct the attribute name dynamically for each year
-    attribute_name = f"num_adhd_events_{year}"
-
-    # Calculate the number of ADHD events for the given year
-    num_adhd_events_year = clinical_events.where(
-        clinical_events.snomedct_code.is_in(adhd_codelist)
-        & clinical_events.date.is_on_or_between(start_date, end_date)
-    ).count_for_patient()
-
-    # Set the attribute on the dataset
-    setattr(dataset, attribute_name, num_adhd_events_year)
-
+dataset.has_mph_med = selected_medications.where(
+    medications.dmd_code.is_in(methylphenidate_codelist)
+).exists_for_patient()
 
-dataset.sex = patients.sex
-dataset.dob = patients.date_of_birth
+dataset.define_population(
+    has_registration
+    & dataset.sex.is_in(["male", "female"])
+    & (dataset.age >= 18)
+    & (dataset.age <= 120)
+    & patients.is_alive_on(start_date),
+)
diff --git a/analysis/measures_definition.py b/analysis/measures_definition.py
@@ -0,0 +1,54 @@
+from ehrql import INTERVAL, case, create_measures, when, years
+from ehrql.tables.tpp import (
+    patients,
+    practice_registrations,
+    clinical_events,
+    medications,
+)
+
+from codelists import adhd_codelist, methylphenidate_codelist
+
+measures = create_measures()
+measures.configure_dummy_data(population_size=10)
+
+# Population variables
+has_registration = practice_registrations.spanning(
+    INTERVAL.start_date, INTERVAL.end_date
+).exists_for_patient()
+
+sex = patients.sex
+age = patients.age_on(INTERVAL.start_date)
+age_band = case(
+    when((age >= 0) & (age < 20)).then("0-19"),
+    when((age >= 20) & (age < 40)).then("20-39"),
+    when((age >= 40) & (age < 60)).then("40-59"),
+    when((age >= 60) & (age < 80)).then("60-79"),
+    when(age >= 80).then("80+"),
+    when(age.is_null()).then("Missing"),
+)
+
+selected_events = clinical_events.where(
+    clinical_events.date.is_on_or_between(INTERVAL.start_date, INTERVAL.end_date)
+)
+
+selected_medications = medications.where(
+    medications.date.is_on_or_between(INTERVAL.start_date, INTERVAL.end_date)
+)
+
+has_adhd_event = selected_events.where(
+    clinical_events.snomedct_code.is_in(adhd_codelist)
+).exists_for_patient()
+
+measures.define_measure(
+    name=f"adhd_prevalence",
+    numerator=has_adhd_event,
+    denominator=(
+        has_registration
+        & patients.sex.is_in(["male", "female"])
+        & (age >= 18)
+        & (age <= 120)
+        & patients.is_alive_on(INTERVAL.start_date)
+    ),
+    group_by={"sex": sex, "age_band": age_band},
+    intervals=years(7).starting_on("2016-01-01"),
+)
diff --git a/analysis/report.py b/analysis/report.py
diff --git a/project.yaml b/project.yaml
@@ -5,15 +5,18 @@ expectations:
   population_size: 5000
 
 actions:
-  generate_dataset_full:
-    run: ehrql:v1 generate-dataset analysis/dataset_definition_core.py --output output/full_dataset_test.csv.gz
+  generate_dataset:
+    run: > 
+      ehrql:v1 generate-dataset analysis/dataset_definition_core.py 
+      --output output/adhd_dataset.csv.gz
     outputs:
       highly_sensitive:
-        full_dataset: output/full_dataset_test.csv.gz
+        adhd_dataset: output/adhd_dataset.csv.gz
 
-  generate_charts:
-    run: python:v2 python analysis/report.py
-    needs: [generate_dataset_full]
+  generate_adhd_prevalence:
+    run: > 
+      ehrql:v1 generate-measures analysis/measures_definition.py 
+      --output output/adhd_prevalence.csv
     outputs:
       moderately_sensitive:
-        table1: output/results_saved.csv
+        measure: output/adhd_prevalence.csv