generated from opensafely/research-template
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0825737
commit cc589aa
Showing
26 changed files
with
1,449 additions
and
759 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,328 @@ | ||
import json, sys | ||
from pathlib import Path | ||
|
||
from datetime import date, datetime | ||
from ehrql import Dataset, case, when, maximum_of, minimum_of, years, days | ||
from ehrql.tables.tpp import ( | ||
patients, | ||
medications, | ||
ons_deaths, | ||
addresses, | ||
clinical_events, | ||
practice_registrations, | ||
household_memberships_2020, | ||
vaccinations, | ||
apcs, | ||
emergency_care_attendances | ||
) | ||
|
||
from variable_lib import ( | ||
has_a_continuous_practice_registration_spanning, | ||
most_recent_bmi, | ||
practice_registration_as_of, | ||
emergency_care_diagnosis_matches, | ||
hospitalisation_diagnosis_matches | ||
) | ||
|
||
import codelists | ||
|
||
dataset = Dataset() | ||
|
||
####################################################################################### | ||
# Import study dates defined in "./analysis/design/study-dates.R" script and then exported | ||
# to JSON | ||
####################################################################################### | ||
study_dates = json.loads( | ||
Path("analysis/design/study-dates.json").read_text(), | ||
) | ||
|
||
args = sys.argv | ||
|
||
#define dataset definition settings from command line arguments | ||
cohort = args[1] | ||
codelist_type = args[4] | ||
investigation_type = args[5] | ||
|
||
# Change these in ./analysis/design/study-dates.R if necessary | ||
study_start_date = study_dates[args[2]] | ||
study_end_date = study_dates[args[3]] | ||
index_date = study_start_date | ||
registration_date = index_date - years(1) | ||
|
||
#define patients age | ||
age_at_start = patients.age_on(study_start_date) | ||
age_at_end = patients.age_on(study_end_date) | ||
age_months = (index_date - patients.date_of_birth).months | ||
age_at_start_months = (study_start_date - patients.date_of_birth).months | ||
|
||
#events occurring before index date | ||
prior_events = clinical_events.where(clinical_events.date.is_on_or_before(index_date)) | ||
|
||
#query prior_events for existence of event-in-codelist | ||
def has_prior_event(codelist, where = True): | ||
return ( | ||
prior_events.where(where) | ||
.where(prior_events.snomedct_code.is_in(codelist)) | ||
.exists_for_patient() | ||
) | ||
|
||
#query prior_events for date of most recent event-in-codelist | ||
def last_prior_event(codelist, where = True): | ||
return ( | ||
prior_events.where(where) | ||
.where(prior_events.snomedct_code.is_in(codelist)) | ||
.sort_by(clinical_events.date) | ||
.last_for_patient() | ||
) | ||
|
||
#query prior_events for date of earliest event-in-codelist | ||
def first_prior_event(codelist, where = True): | ||
return ( | ||
prior_events.where(where) | ||
.where(prior_events.snomedct_code.is_in(codelist)) | ||
.sort_by(clinical_events.date) | ||
.first_for_patient() | ||
) | ||
|
||
#meds occurring before booster date | ||
prior_meds = medications.where(medications.date.is_on_or_before(index_date)) | ||
|
||
#query prior_meds for existence of event-in-codelist | ||
def has_prior_meds(codelist, where = True): | ||
return ( | ||
prior_meds.where(where) | ||
.where(prior_meds.dmd_code.is_in(codelist)) | ||
.exists_for_patient() | ||
) | ||
|
||
#query prior meds for date of most recent med-in-codelist | ||
def last_prior_meds(codelist, where = True): | ||
return ( | ||
prior_meds.where(where) | ||
.where(prior_meds.dmd_code.is_in(codelist)) | ||
.sort_by(medications.date) | ||
.last_for_patient() | ||
) | ||
|
||
#query prior_events for date of earliest event-in-codelist | ||
def first_prior_meds(codelist, where = True): | ||
return ( | ||
prior_meds.where(where) | ||
.where(prior_meds.dmd_code.is_in(codelist)) | ||
.sort_by(medications.date) | ||
.first_for_patient() | ||
) | ||
|
||
##comorbidities for secondary investigation | ||
|
||
def filter_codes_by_category(codelist, include): | ||
return {k:v for k,v in codelist.items() if v in include} | ||
|
||
#lifestyle | ||
|
||
|
||
#smoking | ||
|
||
############################################################################### | ||
# from https://github.com/opensafely/early-inflammatory-arthritis/blob/069e61712fcc9a0c2ec2804ff36a9b773073291c/analysis/dataset_definition.py#L136 | ||
############################################################################### | ||
|
||
most_recent_smoking_code = ( | ||
(clinical_events.where(clinical_events.ctv3_code | ||
.is_in(codelists.clear_smoking_codes)) | ||
.sort_by(clinical_events.date).last_for_patient() | ||
.ctv3_code.to_category(codelists.clear_smoking_codes)) | ||
) | ||
ever_smoked = ( | ||
clinical_events.where(clinical_events.ctv3_code | ||
.is_in(filter_codes_by_category(codelists | ||
.clear_smoking_codes, include = ["S", "E"]))) | ||
.exists_for_patient() | ||
) | ||
smoking_status = (case( | ||
when(most_recent_smoking_code == "S").then("S"), | ||
when((most_recent_smoking_code == "E") | ||
| ((most_recent_smoking_code == "N") | ||
& (ever_smoked == True))).then("E"), | ||
when((most_recent_smoking_code == "N") | ||
& (ever_smoked == False)).then("N"), | ||
otherwise = "M") | ||
) | ||
|
||
#drinking | ||
hazardous_drinking = has_prior_event(codelists.drinking_codelist) | ||
|
||
#drug usage | ||
drug_usage = ( | ||
(has_prior_event(codelists.drug_usage_codelist + | ||
codelists.drug_intervention_codelist + | ||
codelists.drug_assessment_declination_codelist)) | ||
) | ||
|
||
#medication date | ||
medication_date = index_date - years(1) | ||
|
||
#reactive airway disease diagnosis | ||
has_reactive_airway = has_prior_event(codelists.reactive_airway_disease_code) | ||
|
||
#has asthma if there is an asthma diagnosis and a recent medication prescribed | ||
has_asthma = ( | ||
(has_prior_event(codelists.asthma_codelist)) | ||
& (has_prior_meds(codelists.asthma_oral_medications, | ||
where = medications.date.is_on_or_between(medication_date, index_date)) | ||
|(has_prior_meds(codelists.asthma_inhaled_medications))) | ||
) | ||
|
||
#copd diagnosis | ||
has_copd = ( | ||
(has_prior_event(codelists.copd_codelist)) | ||
& (has_prior_meds(codelists.copd_medications)) | ||
& (last_prior_event(codelists.copd_codelist).date | ||
.is_on_or_after(last_prior_event(codelists | ||
.copd_resolved_codelist).date)) | ||
) | ||
|
||
#pulmonary fibrosis diagnosis | ||
has_pulmonary_fibrosis = ( | ||
has_prior_event(codelists | ||
.pulmonary_fibrosis_codelist) | ||
) | ||
|
||
#cystic fibrosis diagnosis | ||
has_cystic_fibrosis = ( | ||
clinical_events.where(clinical_events.ctv3_code. | ||
is_in(codelists.cystic_fibrosis_codelist)) | ||
.exists_for_patient() | ||
) | ||
|
||
#diabetes diagnosis | ||
diab_date = last_prior_event(codelists.diabetes_codelist).date | ||
dmres_date = last_prior_event(codelists.diabetes_resolved_codelist).date | ||
has_diabetes = (case( | ||
when(dmres_date < diab_date).then(True), | ||
when(diab_date.is_not_null() & dmres_date.is_null()) | ||
.then(True), otherwise = False) | ||
) | ||
|
||
#addison's disease diagnosis | ||
has_addisons = ( | ||
clinical_events.where(clinical_events.snomedct_code | ||
.is_in(codelists.addisons_codelist)) | ||
.where(clinical_events.date.is_on_or_before(index_date)) | ||
.exists_for_patient() | ||
) | ||
|
||
#Calculate BMI | ||
|
||
############################################################################### | ||
# from https://github.com/opensafely/comparative-booster-spring2023/blob/main/analysis/dataset_definition.py | ||
############################################################################### | ||
|
||
# BMI | ||
bmi_measurement = most_recent_bmi( | ||
where = clinical_events.date.is_after(index_date - years(5)), | ||
minimum_age_at_measurement = 16, | ||
) | ||
bmi_value = bmi_measurement.numeric_value | ||
bmi = case( | ||
when(bmi_value < 30).then("Not obese"), # include this here to ensure this value is the 1st level in the factor | ||
when((bmi_value >= 30.0) & (bmi_value < 35.0)).then("Obese I (30-34.9)"), | ||
when((bmi_value >= 35.0) & (bmi_value < 40.0)).then("Obese II (35-39.9)"), | ||
# Set maximum to avoid any impossibly extreme values being classified as obese | ||
when((bmi_value >= 40.0) & (bmi_value < 100.0)).then("Obese III (40+)"), | ||
otherwise = "Not obese", # assume missing is non-obese | ||
) | ||
|
||
# Severe Obesity | ||
bmi_stage_event = last_prior_event(codelists.bmi_stage_codelist) | ||
sev_obesity_event = last_prior_event( | ||
codelists.severe_obesity_codelist, | ||
where = ((clinical_events.date >= bmi_stage_event.date) | ||
& (clinical_events.numeric_value != 0.0)), | ||
) | ||
bmi_event = last_prior_event(codelists.bmi_codelist, | ||
where = (clinical_events.numeric_value != 0.0)) | ||
severe_obesity = case( | ||
when(sev_obesity_event.date > bmi_event.date).then(True), | ||
when(bmi_event.numeric_value >= 40.0).then(True), | ||
otherwise = False | ||
) | ||
|
||
#Chronic Heart Disease | ||
has_chd = has_prior_event(codelists.chd_codelist) | ||
|
||
#Chronic Kidney Disease | ||
|
||
############################################################################### | ||
# from https://github.com/opensafely/comparative-booster-spring2023/blob/main/analysis/dataset_definition.py | ||
############################################################################### | ||
|
||
#chronic kidney disease diagnostic codes | ||
ckd = has_prior_event(codelists.ckd_codelist) | ||
#chronic kidney disease codes - all stages | ||
ckd15_date = last_prior_event(codelists.ckd15_codelist).date | ||
#chronic kidney disease codes-stages 3 - 5 | ||
ckd35_date = last_prior_event(codelists.ckd35_codelist).date | ||
has_ckd = case( | ||
when(ckd).then(True), | ||
when((ckd35_date >= ckd15_date)).then(True), | ||
otherwise = False | ||
) | ||
|
||
#Chronic Liver Disease | ||
has_cld = has_prior_event(codelists.cld_codelist) | ||
|
||
#Chronic Neurological Disease including Significant Learning Disorder | ||
has_cnd = has_prior_event(codelists.cnd_codelist) | ||
|
||
#Chronic Respiratory Disease | ||
has_crd = has_prior_event(codelists.crd_codelist) | ||
|
||
#Cancer within 3 years | ||
has_cancer = ( | ||
has_prior_event(codelists.cancer_codelist + | ||
codelists.haemotalogical_cancer_codelist + | ||
codelists.lung_cancer_codelist, | ||
where = clinical_events.date.is_after(index_date - years(3))) | ||
) | ||
|
||
#Immunosuppression | ||
|
||
############################################################################### | ||
# from https://github.com/opensafely/comparative-booster-spring2023/blob/main/analysis/dataset_definition.py | ||
############################################################################### | ||
|
||
#Immunosuppression diagnosis | ||
immdx = has_prior_event(codelists.immunosuppression_diagnosis_codelist) | ||
|
||
#Immunosuppression medication | ||
immrx = has_prior_meds( | ||
codelists.immunosuppression_medications_codelist, | ||
where = (medications.date.is_on_or_after(index_date - years(3))) | ||
) | ||
|
||
#Immunosuppression admin date | ||
immadm = has_prior_event( | ||
codelists.immunosuppression_admin_codelist, | ||
where = (clinical_events.date | ||
.is_on_or_after(index_date - years(3))) | ||
) | ||
|
||
#Chemotherapy medication date | ||
dxt_chemo = has_prior_event( | ||
codelists.chemo_codelist, | ||
where = (clinical_events.date | ||
.is_on_or_after(index_date - years(3))) | ||
) | ||
|
||
#Immunosuppression group | ||
immunosuppressed = immdx | immrx | immadm | dxt_chemo | ||
|
||
#Sickle Cell Disease | ||
has_sickle_cell = has_prior_event(codelists.sickle_cell_codelist) | ||
|
||
#Heart Failure | ||
has_heart_failure = has_prior_event(codelists.heart_failure_codelist) | ||
|
||
#Prior MI | ||
has_prior_mi = has_prior_event(codelists.prior_mi_codelist) |
Oops, something went wrong.