From be9d6702992210e4d8d9bf854d8002425768e201 Mon Sep 17 00:00:00 2001 From: Alyna Khan Date: Tue, 21 Nov 2023 11:20:31 -0800 Subject: [PATCH 1/3] update cancer variable names and cad ehr variable name --- PRIMED_phenotype_data_model.json | 50 ++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/PRIMED_phenotype_data_model.json b/PRIMED_phenotype_data_model.json index f11ffb2..7ff03f4 100755 --- a/PRIMED_phenotype_data_model.json +++ b/PRIMED_phenotype_data_model.json @@ -147,6 +147,12 @@ "required": true, "description": "Number of rows in file (may be > n_subjects for longitudinal data)", "data_type": "integer" + }, + { + "column": "data_model_version", + "description": "data model version for this table", + "data_type": "float", + "notes": "added automatically by validation workflow" } ] }, @@ -855,8 +861,8 @@ "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA." }, { - "column": "cad_ehr_1", - "description": "ehr-defined variable", + "column": "cad_emerge_1", + "description": "indicator of coronary artery disease event occurrence using the emerge ehr-based definition", "data_type": "enumeration", "enumerations": ["0", "1"], "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA." @@ -892,8 +898,22 @@ "notes": "this can be any value that is used consistently within the study" }, { - "column": "breast_cancer_status_1", - "description": "whether or not the participant had breast cancer", + "column": "breast_cancer_status_emerge_1", + "description": "whether or not the participant had breast cancer using the eMERGE EHR-based definition and instructions", + "data_type": "enumeration", + "enumerations": ["0", "1"], + "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA." + }, + { + "column": "breast_cancer_status_registry_1", + "description": "whether or not the participant had breast cancer as reported in a registry", + "data_type": "enumeration", + "enumerations": ["0", "1"], + "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA." + }, + { + "column": "breast_cancer_status_survey_1", + "description": "whether or not the participant had breast cancer as reported in a survey or self-report", "data_type": "enumeration", "enumerations": ["0", "1"], "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA." @@ -907,7 +927,7 @@ { "column": "year_at_diagnosis_1", "description": "year participant was diagnosed", - "data_type": "float", + "data_type": "integer", "examples": "1999" }, { @@ -993,7 +1013,7 @@ "enumerations": ["MX", "M0", "M1"] }, { - "column": "stage_system", + "column": "stage_system_1", "description": "definition of staging system used (e.g., SEER, AJCC) and time period (e.g., year), if applicable", "data_type": "string", "examples": ["AJCC 2003", "SEER 1999"], @@ -1146,6 +1166,13 @@ "examples": ["visit_2", "baseline", "median"], "notes": "this can be any value that is used consistently within the study" }, + { + "column": "prostate_cancer_status_emerge_1", + "description": "whether or not the participant had prostate cancer using the eMERGE EHR-based definition and instructions", + "data_type": "enumeration", + "enumerations": ["0", "1"], + "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA." + }, { "column": "prostate_cancer_status_1", "description": "whether or not the participant had prostate cancer", @@ -1160,9 +1187,9 @@ "notes": "may be the same as age_at_obs or not" }, { - "column": "year_at_diagnosis", + "column": "year_at_diagnosis_1", "description": "year participant was diagnosed", - "data_type": "float", + "data_type": "integer", "examples": "1999" }, { @@ -1224,7 +1251,7 @@ "enumerations": ["MX", "M0", "M1"] }, { - "column": "stage_system", + "column": "stage_system_1", "description": "definition of staging system (e.g., SEER, AJCC) and time period (e.g., year), if applicable", "data_type": "string", "examples": ["AJCC 2003", "SEER 1999"], @@ -1251,6 +1278,11 @@ "enumerations": ["2", "3", "4", "5", "6", "7", "8", "9", "10"], "notes": "missing values can either be left blank or set to NA" }, + { + "column": "psa_1", + "description": "psa score at current visit", + "data_type": "float" + }, { "column": "psa_at_diagnosis_1", "description": "psa score at diagnosis of prostate cancer", From d3f8a0868f4c33fdcf109646f550c27ec59ca7de Mon Sep 17 00:00:00 2001 From: Alyna Khan Date: Tue, 21 Nov 2023 11:57:33 -0800 Subject: [PATCH 2/3] added registry- and survey-specific designation to prostate cancer status variables --- PRIMED_phenotype_data_model.json | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/PRIMED_phenotype_data_model.json b/PRIMED_phenotype_data_model.json index 7fbfc91..0c1ceda 100755 --- a/PRIMED_phenotype_data_model.json +++ b/PRIMED_phenotype_data_model.json @@ -1224,8 +1224,15 @@ "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA." }, { - "column": "prostate_cancer_status_1", - "description": "whether or not the participant had prostate cancer", + "column": "prostate_cancer_status_registry_1", + "description": "whether or not the participant had prostate cancer as reported in a registry", + "data_type": "enumeration", + "enumerations": ["0", "1"], + "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA." + }, + { + "column": "prostate_cancer_status_survey_1", + "description": "whether or not the participant had prostate cancer as reported in a survey or self-report", "data_type": "enumeration", "enumerations": ["0", "1"], "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA." From aaaa5cb911b51bada46a22c9a85a210ffb0fbe5b Mon Sep 17 00:00:00 2001 From: Alyna Khan Date: Tue, 21 Nov 2023 13:42:37 -0800 Subject: [PATCH 3/3] add cancer table names to list of pheno tables --- PRIMED_phenotype_data_model.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PRIMED_phenotype_data_model.json b/PRIMED_phenotype_data_model.json index 0c1ceda..97f1e12 100755 --- a/PRIMED_phenotype_data_model.json +++ b/PRIMED_phenotype_data_model.json @@ -117,7 +117,7 @@ "column": "domain", "required": true, "data_type": "enumeration", - "enumerations": ["pilot", "population_descriptor", "cmqt_flags", "cmqt_anthropometry", "cmqt_blood_pressure", "cmqt_lipids", "cmqt_hematology", "cmqt_glycemic", "cmqt_kidney_function", "diabetes_diabetes", "cvd_cad"] + "enumerations": ["pilot", "population_descriptor", "cmqt_flags", "cmqt_anthropometry", "cmqt_blood_pressure", "cmqt_lipids", "cmqt_hematology", "cmqt_glycemic", "cmqt_kidney_function", "diabetes_diabetes", "cvd_cad", "cancer_breast", "cancer_prostate"] }, { "column": "md5sum",