From be9d6702992210e4d8d9bf854d8002425768e201 Mon Sep 17 00:00:00 2001
From: Alyna Khan <alynak@uw.edu>
Date: Tue, 21 Nov 2023 11:20:31 -0800
Subject: [PATCH 1/3] update cancer variable names and cad ehr variable name

---
 PRIMED_phenotype_data_model.json | 50 ++++++++++++++++++++++++++------
 1 file changed, 41 insertions(+), 9 deletions(-)

diff --git a/PRIMED_phenotype_data_model.json b/PRIMED_phenotype_data_model.json
index f11ffb2..7ff03f4 100755
--- a/PRIMED_phenotype_data_model.json
+++ b/PRIMED_phenotype_data_model.json
@@ -147,6 +147,12 @@
           "required": true,
           "description": "Number of rows in file (may be > n_subjects for longitudinal data)",
           "data_type": "integer"
+        },
+        {
+          "column": "data_model_version",
+          "description": "data model version for this table",
+          "data_type": "float",
+          "notes": "added automatically by validation workflow"
         }
       ]
     },
@@ -855,8 +861,8 @@
           "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA."
         },
         {
-          "column": "cad_ehr_1",
-          "description": "ehr-defined variable",
+          "column": "cad_emerge_1",
+          "description": "indicator of coronary artery disease event occurrence using the emerge ehr-based definition",
           "data_type": "enumeration",
           "enumerations": ["0", "1"],
           "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA."
@@ -892,8 +898,22 @@
           "notes": "this can be any value that is used consistently within the study"
         },
         {
-          "column": "breast_cancer_status_1",
-          "description": "whether or not the participant had breast cancer",
+          "column": "breast_cancer_status_emerge_1",
+          "description": "whether or not the participant had breast cancer using the eMERGE EHR-based definition and instructions",
+          "data_type": "enumeration",
+          "enumerations": ["0", "1"],
+          "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA."
+        },
+        {
+          "column": "breast_cancer_status_registry_1",
+          "description": "whether or not the participant had breast cancer as reported in a registry",
+          "data_type": "enumeration",
+          "enumerations": ["0", "1"],
+          "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA."
+        },
+        {
+          "column": "breast_cancer_status_survey_1",
+          "description": "whether or not the participant had breast cancer as reported in a survey or self-report",
           "data_type": "enumeration",
           "enumerations": ["0", "1"],
           "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA."
@@ -907,7 +927,7 @@
         {
           "column": "year_at_diagnosis_1",
           "description": "year participant was diagnosed",
-          "data_type": "float",
+          "data_type": "integer",
           "examples": "1999"
         },
         {
@@ -993,7 +1013,7 @@
           "enumerations": ["MX", "M0", "M1"]
         },
         {
-          "column": "stage_system",
+          "column": "stage_system_1",
           "description": "definition of staging system used (e.g., SEER, AJCC) and time period (e.g., year), if applicable",
           "data_type": "string",
           "examples": ["AJCC 2003", "SEER 1999"],
@@ -1146,6 +1166,13 @@
           "examples": ["visit_2", "baseline", "median"],
           "notes": "this can be any value that is used consistently within the study"
         },
+        {
+          "column": "prostate_cancer_status_emerge_1",
+          "description": "whether or not the participant had prostate cancer using the eMERGE EHR-based definition and instructions",
+          "data_type": "enumeration",
+          "enumerations": ["0", "1"],
+          "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA."
+        },
         {
           "column": "prostate_cancer_status_1",
           "description": "whether or not the participant had prostate cancer",
@@ -1160,9 +1187,9 @@
           "notes": "may be the same as age_at_obs or not"
         },
         {
-          "column": "year_at_diagnosis",
+          "column": "year_at_diagnosis_1",
           "description": "year participant was diagnosed",
-          "data_type": "float",
+          "data_type": "integer",
           "examples": "1999"
         },
         {
@@ -1224,7 +1251,7 @@
           "enumerations": ["MX", "M0", "M1"]
         },
         {
-          "column": "stage_system",
+          "column": "stage_system_1",
           "description": "definition of staging system (e.g., SEER, AJCC) and time period (e.g., year), if applicable",
           "data_type": "string",
           "examples": ["AJCC 2003", "SEER 1999"],
@@ -1251,6 +1278,11 @@
           "enumerations": ["2", "3", "4", "5", "6", "7", "8", "9", "10"],
           "notes": "missing values can either be left blank or set to NA"
         },
+        {
+          "column": "psa_1",
+          "description": "psa score at current visit",
+          "data_type": "float"
+        },
         {
           "column": "psa_at_diagnosis_1",
           "description": "psa score at diagnosis of prostate cancer",

From d3f8a0868f4c33fdcf109646f550c27ec59ca7de Mon Sep 17 00:00:00 2001
From: Alyna Khan <alynak@uw.edu>
Date: Tue, 21 Nov 2023 11:57:33 -0800
Subject: [PATCH 2/3] added registry- and survey-specific designation to
 prostate cancer status variables

---
 PRIMED_phenotype_data_model.json | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/PRIMED_phenotype_data_model.json b/PRIMED_phenotype_data_model.json
index 7fbfc91..0c1ceda 100755
--- a/PRIMED_phenotype_data_model.json
+++ b/PRIMED_phenotype_data_model.json
@@ -1224,8 +1224,15 @@
           "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA."
         },
         {
-          "column": "prostate_cancer_status_1",
-          "description": "whether or not the participant had prostate cancer",
+          "column": "prostate_cancer_status_registry_1",
+          "description": "whether or not the participant had prostate cancer as reported in a registry",
+          "data_type": "enumeration",
+          "enumerations": ["0", "1"],
+          "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA."
+        },
+        {
+          "column": "prostate_cancer_status_survey_1",
+          "description": "whether or not the participant had prostate cancer as reported in a survey or self-report",
           "data_type": "enumeration",
           "enumerations": ["0", "1"],
           "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA."

From aaaa5cb911b51bada46a22c9a85a210ffb0fbe5b Mon Sep 17 00:00:00 2001
From: Alyna Khan <alynak@uw.edu>
Date: Tue, 21 Nov 2023 13:42:37 -0800
Subject: [PATCH 3/3] add cancer table names to list of pheno tables

---
 PRIMED_phenotype_data_model.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/PRIMED_phenotype_data_model.json b/PRIMED_phenotype_data_model.json
index 0c1ceda..97f1e12 100755
--- a/PRIMED_phenotype_data_model.json
+++ b/PRIMED_phenotype_data_model.json
@@ -117,7 +117,7 @@
           "column": "domain",
           "required": true,
           "data_type": "enumeration",
-          "enumerations": ["pilot", "population_descriptor", "cmqt_flags", "cmqt_anthropometry", "cmqt_blood_pressure", "cmqt_lipids", "cmqt_hematology", "cmqt_glycemic", "cmqt_kidney_function", "diabetes_diabetes", "cvd_cad"]
+          "enumerations": ["pilot", "population_descriptor", "cmqt_flags", "cmqt_anthropometry", "cmqt_blood_pressure", "cmqt_lipids", "cmqt_hematology", "cmqt_glycemic", "cmqt_kidney_function", "diabetes_diabetes", "cvd_cad", "cancer_breast", "cancer_prostate"]
         },
         {
           "column": "md5sum",