UW-GAC · smgogarten · Nov 7, 2023 · Oct 31, 2023
diff --git a/PRIMED_phenotype_data_model.json b/PRIMED_phenotype_data_model.json
@@ -1,7 +1,7 @@
 {
   "name": "PRIMED Phenotype Data Model",
   "description": "Data model for phenotype data in the PRIMED consortium",
-  "version": "1.4",
+  "version": "1.5",
   "tables": [
     {
       "table": "subject",
@@ -296,7 +296,9 @@
           "primary_key": true,
           "required": true,
           "description": "the age at which the observation or measurement for the phenotype(s) were taken",
-          "data_type": "float"
+          "data_type": "float",
+          "min": " 0",
+          "max": "89"
         },
         {
           "column": "visit",
@@ -442,6 +444,8 @@
           "required": true,
           "description": "the age at which the observation or measurement for the phenotype(s) were taken",
           "data_type": "float",
+          "min": 0,
+          "max": 89,
           "examples": "56.2"
         },
         {
@@ -455,18 +459,21 @@
           "column": "height_1",
           "description": "standing body height",
           "data_type": "float",
+          "min": 0,
           "examples": "165.1"
         },
         {
           "column": "weight_1",
           "description": "body weight at baseline",
           "data_type": "float",
+          "min": 0,
           "examples": "72.574"
         },
         {
           "column": "bmi_1",
           "description": "body mass index calculated",
           "data_type": "float",
+          "min": 0,
           "examples": "26.45"
         },
         {
@@ -496,6 +503,8 @@
           "required": true,
           "description": "the age at which the observation or measurement for the phenotype(s) were taken",
           "data_type": "float",
+          "min": 0,
+          "max": 89,
           "examples": "56.2"
         },
         {
@@ -546,6 +555,8 @@
           "required": true,
           "description": "the age at which the observation or measurement for the phenotype(s) were taken",
           "data_type": "float",
+          "min": 0,
+          "max": 89,
           "examples": "56.2"
         },
         {
@@ -606,6 +617,8 @@
           "required": true,
           "description": "the age at which the observation or measurement for the phenotype(s) were taken",
           "data_type": "float",
+          "min": 0,
+          "max": 89,
           "examples": "56.2"
         },
         {
@@ -712,6 +725,8 @@
           "required": true,
           "description": "the age at which the observation or measurement for the phenotype(s) were taken",
           "data_type": "float",
+          "min": 0,
+          "max": 89,
           "examples": "56.2"
         },
         {
@@ -763,6 +778,8 @@
           "required": true,
           "description": "the age at which the observation or measurement for the phenotype(s) were taken",
           "data_type": "float",
+          "min": 0,
+          "max": 89,
           "examples": "56.2"
         },
         {
@@ -849,6 +866,8 @@
           "required": true,
           "description": "the age at which the observation or measurement for the phenotype(s) were taken",
           "data_type": "float",
+          "min": 0,
+          "max": 89,
           "examples": "56.2"
         },
         {
@@ -877,6 +896,7 @@
     {
       "table": "cancer_breast",
       "url": "https://docs.google.com/spreadsheets/d/1Gfj_EoPuYWhiNk7AaR6DOPjeTC7Nn06sF8qcoMGQ9KM/edit#gid=0",
+      "version": "1.0",
       "columns": [
         {
           "column": "subject_id",
@@ -893,6 +913,8 @@
           "required": true,
           "description": "the age at which the observation or measurement for the phenotype(s) were taken",
           "data_type": "float",
+          "min": 0,
+          "max": 89,
           "examples": "56.2"
         },
         {
@@ -913,6 +935,8 @@
           "column": "age_at_diagnosis_1",
           "description": "age at which participant was diagnosed with breast cancer",
           "data_type": "float",
+          "min": 0,
+          "max": 89,
           "notes": "may be the same as age_at_obs or not"
         },
         {
@@ -1083,6 +1107,8 @@
           "column": "age_at_natural_menopause_1",
           "description": "age at which natural menopause occurred",
           "data_type": "float",
+          "min": 0,
+          "max": 89,
           "notes": "NA if menopause has not occurred"
         },
         {
@@ -1100,12 +1126,16 @@
         {
           "column": "age_at_first_birth_1",
           "description": "age at which the first birth occurred",
-          "data_type": "float"
+          "data_type": "float",
+          "min": 0,
+          "max": 89
         },
         {
           "column": "age_at_menarche_1",
           "description": "age at which menarche occurred",
-          "data_type": "float"
+          "data_type": "float",
+          "min": 0,
+          "max": 89
         },
         {
           "column": "deceased_1",
@@ -1125,13 +1155,16 @@
           "column": "age_at_death_1",
           "description": "age at death of individual",
           "data_type": "float",
+          "min": 0,
+          "max": 89,
           "notes": "Set to missing (either blank or NA) if individual is not deceased"
         }
       ]
     },
     {
       "table": "cancer_prostate",
       "url": "https://docs.google.com/spreadsheets/d/1Gfj_EoPuYWhiNk7AaR6DOPjeTC7Nn06sF8qcoMGQ9KM/edit#gid=1811888649",
+      "version": "1.0",
       "columns": [
         {
           "column": "subject_id",

diff --git a/sheets_to_JSON_phenotype.R b/sheets_to_JSON_phenotype.R
@@ -9,11 +9,11 @@ library(jsonlite)
 url <- "https://docs.google.com/spreadsheets/d/1kpWz-6QfjMPVtm62fQwm4hoxzXhR0dnKxVt02fbx9ks"
 model_name <- "PRIMED Phenotype Data Model"
 model_description <- "Data model for phenotype data in the PRIMED consortium"
-model_version <-"1.4"
+model_version <-"1.5"
 
 # table metadata
 meta <- read_sheet(url, sheet="Description", skip=1, col_types="c") %>%
-    select(table=Table, required=Required, url=Link, version=Version) %>%
+    select(table=Table, required=Required, url=Link, version=`Table version`) %>%
     filter(!is.na(url)) # only keep tables with links
 
 #table_names <- meta$table
@@ -51,30 +51,31 @@ for (i in 1:length(tables)) {
             mutate(primary_key = ifelse(paste0(names(tables)[i], "_id") == Column, TRUE, NA))
     }
 
-    if ("Multi-value delimiter" %in% names(tmp)) {
-        tables[[i]] <- tmp %>%
-            select(column = Column, 
-                   primary_key,
-                   required = Required,
-                   description = Description, 
-                   data_type = `Data type`, 
-                   references = References, 
-                   enumerations = Enumerations, 
-                   multi_value_delimiter = `Multi-value delimiter`,
-                   examples = Examples, 
-                   notes = `Notes/comments`)
-    } else {
-        tables[[i]] <- tmp %>%
-            select(column = Column, 
-                   primary_key,
-                   required = Required,
-                   description = Description, 
-                   data_type = `Data type`, 
-                   references = References, 
-                   enumerations = Enumerations, 
-                   examples = Examples, 
-                   notes = `Notes/comments`)
-    }
+    lookup <- c(
+        data_type = "Data type", 
+        multi_value_delimiter = "Multi-value delimiter",
+        notes = "Notes/comments"
+    )
+    tmp <- tmp %>%
+        rename(any_of(lookup)) %>%
+        rename_with(tolower)
+
+    keep_cols <- c(
+        "column", 
+        "primary_key",
+        "required",
+        "description", 
+        "data_type", 
+        "min",
+        "max",
+        "references", 
+        "enumerations", 
+        "multi_value_delimiter",
+        "examples", 
+        "notes"
+    )
+    tables[[i]] <- tmp %>%
+        select(any_of(keep_cols))
 }