Merge pull request #33 from UW-GAC/phenotype/add-diabetes-variables

Update phenotype data model with new diabetes variables
UW-GAC · May 6, 2024 · 1cecccf · 1cecccf
2 parents 8233894 + e26d444
commit 1cecccf
Show file tree

Hide file tree

Showing 3 changed files with 31 additions and 14 deletions.
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,3 @@
+.Rprofile
+renv/*
+renv.lock
diff --git a/PRIMED_phenotype_data_model.json b/PRIMED_phenotype_data_model.json
@@ -1,7 +1,7 @@
 {
   "name": "PRIMED Phenotype Data Model",
   "description": "Data model for phenotype data in the PRIMED consortium",
-  "version": "1.6",
+  "version": "1.7",
   "tables": [
     {
       "table": "subject",
@@ -835,7 +835,7 @@
     {
       "table": "diabetes_diabetes",
       "url": "https://docs.google.com/spreadsheets/d/1Zc1ALFmFI8kD_bn0n-u_HfRpZjOpSf10nM-lOuTiS6s/edit#gid=597696160",
-      "version": "1.0",
+      "version": "1.1",
       "columns": [
         {
           "column": "subject_id",
@@ -877,6 +877,20 @@
           "data_type": "enumeration",
           "enumerations": ["0", "1"],
           "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA."
+        },
+        {
+          "column": "t1d_dprism_1",
+          "description": "indicator of Type 1 Diabetes",
+          "data_type": "enumeration",
+          "enumerations": ["0", "1"],
+          "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA."
+        },
+        {
+          "column": "t2d_dprism_1",
+          "description": "indicator of Type 2 Diabetes",
+          "data_type": "enumeration",
+          "enumerations": ["0", "1"],
+          "notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA."
         }
       ]
     },

diff --git a/sheets_to_JSON_phenotype.R b/sheets_to_JSON_phenotype.R
@@ -9,7 +9,7 @@ library(jsonlite)
 url <- "https://docs.google.com/spreadsheets/d/1kpWz-6QfjMPVtm62fQwm4hoxzXhR0dnKxVt02fbx9ks"
 model_name <- "PRIMED Phenotype Data Model"
 model_description <- "Data model for phenotype data in the PRIMED consortium"
-model_version <-"1.5"
+model_version <-"1.7"
 
 # table metadata
 meta <- read_sheet(url, sheet="Description", skip=1, col_types="c") %>%
@@ -43,37 +43,37 @@ for (i in 1:length(tables)) {
                Description=gsub('\n', ' ', Description), # replace newline with space
                `Notes/comments`=gsub('"', "'", `Notes/comments`), # replace double with single quote
                `Notes/comments`=gsub('\n', ' ', `Notes/comments`)) # replace newline with space
-    
+
     if ("Primary key" %in% names(tmp)) {
         tmp <- tmp %>%
             rename(primary_key = `Primary key`)
     } else {
         tmp <- tmp %>%
             mutate(primary_key = ifelse(paste0(names(tables)[i], "_id") == Column, TRUE, NA))
     }
-    
+
     lookup <- c(
-        data_type = "Data type", 
+        data_type = "Data type",
         multi_value_delimiter = "Multi-value delimiter",
         notes = "Notes/comments"
     )
     tmp <- tmp %>%
         rename(any_of(lookup)) %>%
         rename_with(tolower)
-    
+
     keep_cols <- c(
-        "column", 
+        "column",
         "primary_key",
         "required",
-        "description", 
-        "data_type", 
+        "description",
+        "data_type",
         "min",
         "max",
-        "references", 
-        "enumerations", 
+        "references",
+        "enumerations",
         "is_bucket_path",
         "multi_value_delimiter",
-        "examples", 
+        "examples",
         "notes"
     )
     tables[[i]] <- tmp %>%
@@ -95,7 +95,7 @@ master <- list(
     name = model_name,
     description = model_description,
     version = model_version,
-    
+
     # Data Table Details
     tables = tab_list
 )