Skip to content

Commit

Permalink
Merge pull request #33 from UW-GAC/phenotype/add-diabetes-variables
Browse files Browse the repository at this point in the history
Update phenotype data model with new diabetes variables
  • Loading branch information
amstilp authored May 6, 2024
2 parents 8233894 + e26d444 commit 1cecccf
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 14 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
.Rprofile
renv/*
renv.lock
18 changes: 16 additions & 2 deletions PRIMED_phenotype_data_model.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "PRIMED Phenotype Data Model",
"description": "Data model for phenotype data in the PRIMED consortium",
"version": "1.6",
"version": "1.7",
"tables": [
{
"table": "subject",
Expand Down Expand Up @@ -835,7 +835,7 @@
{
"table": "diabetes_diabetes",
"url": "https://docs.google.com/spreadsheets/d/1Zc1ALFmFI8kD_bn0n-u_HfRpZjOpSf10nM-lOuTiS6s/edit#gid=597696160",
"version": "1.0",
"version": "1.1",
"columns": [
{
"column": "subject_id",
Expand Down Expand Up @@ -877,6 +877,20 @@
"data_type": "enumeration",
"enumerations": ["0", "1"],
"notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA."
},
{
"column": "t1d_dprism_1",
"description": "indicator of Type 1 Diabetes",
"data_type": "enumeration",
"enumerations": ["0", "1"],
"notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA."
},
{
"column": "t2d_dprism_1",
"description": "indicator of Type 2 Diabetes",
"data_type": "enumeration",
"enumerations": ["0", "1"],
"notes": "0 = 'no' 1 = 'yes' Missing values can either be left blank or set to NA."
}
]
},
Expand Down
24 changes: 12 additions & 12 deletions sheets_to_JSON_phenotype.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ library(jsonlite)
url <- "https://docs.google.com/spreadsheets/d/1kpWz-6QfjMPVtm62fQwm4hoxzXhR0dnKxVt02fbx9ks"
model_name <- "PRIMED Phenotype Data Model"
model_description <- "Data model for phenotype data in the PRIMED consortium"
model_version <-"1.5"
model_version <-"1.7"

# table metadata
meta <- read_sheet(url, sheet="Description", skip=1, col_types="c") %>%
Expand Down Expand Up @@ -43,37 +43,37 @@ for (i in 1:length(tables)) {
Description=gsub('\n', ' ', Description), # replace newline with space
`Notes/comments`=gsub('"', "'", `Notes/comments`), # replace double with single quote
`Notes/comments`=gsub('\n', ' ', `Notes/comments`)) # replace newline with space

if ("Primary key" %in% names(tmp)) {
tmp <- tmp %>%
rename(primary_key = `Primary key`)
} else {
tmp <- tmp %>%
mutate(primary_key = ifelse(paste0(names(tables)[i], "_id") == Column, TRUE, NA))
}

lookup <- c(
data_type = "Data type",
data_type = "Data type",
multi_value_delimiter = "Multi-value delimiter",
notes = "Notes/comments"
)
tmp <- tmp %>%
rename(any_of(lookup)) %>%
rename_with(tolower)

keep_cols <- c(
"column",
"column",
"primary_key",
"required",
"description",
"data_type",
"description",
"data_type",
"min",
"max",
"references",
"enumerations",
"references",
"enumerations",
"is_bucket_path",
"multi_value_delimiter",
"examples",
"examples",
"notes"
)
tables[[i]] <- tmp %>%
Expand All @@ -95,7 +95,7 @@ master <- list(
name = model_name,
description = model_description,
version = model_version,

# Data Table Details
tables = tab_list
)
Expand Down

0 comments on commit 1cecccf

Please sign in to comment.