Skip to content

Commit

Permalink
Merge pull request #26 from UW-GAC/min_max
Browse files Browse the repository at this point in the history
add min and max values for some columns
  • Loading branch information
smgogarten authored Nov 7, 2023
2 parents 96e7aea + 210600f commit d00a894
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 30 deletions.
41 changes: 37 additions & 4 deletions PRIMED_phenotype_data_model.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "PRIMED Phenotype Data Model",
"description": "Data model for phenotype data in the PRIMED consortium",
"version": "1.4",
"version": "1.5",
"tables": [
{
"table": "subject",
Expand Down Expand Up @@ -296,7 +296,9 @@
"primary_key": true,
"required": true,
"description": "the age at which the observation or measurement for the phenotype(s) were taken",
"data_type": "float"
"data_type": "float",
"min": " 0",
"max": "89"
},
{
"column": "visit",
Expand Down Expand Up @@ -442,6 +444,8 @@
"required": true,
"description": "the age at which the observation or measurement for the phenotype(s) were taken",
"data_type": "float",
"min": 0,
"max": 89,
"examples": "56.2"
},
{
Expand All @@ -455,18 +459,21 @@
"column": "height_1",
"description": "standing body height",
"data_type": "float",
"min": 0,
"examples": "165.1"
},
{
"column": "weight_1",
"description": "body weight at baseline",
"data_type": "float",
"min": 0,
"examples": "72.574"
},
{
"column": "bmi_1",
"description": "body mass index calculated",
"data_type": "float",
"min": 0,
"examples": "26.45"
},
{
Expand Down Expand Up @@ -496,6 +503,8 @@
"required": true,
"description": "the age at which the observation or measurement for the phenotype(s) were taken",
"data_type": "float",
"min": 0,
"max": 89,
"examples": "56.2"
},
{
Expand Down Expand Up @@ -546,6 +555,8 @@
"required": true,
"description": "the age at which the observation or measurement for the phenotype(s) were taken",
"data_type": "float",
"min": 0,
"max": 89,
"examples": "56.2"
},
{
Expand Down Expand Up @@ -606,6 +617,8 @@
"required": true,
"description": "the age at which the observation or measurement for the phenotype(s) were taken",
"data_type": "float",
"min": 0,
"max": 89,
"examples": "56.2"
},
{
Expand Down Expand Up @@ -712,6 +725,8 @@
"required": true,
"description": "the age at which the observation or measurement for the phenotype(s) were taken",
"data_type": "float",
"min": 0,
"max": 89,
"examples": "56.2"
},
{
Expand Down Expand Up @@ -763,6 +778,8 @@
"required": true,
"description": "the age at which the observation or measurement for the phenotype(s) were taken",
"data_type": "float",
"min": 0,
"max": 89,
"examples": "56.2"
},
{
Expand Down Expand Up @@ -849,6 +866,8 @@
"required": true,
"description": "the age at which the observation or measurement for the phenotype(s) were taken",
"data_type": "float",
"min": 0,
"max": 89,
"examples": "56.2"
},
{
Expand Down Expand Up @@ -877,6 +896,7 @@
{
"table": "cancer_breast",
"url": "https://docs.google.com/spreadsheets/d/1Gfj_EoPuYWhiNk7AaR6DOPjeTC7Nn06sF8qcoMGQ9KM/edit#gid=0",
"version": "1.0",
"columns": [
{
"column": "subject_id",
Expand All @@ -893,6 +913,8 @@
"required": true,
"description": "the age at which the observation or measurement for the phenotype(s) were taken",
"data_type": "float",
"min": 0,
"max": 89,
"examples": "56.2"
},
{
Expand All @@ -913,6 +935,8 @@
"column": "age_at_diagnosis_1",
"description": "age at which participant was diagnosed with breast cancer",
"data_type": "float",
"min": 0,
"max": 89,
"notes": "may be the same as age_at_obs or not"
},
{
Expand Down Expand Up @@ -1083,6 +1107,8 @@
"column": "age_at_natural_menopause_1",
"description": "age at which natural menopause occurred",
"data_type": "float",
"min": 0,
"max": 89,
"notes": "NA if menopause has not occurred"
},
{
Expand All @@ -1100,12 +1126,16 @@
{
"column": "age_at_first_birth_1",
"description": "age at which the first birth occurred",
"data_type": "float"
"data_type": "float",
"min": 0,
"max": 89
},
{
"column": "age_at_menarche_1",
"description": "age at which menarche occurred",
"data_type": "float"
"data_type": "float",
"min": 0,
"max": 89
},
{
"column": "deceased_1",
Expand All @@ -1125,13 +1155,16 @@
"column": "age_at_death_1",
"description": "age at death of individual",
"data_type": "float",
"min": 0,
"max": 89,
"notes": "Set to missing (either blank or NA) if individual is not deceased"
}
]
},
{
"table": "cancer_prostate",
"url": "https://docs.google.com/spreadsheets/d/1Gfj_EoPuYWhiNk7AaR6DOPjeTC7Nn06sF8qcoMGQ9KM/edit#gid=1811888649",
"version": "1.0",
"columns": [
{
"column": "subject_id",
Expand Down
53 changes: 27 additions & 26 deletions sheets_to_JSON_phenotype.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,11 @@ library(jsonlite)
url <- "https://docs.google.com/spreadsheets/d/1kpWz-6QfjMPVtm62fQwm4hoxzXhR0dnKxVt02fbx9ks"
model_name <- "PRIMED Phenotype Data Model"
model_description <- "Data model for phenotype data in the PRIMED consortium"
model_version <-"1.4"
model_version <-"1.5"

# table metadata
meta <- read_sheet(url, sheet="Description", skip=1, col_types="c") %>%
select(table=Table, required=Required, url=Link, version=Version) %>%
select(table=Table, required=Required, url=Link, version=`Table version`) %>%
filter(!is.na(url)) # only keep tables with links

#table_names <- meta$table
Expand Down Expand Up @@ -51,30 +51,31 @@ for (i in 1:length(tables)) {
mutate(primary_key = ifelse(paste0(names(tables)[i], "_id") == Column, TRUE, NA))
}

if ("Multi-value delimiter" %in% names(tmp)) {
tables[[i]] <- tmp %>%
select(column = Column,
primary_key,
required = Required,
description = Description,
data_type = `Data type`,
references = References,
enumerations = Enumerations,
multi_value_delimiter = `Multi-value delimiter`,
examples = Examples,
notes = `Notes/comments`)
} else {
tables[[i]] <- tmp %>%
select(column = Column,
primary_key,
required = Required,
description = Description,
data_type = `Data type`,
references = References,
enumerations = Enumerations,
examples = Examples,
notes = `Notes/comments`)
}
lookup <- c(
data_type = "Data type",
multi_value_delimiter = "Multi-value delimiter",
notes = "Notes/comments"
)
tmp <- tmp %>%
rename(any_of(lookup)) %>%
rename_with(tolower)

keep_cols <- c(
"column",
"primary_key",
"required",
"description",
"data_type",
"min",
"max",
"references",
"enumerations",
"multi_value_delimiter",
"examples",
"notes"
)
tables[[i]] <- tmp %>%
select(any_of(keep_cols))
}


Expand Down

0 comments on commit d00a894

Please sign in to comment.