From 210600fd0c3e5ca782f45d86d45bccbaa4900817 Mon Sep 17 00:00:00 2001 From: "Stephanie M. Gogarten" Date: Tue, 31 Oct 2023 12:36:38 -0700 Subject: [PATCH] add min and max values for some columns --- PRIMED_phenotype_data_model.json | 41 +++++++++++++++++++++--- sheets_to_JSON_phenotype.R | 53 ++++++++++++++++---------------- 2 files changed, 64 insertions(+), 30 deletions(-) diff --git a/PRIMED_phenotype_data_model.json b/PRIMED_phenotype_data_model.json index a4046b3..ebfbc8a 100755 --- a/PRIMED_phenotype_data_model.json +++ b/PRIMED_phenotype_data_model.json @@ -1,7 +1,7 @@ { "name": "PRIMED Phenotype Data Model", "description": "Data model for phenotype data in the PRIMED consortium", - "version": "1.4", + "version": "1.5", "tables": [ { "table": "subject", @@ -296,7 +296,9 @@ "primary_key": true, "required": true, "description": "the age at which the observation or measurement for the phenotype(s) were taken", - "data_type": "float" + "data_type": "float", + "min": " 0", + "max": "89" }, { "column": "visit", @@ -442,6 +444,8 @@ "required": true, "description": "the age at which the observation or measurement for the phenotype(s) were taken", "data_type": "float", + "min": 0, + "max": 89, "examples": "56.2" }, { @@ -455,18 +459,21 @@ "column": "height_1", "description": "standing body height", "data_type": "float", + "min": 0, "examples": "165.1" }, { "column": "weight_1", "description": "body weight at baseline", "data_type": "float", + "min": 0, "examples": "72.574" }, { "column": "bmi_1", "description": "body mass index calculated", "data_type": "float", + "min": 0, "examples": "26.45" }, { @@ -496,6 +503,8 @@ "required": true, "description": "the age at which the observation or measurement for the phenotype(s) were taken", "data_type": "float", + "min": 0, + "max": 89, "examples": "56.2" }, { @@ -546,6 +555,8 @@ "required": true, "description": "the age at which the observation or measurement for the phenotype(s) were taken", "data_type": "float", + "min": 0, + "max": 89, "examples": "56.2" }, { @@ -606,6 +617,8 @@ "required": true, "description": "the age at which the observation or measurement for the phenotype(s) were taken", "data_type": "float", + "min": 0, + "max": 89, "examples": "56.2" }, { @@ -712,6 +725,8 @@ "required": true, "description": "the age at which the observation or measurement for the phenotype(s) were taken", "data_type": "float", + "min": 0, + "max": 89, "examples": "56.2" }, { @@ -763,6 +778,8 @@ "required": true, "description": "the age at which the observation or measurement for the phenotype(s) were taken", "data_type": "float", + "min": 0, + "max": 89, "examples": "56.2" }, { @@ -849,6 +866,8 @@ "required": true, "description": "the age at which the observation or measurement for the phenotype(s) were taken", "data_type": "float", + "min": 0, + "max": 89, "examples": "56.2" }, { @@ -877,6 +896,7 @@ { "table": "cancer_breast", "url": "https://docs.google.com/spreadsheets/d/1Gfj_EoPuYWhiNk7AaR6DOPjeTC7Nn06sF8qcoMGQ9KM/edit#gid=0", + "version": "1.0", "columns": [ { "column": "subject_id", @@ -893,6 +913,8 @@ "required": true, "description": "the age at which the observation or measurement for the phenotype(s) were taken", "data_type": "float", + "min": 0, + "max": 89, "examples": "56.2" }, { @@ -913,6 +935,8 @@ "column": "age_at_diagnosis_1", "description": "age at which participant was diagnosed with breast cancer", "data_type": "float", + "min": 0, + "max": 89, "notes": "may be the same as age_at_obs or not" }, { @@ -1083,6 +1107,8 @@ "column": "age_at_natural_menopause_1", "description": "age at which natural menopause occurred", "data_type": "float", + "min": 0, + "max": 89, "notes": "NA if menopause has not occurred" }, { @@ -1100,12 +1126,16 @@ { "column": "age_at_first_birth_1", "description": "age at which the first birth occurred", - "data_type": "float" + "data_type": "float", + "min": 0, + "max": 89 }, { "column": "age_at_menarche_1", "description": "age at which menarche occurred", - "data_type": "float" + "data_type": "float", + "min": 0, + "max": 89 }, { "column": "deceased_1", @@ -1125,6 +1155,8 @@ "column": "age_at_death_1", "description": "age at death of individual", "data_type": "float", + "min": 0, + "max": 89, "notes": "Set to missing (either blank or NA) if individual is not deceased" } ] @@ -1132,6 +1164,7 @@ { "table": "cancer_prostate", "url": "https://docs.google.com/spreadsheets/d/1Gfj_EoPuYWhiNk7AaR6DOPjeTC7Nn06sF8qcoMGQ9KM/edit#gid=1811888649", + "version": "1.0", "columns": [ { "column": "subject_id", diff --git a/sheets_to_JSON_phenotype.R b/sheets_to_JSON_phenotype.R index ee8458e..b9b6d9a 100644 --- a/sheets_to_JSON_phenotype.R +++ b/sheets_to_JSON_phenotype.R @@ -9,11 +9,11 @@ library(jsonlite) url <- "https://docs.google.com/spreadsheets/d/1kpWz-6QfjMPVtm62fQwm4hoxzXhR0dnKxVt02fbx9ks" model_name <- "PRIMED Phenotype Data Model" model_description <- "Data model for phenotype data in the PRIMED consortium" -model_version <-"1.4" +model_version <-"1.5" # table metadata meta <- read_sheet(url, sheet="Description", skip=1, col_types="c") %>% - select(table=Table, required=Required, url=Link, version=Version) %>% + select(table=Table, required=Required, url=Link, version=`Table version`) %>% filter(!is.na(url)) # only keep tables with links #table_names <- meta$table @@ -51,30 +51,31 @@ for (i in 1:length(tables)) { mutate(primary_key = ifelse(paste0(names(tables)[i], "_id") == Column, TRUE, NA)) } - if ("Multi-value delimiter" %in% names(tmp)) { - tables[[i]] <- tmp %>% - select(column = Column, - primary_key, - required = Required, - description = Description, - data_type = `Data type`, - references = References, - enumerations = Enumerations, - multi_value_delimiter = `Multi-value delimiter`, - examples = Examples, - notes = `Notes/comments`) - } else { - tables[[i]] <- tmp %>% - select(column = Column, - primary_key, - required = Required, - description = Description, - data_type = `Data type`, - references = References, - enumerations = Enumerations, - examples = Examples, - notes = `Notes/comments`) - } + lookup <- c( + data_type = "Data type", + multi_value_delimiter = "Multi-value delimiter", + notes = "Notes/comments" + ) + tmp <- tmp %>% + rename(any_of(lookup)) %>% + rename_with(tolower) + + keep_cols <- c( + "column", + "primary_key", + "required", + "description", + "data_type", + "min", + "max", + "references", + "enumerations", + "multi_value_delimiter", + "examples", + "notes" + ) + tables[[i]] <- tmp %>% + select(any_of(keep_cols)) }