From 436f5b4354b33e39731d4ff50b197efd95e46e77 Mon Sep 17 00:00:00 2001 From: amywatt Date: Tue, 9 Apr 2024 21:26:26 -0700 Subject: [PATCH 01/18] created R script for test data --- test_data/test_files.R | 69 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 test_data/test_files.R diff --git a/test_data/test_files.R b/test_data/test_files.R new file mode 100644 index 0000000..8851874 --- /dev/null +++ b/test_data/test_files.R @@ -0,0 +1,69 @@ +library(dplyr) +library(readr) + +n <- 20 # number of rows in test data + +set.seed(4) + +subject <- tibble( + subject_id = paste0("subject", 1:n), + age_at_obs=round(runif(n, 20, 80)) +) + +cmqt_anthropometry <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep(1, n), + height_1=rnorm(n, 500, 10), # height in cm + weight_1=rnorm(n, 400, 5), # weight in kg + bmi_1=weight_1 / (height_1 / 100)^2, # bmi in km/m^2 + # waist_hip_ratio_1 +) + +cmqt_lipids <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep(1, n), + triglycerides_1=rnorm(n, 600, 100), # mg/dL + # hdl_1 + # total_cholesterol_1 + # ldl_1 + # non_hdl_1 +) + +cmqt_blood_pressure <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep(1, n), + systolic_bp_1=rnorm(n, 100, 100), + diastolic_bp_1=rnorm(n, 100, 100), + # hypertension_1 +) + +cmqt_hematology <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep(1, n), + # rbc_1 + hemoglobin_1=rnorm(n, 10, 20), + hematocrit_1=rnorm(n, 0.4, 0.4), + # mcv_1 + # mch_1 + # mchc_1 + # rdw_1 + wbc_1=rnorm(n, 100, 100), + # basophil_count_1 + # eosinophil_count_1 + # lymphocyte_count_1 + # monocyte_count_1 + # neutrophil_count_1 + platelet_count_1=rnorm(n, 800, 200) + # mean_platelet_volume_1 +) + +# setwd("~/Downloads/primed-file-checks/pheno_qc") +write_tsv(subject, "test_data/subject.tsv") +write_tsv(cmqt_anthropometry, "test_data/cmqt_anthropometry.tsv") +write_tsv(cmqt_lipids, "test_data/cmqt_lipids.tsv") +write_tsv(cmqt_blood_pressure, "test_data/cmqt_blood_pressure.tsv") +write_tsv(cmqt_hematology, "test_data/cmqt_hematology.tsv") From 99da6d63f5a26838463c3a85db8d038363acc0f9 Mon Sep 17 00:00:00 2001 From: amywatt Date: Tue, 9 Apr 2024 22:00:49 -0700 Subject: [PATCH 02/18] creating tsv files --- test_data/cmqt_anthropometry.tsv | 21 ++++++++ test_data/cmqt_blood_pressure.tsv | 21 ++++++++ test_data/cmqt_lipids.tsv | 21 ++++++++ test_data/subject.tsv | 21 ++++++++ test_data/test_files.R | 83 +++++++++++++++++-------------- 5 files changed, 129 insertions(+), 38 deletions(-) create mode 100644 test_data/cmqt_anthropometry.tsv create mode 100644 test_data/cmqt_blood_pressure.tsv create mode 100644 test_data/cmqt_lipids.tsv create mode 100644 test_data/subject.tsv diff --git a/test_data/cmqt_anthropometry.tsv b/test_data/cmqt_anthropometry.tsv new file mode 100644 index 0000000..05552e2 --- /dev/null +++ b/test_data/cmqt_anthropometry.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit height_1 weight_1 bmi_1 waist_hip_ratio_1 +subject1 59 visit_1 168.9662314872622 80.76732089777315 28.290188511041826 0.7469005486739022 +subject2 46 visit_1 165.110036178032 85.25966289494805 31.274953024607136 0.7501018809016656 +subject3 55 visit_1 167.68140136962006 76.22894393591241 27.111285687273472 0.7936294054529471 +subject4 55 visit_1 164.68404018860684 72.58905440174385 26.765053327547104 0.8348499810263527 +subject5 62 visit_1 165.24046335177894 84.30565936248858 30.876205386787987 0.9576720775779393 +subject6 55 visit_1 166.18318741952814 77.97740084621502 28.23544170233956 0.75225930619927 +subject7 61 visit_1 173.1551878731937 78.86297291363192 26.30280885590887 0.7557994230712831 +subject8 65 visit_1 164.6905720192354 84.67048085431246 31.217243374076368 0.8556773306960885 +subject9 66 visit_1 164.29742090189868 77.67052060104524 28.773634591122615 0.7875468828313612 +subject10 51 visit_1 163.01588801788486 76.81228250714892 28.90485548023756 0.9079118556158186 +subject11 61 visit_1 175.78570486609684 86.71854313099686 28.063695767344562 0.714518154361058 +subject12 55 visit_1 166.15618313796944 80.9076769229897 29.306011596467798 0.8851560597443304 +subject13 52 visit_1 174.153356521782 86.4625616820857 28.507845741573323 0.6949822588370015 +subject14 66 visit_1 174.01779814545105 71.559757120676025 23.630968400761283 0.9650955761801525 +subject15 57 visit_1 169.15027858453618 75.89503211184523 26.52576478720391 0.8105106408530143 +subject16 57 visit_1 163.01939420974415 75.68926927950116 28.481034679063217 0.781464924086805 +subject17 67 visit_1 173.79118817919897 80.49421844568977 26.65073452190368 0.7682115581621235 +subject18 59 visit_1 171.36887405860037 78.1217242790082 26.60161071354854 0.8711545665825099 +subject19 67 visit_1 158.50380326447956 83.6195207764213 33.283447127519274 0.8420935231597383 +subject20 62 visit_1 173.681265866022 71.01308990713731 23.54140931141544 0.7862981405629905 diff --git a/test_data/cmqt_blood_pressure.tsv b/test_data/cmqt_blood_pressure.tsv new file mode 100644 index 0000000..b4737b1 --- /dev/null +++ b/test_data/cmqt_blood_pressure.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit systolic_bp_1 diastolic_bp_1 hypertension_1 +subject1 59 1 106.19744641480045 77.7453770274667 0 +subject2 46 1 119.9915439506702 77.71587669181432 0 +subject3 55 1 131.31161792889196 77.46810773155951 0 +subject4 55 1 95.82505980432364 100.68273331958252 0 +subject5 62 1 113.07657687971862 95.83189858480944 0 +subject6 55 1 106.99605911145795 69.57409254647473 0 +subject7 61 1 102.20816658429806 79.91612599622644 0 +subject8 65 1 149.54059774612347 66.65912106959908 0 +subject9 66 1 96.09049722969783 81.4653602921763 0 +subject10 51 1 155.00989669663048 72.12176576093532 0 +subject11 61 1 144.2946028755048 77.11002401473311 0 +subject12 55 1 89.04399463717873 86.66839300105147 0 +subject13 52 1 113.95507935033723 78.63368779967745 0 +subject14 66 1 140.78415432618092 82.24089307715688 0 +subject15 57 1 104.6431654882683 91.17206667122989 0 +subject16 57 1 150.4934516108755 65.61505602426482 0 +subject17 67 1 71.55825385438895 86.34365724811909 0 +subject18 59 1 131.12656635057795 75.00805196385073 0 +subject19 67 1 142.11067814242264 91.26272437490094 1 +subject20 62 1 123.32873631796721 78.99815312958145 0 diff --git a/test_data/cmqt_lipids.tsv b/test_data/cmqt_lipids.tsv new file mode 100644 index 0000000..a750983 --- /dev/null +++ b/test_data/cmqt_lipids.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit triglycerides_1 hdl_1 total_cholesterol_1 ldl_1 ldl_emerge_1 non_hdl_1 +subject1 59 visit_1 118.15800580522827 35.835101458402576 220.53316720683242 161.88670202729745 154.44843215947094 152.84255126108 +subject2 46 visit_1 109.394955100055 43.02981278997137 233.52715045499596 146.72506888583692 91.8007550347407 61.87548402533378 +subject3 55 visit_1 102.95887738001079 57.38623634418997 238.47405267759416 86.30885033094876 178.9538509861965 57.20948587971198 +subject4 55 visit_1 118.45503517265595 64.22196449715179 215.5184812122991 48.916218721009784 172.85745147754167 -9.3175286811343625 +subject5 62 visit_1 125.81558624633729 65.3192194357962 198.3250655588548 100.36336567306202 104.1197976868645 148.30428847191854 +subject6 55 visit_1 110.97424949755714 54.294234834156406 220.36974185018104 157.8632503396199 142.3629481625393 83.89162737685011 +subject7 61 visit_1 119.23052105053934 89.95482517449238 170.29390284091858 142.4348160597267 90.27747046873955 63.399036273056964 +subject8 65 visit_1 106.94345927115715 46.336510135285096 178.22791277583974 118.96024647358614 95.84242945161844 106.06293570353068 +subject9 66 visit_1 105.1634178661385 69.5271870143777 273.31543434378693 59.961590937713645 44.40813126311444 49.00815762250352 +subject10 51 visit_1 115.29692173416518 50.83696558806514 173.64612061027762 166.8664747996658 162.67915992610847 35.880559110726566 +subject11 61 visit_1 133.5022253328462 65.27202903973024 197.53733990036406 159.01849522621504 134.65526129483385 39.9993578621005 +subject12 55 visit_1 113.0875645867579 53.27329735687867 162.0096324328432 148.61517265917303 140.16069077543497 83.8428691799081 +subject13 52 visit_1 108.18346168550093 49.65287230272277 279.8241647996122 90.75852637443339 87.5160360391447 96.26844646584644 +subject14 66 visit_1 95.99811219762432 53.41342588541417 189.16707428709452 145.01345944285436 69.16799659578237 16.096467300171398 +subject15 57 visit_1 101.95475731361401 55.673241851281304 242.9040818352106 95.26329012699127 158.10330850047853 157.0217067955927 +subject16 57 visit_1 98.23126196874051 29.107401520177103 243.50094516540162 105.36929086994367 64.78537605248546 52.35283334533979 +subject17 67 visit_1 104.59976726045966 78.33680530465901 164.40847905041545 77.7662761474513 122.6550287311049 96.21838675622918 +subject18 59 visit_1 100.623109938063 66.64619037598759 217.3166093500972 136.4474774316232 93.33545615478813 98.63371389605848 +subject19 67 visit_1 121.01497518384902 38.522387368697686 178.6288340507974 102.89406422244691 113.51433537201316 91.29303433380157 +subject20 62 visit_1 113.25547892350006 29.079703695367105 105.32804391487394 155.6651521836276 111.84786213756654 73.82205851431792 diff --git a/test_data/subject.tsv b/test_data/subject.tsv new file mode 100644 index 0000000..24c9c15 --- /dev/null +++ b/test_data/subject.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs +subject1 59 +subject2 46 +subject3 55 +subject4 55 +subject5 62 +subject6 55 +subject7 61 +subject8 65 +subject9 66 +subject10 51 +subject11 61 +subject12 55 +subject13 52 +subject14 66 +subject15 57 +subject16 57 +subject17 67 +subject18 59 +subject19 67 +subject20 62 diff --git a/test_data/test_files.R b/test_data/test_files.R index 8851874..8237e4c 100644 --- a/test_data/test_files.R +++ b/test_data/test_files.R @@ -1,69 +1,76 @@ library(dplyr) library(readr) -n <- 20 # number of rows in test data +# number of rows in test data +n <- 20 + +# truncated normal distribution +rtnorm <- function(n, mean, sd, a = -Inf, b = Inf){ + qnorm(runif(n, pnorm(a, mean, sd), pnorm(b, mean, sd)), mean, sd) +} set.seed(4) subject <- tibble( - subject_id = paste0("subject", 1:n), - age_at_obs=round(runif(n, 20, 80)) + subject_id = paste0("subject", 1:n), + age_at_obs=round(rtnorm(n, 58, 5, 0, 90)) ) cmqt_anthropometry <- tibble( subject_id=rep(subject$subject_id), age_at_obs=rep(subject$age_at_obs), - visit=rep(1, n), - height_1=rnorm(n, 500, 10), # height in cm - weight_1=rnorm(n, 400, 5), # weight in kg + visit=rep("visit_1", n), + height_1=rnorm(n, 165, 7), # height in cm + weight_1=rnorm(n, 80, 5), # weight in kg bmi_1=weight_1 / (height_1 / 100)^2, # bmi in km/m^2 - # waist_hip_ratio_1 + waist_hip_ratio_1=rnorm(n, 0.8, 0.08) ) cmqt_lipids <- tibble( subject_id=rep(subject$subject_id), age_at_obs=rep(subject$age_at_obs), - visit=rep(1, n), - triglycerides_1=rnorm(n, 600, 100), # mg/dL - # hdl_1 - # total_cholesterol_1 - # ldl_1 - # non_hdl_1 + visit=rep("visit_1", n), + triglycerides_1=rnorm(n, 116, 13.6), # mg/dL + hdl_1=rnorm(n, 55, 15), + total_cholesterol_1=rnorm(n, 203, 41), + ldl_1=rnorm(n, 122, 37), + ldl_emerge_1=rnorm(n, 122, 37), + non_hdl_1=rnorm(n, 81, 40), ) cmqt_blood_pressure <- tibble( subject_id=rep(subject$subject_id), age_at_obs=rep(subject$age_at_obs), visit=rep(1, n), - systolic_bp_1=rnorm(n, 100, 100), - diastolic_bp_1=rnorm(n, 100, 100), - # hypertension_1 + systolic_bp_1=rnorm(n, 120, 20), + diastolic_bp_1=rnorm(n, 80, 10), + hypertension_1=ifelse(systolic_bp_1 > 140 & diastolic_bp_1 > 90, 1, 0) ) -cmqt_hematology <- tibble( - subject_id=rep(subject$subject_id), - age_at_obs=rep(subject$age_at_obs), - visit=rep(1, n), - # rbc_1 - hemoglobin_1=rnorm(n, 10, 20), - hematocrit_1=rnorm(n, 0.4, 0.4), - # mcv_1 - # mch_1 - # mchc_1 - # rdw_1 - wbc_1=rnorm(n, 100, 100), - # basophil_count_1 - # eosinophil_count_1 - # lymphocyte_count_1 - # monocyte_count_1 - # neutrophil_count_1 - platelet_count_1=rnorm(n, 800, 200) - # mean_platelet_volume_1 -) +# cmqt_hematology <- tibble( +# subject_id=rep(subject$subject_id), +# age_at_obs=rep(subject$age_at_obs), +# visit=rep(1, n), +# rbc_1 +# hemoglobin_1=rnorm(n, 10, 20), +# hematocrit_1=rnorm(n, 0.4, 0.4), +# mcv_1 +# mch_1 +# mchc_1 +# rdw_1 +# wbc_1=rnorm(n, 100, 100), +# basophil_count_1 +# eosinophil_count_1 +# lymphocyte_count_1 +# monocyte_count_1 +# neutrophil_count_1 +# platelet_count_1=rnorm(n, 800, 200) +# mean_platelet_volume_1 +# ) -# setwd("~/Downloads/primed-file-checks/pheno_qc") +# setwd("~/Downloads/primed_data_models") write_tsv(subject, "test_data/subject.tsv") write_tsv(cmqt_anthropometry, "test_data/cmqt_anthropometry.tsv") write_tsv(cmqt_lipids, "test_data/cmqt_lipids.tsv") write_tsv(cmqt_blood_pressure, "test_data/cmqt_blood_pressure.tsv") -write_tsv(cmqt_hematology, "test_data/cmqt_hematology.tsv") +# write_tsv(cmqt_hematology, "test_data/cmqt_hematology.tsv") From b2dfc7193192eed550bcb3aaa1b9e8a00cc7832c Mon Sep 17 00:00:00 2001 From: amywatt Date: Tue, 9 Apr 2024 22:54:29 -0700 Subject: [PATCH 03/18] added phenotype_harmonized table --- test_data/phenotype_harmonized.tsv | 5 +++++ test_data/test_files.R | 17 ++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 test_data/phenotype_harmonized.tsv diff --git a/test_data/phenotype_harmonized.tsv b/test_data/phenotype_harmonized.tsv new file mode 100644 index 0000000..ab5f80e --- /dev/null +++ b/test_data/phenotype_harmonized.tsv @@ -0,0 +1,5 @@ +phenotype_harmonized domain md5sum file_path file_readme_path n_subjects n_rows data_model_version +NA NA NA NA NA NA NA NA +NA NA NA NA NA NA NA NA +NA NA NA NA NA NA NA NA +NA NA NA NA NA NA NA NA diff --git a/test_data/test_files.R b/test_data/test_files.R index 8237e4c..5a19b7b 100644 --- a/test_data/test_files.R +++ b/test_data/test_files.R @@ -16,6 +16,18 @@ subject <- tibble( age_at_obs=round(rtnorm(n, 58, 5, 0, 90)) ) +# fill in table after uploading tsv files to anvil +phenotype_harmonized <- tibble( + phenotype_harmonized=rep(NA, 4), + domain=rep(NA, 4), + md5sum=rep(NA, 4), + file_path=rep(NA, 4), + file_readme_path=rep(NA, 4), + n_subjects=rep(NA, 4), + n_rows=rep(NA, 4), + data_model_version=rep(NA, 4), +) + cmqt_anthropometry <- tibble( subject_id=rep(subject$subject_id), age_at_obs=rep(subject$age_at_obs), @@ -68,9 +80,12 @@ cmqt_blood_pressure <- tibble( # mean_platelet_volume_1 # ) -# setwd("~/Downloads/primed_data_models") +setwd("~/Downloads/primed_data_models") +write_tsv(phenotype_harmonized, "test_data/phenotype_harmonized.tsv") write_tsv(subject, "test_data/subject.tsv") write_tsv(cmqt_anthropometry, "test_data/cmqt_anthropometry.tsv") write_tsv(cmqt_lipids, "test_data/cmqt_lipids.tsv") write_tsv(cmqt_blood_pressure, "test_data/cmqt_blood_pressure.tsv") # write_tsv(cmqt_hematology, "test_data/cmqt_hematology.tsv") + +# md5sum("test_data/cmqt_anthropometry.tsv") From 317d7c4bc21cbaa1662fb8e9911ace6315c03c9c Mon Sep 17 00:00:00 2001 From: amywatt Date: Wed, 10 Apr 2024 20:38:29 -0700 Subject: [PATCH 04/18] added phenotype_harmonized.tsv --- test_data/phenotype_harmonized.tsv | 9 ++++---- test_data/test_files.R | 37 +++++++++++++++++------------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/test_data/phenotype_harmonized.tsv b/test_data/phenotype_harmonized.tsv index ab5f80e..71bb8f1 100644 --- a/test_data/phenotype_harmonized.tsv +++ b/test_data/phenotype_harmonized.tsv @@ -1,5 +1,4 @@ -phenotype_harmonized domain md5sum file_path file_readme_path n_subjects n_rows data_model_version -NA NA NA NA NA NA NA NA -NA NA NA NA NA NA NA NA -NA NA NA NA NA NA NA NA -NA NA NA NA NA NA NA NA +domain md5sum file_path file_readme_path n_subjects n_rows data_model_version +cmqt_anthropometry c790c18bcc0a4bda515c5872852fe7d4 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_anthropometry.tsv readme 20 20 5 +cmqt_lipids 0a16006e71901758713941f8a2be9eb4 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_lipids.tsv readme 20 20 5 +cmqt_blood_pressure 3265d4e88c18fd509c71c59cde47ff03 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_blood_pressure.tsv readme 20 20 5 diff --git a/test_data/test_files.R b/test_data/test_files.R index 5a19b7b..d712bf7 100644 --- a/test_data/test_files.R +++ b/test_data/test_files.R @@ -4,6 +4,10 @@ library(readr) # number of rows in test data n <- 20 +file_names <- c("cmqt_anthropometry", + "cmqt_lipids", + "cmqt_blood_pressure") + # truncated normal distribution rtnorm <- function(n, mean, sd, a = -Inf, b = Inf){ qnorm(runif(n, pnorm(a, mean, sd), pnorm(b, mean, sd)), mean, sd) @@ -16,18 +20,6 @@ subject <- tibble( age_at_obs=round(rtnorm(n, 58, 5, 0, 90)) ) -# fill in table after uploading tsv files to anvil -phenotype_harmonized <- tibble( - phenotype_harmonized=rep(NA, 4), - domain=rep(NA, 4), - md5sum=rep(NA, 4), - file_path=rep(NA, 4), - file_readme_path=rep(NA, 4), - n_subjects=rep(NA, 4), - n_rows=rep(NA, 4), - data_model_version=rep(NA, 4), -) - cmqt_anthropometry <- tibble( subject_id=rep(subject$subject_id), age_at_obs=rep(subject$age_at_obs), @@ -59,6 +51,21 @@ cmqt_blood_pressure <- tibble( hypertension_1=ifelse(systolic_bp_1 > 140 & diastolic_bp_1 > 90, 1, 0) ) +# fill in table after uploading tsv files to anvil + +bucket <- "gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/" + +phenotype_harmonized <- tibble( + # phenotype_harmonized_id= + domain=(file_names), + md5sum=as.vector(md5sum(paste0("test_data/", file_names, ".tsv"))), + file_path=paste0(bucket, file_names, '.tsv'), + file_readme_path=rep("readme", length(file_names)), + n_subjects=rep(n, length(file_names)), + n_rows=rep(n, length(file_names)), + data_model_version=rep(5, length(file_names)), +) + # cmqt_hematology <- tibble( # subject_id=rep(subject$subject_id), # age_at_obs=rep(subject$age_at_obs), @@ -80,12 +87,10 @@ cmqt_blood_pressure <- tibble( # mean_platelet_volume_1 # ) -setwd("~/Downloads/primed_data_models") -write_tsv(phenotype_harmonized, "test_data/phenotype_harmonized.tsv") +# setwd("~/Downloads/primed_data_models") write_tsv(subject, "test_data/subject.tsv") write_tsv(cmqt_anthropometry, "test_data/cmqt_anthropometry.tsv") write_tsv(cmqt_lipids, "test_data/cmqt_lipids.tsv") write_tsv(cmqt_blood_pressure, "test_data/cmqt_blood_pressure.tsv") +write_tsv(phenotype_harmonized, "test_data/phenotype_harmonized.tsv") # write_tsv(cmqt_hematology, "test_data/cmqt_hematology.tsv") - -# md5sum("test_data/cmqt_anthropometry.tsv") From ad04abd639cc72ffee3e17dfa5c3e0aaede48f9b Mon Sep 17 00:00:00 2001 From: amywatt Date: Tue, 16 Apr 2024 09:08:21 -0700 Subject: [PATCH 05/18] adding required columns --- test_data/cmqt_anthropometry.tsv | 40 ++++++++++++++-------------- test_data/cmqt_blood_pressure.tsv | 40 ++++++++++++++-------------- test_data/cmqt_lipids.tsv | 40 ++++++++++++++-------------- test_data/phenotype_harmonized.tsv | 8 +++--- test_data/subject.tsv | 42 +++++++++++++++--------------- test_data/test_files.R | 25 +++++++++++------- 6 files changed, 101 insertions(+), 94 deletions(-) diff --git a/test_data/cmqt_anthropometry.tsv b/test_data/cmqt_anthropometry.tsv index 05552e2..4400f4b 100644 --- a/test_data/cmqt_anthropometry.tsv +++ b/test_data/cmqt_anthropometry.tsv @@ -1,21 +1,21 @@ subject_id age_at_obs visit height_1 weight_1 bmi_1 waist_hip_ratio_1 -subject1 59 visit_1 168.9662314872622 80.76732089777315 28.290188511041826 0.7469005486739022 -subject2 46 visit_1 165.110036178032 85.25966289494805 31.274953024607136 0.7501018809016656 -subject3 55 visit_1 167.68140136962006 76.22894393591241 27.111285687273472 0.7936294054529471 -subject4 55 visit_1 164.68404018860684 72.58905440174385 26.765053327547104 0.8348499810263527 -subject5 62 visit_1 165.24046335177894 84.30565936248858 30.876205386787987 0.9576720775779393 -subject6 55 visit_1 166.18318741952814 77.97740084621502 28.23544170233956 0.75225930619927 -subject7 61 visit_1 173.1551878731937 78.86297291363192 26.30280885590887 0.7557994230712831 -subject8 65 visit_1 164.6905720192354 84.67048085431246 31.217243374076368 0.8556773306960885 -subject9 66 visit_1 164.29742090189868 77.67052060104524 28.773634591122615 0.7875468828313612 -subject10 51 visit_1 163.01588801788486 76.81228250714892 28.90485548023756 0.9079118556158186 -subject11 61 visit_1 175.78570486609684 86.71854313099686 28.063695767344562 0.714518154361058 -subject12 55 visit_1 166.15618313796944 80.9076769229897 29.306011596467798 0.8851560597443304 -subject13 52 visit_1 174.153356521782 86.4625616820857 28.507845741573323 0.6949822588370015 -subject14 66 visit_1 174.01779814545105 71.559757120676025 23.630968400761283 0.9650955761801525 -subject15 57 visit_1 169.15027858453618 75.89503211184523 26.52576478720391 0.8105106408530143 -subject16 57 visit_1 163.01939420974415 75.68926927950116 28.481034679063217 0.781464924086805 -subject17 67 visit_1 173.79118817919897 80.49421844568977 26.65073452190368 0.7682115581621235 -subject18 59 visit_1 171.36887405860037 78.1217242790082 26.60161071354854 0.8711545665825099 -subject19 67 visit_1 158.50380326447956 83.6195207764213 33.283447127519274 0.8420935231597383 -subject20 62 visit_1 173.681265866022 71.01308990713731 23.54140931141544 0.7862981405629905 +subject1 59 visit_1 174.4059603833956 74.65738464756613 24.544270346105098 0.9029542666638012 +subject2 51 visit_1 166.27074769218558 85.32225373402065 30.86246561414685 0.7828680269809288 +subject3 63 visit_1 174.04758635491999 73.4363911773126 24.242383464072116 0.7540203628558878 +subject4 65 visit_1 153.18365996894644 90.31847351125953 38.490336272481564 0.6823418364566137 +subject5 54 visit_1 159.25304495658332 80.65691505331338 31.802855604448204 0.7173809253742001 +subject6 60 visit_1 158.96497699130163 78.84155775542531 31.199834726735073 0.6954780115808266 +subject7 51 visit_1 165.6919058239657 78.0132223851327 28.416175768348257 0.7329398074144686 +subject8 58 visit_1 162.3704139906115 84.44716041140687 32.03105585459906 0.7095477055180177 +subject9 62 visit_1 170.06732908698982 82.63084519748364 28.569353494069684 0.8294998540226414 +subject10 68 visit_1 152.41832586999223 79.1436337851869 34.06760571571555 0.7838557583735298 +subject11 56 visit_1 160.35379800896644 80.79338448721627 31.42080445534267 0.6977872077781471 +subject12 60 visit_1 160.63391457889574 77.57167466913786 30.06274833674053 0.736159001546514 +subject13 57 visit_1 164.44257297713287 75.20546962500397 27.81127508409966 0.8127265938356799 +subject14 56 visit_1 168.04937333980587 80.90258646053528 28.647624244062968 0.8491838106514763 +subject15 63 visit_1 178.79630678806967 83.60867141409459 26.15376606124955 0.8550358369909131 +subject16 60 visit_1 160.8226892924361 78.15229760939602 30.216705379062383 0.7962359191155008 +subject17 56 visit_1 161.13244951873727 81.18769156269829 31.269733119512615 0.9864257342639594 +subject18 57 visit_1 169.87176643590774 76.67038943792542 26.56961512330341 0.7537947207215205 +subject19 55 visit_1 163.9103522477441 76.01596245078622 28.29384899414978 0.8774783307433478 +subject20 52 visit_1 174.4422873663841 79.74151534344308 26.204805402947795 0.7777971498030142 diff --git a/test_data/cmqt_blood_pressure.tsv b/test_data/cmqt_blood_pressure.tsv index b4737b1..338d53f 100644 --- a/test_data/cmqt_blood_pressure.tsv +++ b/test_data/cmqt_blood_pressure.tsv @@ -1,21 +1,21 @@ subject_id age_at_obs visit systolic_bp_1 diastolic_bp_1 hypertension_1 -subject1 59 1 106.19744641480045 77.7453770274667 0 -subject2 46 1 119.9915439506702 77.71587669181432 0 -subject3 55 1 131.31161792889196 77.46810773155951 0 -subject4 55 1 95.82505980432364 100.68273331958252 0 -subject5 62 1 113.07657687971862 95.83189858480944 0 -subject6 55 1 106.99605911145795 69.57409254647473 0 -subject7 61 1 102.20816658429806 79.91612599622644 0 -subject8 65 1 149.54059774612347 66.65912106959908 0 -subject9 66 1 96.09049722969783 81.4653602921763 0 -subject10 51 1 155.00989669663048 72.12176576093532 0 -subject11 61 1 144.2946028755048 77.11002401473311 0 -subject12 55 1 89.04399463717873 86.66839300105147 0 -subject13 52 1 113.95507935033723 78.63368779967745 0 -subject14 66 1 140.78415432618092 82.24089307715688 0 -subject15 57 1 104.6431654882683 91.17206667122989 0 -subject16 57 1 150.4934516108755 65.61505602426482 0 -subject17 67 1 71.55825385438895 86.34365724811909 0 -subject18 59 1 131.12656635057795 75.00805196385073 0 -subject19 67 1 142.11067814242264 91.26272437490094 1 -subject20 62 1 123.32873631796721 78.99815312958145 0 +subject1 52 1 100.32111681998313 67.60810968514947 0 +subject2 57 1 119.54598875730798 74.95779069543926 0 +subject3 61 1 102.30899589794329 93.16781677627166 0 +subject4 64 1 129.70080877678893 87.9443019106884 0 +subject5 58 1 103.36938753648045 84.85034175324486 0 +subject6 58 1 143.46301641752973 103.71031812926483 1 +subject7 61 1 121.22030926987908 77.18294951489403 0 +subject8 62 1 123.13507242668209 81.98995477180007 0 +subject9 53 1 129.37811511205126 88.10235220861095 0 +subject10 61 1 130.90321590809089 78.777414144928 0 +subject11 56 1 127.91204326117058 90.07719442437494 0 +subject12 53 1 101.57273309314265 85.17582142418476 0 +subject13 58 1 106.46854778104007 77.70028608118474 0 +subject14 63 1 81.12209388518411 78.32366221470303 0 +subject15 56 1 122.39966313289008 55.239583636861326 0 +subject16 57 1 145.32154539560975 73.98845773539983 0 +subject17 56 1 98.04026398921327 80.31508610546757 0 +subject18 49 1 128.75812916491657 92.26124588286407 0 +subject19 60 1 132.3278608741974 73.00785190847046 0 +subject20 54 1 120.3573928456392 64.95851210559971 0 diff --git a/test_data/cmqt_lipids.tsv b/test_data/cmqt_lipids.tsv index a750983..cb86da6 100644 --- a/test_data/cmqt_lipids.tsv +++ b/test_data/cmqt_lipids.tsv @@ -1,21 +1,21 @@ subject_id age_at_obs visit triglycerides_1 hdl_1 total_cholesterol_1 ldl_1 ldl_emerge_1 non_hdl_1 -subject1 59 visit_1 118.15800580522827 35.835101458402576 220.53316720683242 161.88670202729745 154.44843215947094 152.84255126108 -subject2 46 visit_1 109.394955100055 43.02981278997137 233.52715045499596 146.72506888583692 91.8007550347407 61.87548402533378 -subject3 55 visit_1 102.95887738001079 57.38623634418997 238.47405267759416 86.30885033094876 178.9538509861965 57.20948587971198 -subject4 55 visit_1 118.45503517265595 64.22196449715179 215.5184812122991 48.916218721009784 172.85745147754167 -9.3175286811343625 -subject5 62 visit_1 125.81558624633729 65.3192194357962 198.3250655588548 100.36336567306202 104.1197976868645 148.30428847191854 -subject6 55 visit_1 110.97424949755714 54.294234834156406 220.36974185018104 157.8632503396199 142.3629481625393 83.89162737685011 -subject7 61 visit_1 119.23052105053934 89.95482517449238 170.29390284091858 142.4348160597267 90.27747046873955 63.399036273056964 -subject8 65 visit_1 106.94345927115715 46.336510135285096 178.22791277583974 118.96024647358614 95.84242945161844 106.06293570353068 -subject9 66 visit_1 105.1634178661385 69.5271870143777 273.31543434378693 59.961590937713645 44.40813126311444 49.00815762250352 -subject10 51 visit_1 115.29692173416518 50.83696558806514 173.64612061027762 166.8664747996658 162.67915992610847 35.880559110726566 -subject11 61 visit_1 133.5022253328462 65.27202903973024 197.53733990036406 159.01849522621504 134.65526129483385 39.9993578621005 -subject12 55 visit_1 113.0875645867579 53.27329735687867 162.0096324328432 148.61517265917303 140.16069077543497 83.8428691799081 -subject13 52 visit_1 108.18346168550093 49.65287230272277 279.8241647996122 90.75852637443339 87.5160360391447 96.26844646584644 -subject14 66 visit_1 95.99811219762432 53.41342588541417 189.16707428709452 145.01345944285436 69.16799659578237 16.096467300171398 -subject15 57 visit_1 101.95475731361401 55.673241851281304 242.9040818352106 95.26329012699127 158.10330850047853 157.0217067955927 -subject16 57 visit_1 98.23126196874051 29.107401520177103 243.50094516540162 105.36929086994367 64.78537605248546 52.35283334533979 -subject17 67 visit_1 104.59976726045966 78.33680530465901 164.40847905041545 77.7662761474513 122.6550287311049 96.21838675622918 -subject18 59 visit_1 100.623109938063 66.64619037598759 217.3166093500972 136.4474774316232 93.33545615478813 98.63371389605848 -subject19 67 visit_1 121.01497518384902 38.522387368697686 178.6288340507974 102.89406422244691 113.51433537201316 91.29303433380157 -subject20 62 visit_1 113.25547892350006 29.079703695367105 105.32804391487394 155.6651521836276 111.84786213756654 73.82205851431792 +subject1 61 visit_1 121.81587985397368 71.17028460566112 238.95637077130564 188.454359916499 96.46527586738083 71.9815081098668 +subject2 62 visit_1 126.12607917531574 65.0236765753393 169.5359717952532 104.30982272343374 121.98435630873986 71.86350676725725 +subject3 57 visit_1 127.7670028393971 40.530614999033276 266.11102406578533 99.99377443873357 142.92649316845015 70.87243092623808 +subject4 56 visit_1 120.15247181676263 25.371440022030995 259.3555543399786 38.45628596995071 77.27636063799875 163.73093327833013 +subject5 56 visit_1 114.44929003903476 46.2283914890792 183.1868028422012 184.25646683652465 109.19166722747944 144.32759433923775 +subject6 50 visit_1 121.76167046737713 69.53915554308915 225.5643479638949 124.67475532358635 97.94270935619721 39.296370185898915 +subject7 57 visit_1 105.15114825942665 63.284384889078396 167.8480078167114 105.7191085525777 89.0851081809514 80.66450398490576 +subject8 58 visit_1 107.78291740857124 53.767667489291675 174.01458398692856 145.1832155257659 176.65010583032839 27.636484278396317 +subject9 58 visit_1 139.32414407501224 29.849293623397426 117.01982112939709 92.40754580081575 77.767419874941 86.86144116870521 +subject10 49 visit_1 106.26310342194574 73.18911140526993 248.07690694514721 80.26451717742208 186.7683088887664 49.48706304374129 +subject11 49 visit_1 114.1879956742671 70.00749806468177 217.02339765103213 84.07440602244296 166.9450153196839 69.44009605893248 +subject12 64 visit_1 102.40319514845531 65.78993486182691 223.12400869710362 124.629653991415 64.73139007878065 107.67357200420587 +subject13 66 visit_1 141.4831375920665 42.33453771936489 164.78803993526844 136.12331298090797 110.81689679812386 75.53475119870976 +subject14 60 visit_1 111.41151732449964 64.32978085521123 144.45642866019128 61.96423225265855 160.45068550343473 89.96357230862755 +subject15 62 visit_1 129.23647592582597 44.16079329472619 243.00636887890863 192.32007878592327 93.58985615329634 125.68826668491953 +subject16 61 visit_1 129.434459859743 48.257820622950135 139.60001130140282 95.5013708444393 178.41288548011966 23.46022409705926 +subject17 53 visit_1 103.19891012404025 37.06740924896674 203.72584264798112 136.07700774951198 32.38276963061956 106.37462899247632 +subject18 53 visit_1 120.74892407710541 60.85708544525265 171.23658654990035 138.3111853538541 142.5841477485692 61.03220785540295 +subject19 49 visit_1 107.91590592904498 47.254350360451454 193.59696622304162 131.52105675876646 162.9047545634819 126.05089749960378 +subject20 60 visit_1 83.60149749371428 68.64803466903822 191.75033372000615 115.36040412574407 128.15816218823934 76.99261251832581 diff --git a/test_data/phenotype_harmonized.tsv b/test_data/phenotype_harmonized.tsv index 71bb8f1..48b8eac 100644 --- a/test_data/phenotype_harmonized.tsv +++ b/test_data/phenotype_harmonized.tsv @@ -1,4 +1,4 @@ -domain md5sum file_path file_readme_path n_subjects n_rows data_model_version -cmqt_anthropometry c790c18bcc0a4bda515c5872852fe7d4 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_anthropometry.tsv readme 20 20 5 -cmqt_lipids 0a16006e71901758713941f8a2be9eb4 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_lipids.tsv readme 20 20 5 -cmqt_blood_pressure 3265d4e88c18fd509c71c59cde47ff03 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_blood_pressure.tsv readme 20 20 5 +domain md5sum file_path file_readme_path n_subjects n_rows +cmqt_anthropometry 5a41957467d044f895168522dd554f1b gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_anthropometry.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_lipids 5ff37e31f7bf8bf3f5b7b9fabbed601a gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_lipids.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_blood_pressure b6b9748868ba729b653f30a7a77e6901 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_blood_pressure.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 diff --git a/test_data/subject.tsv b/test_data/subject.tsv index 24c9c15..49e7064 100644 --- a/test_data/subject.tsv +++ b/test_data/subject.tsv @@ -1,21 +1,21 @@ -subject_id age_at_obs -subject1 59 -subject2 46 -subject3 55 -subject4 55 -subject5 62 -subject6 55 -subject7 61 -subject8 65 -subject9 66 -subject10 51 -subject11 61 -subject12 55 -subject13 52 -subject14 66 -subject15 57 -subject16 57 -subject17 67 -subject18 59 -subject19 67 -subject20 62 +subject_id consent_code study_nickname reported_sex +subject1 HMB-IRB ARIC Other +subject2 GRU ARIC Female +subject3 GRU JHS Unknown +subject4 GRU JHS Female +subject5 DS-CVD JHS Other +subject6 GRU ARIC Female +subject7 DS-CVD JHS Other +subject8 DS-CVD ARIC Other +subject9 DS-CVD JHS Unknown +subject10 GRU JHS Unknown +subject11 DS-CVD JHS Male +subject12 GRU UKBB Unknown +subject13 GRU ARIC Other +subject14 DS-CVD JHS Other +subject15 HMB-IRB JHS Other +subject16 HMB-IRB ARIC Male +subject17 DS-CVD JHS Female +subject18 HMB-IRB JHS Female +subject19 DS-CVD JHS Other +subject20 DS-CVD UKBB Unknown diff --git a/test_data/test_files.R b/test_data/test_files.R index d712bf7..320738a 100644 --- a/test_data/test_files.R +++ b/test_data/test_files.R @@ -1,5 +1,6 @@ library(dplyr) library(readr) +library(tools) # number of rows in test data n <- 20 @@ -15,14 +16,20 @@ rtnorm <- function(n, mean, sd, a = -Inf, b = Inf){ set.seed(4) +readme <- tibble( + read_me = c(NA) +) + subject <- tibble( subject_id = paste0("subject", 1:n), - age_at_obs=round(rtnorm(n, 58, 5, 0, 90)) + consent_code = sample(x = c("GRU", "HMB-IRB", "DS-CVD"), size = n, replace = TRUE), + study_nickname = sample(x = c("UKBB", "JHS", "ARIC"), size = n, replace = TRUE), + reported_sex = sample(x = c("Female", "Male", "Unknown", "Other"), size = n, replace = TRUE) ) cmqt_anthropometry <- tibble( subject_id=rep(subject$subject_id), - age_at_obs=rep(subject$age_at_obs), + age_at_obs=round(rtnorm(n, 58, 5, 0, 90)), visit=rep("visit_1", n), height_1=rnorm(n, 165, 7), # height in cm weight_1=rnorm(n, 80, 5), # weight in kg @@ -32,7 +39,7 @@ cmqt_anthropometry <- tibble( cmqt_lipids <- tibble( subject_id=rep(subject$subject_id), - age_at_obs=rep(subject$age_at_obs), + age_at_obs=round(rtnorm(n, 58, 5, 0, 90)), visit=rep("visit_1", n), triglycerides_1=rnorm(n, 116, 13.6), # mg/dL hdl_1=rnorm(n, 55, 15), @@ -44,8 +51,8 @@ cmqt_lipids <- tibble( cmqt_blood_pressure <- tibble( subject_id=rep(subject$subject_id), - age_at_obs=rep(subject$age_at_obs), - visit=rep(1, n), + age_at_obs=round(rtnorm(n, 58, 5, 0, 90)), + visit=rep("visit_1", n), systolic_bp_1=rnorm(n, 120, 20), diastolic_bp_1=rnorm(n, 80, 10), hypertension_1=ifelse(systolic_bp_1 > 140 & diastolic_bp_1 > 90, 1, 0) @@ -60,10 +67,9 @@ phenotype_harmonized <- tibble( domain=(file_names), md5sum=as.vector(md5sum(paste0("test_data/", file_names, ".tsv"))), file_path=paste0(bucket, file_names, '.tsv'), - file_readme_path=rep("readme", length(file_names)), + file_readme_path=paste0(bucket, 'readme.tsv'), n_subjects=rep(n, length(file_names)), - n_rows=rep(n, length(file_names)), - data_model_version=rep(5, length(file_names)), + n_rows=rep(n, length(file_names)), ) # cmqt_hematology <- tibble( @@ -87,7 +93,8 @@ phenotype_harmonized <- tibble( # mean_platelet_volume_1 # ) -# setwd("~/Downloads/primed_data_models") +setwd("~/Downloads/primed_data_models") +write_tsv(readme, "test_data/readme.tsv") write_tsv(subject, "test_data/subject.tsv") write_tsv(cmqt_anthropometry, "test_data/cmqt_anthropometry.tsv") write_tsv(cmqt_lipids, "test_data/cmqt_lipids.tsv") From fb9ae63e9a0868be3ce5950b1140d54a243eda56 Mon Sep 17 00:00:00 2001 From: amywatt Date: Tue, 16 Apr 2024 09:13:00 -0700 Subject: [PATCH 06/18] adding required columns --- test_data/cmqt_anthropometry.tsv | 40 ++++++++++++++--------------- test_data/cmqt_blood_pressure.tsv | 40 ++++++++++++++--------------- test_data/cmqt_lipids.tsv | 40 ++++++++++++++--------------- test_data/subject.tsv | 42 +++++++++++++++---------------- test_data/test_files.R | 10 +++++--- 5 files changed, 88 insertions(+), 84 deletions(-) diff --git a/test_data/cmqt_anthropometry.tsv b/test_data/cmqt_anthropometry.tsv index 4400f4b..0b0fe36 100644 --- a/test_data/cmqt_anthropometry.tsv +++ b/test_data/cmqt_anthropometry.tsv @@ -1,21 +1,21 @@ subject_id age_at_obs visit height_1 weight_1 bmi_1 waist_hip_ratio_1 -subject1 59 visit_1 174.4059603833956 74.65738464756613 24.544270346105098 0.9029542666638012 -subject2 51 visit_1 166.27074769218558 85.32225373402065 30.86246561414685 0.7828680269809288 -subject3 63 visit_1 174.04758635491999 73.4363911773126 24.242383464072116 0.7540203628558878 -subject4 65 visit_1 153.18365996894644 90.31847351125953 38.490336272481564 0.6823418364566137 -subject5 54 visit_1 159.25304495658332 80.65691505331338 31.802855604448204 0.7173809253742001 -subject6 60 visit_1 158.96497699130163 78.84155775542531 31.199834726735073 0.6954780115808266 -subject7 51 visit_1 165.6919058239657 78.0132223851327 28.416175768348257 0.7329398074144686 -subject8 58 visit_1 162.3704139906115 84.44716041140687 32.03105585459906 0.7095477055180177 -subject9 62 visit_1 170.06732908698982 82.63084519748364 28.569353494069684 0.8294998540226414 -subject10 68 visit_1 152.41832586999223 79.1436337851869 34.06760571571555 0.7838557583735298 -subject11 56 visit_1 160.35379800896644 80.79338448721627 31.42080445534267 0.6977872077781471 -subject12 60 visit_1 160.63391457889574 77.57167466913786 30.06274833674053 0.736159001546514 -subject13 57 visit_1 164.44257297713287 75.20546962500397 27.81127508409966 0.8127265938356799 -subject14 56 visit_1 168.04937333980587 80.90258646053528 28.647624244062968 0.8491838106514763 -subject15 63 visit_1 178.79630678806967 83.60867141409459 26.15376606124955 0.8550358369909131 -subject16 60 visit_1 160.8226892924361 78.15229760939602 30.216705379062383 0.7962359191155008 -subject17 56 visit_1 161.13244951873727 81.18769156269829 31.269733119512615 0.9864257342639594 -subject18 57 visit_1 169.87176643590774 76.67038943792542 26.56961512330341 0.7537947207215205 -subject19 55 visit_1 163.9103522477441 76.01596245078622 28.29384899414978 0.8774783307433478 -subject20 52 visit_1 174.4422873663841 79.74151534344308 26.204805402947795 0.7777971498030142 +subject1 subject1 visit_1 174.4059603833956 74.65738464756613 24.544270346105098 0.9029542666638012 +subject2 subject2 visit_1 166.27074769218558 85.32225373402065 30.86246561414685 0.7828680269809288 +subject3 subject3 visit_1 174.04758635491999 73.4363911773126 24.242383464072116 0.7540203628558878 +subject4 subject4 visit_1 153.18365996894644 90.31847351125953 38.490336272481564 0.6823418364566137 +subject5 subject5 visit_1 159.25304495658332 80.65691505331338 31.802855604448204 0.7173809253742001 +subject6 subject6 visit_1 158.96497699130163 78.84155775542531 31.199834726735073 0.6954780115808266 +subject7 subject7 visit_1 165.6919058239657 78.0132223851327 28.416175768348257 0.7329398074144686 +subject8 subject8 visit_1 162.3704139906115 84.44716041140687 32.03105585459906 0.7095477055180177 +subject9 subject9 visit_1 170.06732908698982 82.63084519748364 28.569353494069684 0.8294998540226414 +subject10 subject10 visit_1 152.41832586999223 79.1436337851869 34.06760571571555 0.7838557583735298 +subject11 subject11 visit_1 160.35379800896644 80.79338448721627 31.42080445534267 0.6977872077781471 +subject12 subject12 visit_1 160.63391457889574 77.57167466913786 30.06274833674053 0.736159001546514 +subject13 subject13 visit_1 164.44257297713287 75.20546962500397 27.81127508409966 0.8127265938356799 +subject14 subject14 visit_1 168.04937333980587 80.90258646053528 28.647624244062968 0.8491838106514763 +subject15 subject15 visit_1 178.79630678806967 83.60867141409459 26.15376606124955 0.8550358369909131 +subject16 subject16 visit_1 160.8226892924361 78.15229760939602 30.216705379062383 0.7962359191155008 +subject17 subject17 visit_1 161.13244951873727 81.18769156269829 31.269733119512615 0.9864257342639594 +subject18 subject18 visit_1 169.87176643590774 76.67038943792542 26.56961512330341 0.7537947207215205 +subject19 subject19 visit_1 163.9103522477441 76.01596245078622 28.29384899414978 0.8774783307433478 +subject20 subject20 visit_1 174.4422873663841 79.74151534344308 26.204805402947795 0.7777971498030142 diff --git a/test_data/cmqt_blood_pressure.tsv b/test_data/cmqt_blood_pressure.tsv index 338d53f..d5efead 100644 --- a/test_data/cmqt_blood_pressure.tsv +++ b/test_data/cmqt_blood_pressure.tsv @@ -1,21 +1,21 @@ subject_id age_at_obs visit systolic_bp_1 diastolic_bp_1 hypertension_1 -subject1 52 1 100.32111681998313 67.60810968514947 0 -subject2 57 1 119.54598875730798 74.95779069543926 0 -subject3 61 1 102.30899589794329 93.16781677627166 0 -subject4 64 1 129.70080877678893 87.9443019106884 0 -subject5 58 1 103.36938753648045 84.85034175324486 0 -subject6 58 1 143.46301641752973 103.71031812926483 1 -subject7 61 1 121.22030926987908 77.18294951489403 0 -subject8 62 1 123.13507242668209 81.98995477180007 0 -subject9 53 1 129.37811511205126 88.10235220861095 0 -subject10 61 1 130.90321590809089 78.777414144928 0 -subject11 56 1 127.91204326117058 90.07719442437494 0 -subject12 53 1 101.57273309314265 85.17582142418476 0 -subject13 58 1 106.46854778104007 77.70028608118474 0 -subject14 63 1 81.12209388518411 78.32366221470303 0 -subject15 56 1 122.39966313289008 55.239583636861326 0 -subject16 57 1 145.32154539560975 73.98845773539983 0 -subject17 56 1 98.04026398921327 80.31508610546757 0 -subject18 49 1 128.75812916491657 92.26124588286407 0 -subject19 60 1 132.3278608741974 73.00785190847046 0 -subject20 54 1 120.3573928456392 64.95851210559971 0 +subject1 59 visit_1 114.22004802946624 70.16055840999157 0 +subject2 46 visit_1 133.33678600210294 79.77299437865399 0 +subject3 55 visit_1 117.26737559935488 71.15449794897164 0 +subject4 55 visit_1 124.48178615431377 84.85040438839447 0 +subject5 62 visit_1 142.34413334245977 71.68469376824022 0 +subject6 55 visit_1 91.23011204852963 91.73150820876486 0 +subject7 61 visit_1 132.68731449623817 80.61015463493953 0 +subject8 65 visit_1 110.01610392770148 81.56753621334104 0 +subject9 66 visit_1 142.52544874980188 84.68905755602563 0 +subject10 51 visit_1 117.9963062591629 85.45160795404544 0 +subject11 61 visit_1 97.80315967850292 83.95602163058528 0 +subject12 55 visit_1 133.15848523890034 70.78636654657132 0 +subject13 52 visit_1 119.13407971480952 73.23427389052003 0 +subject14 66 visit_1 132.56778450014235 60.56104694259206 0 +subject15 57 visit_1 101.26341574932871 81.19983156644504 0 +subject16 57 visit_1 112.85233189693206 92.66077269780487 0 +subject17 67 visit_1 119.09854164136453 69.02013199460663 0 +subject18 59 visit_1 113.03892279283257 84.37906458245828 0 +subject19 67 visit_1 112.82052189333163 86.1639304370987 0 +subject20 62 visit_1 127.91230490643542 80.1786964228196 0 diff --git a/test_data/cmqt_lipids.tsv b/test_data/cmqt_lipids.tsv index cb86da6..4039299 100644 --- a/test_data/cmqt_lipids.tsv +++ b/test_data/cmqt_lipids.tsv @@ -1,21 +1,21 @@ subject_id age_at_obs visit triglycerides_1 hdl_1 total_cholesterol_1 ldl_1 ldl_emerge_1 non_hdl_1 -subject1 61 visit_1 121.81587985397368 71.17028460566112 238.95637077130564 188.454359916499 96.46527586738083 71.9815081098668 -subject2 62 visit_1 126.12607917531574 65.0236765753393 169.5359717952532 104.30982272343374 121.98435630873986 71.86350676725725 -subject3 57 visit_1 127.7670028393971 40.530614999033276 266.11102406578533 99.99377443873357 142.92649316845015 70.87243092623808 -subject4 56 visit_1 120.15247181676263 25.371440022030995 259.3555543399786 38.45628596995071 77.27636063799875 163.73093327833013 -subject5 56 visit_1 114.44929003903476 46.2283914890792 183.1868028422012 184.25646683652465 109.19166722747944 144.32759433923775 -subject6 50 visit_1 121.76167046737713 69.53915554308915 225.5643479638949 124.67475532358635 97.94270935619721 39.296370185898915 -subject7 57 visit_1 105.15114825942665 63.284384889078396 167.8480078167114 105.7191085525777 89.0851081809514 80.66450398490576 -subject8 58 visit_1 107.78291740857124 53.767667489291675 174.01458398692856 145.1832155257659 176.65010583032839 27.636484278396317 -subject9 58 visit_1 139.32414407501224 29.849293623397426 117.01982112939709 92.40754580081575 77.767419874941 86.86144116870521 -subject10 49 visit_1 106.26310342194574 73.18911140526993 248.07690694514721 80.26451717742208 186.7683088887664 49.48706304374129 -subject11 49 visit_1 114.1879956742671 70.00749806468177 217.02339765103213 84.07440602244296 166.9450153196839 69.44009605893248 -subject12 64 visit_1 102.40319514845531 65.78993486182691 223.12400869710362 124.629653991415 64.73139007878065 107.67357200420587 -subject13 66 visit_1 141.4831375920665 42.33453771936489 164.78803993526844 136.12331298090797 110.81689679812386 75.53475119870976 -subject14 60 visit_1 111.41151732449964 64.32978085521123 144.45642866019128 61.96423225265855 160.45068550343473 89.96357230862755 -subject15 62 visit_1 129.23647592582597 44.16079329472619 243.00636887890863 192.32007878592327 93.58985615329634 125.68826668491953 -subject16 61 visit_1 129.434459859743 48.257820622950135 139.60001130140282 95.5013708444393 178.41288548011966 23.46022409705926 -subject17 53 visit_1 103.19891012404025 37.06740924896674 203.72584264798112 136.07700774951198 32.38276963061956 106.37462899247632 -subject18 53 visit_1 120.74892407710541 60.85708544525265 171.23658654990035 138.3111853538541 142.5841477485692 61.03220785540295 -subject19 49 visit_1 107.91590592904498 47.254350360451454 193.59696622304162 131.52105675876646 162.9047545634819 126.05089749960378 -subject20 60 visit_1 83.60149749371428 68.64803466903822 191.75033372000615 115.36040412574407 128.15816218823934 76.99261251832581 +subject1 59 visit_1 125.31330632935543 53.001465817206366 244.02049471013018 134.65526129483385 84.07440602244296 129.58920575100962 +subject2 46 visit_1 114.43445627023667 40.003524060796295 232.49248862232687 140.16069077543497 124.629653991415 19.087989274357454 +subject3 55 visit_1 111.15193755446865 83.10640175595569 168.38106976626403 87.5160360391447 136.12331298090797 68.91015870067446 +subject4 55 visit_1 114.56150613610885 49.93917351966873 228.501401004244 69.16799659578237 61.96423225265855 122.56830865236185 +subject5 62 visit_1 116.61040594516172 69.59905432995511 173.37283500558493 158.10330850047853 192.32007878592327 50.28633097653658 +subject6 55 visit_1 92.52404404496058 69.8174189629518 184.57137636939703 64.78537605248546 95.5013708444393 141.986903221751 +subject7 61 visit_1 137.15870347622416 40.88115087210321 153.98425194717578 122.6550287311049 136.07700774951198 -15.883492291222097 +subject8 65 visit_1 126.55921260756209 60.23778390857215 219.00936688369057 93.33545615478813 138.3111853538541 103.25313270115589 +subject9 66 visit_1 101.06029788095256 46.08371977468197 181.82855765190064 113.51433537201316 131.52105675876646 125.22135628484529 +subject10 51 visit_1 92.49893135046618 19.266357529831936 240.30462809537113 111.84786213756654 115.36040412574407 87.65747263593441 +subject11 61 visit_1 121.81587985397368 71.17028460566112 238.95637077130564 188.454359916499 96.46527586738083 71.9815081098668 +subject12 55 visit_1 126.12607917531574 65.0236765753393 169.5359717952532 104.30982272343374 121.98435630873986 71.86350676725725 +subject13 52 visit_1 127.7670028393971 40.530614999033276 266.11102406578533 99.99377443873357 142.92649316845015 70.87243092623808 +subject14 66 visit_1 120.15247181676263 25.371440022030995 259.3555543399786 38.45628596995071 77.27636063799875 163.73093327833013 +subject15 57 visit_1 114.44929003903476 46.2283914890792 183.1868028422012 184.25646683652465 109.19166722747944 144.32759433923775 +subject16 57 visit_1 121.76167046737713 69.53915554308915 225.5643479638949 124.67475532358635 97.94270935619721 39.296370185898915 +subject17 67 visit_1 105.15114825942665 63.284384889078396 167.8480078167114 105.7191085525777 89.0851081809514 80.66450398490576 +subject18 59 visit_1 107.78291740857124 53.767667489291675 174.01458398692856 145.1832155257659 176.65010583032839 27.636484278396317 +subject19 67 visit_1 139.32414407501224 29.849293623397426 117.01982112939709 92.40754580081575 77.767419874941 86.86144116870521 +subject20 62 visit_1 106.26310342194574 73.18911140526993 248.07690694514721 80.26451717742208 186.7683088887664 49.48706304374129 diff --git a/test_data/subject.tsv b/test_data/subject.tsv index 49e7064..3bab20a 100644 --- a/test_data/subject.tsv +++ b/test_data/subject.tsv @@ -1,21 +1,21 @@ -subject_id consent_code study_nickname reported_sex -subject1 HMB-IRB ARIC Other -subject2 GRU ARIC Female -subject3 GRU JHS Unknown -subject4 GRU JHS Female -subject5 DS-CVD JHS Other -subject6 GRU ARIC Female -subject7 DS-CVD JHS Other -subject8 DS-CVD ARIC Other -subject9 DS-CVD JHS Unknown -subject10 GRU JHS Unknown -subject11 DS-CVD JHS Male -subject12 GRU UKBB Unknown -subject13 GRU ARIC Other -subject14 DS-CVD JHS Other -subject15 HMB-IRB JHS Other -subject16 HMB-IRB ARIC Male -subject17 DS-CVD JHS Female -subject18 HMB-IRB JHS Female -subject19 DS-CVD JHS Other -subject20 DS-CVD UKBB Unknown +subject_id consent_code study_nickname dbgap_submission reported_sex +subject1 DS-CVD ARIC TRUE Unknown +subject2 DS-CVD UKBB TRUE Female +subject3 HMB-IRB JHS FALSE Other +subject4 HMB-IRB UKBB FALSE Other +subject5 HMB-IRB ARIC FALSE Female +subject6 DS-CVD UKBB FALSE Unknown +subject7 HMB-IRB ARIC FALSE Female +subject8 DS-CVD ARIC FALSE Unknown +subject9 HMB-IRB ARIC FALSE Other +subject10 HMB-IRB JHS FALSE Other +subject11 HMB-IRB JHS FALSE Male +subject12 GRU ARIC FALSE Unknown +subject13 DS-CVD ARIC FALSE Male +subject14 HMB-IRB ARIC FALSE Male +subject15 HMB-IRB ARIC FALSE Other +subject16 DS-CVD JHS FALSE Unknown +subject17 HMB-IRB UKBB FALSE Male +subject18 HMB-IRB UKBB FALSE Male +subject19 HMB-IRB ARIC FALSE Male +subject20 GRU ARIC FALSE Female diff --git a/test_data/test_files.R b/test_data/test_files.R index 320738a..3501d1d 100644 --- a/test_data/test_files.R +++ b/test_data/test_files.R @@ -22,14 +22,16 @@ readme <- tibble( subject <- tibble( subject_id = paste0("subject", 1:n), + age_at_obs=round(rtnorm(n, 58, 5, 0, 90)), consent_code = sample(x = c("GRU", "HMB-IRB", "DS-CVD"), size = n, replace = TRUE), study_nickname = sample(x = c("UKBB", "JHS", "ARIC"), size = n, replace = TRUE), + dbgap_submission = c(rep(TRUE, 2), rep(FALSE, n-2)), reported_sex = sample(x = c("Female", "Male", "Unknown", "Other"), size = n, replace = TRUE) ) cmqt_anthropometry <- tibble( subject_id=rep(subject$subject_id), - age_at_obs=round(rtnorm(n, 58, 5, 0, 90)), + age_at_obs=rep(subject$subject_id), visit=rep("visit_1", n), height_1=rnorm(n, 165, 7), # height in cm weight_1=rnorm(n, 80, 5), # weight in kg @@ -39,7 +41,7 @@ cmqt_anthropometry <- tibble( cmqt_lipids <- tibble( subject_id=rep(subject$subject_id), - age_at_obs=round(rtnorm(n, 58, 5, 0, 90)), + age_at_obs=rep(subject$age_at_obs), visit=rep("visit_1", n), triglycerides_1=rnorm(n, 116, 13.6), # mg/dL hdl_1=rnorm(n, 55, 15), @@ -51,7 +53,7 @@ cmqt_lipids <- tibble( cmqt_blood_pressure <- tibble( subject_id=rep(subject$subject_id), - age_at_obs=round(rtnorm(n, 58, 5, 0, 90)), + age_at_obs=rep(subject$age_at_obs), visit=rep("visit_1", n), systolic_bp_1=rnorm(n, 120, 20), diastolic_bp_1=rnorm(n, 80, 10), @@ -72,6 +74,8 @@ phenotype_harmonized <- tibble( n_rows=rep(n, length(file_names)), ) +subject <- subject %>% select(-age_at_obs) + # cmqt_hematology <- tibble( # subject_id=rep(subject$subject_id), # age_at_obs=rep(subject$age_at_obs), From 92f548ea6833df38ab50dfecd37f03f7e4f14af5 Mon Sep 17 00:00:00 2001 From: amywatt Date: Mon, 22 Apr 2024 11:31:44 -0700 Subject: [PATCH 07/18] fixed age column and added set seed for all tables --- test_data/cmqt_anthropometry.tsv | 40 +++++++++++++++--------------- test_data/cmqt_blood_pressure.tsv | 40 +++++++++++++++--------------- test_data/cmqt_lipids.tsv | 40 +++++++++++++++--------------- test_data/phenotype_harmonized.tsv | 6 ++--- test_data/readme.tsv | 2 ++ test_data/test_files.R | 16 ++++++++---- 6 files changed, 76 insertions(+), 68 deletions(-) create mode 100644 test_data/readme.tsv diff --git a/test_data/cmqt_anthropometry.tsv b/test_data/cmqt_anthropometry.tsv index 0b0fe36..e7c4800 100644 --- a/test_data/cmqt_anthropometry.tsv +++ b/test_data/cmqt_anthropometry.tsv @@ -1,21 +1,21 @@ subject_id age_at_obs visit height_1 weight_1 bmi_1 waist_hip_ratio_1 -subject1 subject1 visit_1 174.4059603833956 74.65738464756613 24.544270346105098 0.9029542666638012 -subject2 subject2 visit_1 166.27074769218558 85.32225373402065 30.86246561414685 0.7828680269809288 -subject3 subject3 visit_1 174.04758635491999 73.4363911773126 24.242383464072116 0.7540203628558878 -subject4 subject4 visit_1 153.18365996894644 90.31847351125953 38.490336272481564 0.6823418364566137 -subject5 subject5 visit_1 159.25304495658332 80.65691505331338 31.802855604448204 0.7173809253742001 -subject6 subject6 visit_1 158.96497699130163 78.84155775542531 31.199834726735073 0.6954780115808266 -subject7 subject7 visit_1 165.6919058239657 78.0132223851327 28.416175768348257 0.7329398074144686 -subject8 subject8 visit_1 162.3704139906115 84.44716041140687 32.03105585459906 0.7095477055180177 -subject9 subject9 visit_1 170.06732908698982 82.63084519748364 28.569353494069684 0.8294998540226414 -subject10 subject10 visit_1 152.41832586999223 79.1436337851869 34.06760571571555 0.7838557583735298 -subject11 subject11 visit_1 160.35379800896644 80.79338448721627 31.42080445534267 0.6977872077781471 -subject12 subject12 visit_1 160.63391457889574 77.57167466913786 30.06274833674053 0.736159001546514 -subject13 subject13 visit_1 164.44257297713287 75.20546962500397 27.81127508409966 0.8127265938356799 -subject14 subject14 visit_1 168.04937333980587 80.90258646053528 28.647624244062968 0.8491838106514763 -subject15 subject15 visit_1 178.79630678806967 83.60867141409459 26.15376606124955 0.8550358369909131 -subject16 subject16 visit_1 160.8226892924361 78.15229760939602 30.216705379062383 0.7962359191155008 -subject17 subject17 visit_1 161.13244951873727 81.18769156269829 31.269733119512615 0.9864257342639594 -subject18 subject18 visit_1 169.87176643590774 76.67038943792542 26.56961512330341 0.7537947207215205 -subject19 subject19 visit_1 163.9103522477441 76.01596245078622 28.29384899414978 0.8774783307433478 -subject20 subject20 visit_1 174.4422873663841 79.74151534344308 26.204805402947795 0.7777971498030142 +subject1 59 visit_1 166.51728404004504 87.70407490435488 31.630141478991852 0.9074966900959498 +subject2 46 visit_1 161.202551994156 80.8258450985496 31.103297082655853 0.8145228307678352 +subject3 55 visit_1 171.23801251550628 86.53811180127285 29.51257188546359 0.9034009869133715 +subject4 55 visit_1 169.17186404031233 86.4412843896079 30.204033404764687 0.6649561139308164 +subject5 62 visit_1 176.4493260077908 82.96448470324013 26.647238715038643 0.7343205137895237 +subject6 55 visit_1 169.82492809343805 78.58528157838867 27.248232409006288 0.7310283084720187 +subject7 61 visit_1 156.0312735892919 86.27942012799926 35.439200098235276 0.8079074951310364 +subject8 65 visit_1 163.50798836505146 84.54919575614312 31.62507251261148 0.7699475884641312 +subject9 66 visit_1 178.2757791033649 75.35985947462825 23.711301552730852 0.8579123324227408 +subject10 51 visit_1 177.43804249577906 86.20090419001572 27.379048176451334 0.656209438514197 +subject11 61 visit_1 168.9662314872622 80.76732089777315 28.290188511041826 0.7469005486739022 +subject12 55 visit_1 165.110036178032 85.25966289494805 31.274953024607136 0.7501018809016656 +subject13 52 visit_1 167.68140136962006 76.22894393591241 27.111285687273472 0.7936294054529471 +subject14 66 visit_1 164.68404018860684 72.58905440174385 26.765053327547104 0.8348499810263527 +subject15 57 visit_1 165.24046335177894 84.30565936248858 30.876205386787987 0.9576720775779393 +subject16 57 visit_1 166.18318741952814 77.97740084621502 28.23544170233956 0.75225930619927 +subject17 67 visit_1 173.1551878731937 78.86297291363192 26.30280885590887 0.7557994230712831 +subject18 59 visit_1 164.6905720192354 84.67048085431246 31.217243374076368 0.8556773306960885 +subject19 67 visit_1 164.29742090189868 77.67052060104524 28.773634591122615 0.7875468828313612 +subject20 62 visit_1 163.01588801788486 76.81228250714892 28.90485548023756 0.9079118556158186 diff --git a/test_data/cmqt_blood_pressure.tsv b/test_data/cmqt_blood_pressure.tsv index d5efead..2afdd3b 100644 --- a/test_data/cmqt_blood_pressure.tsv +++ b/test_data/cmqt_blood_pressure.tsv @@ -1,21 +1,21 @@ subject_id age_at_obs visit systolic_bp_1 diastolic_bp_1 hypertension_1 -subject1 59 visit_1 114.22004802946624 70.16055840999157 0 -subject2 46 visit_1 133.33678600210294 79.77299437865399 0 -subject3 55 visit_1 117.26737559935488 71.15449794897164 0 -subject4 55 visit_1 124.48178615431377 84.85040438839447 0 -subject5 62 visit_1 142.34413334245977 71.68469376824022 0 -subject6 55 visit_1 91.23011204852963 91.73150820876486 0 -subject7 61 visit_1 132.68731449623817 80.61015463493953 0 -subject8 65 visit_1 110.01610392770148 81.56753621334104 0 -subject9 66 visit_1 142.52544874980188 84.68905755602563 0 -subject10 51 visit_1 117.9963062591629 85.45160795404544 0 -subject11 61 visit_1 97.80315967850292 83.95602163058528 0 -subject12 55 visit_1 133.15848523890034 70.78636654657132 0 -subject13 52 visit_1 119.13407971480952 73.23427389052003 0 -subject14 66 visit_1 132.56778450014235 60.56104694259206 0 -subject15 57 visit_1 101.26341574932871 81.19983156644504 0 -subject16 57 visit_1 112.85233189693206 92.66077269780487 0 -subject17 67 visit_1 119.09854164136453 69.02013199460663 0 -subject18 59 visit_1 113.03892279283257 84.37906458245828 0 -subject19 67 visit_1 112.82052189333163 86.1639304370987 0 -subject20 62 visit_1 127.91230490643542 80.1786964228196 0 +subject1 59 visit_1 124.33509725727151 95.40814980870978 0 +subject2 46 visit_1 109.15014855473149 81.65169019709921 0 +subject3 55 visit_1 137.82289290144655 93.0762236025457 0 +subject4 55 visit_1 131.91961154374948 92.88256877921579 0 +subject5 62 visit_1 152.71236002225947 85.92896940648028 0 +subject6 55 visit_1 133.78550883839446 77.17056315677733 0 +subject7 61 visit_1 94.37506739797686 92.5588402559985 0 +subject8 65 visit_1 115.73710961443275 89.09839151228624 0 +subject9 66 visit_1 157.93079743818544 70.7197189492565 0 +subject10 51 visit_1 155.53726427365447 92.40180838003143 1 +subject11 61 visit_1 131.33208996360634 81.53464179554629 0 +subject12 55 visit_1 120.3143890800914 90.5193257898961 0 +subject13 52 visit_1 127.66114677034301 72.45788787182482 0 +subject14 66 visit_1 119.09725768173382 65.17810880348769 0 +subject15 57 visit_1 120.68703814793986 88.61131872497717 0 +subject16 57 visit_1 123.38053548436613 75.95480169243004 0 +subject17 67 visit_1 143.30053678055347 77.72594582726384 0 +subject18 59 visit_1 119.11592005495825 89.34096170862492 0 +subject19 67 visit_1 117.9926311482819 75.3410412020905 0 +subject20 62 visit_1 114.33110862252818 73.62456501429784 0 diff --git a/test_data/cmqt_lipids.tsv b/test_data/cmqt_lipids.tsv index 4039299..c7ae69a 100644 --- a/test_data/cmqt_lipids.tsv +++ b/test_data/cmqt_lipids.tsv @@ -1,21 +1,21 @@ subject_id age_at_obs visit triglycerides_1 hdl_1 total_cholesterol_1 ldl_1 ldl_emerge_1 non_hdl_1 -subject1 59 visit_1 125.31330632935543 53.001465817206366 244.02049471013018 134.65526129483385 84.07440602244296 129.58920575100962 -subject2 46 visit_1 114.43445627023667 40.003524060796295 232.49248862232687 140.16069077543497 124.629653991415 19.087989274357454 -subject3 55 visit_1 111.15193755446865 83.10640175595569 168.38106976626403 87.5160360391447 136.12331298090797 68.91015870067446 -subject4 55 visit_1 114.56150613610885 49.93917351966873 228.501401004244 69.16799659578237 61.96423225265855 122.56830865236185 -subject5 62 visit_1 116.61040594516172 69.59905432995511 173.37283500558493 158.10330850047853 192.32007878592327 50.28633097653658 -subject6 55 visit_1 92.52404404496058 69.8174189629518 184.57137636939703 64.78537605248546 95.5013708444393 141.986903221751 -subject7 61 visit_1 137.15870347622416 40.88115087210321 153.98425194717578 122.6550287311049 136.07700774951198 -15.883492291222097 -subject8 65 visit_1 126.55921260756209 60.23778390857215 219.00936688369057 93.33545615478813 138.3111853538541 103.25313270115589 -subject9 66 visit_1 101.06029788095256 46.08371977468197 181.82855765190064 113.51433537201316 131.52105675876646 125.22135628484529 -subject10 51 visit_1 92.49893135046618 19.266357529831936 240.30462809537113 111.84786213756654 115.36040412574407 87.65747263593441 -subject11 61 visit_1 121.81587985397368 71.17028460566112 238.95637077130564 188.454359916499 96.46527586738083 71.9815081098668 -subject12 55 visit_1 126.12607917531574 65.0236765753393 169.5359717952532 104.30982272343374 121.98435630873986 71.86350676725725 -subject13 52 visit_1 127.7670028393971 40.530614999033276 266.11102406578533 99.99377443873357 142.92649316845015 70.87243092623808 -subject14 66 visit_1 120.15247181676263 25.371440022030995 259.3555543399786 38.45628596995071 77.27636063799875 163.73093327833013 -subject15 57 visit_1 114.44929003903476 46.2283914890792 183.1868028422012 184.25646683652465 109.19166722747944 144.32759433923775 -subject16 57 visit_1 121.76167046737713 69.53915554308915 225.5643479638949 124.67475532358635 97.94270935619721 39.296370185898915 -subject17 67 visit_1 105.15114825942665 63.284384889078396 167.8480078167114 105.7191085525777 89.0851081809514 80.66450398490576 -subject18 59 visit_1 107.78291740857124 53.767667489291675 174.01458398692856 145.1832155257659 176.65010583032839 27.636484278396317 -subject19 67 visit_1 139.32414407501224 29.849293623397426 117.01982112939709 92.40754580081575 77.767419874941 86.86144116870521 -subject20 62 visit_1 106.26310342194574 73.18911140526993 248.07690694514721 80.26451717742208 186.7683088887664 49.48706304374129 +subject1 59 visit_1 118.94786613494463 78.11222471306466 258.0920536741743 82.46464639198933 169.61634833200802 108.39207743928065 +subject2 46 visit_1 108.62210101721742 57.47753529564881 210.4429507685155 161.38467763175282 114.07646247867957 76.39545961834311 +subject3 55 visit_1 128.11956717298366 74.61433540381856 255.99300579310284 73.42929471211319 100.73441782084811 66.74099280726072 +subject4 55 visit_1 124.10533584974965 74.32385316882369 133.79000838954343 198.35670398332047 67.58309936118381 76.7691356944378 +subject5 62 visit_1 138.24440481513642 63.893454109720416 169.33926331713087 126.86117139451909 83.78867798556752 82.7953116034168 +subject6 55 visit_1 125.37414601010823 50.75584473516601 167.65200809190955 113.4275273901473 73.65858035613226 11.953070720472283 +subject7 61 visit_1 98.57504583062428 73.83826038399776 207.05259125465614 107.29784564998208 90.9846609291917 143.23148081242402 +subject8 65 visit_1 113.10123453781426 68.64758726842938 187.59813908786725 154.90898704441082 80.16581380208316 112.05650766930025 +subject9 66 visit_1 141.7929422579661 41.079578423884755 232.6800703666546 141.46825446137896 135.6436824854716 37.05969964986049 +subject10 51 visit_1 140.16533970608504 73.60271257004715 129.30733723852595 115.6628900103831 114.53328824775753 11.879209854312279 +subject11 61 visit_1 123.70582117525231 57.30196269331943 175.78653119537483 127.87104520540043 74.72658359739302 98.10552898227554 +subject12 55 visit_1 116.21378457446215 70.77898868484415 177.42721396210362 104.03039255162022 92.47353821526272 110.78258580975216 +subject13 52 visit_1 121.20957980383325 43.68683180773724 199.73507029463536 86.52047522502937 127.88604964900193 115.60883188057969 +subject14 66 visit_1 115.386135223579 32.76716320523153 220.86061527600572 128.67913980796104 144.74751242630774 93.21315240224303 +subject15 57 visit_1 116.4671859405991 67.91697808746576 283.8069397586939 148.70416846429998 147.4540746082973 76.43908835010222 +subject16 57 visit_1 118.29876412936896 48.93220253864506 178.5328944271258 108.32700230953047 120.25911259091913 97.94608960993273 +subject17 67 visit_1 131.84436501077636 51.588918740895764 180.34720432403256 130.78891756396732 208.22190209708123 49.09161252772545 +subject18 59 visit_1 115.39882563737162 69.01144256293738 231.53463198174538 97.36088184064815 100.63005833370325 56.832110025209516 +subject19 67 visit_1 114.6349891808317 48.01156180313574 196.6177774510726 92.518122135818 157.83372796879834 149.60042375003604 +subject20 62 visit_1 112.14515386331917 45.43684752144676 258.304826003107 120.08721354147882 111.73118178389402 52.362068888075726 diff --git a/test_data/phenotype_harmonized.tsv b/test_data/phenotype_harmonized.tsv index 48b8eac..74758c4 100644 --- a/test_data/phenotype_harmonized.tsv +++ b/test_data/phenotype_harmonized.tsv @@ -1,4 +1,4 @@ domain md5sum file_path file_readme_path n_subjects n_rows -cmqt_anthropometry 5a41957467d044f895168522dd554f1b gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_anthropometry.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -cmqt_lipids 5ff37e31f7bf8bf3f5b7b9fabbed601a gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_lipids.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -cmqt_blood_pressure b6b9748868ba729b653f30a7a77e6901 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_blood_pressure.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_anthropometry NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_anthropometry.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_lipids NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_lipids.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_blood_pressure NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_blood_pressure.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 diff --git a/test_data/readme.tsv b/test_data/readme.tsv new file mode 100644 index 0000000..bb2250c --- /dev/null +++ b/test_data/readme.tsv @@ -0,0 +1,2 @@ +read_me +NA diff --git a/test_data/test_files.R b/test_data/test_files.R index 3501d1d..7ce5afd 100644 --- a/test_data/test_files.R +++ b/test_data/test_files.R @@ -16,10 +16,6 @@ rtnorm <- function(n, mean, sd, a = -Inf, b = Inf){ set.seed(4) -readme <- tibble( - read_me = c(NA) -) - subject <- tibble( subject_id = paste0("subject", 1:n), age_at_obs=round(rtnorm(n, 58, 5, 0, 90)), @@ -29,9 +25,11 @@ subject <- tibble( reported_sex = sample(x = c("Female", "Male", "Unknown", "Other"), size = n, replace = TRUE) ) +set.seed(4) + cmqt_anthropometry <- tibble( subject_id=rep(subject$subject_id), - age_at_obs=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), visit=rep("visit_1", n), height_1=rnorm(n, 165, 7), # height in cm weight_1=rnorm(n, 80, 5), # weight in kg @@ -39,6 +37,8 @@ cmqt_anthropometry <- tibble( waist_hip_ratio_1=rnorm(n, 0.8, 0.08) ) +set.seed(4) + cmqt_lipids <- tibble( subject_id=rep(subject$subject_id), age_at_obs=rep(subject$age_at_obs), @@ -51,6 +51,8 @@ cmqt_lipids <- tibble( non_hdl_1=rnorm(n, 81, 40), ) +set.seed(4) + cmqt_blood_pressure <- tibble( subject_id=rep(subject$subject_id), age_at_obs=rep(subject$age_at_obs), @@ -64,6 +66,10 @@ cmqt_blood_pressure <- tibble( bucket <- "gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/" +readme <- tibble( + read_me = c(NA) +) + phenotype_harmonized <- tibble( # phenotype_harmonized_id= domain=(file_names), From 75c79f04b84b68392d76fc071d4039d269d3388d Mon Sep 17 00:00:00 2001 From: amywatt Date: Mon, 22 Apr 2024 13:00:29 -0700 Subject: [PATCH 08/18] upated phenotype_harmonized --- test_data/phenotype_harmonized.tsv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test_data/phenotype_harmonized.tsv b/test_data/phenotype_harmonized.tsv index 74758c4..d3144d3 100644 --- a/test_data/phenotype_harmonized.tsv +++ b/test_data/phenotype_harmonized.tsv @@ -1,4 +1,4 @@ domain md5sum file_path file_readme_path n_subjects n_rows -cmqt_anthropometry NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_anthropometry.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -cmqt_lipids NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_lipids.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -cmqt_blood_pressure NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_blood_pressure.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_anthropometry d26b5af92459c0961442e2dcf7ce9235 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_anthropometry.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_lipids 17ce825be3d94425e26c08987fa78cd9 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_lipids.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_blood_pressure 9877023cd800d4235e1b99a650f88694 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_blood_pressure.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 From 663de44dd9b450cfc8e350ec48aa2a77afcc5c78 Mon Sep 17 00:00:00 2001 From: amywatt Date: Mon, 22 Apr 2024 15:57:02 -0700 Subject: [PATCH 09/18] added phenotype harmonized tables --- test_data/cancer_breast.tsv | 21 +++ test_data/cancer_prostate.tsv | 21 +++ test_data/cmqt_flags.tsv | 21 +++ test_data/cmqt_glycemic.tsv | 21 +++ test_data/cmqt_hematology.tsv | 21 +++ test_data/cmqt_kidney_function.tsv | 21 +++ test_data/cvd_cad.tsv | 21 +++ test_data/diabetes_diabetes.tsv | 21 +++ test_data/family_history.tsv | 21 +++ test_data/phenotype_harmonized.tsv | 11 +- test_data/test_files.R | 249 +++++++++++++++++++++++++---- 11 files changed, 419 insertions(+), 30 deletions(-) create mode 100644 test_data/cancer_breast.tsv create mode 100644 test_data/cancer_prostate.tsv create mode 100644 test_data/cmqt_flags.tsv create mode 100644 test_data/cmqt_glycemic.tsv create mode 100644 test_data/cmqt_hematology.tsv create mode 100644 test_data/cmqt_kidney_function.tsv create mode 100644 test_data/cvd_cad.tsv create mode 100644 test_data/diabetes_diabetes.tsv create mode 100644 test_data/family_history.tsv diff --git a/test_data/cancer_breast.tsv b/test_data/cancer_breast.tsv new file mode 100644 index 0000000..0704e59 --- /dev/null +++ b/test_data/cancer_breast.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit breast_cancer_status_emerge_1 breast_cancer_status_registry_1 breast_cancer_status_survey_1 age_at_diagnosis_1 year_at_diagnosis_1 breast_cancer_type_1 cancer_behavior_1 her2_1 pr_1 er_1 T_stage_clinical_1 T_stage_pathological_1 T_stage_uknown_1 T_stage_clinical_2 T_stage_pathological_2 T_stage_unknown_2 nodal_involvement_1 distant_metastasis_1 stage_system_1 grade_clinical_1 grade_pathological_1 grade_unknown_1 screening_history_1 recurrence_1 surgery_1 radiotherapy_1 chemotherapy_1 hormone_therapy_1 NSAID_1 age_at_natural_menopause_1 post_menopausal_hormone_use_1 parity_1 age_at_first_birth_1 age_at_menarche_1 deceased_1 cause_of_death_breast_cancer_1 age_at_death_1 +subject1 59 visit_1 0 0 0 63 2017 unilateral benign negative unknown positive unstaged stage4 stage3 unknown unknown unstaged N2 M1 NA grade1 grade1 grade3 0 recurrance_second_primary grade1 grade1 grade2 pharmaceutical grade3 64 grade3 0 26 15 0 0 64 +subject2 46 visit_1 0 1 0 46 2013 bilateral borderline unknown unknown negative unstaged stage4 stage3 localized distant in_situ NX MX NA grade1 grade3 grade1 1 none grade2 grade2 grade2 surgical grade1 58 grade1 2 30 9 1 1 48 +subject3 55 visit_1 0 0 0 62 2011 unilateral invasive positive negative positive stage3 unstaged stage1 unstaged unstaged regional N2 MX NA grade2 grade2 grade1 0 unknown grade3 grade2 grade1 surgical grade3 62 grade3 2 32 18 0 0 72 +subject4 55 visit_1 0 0 0 64 2017 bilateral borderline unknown negative unknown stage3 stage2 stage4 localized localized regional N0 MX NA grade2 grade3 grade3 0 unknown grade3 grade3 grade1 pharmaceutical grade3 59 grade1 2 30 14 0 0 78 +subject5 62 visit_1 0 0 0 63 2016 unilateral benign positive positive negative stage3 unstaged unknown regional regional unknown NX MX NA grade2 grade3 grade2 0 recurrance_second_primary grade2 grade1 grade1 unknown grade2 63 grade2 2 30 13 0 0 77 +subject6 55 visit_1 0 0 0 59 2002 bilateral in_situ negative positive positive stage1 stage4 stage1 localized localized unknown N3 MX NA grade2 grade3 grade2 1 none grade2 grade2 grade3 both grade1 57 grade1 0 23 20 0 0 73 +subject7 61 visit_1 0 0 0 61 2002 bilateral invasive negative positive unknown stage3 stage4 stage3 localized unstaged unknown NX MX NA grade1 grade1 grade3 1 unknown grade3 grade3 grade1 none grade2 61 grade1 2 17 12 0 0 73 +subject8 65 visit_1 0 0 0 68 2021 unilateral benign unknown positive positive stage3 stage3 unstaged in_situ localized unknown NX M0 NA grade1 grade3 grade3 0 unknown grade3 grade2 grade1 none grade2 81 grade2 0 31 13 0 0 86 +subject9 66 visit_1 0 0 0 72 2006 bilateral in_situ unknown positive unknown stage4 stage3 unknown regional regional regional N3 M1 NA grade3 grade2 grade1 1 none grade1 grade1 grade2 none grade1 68 grade2 0 25 14 0 0 81 +subject10 51 visit_1 0 0 0 62 2017 bilateral borderline negative unknown unknown stage1 unknown stage2 unknown distant unknown N2 MX NA grade1 grade1 grade1 1 recurrance_primary grade3 grade1 grade1 surgical grade2 59 grade3 0 27 16 0 0 74 +subject11 61 visit_1 0 0 0 63 2006 unilateral borderline negative positive negative stage1 stage4 unknown unknown regional unstaged NX M0 NA grade1 grade1 grade3 0 recurrance_primary grade2 grade3 grade3 unknown grade1 77 grade1 1 30 17 0 0 67 +subject12 55 visit_1 0 0 0 59 2004 unilateral benign positive negative unknown unknown unknown stage1 unknown localized unstaged N1 MX NA grade1 grade1 grade2 0 none grade1 grade2 grade3 pharmaceutical grade3 74 grade3 1 26 15 0 0 59 +subject13 52 visit_1 0 0 0 55 2010 unilateral borderline negative positive unknown unknown stage2 stage2 unstaged localized regional N1 MX NA grade2 grade1 grade1 0 unknown grade2 grade2 grade1 surgical grade2 53 grade2 1 25 12 0 0 66 +subject14 66 visit_1 1 0 0 68 2007 unilateral benign unknown negative positive stage4 stage2 stage3 in_situ distant unknown N1 M0 NA grade2 grade3 grade1 1 recurrance_primary grade3 grade3 grade3 both grade2 82 grade2 2 22 11 0 0 72 +subject15 57 visit_1 0 0 0 64 2008 bilateral invasive positive positive positive unstaged stage2 stage4 distant in_situ regional N0 M1 NA grade3 grade3 grade3 0 recurrance_second_primary grade2 grade2 grade3 both grade3 66 grade1 1 22 12 0 0 80 +subject16 57 visit_1 0 1 0 62 2003 bilateral invasive unknown positive unknown unstaged stage4 stage4 unknown unknown unstaged N0 M0 NA grade2 grade1 grade1 1 recurrance_second_primary grade2 grade2 grade1 both grade1 57 grade3 1 26 19 1 0 70 +subject17 67 visit_1 1 0 0 69 2014 unilateral in_situ positive negative unknown stage1 unknown stage2 localized regional localized N1 MX NA grade2 grade1 grade3 1 none grade2 grade3 grade3 none grade1 74 grade2 1 32 14 0 0 85 +subject18 59 visit_1 0 0 0 62 2009 bilateral borderline positive positive negative stage2 stage2 unknown unknown regional distant N3 M0 NA grade3 grade2 grade2 0 recurrance_primary grade1 grade3 grade1 pharmaceutical grade1 59 grade2 1 23 12 0 0 74 +subject19 67 visit_1 1 0 0 69 2001 bilateral borderline negative negative negative stage1 stage2 stage1 unknown unstaged unknown N0 MX NA grade2 grade3 grade2 0 recurrance_second_primary grade2 grade3 grade2 both grade1 78 grade1 1 32 18 0 0 73 +subject20 62 visit_1 0 0 0 63 2013 unilateral benign unknown unknown positive stage4 stage2 unknown in_situ regional distant N0 M0 NA grade1 grade2 grade3 0 recurrance_second_primary grade1 grade1 grade2 surgical grade1 75 grade1 2 28 16 1 0 64 diff --git a/test_data/cancer_prostate.tsv b/test_data/cancer_prostate.tsv new file mode 100644 index 0000000..6380cc7 --- /dev/null +++ b/test_data/cancer_prostate.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit prostate_cancer_status_emerge_1 prostate_cancer_status_registry_1 prostate_cancer_status_survey_1 age_at_diagnosis_1 year_at_diagnosis_1 cancer_behavior_1 T_stage_clinical_1 T_stage_pathological_1 T_stage_uknown_1 T_stage_clinical_2 T_stage_pathological_2 T_stage_unknown_2 nodal_involvement_1 distant_metastasis_1 stage_system_1 +subject1 59 visit_1 1 1 1 63 2017 borderline stage1 stage4 unknown localized unstaged in_situ N1 M1 NA +subject2 46 visit_1 0 1 0 46 2013 in_situ stage3 unstaged unknown in_situ unstaged in_situ N0 MX NA +subject3 55 visit_1 0 1 1 62 2011 borderline unknown stage2 stage3 regional distant unstaged NX M1 NA +subject4 55 visit_1 0 0 0 64 2017 in_situ stage2 unknown stage3 unknown distant regional N2 MX NA +subject5 62 visit_1 1 1 1 63 2016 borderline stage1 stage2 stage2 in_situ distant unstaged N3 MX NA +subject6 55 visit_1 0 1 0 59 2002 in_situ unstaged stage3 stage1 regional localized in_situ NX MX NA +subject7 61 visit_1 1 0 1 61 2002 in_situ unknown stage4 stage1 unstaged distant in_situ N0 MX NA +subject8 65 visit_1 1 1 1 68 2021 benign stage2 unknown stage1 regional distant distant N2 M0 NA +subject9 66 visit_1 1 1 1 72 2006 invasive stage4 unstaged stage1 unstaged in_situ distant N3 MX NA +subject10 51 visit_1 0 1 1 62 2017 invasive stage3 stage3 unstaged unknown localized unknown N0 M1 NA +subject11 61 visit_1 1 1 0 63 2006 borderline stage3 stage3 stage1 distant localized in_situ N3 M1 NA +subject12 55 visit_1 0 0 1 59 2004 borderline stage2 stage1 stage4 unknown unknown unknown NX M1 NA +subject13 52 visit_1 0 1 1 55 2010 borderline stage3 stage4 stage2 unknown unknown regional NX M1 NA +subject14 66 visit_1 1 1 1 68 2007 borderline stage2 unknown stage4 regional in_situ regional N0 M0 NA +subject15 57 visit_1 0 0 1 64 2008 invasive unstaged stage2 stage1 regional unstaged regional N2 M0 NA +subject16 57 visit_1 0 1 0 62 2003 in_situ unknown unknown stage1 unstaged unstaged in_situ N2 M1 NA +subject17 67 visit_1 1 0 0 69 2014 borderline unstaged stage2 stage4 unknown localized unknown N0 MX NA +subject18 59 visit_1 1 1 0 62 2009 invasive stage2 stage2 stage2 distant regional regional N3 M1 NA +subject19 67 visit_1 1 0 1 69 2001 invasive stage3 stage3 stage3 distant localized regional NX M1 NA +subject20 62 visit_1 1 0 1 63 2013 benign stage1 unstaged unstaged localized in_situ regional N3 M0 NA diff --git a/test_data/cmqt_flags.tsv b/test_data/cmqt_flags.tsv new file mode 100644 index 0000000..8d256a2 --- /dev/null +++ b/test_data/cmqt_flags.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit flag_pregnancy_1 flag_acute_illness_1 flag_bld_1 flag_anemia_1 flag_hiv_1 flag_eskd_1 flag_splenectomy_1 flag_cirrhosis_1 flag_fasting_1 flag_lipids_med_1 flag_bp_med_1 flag_cvd_1 flag_t2d_1 flag_t1d_1 flag_diabetes_other_1 +subject1 59 visit_1 unknown unknown data not collected unknown data not collected yes no unknown data not collected no data not collected unknown yes data not collected data not collected +subject2 46 visit_1 no data not collected no no unknown unknown yes unknown data not collected unknown data not collected unknown yes no yes +subject3 55 visit_1 yes unknown unknown data not collected unknown yes data not collected yes yes no yes data not collected no unknown data not collected +subject4 55 visit_1 yes yes no data not collected data not collected unknown yes data not collected yes data not collected yes yes unknown no no +subject5 62 visit_1 data not collected unknown data not collected no data not collected yes no no yes unknown yes data not collected data not collected no no +subject6 55 visit_1 yes data not collected no unknown no unknown unknown yes no no no unknown no no no +subject7 61 visit_1 unknown yes data not collected no no unknown data not collected unknown no unknown yes unknown yes no unknown +subject8 65 visit_1 data not collected data not collected data not collected unknown data not collected no no data not collected no yes yes yes unknown unknown no +subject9 66 visit_1 data not collected unknown unknown data not collected no data not collected unknown data not collected no data not collected unknown yes data not collected yes no +subject10 51 visit_1 no unknown unknown data not collected data not collected data not collected yes yes data not collected data not collected no data not collected yes data not collected yes +subject11 61 visit_1 data not collected unknown yes yes no yes yes yes no yes no unknown data not collected data not collected yes +subject12 55 visit_1 yes no unknown unknown no yes no no unknown data not collected data not collected data not collected no data not collected no +subject13 52 visit_1 no data not collected data not collected yes unknown yes yes unknown no data not collected data not collected no no unknown no +subject14 66 visit_1 data not collected unknown data not collected yes yes yes no data not collected unknown no unknown no yes unknown yes +subject15 57 visit_1 yes yes data not collected data not collected yes data not collected data not collected yes no yes data not collected yes unknown yes unknown +subject16 57 visit_1 yes data not collected yes unknown no unknown data not collected data not collected no unknown unknown unknown unknown data not collected data not collected +subject17 67 visit_1 data not collected yes no yes data not collected yes unknown no unknown data not collected no data not collected yes no yes +subject18 59 visit_1 unknown unknown no yes yes data not collected yes yes no yes no yes data not collected data not collected yes +subject19 67 visit_1 data not collected yes data not collected yes no data not collected yes yes yes yes no no no data not collected data not collected +subject20 62 visit_1 data not collected no unknown no unknown no no data not collected unknown no unknown yes data not collected unknown no diff --git a/test_data/cmqt_glycemic.tsv b/test_data/cmqt_glycemic.tsv new file mode 100644 index 0000000..42fb3cf --- /dev/null +++ b/test_data/cmqt_glycemic.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit fasting_glucose_plasma_1 fasting_glucose_serum_1 fasting_insulin_1 hba1c_1 +subject1 59 visit_1 54.335097257271514 100.40814980870978 44.062251757196236 0.8629538590264505 +subject2 46 visit_1 39.150148554731494 86.65169019709921 37.08921230758763 5.12890149360826 +subject3 55 visit_1 67.82289290144655 98.0762236025457 43.75507401850285 0.3745564709250373 +subject4 55 visit_1 61.9196115437495 97.88256877921579 25.871708544811234 7.12738940450381 +subject5 62 visit_1 82.71236002225946 90.92896940648028 31.074038534214274 3.262766021325356 +subject6 55 visit_1 63.78550883839445 82.17056315677733 30.827123135401397 2.5366231021701244 +subject7 61 visit_1 24.37506739797687 97.5588402559985 36.59306213482773 2.2052889540530853 +subject8 65 visit_1 45.73710961443275 94.09839151228624 33.746069134809844 4.778864164562747 +subject9 66 visit_1 87.93079743818544 75.7197189492565 40.343424931705556 4.052338078993457 +subject10 51 visit_1 85.53726427365447 97.40180838003143 25.215707888564772 2.657453514074762 +subject11 61 visit_1 61.332089963606336 86.53464179554629 32.01754115054266 3.31735379488651 +subject12 55 visit_1 50.3143890800914 95.5193257898961 32.25764106762492 2.0286698676551476 +subject13 52 visit_1 57.66114677034302 77.45788787182482 35.52220540897103 1.0821878500015871 +subject14 66 visit_1 49.09725768173383 70.17810880348769 38.61374857697645 3.36103458421411 +subject15 57 visit_1 50.68703814793985 93.61131872497717 47.825405818345445 4.443468565637837 +subject16 57 visit_1 53.38053548436613 80.95480169243004 32.41944796494524 2.2609190437584035 +subject17 67 visit_1 73.30053678055346 82.72594582726384 32.68495673034623 3.475076625079314 +subject18 59 visit_1 49.11592005495825 94.34096170862492 40.17579980220664 1.6681557751701703 +subject19 67 visit_1 47.99263114828191 80.3410412020905 35.06601621235209 1.4063849803144863 +subject20 62 visit_1 44.33110862252818 78.62456501429784 44.09338917118639 2.8966061373772334 diff --git a/test_data/cmqt_hematology.tsv b/test_data/cmqt_hematology.tsv new file mode 100644 index 0000000..e119655 --- /dev/null +++ b/test_data/cmqt_hematology.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit rbc_1 hemoglobin_1 hematocrit_1 mcv_1 mch_1 mchc_1 rdw_1 wbc_1 basophil_count_1 eosinophil_count_1 lymphocyte_count_1 monocyte_count_1 neutrophil_count_1 platelet_count_1 mean_platelet_volume_1 +subject1 59 visit_1 4.325132294295363 17.622444942612933 0.937483450479749 2.1675254921479365 35.86295385902645 36.57385666659503 10.684801935982016 7.600293163441274 300.04998709787844 234.20340890495638 274.98394655251263 571.473014377524 3422.0048029466243 603.2111681998314 7.521621937029895 +subject2 46 visit_1 3.186261141604862 13.495507059129762 0.47261415383917543 1.4832875675756076 40.12890149360826 33.57170067452322 9.884886490458578 5.00070481215926 271.93289907884605 249.08294804171615 1371.0717294977026 295.21997318589365 5333.678600210294 795.4598875730798 8.99155813908785 +subject3 55 visit_1 5.336716967608491 16.92286708076371 0.917004934566857 1.7182941125277411 35.374556470925036 32.8505090713972 9.643524820181518 13.621280351191137 115.56358479576589 106.8000974030938 1681.711161646161 419.7753967516861 3726.737559935488 623.0899589794328 12.633563355254331 +subject4 55 visit_1 4.8939708657812115 16.86477063376474 -0.2752194303459178 -0.5623642598084516 42.12738940450381 31.05854591141534 9.894228392360946 6.987834703933746 262.1985390347415 57.21080161022263 -322.58831749571505 553.9207716309046 4448.178615431377 897.0080877678893 11.58886038213768 +subject5 62 visit_1 6.453427001669459 14.778690821944084 0.07160256894761835 0.11095278358164622 38.262766021325355 31.934523134355 10.04488279008542 10.919810865991021 127.73862196484127 297.5765094607528 3200.542669889818 373.21582744134145 6234.413334245976 633.6938753648045 10.97006835064897 +subject6 55 visit_1 5.033913162879584 12.1511689470332 0.055141542360093165 0.10954011437207664 37.536623102170125 31.386950289520662 8.273826768011807 10.963483792590361 155.05213748633423 45.365881222933666 583.8208336334947 602.4672580543775 1123.0112048529631 1034.6301641752973 14.742063625852968 +subject7 61 visit_1 2.0781300548482653 16.767652076799553 0.439537475655182 2.1150624073298183 37.20528895405309 32.323495185361715 11.5557870203106 5.176230174420642 80.44939499311161 201.77034792190517 1680.4596689057294 207.79126927194477 5268.731449623816 812.2030926987908 9.436589902978806 +subject8 65 visit_1 3.680283221082456 15.729517453685874 0.24973794232065624 0.6785834875154052 39.77886416456275 31.73869263795044 10.776412691732506 9.04755678171443 239.04723630168436 122.52825987780575 1740.8428474014622 505.63283175288973 3001.610392770148 831.3507242668209 10.397990954360015 +subject9 66 visit_1 6.844809807863908 10.21591568477695 0.6895616621137035 1.0074226771377572 39.05233807899346 34.73749635056603 8.901492491246513 6.216743954936394 148.36233573634303 177.06577127571126 1557.3258583450393 560.5533907121132 6252.544874980189 893.7811511205126 11.62047044172219 +subject10 51 visit_1 6.6652948205240845 16.720542514009427 -0.3189528074290151 -0.4785276810965373 37.65745351407476 33.59639395933824 8.271980246357806 0.8532715059663865 290.9868977935881 172.56178956099063 1120.5514628579476 466.64368158983604 3799.630625916291 909.0321590809087 9.7554828289856 +subject11 61 visit_1 4.8499067472704755 13.460392538663887 0.13450274336951068 0.2773305763975952 38.31735379488651 31.444680194453674 10.427638224556889 11.234056921132225 287.69846529586744 379.60637815269996 609.8723207400225 427.453770274667 1780.3159678502925 879.1204326117057 12.015438884874985 +subject12 55 visit_1 4.023579181006855 16.15579773696883 0.15050940450832806 0.3740684543224642 37.02866986765515 32.40397503866285 10.744564645243804 10.004735315067858 118.38041901281272 152.18871006333444 1299.57719753351 427.1587669181431 5315.848523890034 615.7273309314264 11.035164284836954 +subject13 52 visit_1 4.574586007775727 10.737366361547448 0.3681470272647353 0.8047657791086918 36.08218785000159 34.318164845892 10.865220797014493 5.106122999806655 353.92932698972027 140.52371469927994 1865.5808964445987 424.6810773155952 3913.407971480952 664.6854778104007 9.540057216236947 +subject14 66 visit_1 3.932294326130037 8.553432641046307 0.5742499051317631 1.4603431419562902 38.36103458421411 35.22959526628691 10.305328810056075 2.074288004406199 337.45257156092345 -25.793821702835913 91.2529902161823 656.8273331958253 5256.778450014236 411.22093885184114 9.664732442940606 +subject15 57 visit_1 4.051527861095489 15.583395617493153 1.1883603878896962 2.9331166627307272 39.443468565637836 35.37589592477283 9.885977208752555 6.245678297815839 151.67512888341756 368.26072117979635 953.8288439859309 608.3189858480944 2126.3415749328706 823.9966313289008 5.0479167273722645 +subject16 57 visit_1 4.25354016132746 11.786440507729012 0.16129653099634933 0.3792053792340621 37.260919043758406 33.90589797788752 10.423652240248318 10.907831108617831 255.03499503389 207.22906844212528 649.8029555728975 345.7409254647473 3285.2331896932055 1053.2154539560975 8.797691547079966 +subject17 67 visit_1 5.747540258541509 12.317783748179153 0.17899711535641527 0.31143255602325687 38.47507662507931 38.66064335659898 9.202290313193137 9.656876977815678 114.26343369929609 155.9975906826424 410.40832921490255 449.1612599622644 3909.8541641364536 580.4026398921327 10.063017221093515 +subject18 59 visit_1 3.933694004121869 15.802288512587474 0.6783866534804426 1.724553696270234 36.66815577517017 32.844868018038014 9.395802750630239 7.753533497858336 129.30386338275255 262.6573392588267 2777.029887306173 316.59121069599075 3303.892279283257 887.5812916491656 12.452249176572817 +subject19 67 visit_1 3.849447336121143 11.602312360627149 0.33773441415680605 0.8773581885058875 36.406384980314485 35.936958268583695 11.7150105937509 2.9698587246794848 -9.707753342933955 120.02039405625881 104.52486148489174 464.653602921763 3282.0521893331625 923.2786087419738 8.601570381694092 +subject20 62 visit_1 3.5748331466896133 11.08736950428935 0.9395592780790926 2.6282605076243866 37.89660613737723 33.44492874507535 9.284051722201893 11.637822281053985 309.9436754759688 87.2013977768164 3050.4948348315243 371.21765760935324 4791.230490643542 803.573928456392 6.9917024211199426 diff --git a/test_data/cmqt_kidney_function.tsv b/test_data/cmqt_kidney_function.tsv new file mode 100644 index 0000000..00309da --- /dev/null +++ b/test_data/cmqt_kidney_function.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit cystatin_c_1 serum_creatinine_1 +subject1 59 visit_1 3.2167548628635756 0.8852037452177444 +subject2 46 visit_1 2.4575074277365747 0.5412922549274802 +subject3 55 visit_1 3.8911446450723277 0.8269055900636425 +subject4 55 visit_1 3.5959805771874747 0.8220642194803949 +subject5 62 visit_1 4.635618001112973 0.6482242351620069 +subject6 55 visit_1 3.6892754419197225 0.4292640789194334 +subject7 61 visit_1 1.7187533698988435 0.8139710063999628 +subject8 65 visit_1 2.7868554807216372 0.7274597878071561 +subject9 66 visit_1 4.896539871909272 0.2679929737314125 +subject10 51 visit_1 4.776863213682724 0.8100452095007857 +subject11 61 visit_1 3.566604498180317 0.5383660448886572 +subject12 55 visit_1 3.01571945400457 0.7629831447474025 +subject13 52 visit_1 3.3830573385171507 0.31144719679562066 +subject14 66 visit_1 2.9548628840866913 0.12945272008719222 +subject15 57 visit_1 3.0343519073969927 0.7152829681244294 +subject16 57 visit_1 3.1690267742183065 0.39887004231075096 +subject17 67 visit_1 4.165026839027673 0.44314864568159607 +subject18 59 visit_1 2.9557960027479124 0.7335240427156229 +subject19 67 visit_1 2.899631557414095 0.3835260300522624 +subject20 62 visit_1 2.716555431126409 0.34061412535744595 diff --git a/test_data/cvd_cad.tsv b/test_data/cvd_cad.tsv new file mode 100644 index 0000000..e33e24f --- /dev/null +++ b/test_data/cvd_cad.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit cad_1 cad_emerge_1 cad_emerge_mod_1 +subject1 59 visit_1 0 0 0 +subject2 46 visit_1 0 1 0 +subject3 55 visit_1 0 0 0 +subject4 55 visit_1 0 0 0 +subject5 62 visit_1 0 0 0 +subject6 55 visit_1 0 0 0 +subject7 61 visit_1 0 0 0 +subject8 65 visit_1 0 0 0 +subject9 66 visit_1 0 0 0 +subject10 51 visit_1 0 0 0 +subject11 61 visit_1 0 0 0 +subject12 55 visit_1 0 0 0 +subject13 52 visit_1 0 0 0 +subject14 66 visit_1 1 0 0 +subject15 57 visit_1 0 0 0 +subject16 57 visit_1 0 1 0 +subject17 67 visit_1 1 0 0 +subject18 59 visit_1 0 0 0 +subject19 67 visit_1 1 0 0 +subject20 62 visit_1 0 0 0 diff --git a/test_data/diabetes_diabetes.tsv b/test_data/diabetes_diabetes.tsv new file mode 100644 index 0000000..5a1a6fb --- /dev/null +++ b/test_data/diabetes_diabetes.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit t1d_1 t2d_1 +subject1 59 visit_1 0 0 +subject2 46 visit_1 0 1 +subject3 55 visit_1 0 0 +subject4 55 visit_1 0 0 +subject5 62 visit_1 0 0 +subject6 55 visit_1 0 0 +subject7 61 visit_1 0 0 +subject8 65 visit_1 0 0 +subject9 66 visit_1 0 0 +subject10 51 visit_1 0 0 +subject11 61 visit_1 0 0 +subject12 55 visit_1 0 0 +subject13 52 visit_1 0 0 +subject14 66 visit_1 1 0 +subject15 57 visit_1 0 0 +subject16 57 visit_1 0 1 +subject17 67 visit_1 1 0 +subject18 59 visit_1 0 0 +subject19 67 visit_1 1 0 +subject20 62 visit_1 0 0 diff --git a/test_data/family_history.tsv b/test_data/family_history.tsv new file mode 100644 index 0000000..992414a --- /dev/null +++ b/test_data/family_history.tsv @@ -0,0 +1,21 @@ +subject_id age_at_obs visit family_hx_cancer_breast_1 family_hx_cancer_breast_relatedness_1 family_hx_cancer_prostate family_hx_cancer_prostate_relatedness_1 family_hx_cancer_pancreatic family_hx_cancer_pancreatic_relatedness_1 family_hx_cancer_colorectal family_hx_cancer_colorectal_relatedness_1 family_hx_cancer_lung family_hx_cancer_lung_relatedness_1 family_hx_cancer_any family_hx_type2_diabetes family_hx_type2_diabetes_relatedness_1 family_hx_myocardial_infarction family_hx_myocardial_infarction_relatedness_1 family_hx_dementia family_hx_dementia_relatedness_1 family_hx_type1_diabetes family_hx_type1_diabetes_relatedness_1 family_hx_asthma family_hx_asthma_relatedness_1 family_hx_stroke family_hx_stroke_relatedness_1 family_hx_heart_failure family_hx_heart_failure_relatedness_1 +subject1 59 1 1 unknown 1 2nd degree 1 1st degree 0 2nd degree 1 1st degree 1 1 2nd degree 1 unknown 1 2nd degree 1 1st degree 0 unknown 0 2nd degree 0 1st degree +subject2 46 1 0 unknown 0 1st degree 1 2nd degree 0 unknown 1 2nd degree 1 1 2nd degree 0 2nd degree 1 1st degree 0 1st degree 1 1st degree 1 unknown 0 2nd degree +subject3 55 1 0 2nd degree 1 unknown 1 1st degree 1 1st degree 0 1st degree 0 1 1st degree 1 unknown 0 unknown 0 2nd degree 0 1st degree 0 unknown 1 2nd degree +subject4 55 1 0 2nd degree 0 unknown 1 2nd degree 0 unknown 0 unknown 0 0 2nd degree 0 1st degree 0 2nd degree 0 2nd degree 1 unknown 0 2nd degree 1 unknown +subject5 62 1 1 2nd degree 1 1st degree 1 2nd degree 0 1st degree 0 2nd degree 0 1 unknown 0 1st degree 1 1st degree 0 2nd degree 1 2nd degree 0 2nd degree 0 1st degree +subject6 55 1 0 unknown 0 2nd degree 0 2nd degree 1 2nd degree 0 1st degree 0 1 1st degree 0 1st degree 1 unknown 0 2nd degree 1 2nd degree 1 unknown 1 2nd degree +subject7 61 1 1 2nd degree 1 1st degree 0 unknown 1 2nd degree 0 unknown 0 1 2nd degree 0 unknown 1 1st degree 0 1st degree 0 unknown 1 2nd degree 1 unknown +subject8 65 1 1 unknown 1 2nd degree 1 1st degree 0 unknown 0 1st degree 0 0 unknown 1 1st degree 1 1st degree 1 1st degree 1 unknown 0 2nd degree 1 2nd degree +subject9 66 1 1 2nd degree 1 unknown 0 unknown 1 unknown 0 unknown 1 0 unknown 0 1st degree 0 unknown 1 unknown 0 1st degree 1 unknown 0 1st degree +subject10 51 1 0 2nd degree 1 unknown 1 unknown 0 2nd degree 1 unknown 0 1 1st degree 1 2nd degree 1 unknown 0 1st degree 0 1st degree 1 1st degree 1 1st degree +subject11 61 1 1 2nd degree 0 2nd degree 0 1st degree 0 2nd degree 0 2nd degree 0 1 unknown 1 1st degree 1 1st degree 1 1st degree 0 unknown 0 1st degree 0 unknown +subject12 55 1 0 1st degree 1 2nd degree 0 2nd degree 0 1st degree 1 unknown 1 1 1st degree 1 1st degree 1 2nd degree 0 1st degree 0 2nd degree 0 unknown 0 2nd degree +subject13 52 1 0 unknown 1 2nd degree 1 1st degree 0 2nd degree 0 unknown 1 0 1st degree 1 1st degree 0 2nd degree 0 2nd degree 0 1st degree 0 unknown 0 2nd degree +subject14 66 1 1 2nd degree 1 2nd degree 0 2nd degree 0 unknown 1 1st degree 1 0 2nd degree 1 2nd degree 1 2nd degree 1 2nd degree 1 1st degree 1 1st degree 1 unknown +subject15 57 1 0 2nd degree 1 unknown 0 unknown 1 1st degree 0 1st degree 1 0 2nd degree 0 2nd degree 0 1st degree 1 unknown 1 unknown 0 1st degree 0 2nd degree +subject16 57 1 0 unknown 0 unknown 0 2nd degree 1 unknown 0 unknown 1 1 2nd degree 1 unknown 1 1st degree 1 2nd degree 0 1st degree 1 2nd degree 0 2nd degree +subject17 67 1 1 2nd degree 0 1st degree 1 2nd degree 1 1st degree 1 unknown 0 1 1st degree 0 1st degree 0 2nd degree 0 2nd degree 0 unknown 1 unknown 0 unknown +subject18 59 1 1 2nd degree 0 2nd degree 0 unknown 0 1st degree 0 2nd degree 0 0 unknown 1 1st degree 0 unknown 0 unknown 0 2nd degree 0 1st degree 0 unknown +subject19 67 1 1 2nd degree 1 1st degree 0 unknown 0 2nd degree 0 2nd degree 0 0 1st degree 1 unknown 1 2nd degree 0 2nd degree 1 2nd degree 0 2nd degree 1 unknown +subject20 62 1 1 1st degree 1 1st degree 1 1st degree 0 unknown 1 1st degree 1 0 unknown 1 1st degree 0 1st degree 0 1st degree 1 unknown 0 2nd degree 0 1st degree diff --git a/test_data/phenotype_harmonized.tsv b/test_data/phenotype_harmonized.tsv index d3144d3..229d142 100644 --- a/test_data/phenotype_harmonized.tsv +++ b/test_data/phenotype_harmonized.tsv @@ -1,4 +1,13 @@ domain md5sum file_path file_readme_path n_subjects n_rows +cmqt_flags NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_flags.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_anthropometry d26b5af92459c0961442e2dcf7ce9235 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_anthropometry.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -cmqt_lipids 17ce825be3d94425e26c08987fa78cd9 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_lipids.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_blood_pressure 9877023cd800d4235e1b99a650f88694 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_blood_pressure.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_lipids 17ce825be3d94425e26c08987fa78cd9 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_lipids.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_hematology NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_hematology.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_glycemic NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_glycemic.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_kidney_function NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_kidney_function.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +diabetes_diabetes NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/diabetes_diabetes.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cvd_cad NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cvd_cad.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cancer_breast NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_breast.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cancer_prostate NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_prostate.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +family_history NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/family_history.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 diff --git a/test_data/test_files.R b/test_data/test_files.R index 7ce5afd..dd67fcf 100644 --- a/test_data/test_files.R +++ b/test_data/test_files.R @@ -5,9 +5,21 @@ library(tools) # number of rows in test data n <- 20 -file_names <- c("cmqt_anthropometry", - "cmqt_lipids", - "cmqt_blood_pressure") +file_names <- c( + # "pilot", + # "population_descriptor", + "cmqt_flags", + "cmqt_anthropometry", + "cmqt_blood_pressure", + "cmqt_lipids", + "cmqt_hematology", + "cmqt_glycemic", + "cmqt_kidney_function", + "diabetes_diabetes", + "cvd_cad", + "cancer_breast", + "cancer_prostate", + "family_history") # truncated normal distribution rtnorm <- function(n, mean, sd, a = -Inf, b = Inf){ @@ -27,6 +39,29 @@ subject <- tibble( set.seed(4) +cmqt_flagd <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + flag_pregnancy_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_acute_illness_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_bld_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_anemia_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_hiv_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_eskd_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_splenectomy_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_cirrhosis_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_fasting_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_lipids_med_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_bp_med_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_cvd_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_t2d_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_t1d_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), + flag_diabetes_other_1 = sample(x = c("no", "yes", "unknown", "data not collected"), size = n, replace = TRUE), +) + +set.seed(4) + cmqt_anthropometry <- tibble( subject_id=rep(subject$subject_id), age_at_obs=rep(subject$age_at_obs), @@ -39,6 +74,17 @@ cmqt_anthropometry <- tibble( set.seed(4) +cmqt_blood_pressure <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + systolic_bp_1=rnorm(n, 120, 20), + diastolic_bp_1=rnorm(n, 80, 10), + hypertension_1=ifelse(systolic_bp_1 > 140 & diastolic_bp_1 > 90, 1, 0) +) + +set.seed(4) + cmqt_lipids <- tibble( subject_id=rep(subject$subject_id), age_at_obs=rep(subject$age_at_obs), @@ -53,13 +99,169 @@ cmqt_lipids <- tibble( set.seed(4) -cmqt_blood_pressure <- tibble( +cmqt_hematology <- tibble( subject_id=rep(subject$subject_id), age_at_obs=rep(subject$age_at_obs), visit=rep("visit_1", n), - systolic_bp_1=rnorm(n, 120, 20), - diastolic_bp_1=rnorm(n, 80, 10), - hypertension_1=ifelse(systolic_bp_1 > 140 & diastolic_bp_1 > 90, 1, 0) + rbc_1=rnorm(n, 4, 1.5), + hemoglobin_1=rnorm(n, 13, 3), + hematocrit_1=rnorm(n, 0.4, 0.4), + mcv_1=(hematocrit_1 * 10) / rbc_1, + mch_1=rnorm(n, 38, 2), + mchc_1=rnorm(n, 34, 2), + rdw_1=rnorm(n, 10, 1), + wbc_1=rnorm(n, 8, 3), + basophil_count_1=rnorm(n, 200, 100), + eosinophil_count_1=rnorm(n, 200, 100), + lymphocyte_count_1=rnorm(n, 1300, 1000), + monocyte_count_1=rnorm(n, 450, 100), + neutrophil_count_1=rnorm(n, 4000, 2000), + platelet_count_1=rnorm(n, 800, 200), + mean_platelet_volume_1=rnorm(n, 10, 2) +) + +set.seed(4) + +cmqt_glycemic <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + fasting_glucose_plasma_1=rnorm(n, 50, 20), + fasting_glucose_serum_1=rnorm(n, 85, 10), + fasting_insulin_1=rnorm(n, 36, 6), + hba1c_1=rnorm(n, 3, 2) +) + +set.seed(4) + +cmqt_kidney_function <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + cystatin_c_1=rnorm(n, 3, 1), + serum_creatinine_1=rnorm(n, .5, 0.25) +) + +set.seed(4) + +diabetes_diabetes <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + t1d_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + t2d_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), +) + +set.seed(4) + +cvd_cad <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + cad_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + cad_emerge_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + cad_emerge_mod_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)) +) + +set.seed(4) + +cancer_breast <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + breast_cancer_status_emerge_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + breast_cancer_status_registry_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + breast_cancer_status_survey_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + age_at_diagnosis_1=sapply(subject$age_at_obs, function(x) round(rtnorm(1, x, 5, x, 90))), + year_at_diagnosis_1=round(rtnorm(n, 2010, 5, 1900, 2024)), + breast_cancer_type_1=sample(x = c("unilateral", "bilateral"), size = n, replace = TRUE), + cancer_behavior_1=sample(x = c("benign", "borderline", "in_situ", "invasive"), size = n, replace = TRUE), + her2_1=sample(x = c("positive", "negative", "unknown"), size = n, replace = TRUE), + pr_1=sample(x = c("positive", "negative", "unknown"), size = n, replace = TRUE), + er_1=sample(x = c("positive", "negative", "unknown"), size = n, replace = TRUE), + T_stage_clinical_1=sample(x = c("stage1", "stage2", "stage3", "stage4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_pathological_1=sample(x = c("stage1", "stage2", "stage3", "stage4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_uknown_1=sample(x = c("stage1", "stage2", "stage3", "stage4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_clinical_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_pathological_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_unknown_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), + nodal_involvement_1=sample(x = c("NX", "N0", "N1", "N2", "N3"), size = n, replace = TRUE), + distant_metastasis_1=sample(x = c("MX", "M0", "M1"), size = n, replace = TRUE), + stage_system_1=rep(NA, n), + grade_clinical_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), + grade_pathological_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), + grade_unknown_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), + screening_history_1=sample(x = c(0, 1), size = n, replace = TRUE), + recurrence_1=sample(x = c("recurrance_primary", "recurrance_second_primary", "unknown", "none"), size = n, replace = TRUE), + surgery_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), + radiotherapy_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), + chemotherapy_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), + hormone_therapy_1=sample(x = c("pharmaceutical", "surgical", "both", "none", "unknown"), size = n, replace = TRUE), + NSAID_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), + age_at_natural_menopause_1=sapply(subject$age_at_obs, function(x) round(rtnorm(1, x, 10, x, 90))), + post_menopausal_hormone_use_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), + parity_1=sample(x = c(0, 1, 2), size = n, replace = TRUE), + age_at_first_birth_1=round(rtnorm(n, 28, 5, 0, 90)), + age_at_menarche_1=round(rtnorm(n, 15, 3, 0, 90)), + deceased_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.9, 0.1)), + cause_of_death_breast_cancer_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.9, 0.1)), + age_at_death_1=sapply(subject$age_at_obs, function(x) round(rtnorm(1, x, 20, x, 90))), +) + +set.seed(4) + +cancer_prostate <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep("visit_1", n), + prostate_cancer_status_emerge_1=sample(x = c(0, 1), size = n, replace = TRUE), + prostate_cancer_status_registry_1=sample(x = c(0, 1), size = n, replace = TRUE), + prostate_cancer_status_survey_1=sample(x = c(0, 1), size = n, replace = TRUE), + age_at_diagnosis_1=sapply(subject$age_at_obs, function(x) round(rtnorm(1, x, 5, x, 90))), + year_at_diagnosis_1=round(rtnorm(n, 2010, 5, 1900, 2024)), + cancer_behavior_1=sample(x = c("benign", "borderline", "in_situ", "invasive"), size = n, replace = TRUE), + T_stage_clinical_1=sample(x = c("stage1", "stage2", "stage3", "stage4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_pathological_1=sample(x = c("stage1", "stage2", "stage3", "stage4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_uknown_1=sample(x = c("stage1", "stage2", "stage3", "stage4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_clinical_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_pathological_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_unknown_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), + nodal_involvement_1=sample(x = c("NX", "N0", "N1", "N2", "N3"), size = n, replace = TRUE), + distant_metastasis_1=sample(x = c("MX", "M0", "M1"), size = n, replace = TRUE), + stage_system_1=rep(NA, n) +) + +set.seed(4) + +family_history <- tibble( + subject_id=rep(subject$subject_id), + age_at_obs=rep(subject$age_at_obs), + visit=rep(1, n), + family_hx_cancer_breast_1=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_cancer_breast_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_cancer_prostate=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_cancer_prostate_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_cancer_pancreatic=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_cancer_pancreatic_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_cancer_colorectal=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_cancer_colorectal_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_cancer_lung=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_cancer_lung_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_cancer_any=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_type2_diabetes=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_type2_diabetes_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_myocardial_infarction=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_myocardial_infarction_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_dementia=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_dementia_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_type1_diabetes=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_type1_diabetes_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_asthma=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_asthma_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_stroke=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_stroke_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE), + family_hx_heart_failure=sample(x = c(0, 1), size = n, replace = TRUE), + family_hx_heart_failure_relatedness_1=sample(x = c("1st degree", "2nd degree", "unknown"), size = n, replace = TRUE) ) # fill in table after uploading tsv files to anvil @@ -82,32 +284,21 @@ phenotype_harmonized <- tibble( subject <- subject %>% select(-age_at_obs) -# cmqt_hematology <- tibble( -# subject_id=rep(subject$subject_id), -# age_at_obs=rep(subject$age_at_obs), -# visit=rep(1, n), -# rbc_1 -# hemoglobin_1=rnorm(n, 10, 20), -# hematocrit_1=rnorm(n, 0.4, 0.4), -# mcv_1 -# mch_1 -# mchc_1 -# rdw_1 -# wbc_1=rnorm(n, 100, 100), -# basophil_count_1 -# eosinophil_count_1 -# lymphocyte_count_1 -# monocyte_count_1 -# neutrophil_count_1 -# platelet_count_1=rnorm(n, 800, 200) -# mean_platelet_volume_1 -# ) + setwd("~/Downloads/primed_data_models") write_tsv(readme, "test_data/readme.tsv") write_tsv(subject, "test_data/subject.tsv") +write_tsv(cmqt_flags, "test_data/cmqt_flags.tsv") write_tsv(cmqt_anthropometry, "test_data/cmqt_anthropometry.tsv") -write_tsv(cmqt_lipids, "test_data/cmqt_lipids.tsv") write_tsv(cmqt_blood_pressure, "test_data/cmqt_blood_pressure.tsv") +write_tsv(cmqt_lipids, "test_data/cmqt_lipids.tsv") +write_tsv(cmqt_hematology, "test_data/cmqt_hematology.tsv") +write_tsv(cmqt_glycemic, "test_data/cmqt_glycemic.tsv") +write_tsv(cmqt_kidney_function, "test_data/cmqt_kidney_function.tsv") +write_tsv(diabetes_diabetes, "test_data/diabetes_diabetes.tsv") +write_tsv(cvd_cad, "test_data/cvd_cad.tsv") +write_tsv(cancer_breast, "test_data/cancer_breast.tsv") +write_tsv(cancer_prostate, "test_data/cancer_prostate.tsv") +write_tsv(family_history, "test_data/family_history.tsv") write_tsv(phenotype_harmonized, "test_data/phenotype_harmonized.tsv") -# write_tsv(cmqt_hematology, "test_data/cmqt_hematology.tsv") From b4ffa1fd26f07dbe3428fb755fb89c57c94df253 Mon Sep 17 00:00:00 2001 From: amywatt Date: Mon, 22 Apr 2024 15:57:27 -0700 Subject: [PATCH 10/18] added phenotype harmonized tables --- test_data/test_files.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test_data/test_files.R b/test_data/test_files.R index dd67fcf..0eddef7 100644 --- a/test_data/test_files.R +++ b/test_data/test_files.R @@ -39,7 +39,7 @@ subject <- tibble( set.seed(4) -cmqt_flagd <- tibble( +cmqt_flags <- tibble( subject_id=rep(subject$subject_id), age_at_obs=rep(subject$age_at_obs), visit=rep("visit_1", n), From 7aee591e42b4f60465c6779e06563dc5178f0e9f Mon Sep 17 00:00:00 2001 From: amywatt Date: Mon, 22 Apr 2024 16:20:52 -0700 Subject: [PATCH 11/18] added md5sum --- test_data/phenotype_harmonized.tsv | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/test_data/phenotype_harmonized.tsv b/test_data/phenotype_harmonized.tsv index 229d142..d71d655 100644 --- a/test_data/phenotype_harmonized.tsv +++ b/test_data/phenotype_harmonized.tsv @@ -1,13 +1,13 @@ domain md5sum file_path file_readme_path n_subjects n_rows -cmqt_flags NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_flags.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_flags 99dee9ebbef7e7a0681d4ae5b3b0063e gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_flags.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_anthropometry d26b5af92459c0961442e2dcf7ce9235 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_anthropometry.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_blood_pressure 9877023cd800d4235e1b99a650f88694 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_blood_pressure.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_lipids 17ce825be3d94425e26c08987fa78cd9 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_lipids.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -cmqt_hematology NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_hematology.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -cmqt_glycemic NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_glycemic.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -cmqt_kidney_function NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_kidney_function.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -diabetes_diabetes NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/diabetes_diabetes.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -cvd_cad NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cvd_cad.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -cancer_breast NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_breast.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -cancer_prostate NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_prostate.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -family_history NA gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/family_history.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_hematology 01f2848bccf4ac09c8addd2434323a0b gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_hematology.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_glycemic 4af06300bac223b5462356532fa98729 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_glycemic.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_kidney_function 35962811d3e9c081de82e4f3f8e4bfb5 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_kidney_function.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +diabetes_diabetes fdf40653b5a3e9f8eb6f4c9608f07e66 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/diabetes_diabetes.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cvd_cad 26439afc298880695450a008d3f92290 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cvd_cad.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cancer_breast 4a1d43806d3839924e947331c2462ef2 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_breast.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cancer_prostate 4bbb799fb0ad21d314411d1f0331f421 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_prostate.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +family_history 53fe6db8b6d0864ca9adc9e46061a6b0 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/family_history.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 From 691b6af926089c473443f065cfa306dd86805536 Mon Sep 17 00:00:00 2001 From: amywatt Date: Tue, 23 Apr 2024 13:29:53 -0700 Subject: [PATCH 12/18] test data tables --- test_data/cancer_breast.tsv | 40 ++++++++--------- test_data/cancer_prostate.tsv | 42 +++++++++--------- test_data/phenotype_harmonized.tsv | 4 +- test_data/test_files.R | 69 ++++++++++++++++++------------ 4 files changed, 85 insertions(+), 70 deletions(-) diff --git a/test_data/cancer_breast.tsv b/test_data/cancer_breast.tsv index 0704e59..8147dbe 100644 --- a/test_data/cancer_breast.tsv +++ b/test_data/cancer_breast.tsv @@ -1,21 +1,21 @@ subject_id age_at_obs visit breast_cancer_status_emerge_1 breast_cancer_status_registry_1 breast_cancer_status_survey_1 age_at_diagnosis_1 year_at_diagnosis_1 breast_cancer_type_1 cancer_behavior_1 her2_1 pr_1 er_1 T_stage_clinical_1 T_stage_pathological_1 T_stage_uknown_1 T_stage_clinical_2 T_stage_pathological_2 T_stage_unknown_2 nodal_involvement_1 distant_metastasis_1 stage_system_1 grade_clinical_1 grade_pathological_1 grade_unknown_1 screening_history_1 recurrence_1 surgery_1 radiotherapy_1 chemotherapy_1 hormone_therapy_1 NSAID_1 age_at_natural_menopause_1 post_menopausal_hormone_use_1 parity_1 age_at_first_birth_1 age_at_menarche_1 deceased_1 cause_of_death_breast_cancer_1 age_at_death_1 -subject1 59 visit_1 0 0 0 63 2017 unilateral benign negative unknown positive unstaged stage4 stage3 unknown unknown unstaged N2 M1 NA grade1 grade1 grade3 0 recurrance_second_primary grade1 grade1 grade2 pharmaceutical grade3 64 grade3 0 26 15 0 0 64 -subject2 46 visit_1 0 1 0 46 2013 bilateral borderline unknown unknown negative unstaged stage4 stage3 localized distant in_situ NX MX NA grade1 grade3 grade1 1 none grade2 grade2 grade2 surgical grade1 58 grade1 2 30 9 1 1 48 -subject3 55 visit_1 0 0 0 62 2011 unilateral invasive positive negative positive stage3 unstaged stage1 unstaged unstaged regional N2 MX NA grade2 grade2 grade1 0 unknown grade3 grade2 grade1 surgical grade3 62 grade3 2 32 18 0 0 72 -subject4 55 visit_1 0 0 0 64 2017 bilateral borderline unknown negative unknown stage3 stage2 stage4 localized localized regional N0 MX NA grade2 grade3 grade3 0 unknown grade3 grade3 grade1 pharmaceutical grade3 59 grade1 2 30 14 0 0 78 -subject5 62 visit_1 0 0 0 63 2016 unilateral benign positive positive negative stage3 unstaged unknown regional regional unknown NX MX NA grade2 grade3 grade2 0 recurrance_second_primary grade2 grade1 grade1 unknown grade2 63 grade2 2 30 13 0 0 77 -subject6 55 visit_1 0 0 0 59 2002 bilateral in_situ negative positive positive stage1 stage4 stage1 localized localized unknown N3 MX NA grade2 grade3 grade2 1 none grade2 grade2 grade3 both grade1 57 grade1 0 23 20 0 0 73 -subject7 61 visit_1 0 0 0 61 2002 bilateral invasive negative positive unknown stage3 stage4 stage3 localized unstaged unknown NX MX NA grade1 grade1 grade3 1 unknown grade3 grade3 grade1 none grade2 61 grade1 2 17 12 0 0 73 -subject8 65 visit_1 0 0 0 68 2021 unilateral benign unknown positive positive stage3 stage3 unstaged in_situ localized unknown NX M0 NA grade1 grade3 grade3 0 unknown grade3 grade2 grade1 none grade2 81 grade2 0 31 13 0 0 86 -subject9 66 visit_1 0 0 0 72 2006 bilateral in_situ unknown positive unknown stage4 stage3 unknown regional regional regional N3 M1 NA grade3 grade2 grade1 1 none grade1 grade1 grade2 none grade1 68 grade2 0 25 14 0 0 81 -subject10 51 visit_1 0 0 0 62 2017 bilateral borderline negative unknown unknown stage1 unknown stage2 unknown distant unknown N2 MX NA grade1 grade1 grade1 1 recurrance_primary grade3 grade1 grade1 surgical grade2 59 grade3 0 27 16 0 0 74 -subject11 61 visit_1 0 0 0 63 2006 unilateral borderline negative positive negative stage1 stage4 unknown unknown regional unstaged NX M0 NA grade1 grade1 grade3 0 recurrance_primary grade2 grade3 grade3 unknown grade1 77 grade1 1 30 17 0 0 67 -subject12 55 visit_1 0 0 0 59 2004 unilateral benign positive negative unknown unknown unknown stage1 unknown localized unstaged N1 MX NA grade1 grade1 grade2 0 none grade1 grade2 grade3 pharmaceutical grade3 74 grade3 1 26 15 0 0 59 -subject13 52 visit_1 0 0 0 55 2010 unilateral borderline negative positive unknown unknown stage2 stage2 unstaged localized regional N1 MX NA grade2 grade1 grade1 0 unknown grade2 grade2 grade1 surgical grade2 53 grade2 1 25 12 0 0 66 -subject14 66 visit_1 1 0 0 68 2007 unilateral benign unknown negative positive stage4 stage2 stage3 in_situ distant unknown N1 M0 NA grade2 grade3 grade1 1 recurrance_primary grade3 grade3 grade3 both grade2 82 grade2 2 22 11 0 0 72 -subject15 57 visit_1 0 0 0 64 2008 bilateral invasive positive positive positive unstaged stage2 stage4 distant in_situ regional N0 M1 NA grade3 grade3 grade3 0 recurrance_second_primary grade2 grade2 grade3 both grade3 66 grade1 1 22 12 0 0 80 -subject16 57 visit_1 0 1 0 62 2003 bilateral invasive unknown positive unknown unstaged stage4 stage4 unknown unknown unstaged N0 M0 NA grade2 grade1 grade1 1 recurrance_second_primary grade2 grade2 grade1 both grade1 57 grade3 1 26 19 1 0 70 -subject17 67 visit_1 1 0 0 69 2014 unilateral in_situ positive negative unknown stage1 unknown stage2 localized regional localized N1 MX NA grade2 grade1 grade3 1 none grade2 grade3 grade3 none grade1 74 grade2 1 32 14 0 0 85 -subject18 59 visit_1 0 0 0 62 2009 bilateral borderline positive positive negative stage2 stage2 unknown unknown regional distant N3 M0 NA grade3 grade2 grade2 0 recurrance_primary grade1 grade3 grade1 pharmaceutical grade1 59 grade2 1 23 12 0 0 74 -subject19 67 visit_1 1 0 0 69 2001 bilateral borderline negative negative negative stage1 stage2 stage1 unknown unstaged unknown N0 MX NA grade2 grade3 grade2 0 recurrance_second_primary grade2 grade3 grade2 both grade1 78 grade1 1 32 18 0 0 73 -subject20 62 visit_1 0 0 0 63 2013 unilateral benign unknown unknown positive stage4 stage2 unknown in_situ regional distant N0 M0 NA grade1 grade2 grade3 0 recurrance_second_primary grade1 grade1 grade2 surgical grade1 75 grade1 2 28 16 1 0 64 +subject1 59 visit_1 0 0 0 63 2017 unilateral benign negative unknown positive unstaged stage 4 stage 3 unknown unknown unstaged N2 M1 NA grade 1 grade 1 grade 3 0 recurrence_second_primary 0 0 1 pharmaceutical 1 64 1 0 26 15 0 0 64 +subject2 46 visit_1 0 1 0 46 2013 bilateral borderline unknown unknown negative unstaged stage 4 stage 3 localized distant in_situ NX MX NA grade 1 grade 3 grade 1 1 none 0 0 0 surgical 0 58 0 2 30 9 1 1 48 +subject3 55 visit_1 0 0 0 62 2011 unilateral invasive positive negative positive stage 3 unstaged stage 1 unstaged unstaged regional N2 MX NA grade 2 grade 2 grade 1 0 unknown 1 0 0 surgical 1 62 1 2 32 18 0 0 72 +subject4 55 visit_1 0 0 0 64 2017 bilateral borderline unknown negative unknown stage 3 stage 2 stage 4 localized localized regional N0 MX NA grade 2 grade 3 grade 3 0 unknown 1 1 0 pharmaceutical 1 59 0 2 30 14 0 0 78 +subject5 62 visit_1 0 0 0 63 2016 unilateral benign positive positive negative stage 3 unstaged unknown regional regional unknown NX MX NA grade 2 grade 3 grade 2 0 recurrence_second_primary 0 0 0 unknown 0 63 1 2 30 13 0 0 77 +subject6 55 visit_1 0 0 0 59 2002 bilateral in_situ negative positive positive stage 1 stage 4 stage 1 localized localized unknown N3 MX NA grade 2 grade 3 grade 2 1 none 1 1 1 both 0 57 0 0 23 20 0 0 73 +subject7 61 visit_1 0 0 0 61 2002 bilateral invasive negative positive unknown stage 3 stage 4 stage 3 localized unstaged unknown NX MX NA grade 1 grade 1 grade 3 1 unknown 1 1 0 none 0 61 0 2 17 12 0 0 73 +subject8 65 visit_1 0 0 0 68 2021 unilateral benign unknown positive positive stage 3 stage 3 unstaged in_situ localized unknown NX M0 NA grade 1 grade 3 grade 3 0 unknown 1 1 0 none 1 81 1 0 31 13 0 0 86 +subject9 66 visit_1 0 0 0 72 2006 bilateral in_situ unknown positive unknown stage 4 stage 3 unknown regional regional regional N3 M1 NA grade 3 grade 2 grade 1 1 none 0 0 1 none 0 68 1 0 25 14 0 0 81 +subject10 51 visit_1 0 0 0 62 2017 bilateral borderline negative unknown unknown stage 1 unknown stage 2 unknown distant unknown N2 MX NA grade 1 grade 1 grade 1 1 recurrence_primary 1 0 0 surgical 0 59 1 0 27 16 0 0 74 +subject11 61 visit_1 0 0 0 63 2006 unilateral borderline negative positive negative stage 1 stage 4 unknown unknown regional unstaged NX M0 NA grade 1 grade 1 grade 3 0 recurrence_primary 0 1 1 unknown 0 77 0 1 30 17 0 0 67 +subject12 55 visit_1 0 0 0 59 2004 unilateral benign positive negative unknown unknown unknown stage 1 unknown localized unstaged N1 MX NA grade 1 grade 1 grade 2 0 none 0 1 1 pharmaceutical 1 74 1 1 26 15 0 0 59 +subject13 52 visit_1 0 0 0 55 2010 unilateral borderline negative positive unknown unknown stage 2 stage 2 unstaged localized regional N1 MX NA grade 2 grade 1 grade 1 0 unknown 0 1 0 surgical 1 53 0 1 25 12 0 0 66 +subject14 66 visit_1 1 0 0 68 2007 unilateral benign unknown negative positive stage 4 stage 2 stage 3 in_situ distant unknown N1 M0 NA grade 2 grade 3 grade 1 1 recurrence_primary 1 1 1 both 0 82 0 2 22 11 0 0 72 +subject15 57 visit_1 0 0 0 64 2008 bilateral invasive positive positive positive unstaged stage 2 stage 4 distant in_situ regional N0 M1 NA grade 3 grade 3 grade 3 0 recurrence_second_primary 0 1 1 both 1 66 0 1 22 12 0 0 80 +subject16 57 visit_1 0 1 0 62 2003 bilateral invasive unknown positive unknown unstaged stage 4 stage 4 unknown unknown unstaged N0 M0 NA grade 2 grade 1 grade 1 1 recurrence_second_primary 0 0 0 both 0 57 1 1 26 19 1 0 70 +subject17 67 visit_1 1 0 0 69 2014 unilateral in_situ positive negative unknown stage 1 unknown stage 2 localized regional localized N1 MX NA grade 2 grade 1 grade 3 1 none 0 1 1 none 0 74 0 1 32 14 0 0 85 +subject18 59 visit_1 0 0 0 62 2009 bilateral borderline positive positive negative stage 2 stage 2 unknown unknown regional distant N3 M0 NA grade 3 grade 2 grade 2 0 recurrence_primary 0 1 0 pharmaceutical 0 59 0 1 23 12 0 0 74 +subject19 67 visit_1 1 0 0 69 2001 bilateral borderline negative negative negative stage 1 stage 2 stage 1 unknown unstaged unknown N0 MX NA grade 2 grade 3 grade 2 0 recurrence_second_primary 1 1 1 both 0 78 0 1 32 18 0 0 73 +subject20 62 visit_1 0 0 0 63 2013 unilateral benign unknown unknown positive stage 4 stage 2 unknown in_situ regional distant N0 M0 NA grade 1 grade 2 grade 3 0 recurrence_second_primary 0 0 0 surgical 0 75 0 2 28 16 1 0 64 diff --git a/test_data/cancer_prostate.tsv b/test_data/cancer_prostate.tsv index 6380cc7..b2ff2d2 100644 --- a/test_data/cancer_prostate.tsv +++ b/test_data/cancer_prostate.tsv @@ -1,21 +1,21 @@ -subject_id age_at_obs visit prostate_cancer_status_emerge_1 prostate_cancer_status_registry_1 prostate_cancer_status_survey_1 age_at_diagnosis_1 year_at_diagnosis_1 cancer_behavior_1 T_stage_clinical_1 T_stage_pathological_1 T_stage_uknown_1 T_stage_clinical_2 T_stage_pathological_2 T_stage_unknown_2 nodal_involvement_1 distant_metastasis_1 stage_system_1 -subject1 59 visit_1 1 1 1 63 2017 borderline stage1 stage4 unknown localized unstaged in_situ N1 M1 NA -subject2 46 visit_1 0 1 0 46 2013 in_situ stage3 unstaged unknown in_situ unstaged in_situ N0 MX NA -subject3 55 visit_1 0 1 1 62 2011 borderline unknown stage2 stage3 regional distant unstaged NX M1 NA -subject4 55 visit_1 0 0 0 64 2017 in_situ stage2 unknown stage3 unknown distant regional N2 MX NA -subject5 62 visit_1 1 1 1 63 2016 borderline stage1 stage2 stage2 in_situ distant unstaged N3 MX NA -subject6 55 visit_1 0 1 0 59 2002 in_situ unstaged stage3 stage1 regional localized in_situ NX MX NA -subject7 61 visit_1 1 0 1 61 2002 in_situ unknown stage4 stage1 unstaged distant in_situ N0 MX NA -subject8 65 visit_1 1 1 1 68 2021 benign stage2 unknown stage1 regional distant distant N2 M0 NA -subject9 66 visit_1 1 1 1 72 2006 invasive stage4 unstaged stage1 unstaged in_situ distant N3 MX NA -subject10 51 visit_1 0 1 1 62 2017 invasive stage3 stage3 unstaged unknown localized unknown N0 M1 NA -subject11 61 visit_1 1 1 0 63 2006 borderline stage3 stage3 stage1 distant localized in_situ N3 M1 NA -subject12 55 visit_1 0 0 1 59 2004 borderline stage2 stage1 stage4 unknown unknown unknown NX M1 NA -subject13 52 visit_1 0 1 1 55 2010 borderline stage3 stage4 stage2 unknown unknown regional NX M1 NA -subject14 66 visit_1 1 1 1 68 2007 borderline stage2 unknown stage4 regional in_situ regional N0 M0 NA -subject15 57 visit_1 0 0 1 64 2008 invasive unstaged stage2 stage1 regional unstaged regional N2 M0 NA -subject16 57 visit_1 0 1 0 62 2003 in_situ unknown unknown stage1 unstaged unstaged in_situ N2 M1 NA -subject17 67 visit_1 1 0 0 69 2014 borderline unstaged stage2 stage4 unknown localized unknown N0 MX NA -subject18 59 visit_1 1 1 0 62 2009 invasive stage2 stage2 stage2 distant regional regional N3 M1 NA -subject19 67 visit_1 1 0 1 69 2001 invasive stage3 stage3 stage3 distant localized regional NX M1 NA -subject20 62 visit_1 1 0 1 63 2013 benign stage1 unstaged unstaged localized in_situ regional N3 M0 NA +subject_id age_at_obs visit prostate_cancer_status_emerge_1 prostate_cancer_status_registry_1 prostate_cancer_status_survey_1 age_at_diagnosis_1 year_at_diagnosis_1 cancer_behavior_1 T_stage_clinical_1 T_stage_pathological_1 T_stage_uknown_1 T_stage_clinical_2 T_stage_pathological_2 T_stage_unknown_2 nodal_involvement_1 distant_metastasis_1 stage_system_1 gleason_score_clinical_1 gleason_score_pathological_1 gleason_score_unknown_1 psa_1 psa_at_diagnosis_1 screening_history_1 recurrence_1 surgery_1 radiotherapy_1 chemotherapy_1 hormone_therapy_1 NSAID_1 deceased_1 cause_of_death_prostate_cancer_1 age_at_death_1 +subject1 59 visit_1 1 1 1 63 2017 borderline stage 1 stage 4 unknown localized unstaged in_situ N1 M1 NA 8.731725374236703 8.478020103648305 7.070699093863368 3.3273850651366184 0.6910954617090586 0 none 0 0 0 pharmaceutical 1 0 0 68 +subject2 46 visit_1 0 1 0 46 2013 in_situ stage 3 unstaged unknown in_situ unstaged in_situ N0 MX NA 5.31847382709384 6.302142806351185 3.4212676770985126 0.1117048051118088 1.114277099266359 1 recurrence_second_primary 1 1 0 surgical 0 0 0 70 +subject3 55 visit_1 0 1 1 62 2011 borderline unknown stage 2 stage 3 regional distant unstaged NX M1 NA 8.112247075885534 3.6575526501983404 7.505811808630824 1.1467807022659444 1.650565194078115 0 recurrence_primary 0 1 1 both 0 0 0 67 +subject4 55 visit_1 0 0 0 64 2017 in_situ stage 2 unknown stage 3 unknown distant regional N2 MX NA 2.075820654630661 3.5997560676187277 4.670109050348401 0.6244635030052077 1.385998555747178 1 none 0 1 1 unknown 0 0 1 62 +subject5 62 visit_1 1 1 1 63 2016 borderline stage 1 stage 2 stage 2 in_situ distant unstaged N3 MX NA 3.5938638411462307 9.505069125443697 3.4053481109440327 1.0444848894466576 1.9458136918517794 1 recurrence_second_primary 0 0 0 pharmaceutical 0 1 0 63 +subject6 55 visit_1 0 1 0 59 2002 in_situ unstaged stage 3 stage 1 regional localized in_situ NX MX NA 2.81782154366374 9.789288826286793 9.076163556426764 0.959323014536773 1.2909907535132685 1 recurrence_second_primary 1 1 1 none 1 0 0 59 +subject7 61 visit_1 1 0 1 61 2002 in_situ unknown stage 4 stage 1 unstaged distant in_situ N0 MX NA 7.864194095134735 9.322885816916823 2.6132918391376734 0.08122257203704342 0.302979697472334 0 none 1 1 1 none 0 0 0 62 +subject8 65 visit_1 1 1 1 68 2021 benign stage 2 unknown stage 1 regional distant distant N2 M0 NA 2.845074152573943 9.902249811217189 2.6696471609175205 1.6165848336901831 0.3081623612021196 1 none 0 1 1 both 0 0 0 86 +subject9 66 visit_1 1 1 1 72 2006 invasive stage 4 unstaged stage 1 unstaged in_situ distant N3 MX NA 3.8796687815338373 4.515676589682698 8.683276420459151 3.215384566425846 3.4306225959208776 0 recurrence_primary 1 1 0 surgical 1 0 0 70 +subject10 51 visit_1 0 1 1 62 2017 invasive stage 3 stage 3 unstaged unknown localized unknown N0 M1 NA 5.506285002455115 8.861765194684267 7.553499078378081 0.21256352824871372 1.1323419133081138 0 recurrence_primary 1 0 1 pharmaceutical 0 0 0 66 +subject11 61 visit_1 1 1 0 63 2006 borderline stage 3 stage 3 stage 1 distant localized in_situ N3 M1 NA 4.612345730885863 7.67168252915144 2.4880887512117624 1.6517128367933704 0.9405233862074993 0 none 0 0 0 unknown 1 1 0 85 +subject12 55 visit_1 0 0 1 59 2004 borderline stage 2 stage 1 stage 4 unknown unknown unknown NX M1 NA 2.0972411278635263 7.748725865036249 6.268828645348549 0.2398326754427973 0.7250777214867976 0 unknown 0 1 0 both 1 0 0 85 +subject13 52 visit_1 0 1 1 55 2010 borderline stage 3 stage 4 stage 2 unknown unknown regional NX M1 NA 2.9275574795901775 3.5649712923914194 6.056498387828469 1.1806309466433595 1.9446144610218248 0 recurrence_primary 0 1 0 both 0 0 0 54 +subject14 66 visit_1 1 1 1 68 2007 borderline stage 2 unknown stage 4 regional in_situ regional N0 M0 NA 5.634906744584441 8.788703504949808 6.162681128829718 1.6857904625831341 1.52517841165081 1 recurrence_primary 1 0 1 none 1 0 0 86 +subject15 57 visit_1 0 0 1 64 2008 invasive unstaged stage 2 stage 1 regional unstaged regional N2 M0 NA 7.215250797569752 3.9183619394898415 3.7540266644209623 2.1816372213400363 2.002552414007943 1 unknown 0 0 0 both 1 0 0 73 +subject16 57 visit_1 0 1 0 62 2003 in_situ unknown unknown stage 1 unstaged unstaged in_situ N2 M1 NA 9.906074807047844 7.992842447012663 3.8347186259925365 1.8889410806023983 1.223594159764647 0 recurrence_primary 1 0 0 both 0 0 0 57 +subject17 67 visit_1 1 0 0 69 2014 borderline unstaged stage 2 stage 4 unknown localized unknown N0 MX NA 4.422365535050631 2.1439468264579773 5.274411527439952 0.8706631343253838 1.8269750432423142 0 none 1 1 0 none 1 0 0 78 +subject18 59 visit_1 1 1 0 62 2009 invasive stage 2 stage 2 stage 2 distant regional regional N3 M1 NA 4.340394644066691 5.121547309681773 9.450007228180766 1.5219466926525085 2.223313852420131 0 unknown 0 0 0 unknown 0 0 0 60 +subject19 67 visit_1 1 0 1 69 2001 invasive stage 3 stage 3 stage 3 distant localized regional NX M1 NA 8.548433542251587 8.913689605891705 5.135163985192776 0.61399603928097 1.4170698016606862 1 unknown 0 0 1 none 1 0 0 82 +subject20 62 visit_1 1 0 1 63 2013 benign stage 1 unstaged unstaged localized in_situ regional N3 M0 NA 3.4125859420746565 4.814642226323485 3.825298761948943 1.211037802492477 0.8747108557841603 1 none 0 0 0 pharmaceutical 0 0 0 81 diff --git a/test_data/phenotype_harmonized.tsv b/test_data/phenotype_harmonized.tsv index d71d655..29de16d 100644 --- a/test_data/phenotype_harmonized.tsv +++ b/test_data/phenotype_harmonized.tsv @@ -8,6 +8,6 @@ cmqt_glycemic 4af06300bac223b5462356532fa98729 gs://fc-e3b6ff37-761e-4e53-89c0-f cmqt_kidney_function 35962811d3e9c081de82e4f3f8e4bfb5 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_kidney_function.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 diabetes_diabetes fdf40653b5a3e9f8eb6f4c9608f07e66 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/diabetes_diabetes.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cvd_cad 26439afc298880695450a008d3f92290 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cvd_cad.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -cancer_breast 4a1d43806d3839924e947331c2462ef2 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_breast.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -cancer_prostate 4bbb799fb0ad21d314411d1f0331f421 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_prostate.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cancer_breast 322959303fc4c173f503aaea46fbccbf gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_breast.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cancer_prostate 2975de4991faf46556fb592d00e9214c gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_prostate.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 family_history 53fe6db8b6d0864ca9adc9e46061a6b0 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/family_history.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 diff --git a/test_data/test_files.R b/test_data/test_files.R index 0eddef7..1aa2095 100644 --- a/test_data/test_files.R +++ b/test_data/test_files.R @@ -179,27 +179,27 @@ cancer_breast <- tibble( her2_1=sample(x = c("positive", "negative", "unknown"), size = n, replace = TRUE), pr_1=sample(x = c("positive", "negative", "unknown"), size = n, replace = TRUE), er_1=sample(x = c("positive", "negative", "unknown"), size = n, replace = TRUE), - T_stage_clinical_1=sample(x = c("stage1", "stage2", "stage3", "stage4", "unstaged", "unknown"), size = n, replace = TRUE), - T_stage_pathological_1=sample(x = c("stage1", "stage2", "stage3", "stage4", "unstaged", "unknown"), size = n, replace = TRUE), - T_stage_uknown_1=sample(x = c("stage1", "stage2", "stage3", "stage4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_clinical_1=sample(x = c("stage 1", "stage 2", "stage 3", "stage 4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_pathological_1=sample(x = c("stage 1", "stage 2", "stage 3", "stage 4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_uknown_1=sample(x = c("stage 1", "stage 2", "stage 3", "stage 4", "unstaged", "unknown"), size = n, replace = TRUE), T_stage_clinical_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), T_stage_pathological_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), T_stage_unknown_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), nodal_involvement_1=sample(x = c("NX", "N0", "N1", "N2", "N3"), size = n, replace = TRUE), distant_metastasis_1=sample(x = c("MX", "M0", "M1"), size = n, replace = TRUE), stage_system_1=rep(NA, n), - grade_clinical_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), - grade_pathological_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), - grade_unknown_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), + grade_clinical_1=sample(x = c("grade 1", "grade 2", "grade 3"), size = n, replace = TRUE), + grade_pathological_1=sample(x = c("grade 1", "grade 2", "grade 3"), size = n, replace = TRUE), + grade_unknown_1=sample(x = c("grade 1", "grade 2", "grade 3"), size = n, replace = TRUE), screening_history_1=sample(x = c(0, 1), size = n, replace = TRUE), - recurrence_1=sample(x = c("recurrance_primary", "recurrance_second_primary", "unknown", "none"), size = n, replace = TRUE), - surgery_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), - radiotherapy_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), - chemotherapy_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), + recurrence_1=sample(x = c("recurrence_primary", "recurrence_second_primary", "unknown", "none"), size = n, replace = TRUE), + surgery_1=sample(x = c(0, 1), size = n, replace = TRUE), + radiotherapy_1=sample(x = c(0, 1), size = n, replace = TRUE), + chemotherapy_1=sample(x = c(0, 1), size = n, replace = TRUE), hormone_therapy_1=sample(x = c("pharmaceutical", "surgical", "both", "none", "unknown"), size = n, replace = TRUE), - NSAID_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), + NSAID_1=sample(x = c(0, 1), size = n, replace = TRUE), age_at_natural_menopause_1=sapply(subject$age_at_obs, function(x) round(rtnorm(1, x, 10, x, 90))), - post_menopausal_hormone_use_1=sample(x = c("grade1", "grade2", "grade3"), size = n, replace = TRUE), + post_menopausal_hormone_use_1=sample(x = c(0, 1), size = n, replace = TRUE), parity_1=sample(x = c(0, 1, 2), size = n, replace = TRUE), age_at_first_birth_1=round(rtnorm(n, 28, 5, 0, 90)), age_at_menarche_1=round(rtnorm(n, 15, 3, 0, 90)), @@ -220,15 +220,30 @@ cancer_prostate <- tibble( age_at_diagnosis_1=sapply(subject$age_at_obs, function(x) round(rtnorm(1, x, 5, x, 90))), year_at_diagnosis_1=round(rtnorm(n, 2010, 5, 1900, 2024)), cancer_behavior_1=sample(x = c("benign", "borderline", "in_situ", "invasive"), size = n, replace = TRUE), - T_stage_clinical_1=sample(x = c("stage1", "stage2", "stage3", "stage4", "unstaged", "unknown"), size = n, replace = TRUE), - T_stage_pathological_1=sample(x = c("stage1", "stage2", "stage3", "stage4", "unstaged", "unknown"), size = n, replace = TRUE), - T_stage_uknown_1=sample(x = c("stage1", "stage2", "stage3", "stage4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_clinical_1=sample(x = c("stage 1", "stage 2", "stage 3", "stage 4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_pathological_1=sample(x = c("stage 1", "stage 2", "stage 3", "stage 4", "unstaged", "unknown"), size = n, replace = TRUE), + T_stage_uknown_1=sample(x = c("stage 1", "stage 2", "stage 3", "stage 4", "unstaged", "unknown"), size = n, replace = TRUE), T_stage_clinical_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), T_stage_pathological_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), T_stage_unknown_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), nodal_involvement_1=sample(x = c("NX", "N0", "N1", "N2", "N3"), size = n, replace = TRUE), distant_metastasis_1=sample(x = c("MX", "M0", "M1"), size = n, replace = TRUE), - stage_system_1=rep(NA, n) + stage_system_1=rep(NA, n), + gleason_score_clinical_1=runif(n, 2, 10), + gleason_score_pathological_1=runif(n, 2, 10), + gleason_score_unknown_1=runif(n, 2, 10), + psa_1=rtnorm(n, 1.5, 1, 0, 50), + psa_at_diagnosis_1=rtnorm(n, 1.5, 1, 0, 50), + screening_history_1=sample(x = c(0, 1), size = n, replace = TRUE), + recurrence_1=sample(x = c("recurrence_primary", "recurrence_second_primary", "unknown", "none"), size = n, replace = TRUE), + surgery_1=sample(x = c(0, 1), size = n, replace = TRUE), + radiotherapy_1=sample(x = c(0, 1), size = n, replace = TRUE), + chemotherapy_1=sample(x = c(0, 1), size = n, replace = TRUE), + hormone_therapy_1=sample(x = c("pharmaceutical", "surgical", "both", "none", "unknown"), size = n, replace = TRUE), + NSAID_1=sample(x = c(0, 1), size = n, replace = TRUE), + deceased_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.9, 0.1)), + cause_of_death_prostate_cancer_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.9, 0.1)), + age_at_death_1=sapply(subject$age_at_obs, function(x) round(rtnorm(1, x, 20, x, 90))), ) set.seed(4) @@ -272,20 +287,9 @@ readme <- tibble( read_me = c(NA) ) -phenotype_harmonized <- tibble( - # phenotype_harmonized_id= - domain=(file_names), - md5sum=as.vector(md5sum(paste0("test_data/", file_names, ".tsv"))), - file_path=paste0(bucket, file_names, '.tsv'), - file_readme_path=paste0(bucket, 'readme.tsv'), - n_subjects=rep(n, length(file_names)), - n_rows=rep(n, length(file_names)), -) - subject <- subject %>% select(-age_at_obs) - setwd("~/Downloads/primed_data_models") write_tsv(readme, "test_data/readme.tsv") write_tsv(subject, "test_data/subject.tsv") @@ -301,4 +305,15 @@ write_tsv(cvd_cad, "test_data/cvd_cad.tsv") write_tsv(cancer_breast, "test_data/cancer_breast.tsv") write_tsv(cancer_prostate, "test_data/cancer_prostate.tsv") write_tsv(family_history, "test_data/family_history.tsv") + +phenotype_harmonized <- tibble( + # phenotype_harmonized_id= + domain=(file_names), + md5sum=as.vector(md5sum(paste0("test_data/", file_names, ".tsv"))), + file_path=paste0(bucket, file_names, '.tsv'), + file_readme_path=paste0(bucket, 'readme.tsv'), + n_subjects=rep(n, length(file_names)), + n_rows=rep(n, length(file_names)), +) + write_tsv(phenotype_harmonized, "test_data/phenotype_harmonized.tsv") From 125a05f9f74b276f24afb833bf79e0b517519121 Mon Sep 17 00:00:00 2001 From: amywatt Date: Wed, 24 Apr 2024 15:40:39 -0700 Subject: [PATCH 13/18] test data --- test_data/cancer_prostate.tsv | 40 +++++++++++++++--------------- test_data/phenotype_harmonized.tsv | 2 +- test_data/test_files.R | 8 +++--- 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/test_data/cancer_prostate.tsv b/test_data/cancer_prostate.tsv index b2ff2d2..112f50b 100644 --- a/test_data/cancer_prostate.tsv +++ b/test_data/cancer_prostate.tsv @@ -1,21 +1,21 @@ subject_id age_at_obs visit prostate_cancer_status_emerge_1 prostate_cancer_status_registry_1 prostate_cancer_status_survey_1 age_at_diagnosis_1 year_at_diagnosis_1 cancer_behavior_1 T_stage_clinical_1 T_stage_pathological_1 T_stage_uknown_1 T_stage_clinical_2 T_stage_pathological_2 T_stage_unknown_2 nodal_involvement_1 distant_metastasis_1 stage_system_1 gleason_score_clinical_1 gleason_score_pathological_1 gleason_score_unknown_1 psa_1 psa_at_diagnosis_1 screening_history_1 recurrence_1 surgery_1 radiotherapy_1 chemotherapy_1 hormone_therapy_1 NSAID_1 deceased_1 cause_of_death_prostate_cancer_1 age_at_death_1 -subject1 59 visit_1 1 1 1 63 2017 borderline stage 1 stage 4 unknown localized unstaged in_situ N1 M1 NA 8.731725374236703 8.478020103648305 7.070699093863368 3.3273850651366184 0.6910954617090586 0 none 0 0 0 pharmaceutical 1 0 0 68 -subject2 46 visit_1 0 1 0 46 2013 in_situ stage 3 unstaged unknown in_situ unstaged in_situ N0 MX NA 5.31847382709384 6.302142806351185 3.4212676770985126 0.1117048051118088 1.114277099266359 1 recurrence_second_primary 1 1 0 surgical 0 0 0 70 -subject3 55 visit_1 0 1 1 62 2011 borderline unknown stage 2 stage 3 regional distant unstaged NX M1 NA 8.112247075885534 3.6575526501983404 7.505811808630824 1.1467807022659444 1.650565194078115 0 recurrence_primary 0 1 1 both 0 0 0 67 -subject4 55 visit_1 0 0 0 64 2017 in_situ stage 2 unknown stage 3 unknown distant regional N2 MX NA 2.075820654630661 3.5997560676187277 4.670109050348401 0.6244635030052077 1.385998555747178 1 none 0 1 1 unknown 0 0 1 62 -subject5 62 visit_1 1 1 1 63 2016 borderline stage 1 stage 2 stage 2 in_situ distant unstaged N3 MX NA 3.5938638411462307 9.505069125443697 3.4053481109440327 1.0444848894466576 1.9458136918517794 1 recurrence_second_primary 0 0 0 pharmaceutical 0 1 0 63 -subject6 55 visit_1 0 1 0 59 2002 in_situ unstaged stage 3 stage 1 regional localized in_situ NX MX NA 2.81782154366374 9.789288826286793 9.076163556426764 0.959323014536773 1.2909907535132685 1 recurrence_second_primary 1 1 1 none 1 0 0 59 -subject7 61 visit_1 1 0 1 61 2002 in_situ unknown stage 4 stage 1 unstaged distant in_situ N0 MX NA 7.864194095134735 9.322885816916823 2.6132918391376734 0.08122257203704342 0.302979697472334 0 none 1 1 1 none 0 0 0 62 -subject8 65 visit_1 1 1 1 68 2021 benign stage 2 unknown stage 1 regional distant distant N2 M0 NA 2.845074152573943 9.902249811217189 2.6696471609175205 1.6165848336901831 0.3081623612021196 1 none 0 1 1 both 0 0 0 86 -subject9 66 visit_1 1 1 1 72 2006 invasive stage 4 unstaged stage 1 unstaged in_situ distant N3 MX NA 3.8796687815338373 4.515676589682698 8.683276420459151 3.215384566425846 3.4306225959208776 0 recurrence_primary 1 1 0 surgical 1 0 0 70 -subject10 51 visit_1 0 1 1 62 2017 invasive stage 3 stage 3 unstaged unknown localized unknown N0 M1 NA 5.506285002455115 8.861765194684267 7.553499078378081 0.21256352824871372 1.1323419133081138 0 recurrence_primary 1 0 1 pharmaceutical 0 0 0 66 -subject11 61 visit_1 1 1 0 63 2006 borderline stage 3 stage 3 stage 1 distant localized in_situ N3 M1 NA 4.612345730885863 7.67168252915144 2.4880887512117624 1.6517128367933704 0.9405233862074993 0 none 0 0 0 unknown 1 1 0 85 -subject12 55 visit_1 0 0 1 59 2004 borderline stage 2 stage 1 stage 4 unknown unknown unknown NX M1 NA 2.0972411278635263 7.748725865036249 6.268828645348549 0.2398326754427973 0.7250777214867976 0 unknown 0 1 0 both 1 0 0 85 -subject13 52 visit_1 0 1 1 55 2010 borderline stage 3 stage 4 stage 2 unknown unknown regional NX M1 NA 2.9275574795901775 3.5649712923914194 6.056498387828469 1.1806309466433595 1.9446144610218248 0 recurrence_primary 0 1 0 both 0 0 0 54 -subject14 66 visit_1 1 1 1 68 2007 borderline stage 2 unknown stage 4 regional in_situ regional N0 M0 NA 5.634906744584441 8.788703504949808 6.162681128829718 1.6857904625831341 1.52517841165081 1 recurrence_primary 1 0 1 none 1 0 0 86 -subject15 57 visit_1 0 0 1 64 2008 invasive unstaged stage 2 stage 1 regional unstaged regional N2 M0 NA 7.215250797569752 3.9183619394898415 3.7540266644209623 2.1816372213400363 2.002552414007943 1 unknown 0 0 0 both 1 0 0 73 -subject16 57 visit_1 0 1 0 62 2003 in_situ unknown unknown stage 1 unstaged unstaged in_situ N2 M1 NA 9.906074807047844 7.992842447012663 3.8347186259925365 1.8889410806023983 1.223594159764647 0 recurrence_primary 1 0 0 both 0 0 0 57 -subject17 67 visit_1 1 0 0 69 2014 borderline unstaged stage 2 stage 4 unknown localized unknown N0 MX NA 4.422365535050631 2.1439468264579773 5.274411527439952 0.8706631343253838 1.8269750432423142 0 none 1 1 0 none 1 0 0 78 -subject18 59 visit_1 1 1 0 62 2009 invasive stage 2 stage 2 stage 2 distant regional regional N3 M1 NA 4.340394644066691 5.121547309681773 9.450007228180766 1.5219466926525085 2.223313852420131 0 unknown 0 0 0 unknown 0 0 0 60 -subject19 67 visit_1 1 0 1 69 2001 invasive stage 3 stage 3 stage 3 distant localized regional NX M1 NA 8.548433542251587 8.913689605891705 5.135163985192776 0.61399603928097 1.4170698016606862 1 unknown 0 0 1 none 1 0 0 82 -subject20 62 visit_1 1 0 1 63 2013 benign stage 1 unstaged unstaged localized in_situ regional N3 M0 NA 3.4125859420746565 4.814642226323485 3.825298761948943 1.211037802492477 0.8747108557841603 1 none 0 0 0 pharmaceutical 0 0 0 81 +subject1 59 visit_1 1 1 1 63 2017 borderline stage 1 stage 4 unknown localized unstaged in_situ N1 M1 NA 9 9 7 3.3273850651366184 0.6910954617090586 0 none 0 0 0 0 1 0 0 68 +subject2 46 visit_1 0 1 0 46 2013 in_situ stage 3 unstaged unknown in_situ unstaged in_situ N0 MX NA 5 6 3 0.1117048051118088 1.114277099266359 1 recurrence_second_primary 1 1 0 0 0 0 0 70 +subject3 55 visit_1 0 1 1 62 2011 borderline unknown stage 2 stage 3 regional distant unstaged NX M1 NA 8 3 8 1.1467807022659444 1.650565194078115 0 recurrence_primary 0 1 1 0 0 0 0 67 +subject4 55 visit_1 0 0 0 64 2017 in_situ stage 2 unknown stage 3 unknown distant regional N2 MX NA 2 3 5 0.6244635030052077 1.385998555747178 1 none 0 1 1 1 0 0 1 62 +subject5 62 visit_1 1 1 1 63 2016 borderline stage 1 stage 2 stage 2 in_situ distant unstaged N3 MX NA 3 10 3 1.0444848894466576 1.9458136918517794 1 recurrence_second_primary 0 0 0 0 0 1 0 63 +subject6 55 visit_1 0 1 0 59 2002 in_situ unstaged stage 3 stage 1 regional localized in_situ NX MX NA 2 10 9 0.959323014536773 1.2909907535132685 1 recurrence_second_primary 1 1 1 1 1 0 0 59 +subject7 61 visit_1 1 0 1 61 2002 in_situ unknown stage 4 stage 1 unstaged distant in_situ N0 MX NA 8 10 2 0.08122257203704342 0.302979697472334 0 none 1 1 1 1 0 0 0 62 +subject8 65 visit_1 1 1 1 68 2021 benign stage 2 unknown stage 1 regional distant distant N2 M0 NA 2 10 2 1.6165848336901831 0.3081623612021196 1 none 0 1 1 1 0 0 0 86 +subject9 66 visit_1 1 1 1 72 2006 invasive stage 4 unstaged stage 1 unstaged in_situ distant N3 MX NA 4 4 9 3.215384566425846 3.4306225959208776 0 recurrence_primary 1 1 0 0 1 0 0 70 +subject10 51 visit_1 0 1 1 62 2017 invasive stage 3 stage 3 unstaged unknown localized unknown N0 M1 NA 5 9 8 0.21256352824871372 1.1323419133081138 0 recurrence_primary 1 0 1 0 0 0 0 66 +subject11 61 visit_1 1 1 0 63 2006 borderline stage 3 stage 3 stage 1 distant localized in_situ N3 M1 NA 4 8 2 1.6517128367933704 0.9405233862074993 0 none 0 0 0 1 1 1 0 85 +subject12 55 visit_1 0 0 1 59 2004 borderline stage 2 stage 1 stage 4 unknown unknown unknown NX M1 NA 2 8 6 0.2398326754427973 0.7250777214867976 0 unknown 0 1 0 1 1 0 0 85 +subject13 52 visit_1 0 1 1 55 2010 borderline stage 3 stage 4 stage 2 unknown unknown regional NX M1 NA 3 3 6 1.1806309466433595 1.9446144610218248 0 recurrence_primary 0 1 0 1 0 0 0 54 +subject14 66 visit_1 1 1 1 68 2007 borderline stage 2 unknown stage 4 regional in_situ regional N0 M0 NA 6 9 6 1.6857904625831341 1.52517841165081 1 recurrence_primary 1 0 1 1 1 0 0 86 +subject15 57 visit_1 0 0 1 64 2008 invasive unstaged stage 2 stage 1 regional unstaged regional N2 M0 NA 7 4 3 2.1816372213400363 2.002552414007943 1 unknown 0 0 0 1 1 0 0 73 +subject16 57 visit_1 0 1 0 62 2003 in_situ unknown unknown stage 1 unstaged unstaged in_situ N2 M1 NA 10 8 4 1.8889410806023983 1.223594159764647 0 recurrence_primary 1 0 0 0 0 0 0 57 +subject17 67 visit_1 1 0 0 69 2014 borderline unstaged stage 2 stage 4 unknown localized unknown N0 MX NA 4 2 5 0.8706631343253838 1.8269750432423142 0 none 1 1 0 1 1 0 0 78 +subject18 59 visit_1 1 1 0 62 2009 invasive stage 2 stage 2 stage 2 distant regional regional N3 M1 NA 4 5 10 1.5219466926525085 2.223313852420131 0 unknown 0 0 0 1 0 0 0 60 +subject19 67 visit_1 1 0 1 69 2001 invasive stage 3 stage 3 stage 3 distant localized regional NX M1 NA 9 9 5 0.61399603928097 1.4170698016606862 1 unknown 0 0 1 1 1 0 0 82 +subject20 62 visit_1 1 0 1 63 2013 benign stage 1 unstaged unstaged localized in_situ regional N3 M0 NA 3 5 4 1.211037802492477 0.8747108557841603 1 none 0 0 0 0 0 0 0 81 diff --git a/test_data/phenotype_harmonized.tsv b/test_data/phenotype_harmonized.tsv index 29de16d..e61720e 100644 --- a/test_data/phenotype_harmonized.tsv +++ b/test_data/phenotype_harmonized.tsv @@ -9,5 +9,5 @@ cmqt_kidney_function 35962811d3e9c081de82e4f3f8e4bfb5 gs://fc-e3b6ff37-761e-4e53 diabetes_diabetes fdf40653b5a3e9f8eb6f4c9608f07e66 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/diabetes_diabetes.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cvd_cad 26439afc298880695450a008d3f92290 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cvd_cad.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cancer_breast 322959303fc4c173f503aaea46fbccbf gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_breast.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -cancer_prostate 2975de4991faf46556fb592d00e9214c gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_prostate.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cancer_prostate af65adca42868373afc81bef2dd2cd2b gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_prostate.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 family_history 53fe6db8b6d0864ca9adc9e46061a6b0 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/family_history.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 diff --git a/test_data/test_files.R b/test_data/test_files.R index 1aa2095..a6676c1 100644 --- a/test_data/test_files.R +++ b/test_data/test_files.R @@ -229,9 +229,9 @@ cancer_prostate <- tibble( nodal_involvement_1=sample(x = c("NX", "N0", "N1", "N2", "N3"), size = n, replace = TRUE), distant_metastasis_1=sample(x = c("MX", "M0", "M1"), size = n, replace = TRUE), stage_system_1=rep(NA, n), - gleason_score_clinical_1=runif(n, 2, 10), - gleason_score_pathological_1=runif(n, 2, 10), - gleason_score_unknown_1=runif(n, 2, 10), + gleason_score_clinical_1=sample(x = c(2, 3, 4, 5, 6, 7, 8, 9, 10), size = n, replace = TRUE), + gleason_score_pathological_1=sample(x = c(2, 3, 4, 5, 6, 7, 8, 9, 10), size = n, replace = TRUE), + gleason_score_unknown_1=sample(x = c(2, 3, 4, 5, 6, 7, 8, 9, 10), size = n, replace = TRUE), psa_1=rtnorm(n, 1.5, 1, 0, 50), psa_at_diagnosis_1=rtnorm(n, 1.5, 1, 0, 50), screening_history_1=sample(x = c(0, 1), size = n, replace = TRUE), @@ -239,7 +239,7 @@ cancer_prostate <- tibble( surgery_1=sample(x = c(0, 1), size = n, replace = TRUE), radiotherapy_1=sample(x = c(0, 1), size = n, replace = TRUE), chemotherapy_1=sample(x = c(0, 1), size = n, replace = TRUE), - hormone_therapy_1=sample(x = c("pharmaceutical", "surgical", "both", "none", "unknown"), size = n, replace = TRUE), + hormone_therapy_1=sample(x = c(0, 1), size = n, replace = TRUE), NSAID_1=sample(x = c(0, 1), size = n, replace = TRUE), deceased_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.9, 0.1)), cause_of_death_prostate_cancer_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.9, 0.1)), From 5df1f572e7947e02194d55fad7f653d354ebf2d0 Mon Sep 17 00:00:00 2001 From: amywatt Date: Tue, 30 Apr 2024 18:59:12 -0700 Subject: [PATCH 14/18] population descriptor table --- test_data/phenotype_harmonized.tsv | 1 + test_data/population_descriptor.tsv | 21 +++++++++++++ test_data/test_files.R | 49 ++++++++++++++++++----------- 3 files changed, 53 insertions(+), 18 deletions(-) create mode 100644 test_data/population_descriptor.tsv diff --git a/test_data/phenotype_harmonized.tsv b/test_data/phenotype_harmonized.tsv index e61720e..761f4f8 100644 --- a/test_data/phenotype_harmonized.tsv +++ b/test_data/phenotype_harmonized.tsv @@ -1,4 +1,5 @@ domain md5sum file_path file_readme_path n_subjects n_rows +population_descriptor b3b28f8d7450cda4e44c69a4bf35683f gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/population_descriptor.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_flags 99dee9ebbef7e7a0681d4ae5b3b0063e gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_flags.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_anthropometry d26b5af92459c0961442e2dcf7ce9235 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_anthropometry.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_blood_pressure 9877023cd800d4235e1b99a650f88694 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_blood_pressure.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 diff --git a/test_data/population_descriptor.tsv b/test_data/population_descriptor.tsv new file mode 100644 index 0000000..f9fd9c0 --- /dev/null +++ b/test_data/population_descriptor.tsv @@ -0,0 +1,21 @@ +subject_id population_descriptor_id population_descriptor population_label country_of_recruitment country_of_birth +subject1 022f19bfe0b628e1 population | subpopulation MSL | AFR USA Sierra Leone +subject2 01bb18a183122d64 population | subpopulation IBS | EUR Peru UK +subject3 01bb18a183122d64 population | subpopulation MXL | AMR Sierra Leone USA +subject4 01bb18a183122d64 population | subpopulation PEL | AMR Sierra Leone Sierra Leone +subject5 0224684f6cb9e980 population | subpopulation MSL | AFR Spain Sierra Leone +subject6 01bb18a183122d64 population | subpopulation PEL | AMR UK Peru +subject7 0224684f6cb9e980 population | subpopulation MSL | AFR Peru Peru +subject8 0224684f6cb9e980 population | subpopulation MSL | AFR USA Sierra Leone +subject9 0224684f6cb9e980 population | subpopulation GBR | EUR Sierra Leone Spain +subject10 01bb18a183122d64 population | subpopulation MXL | AMR Sierra Leone Sierra Leone +subject11 0224684f6cb9e980 population | subpopulation IBS | EUR Spain Peru +subject12 01bb18a183122d64 population | subpopulation GBR | EUR UK Peru +subject13 01bb18a183122d64 population | subpopulation MSL | AFR USA USA +subject14 0224684f6cb9e980 population | subpopulation MSL | AFR Spain Spain +subject15 022f19bfe0b628e1 population | subpopulation MSL | AFR Sierra Leone Spain +subject16 022f19bfe0b628e1 population | subpopulation MXL | AMR UK Peru +subject17 0224684f6cb9e980 population | subpopulation PEL | AMR Spain UK +subject18 022f19bfe0b628e1 population | subpopulation PEL | AMR USA USA +subject19 0224684f6cb9e980 population | subpopulation MSL | AFR Spain Peru +subject20 0224684f6cb9e980 population | subpopulation GBR | EUR Peru UK diff --git a/test_data/test_files.R b/test_data/test_files.R index a6676c1..4ba433a 100644 --- a/test_data/test_files.R +++ b/test_data/test_files.R @@ -7,7 +7,7 @@ n <- 20 file_names <- c( # "pilot", - # "population_descriptor", + "population_descriptor", "cmqt_flags", "cmqt_anthropometry", "cmqt_blood_pressure", @@ -39,6 +39,17 @@ subject <- tibble( set.seed(4) +population_descriptor <- tibble( + subject_id=rep(subject$subject_id), + population_descriptor_id = sample(x = c("01bb18a183122d64", "022f19bfe0b628e1", "0224684f6cb9e980"), size = n, replace = TRUE), + population_descriptor = sample(x = c("population|subpopulation"), size = n, replace = TRUE), + population_label = sample(x = c("PEL|AMR", "IBS|EUR", "MXL|AMR", "GBR|EUR", "MSL|AFR"), size = n, replace = TRUE), + country_of_recruitment = sample(x = c("Peru", "Spain", "USA", "UK", "Sierra Leone"), size = n, replace = TRUE), + country_of_birth = sample(x = c("Peru", "Spain", "USA", "UK", "Sierra Leone"), size = n, replace = TRUE) +) + +set.seed(4) + cmqt_flags <- tibble( subject_id=rep(subject$subject_id), age_at_obs=rep(subject$age_at_obs), @@ -290,30 +301,32 @@ readme <- tibble( subject <- subject %>% select(-age_at_obs) -setwd("~/Downloads/primed_data_models") -write_tsv(readme, "test_data/readme.tsv") -write_tsv(subject, "test_data/subject.tsv") -write_tsv(cmqt_flags, "test_data/cmqt_flags.tsv") -write_tsv(cmqt_anthropometry, "test_data/cmqt_anthropometry.tsv") -write_tsv(cmqt_blood_pressure, "test_data/cmqt_blood_pressure.tsv") -write_tsv(cmqt_lipids, "test_data/cmqt_lipids.tsv") -write_tsv(cmqt_hematology, "test_data/cmqt_hematology.tsv") -write_tsv(cmqt_glycemic, "test_data/cmqt_glycemic.tsv") -write_tsv(cmqt_kidney_function, "test_data/cmqt_kidney_function.tsv") -write_tsv(diabetes_diabetes, "test_data/diabetes_diabetes.tsv") -write_tsv(cvd_cad, "test_data/cvd_cad.tsv") -write_tsv(cancer_breast, "test_data/cancer_breast.tsv") -write_tsv(cancer_prostate, "test_data/cancer_prostate.tsv") -write_tsv(family_history, "test_data/family_history.tsv") +# working in primed_data_models/test_data directory + +write_tsv(readme, "readme.tsv") +write_tsv(subject, "subject.tsv") +write_tsv(population_descriptor, "population_descriptor.tsv") +write_tsv(cmqt_flags, "cmqt_flags.tsv") +write_tsv(cmqt_anthropometry, "cmqt_anthropometry.tsv") +write_tsv(cmqt_blood_pressure, "cmqt_blood_pressure.tsv") +write_tsv(cmqt_lipids, "cmqt_lipids.tsv") +write_tsv(cmqt_hematology, "cmqt_hematology.tsv") +write_tsv(cmqt_glycemic, "cmqt_glycemic.tsv") +write_tsv(cmqt_kidney_function, "cmqt_kidney_function.tsv") +write_tsv(diabetes_diabetes, "diabetes_diabetes.tsv") +write_tsv(cvd_cad, "cvd_cad.tsv") +write_tsv(cancer_breast, "cancer_breast.tsv") +write_tsv(cancer_prostate, "cancer_prostate.tsv") +write_tsv(family_history, "family_history.tsv") phenotype_harmonized <- tibble( # phenotype_harmonized_id= domain=(file_names), - md5sum=as.vector(md5sum(paste0("test_data/", file_names, ".tsv"))), + md5sum=as.vector(md5sum(paste0(file_names, ".tsv"))), file_path=paste0(bucket, file_names, '.tsv'), file_readme_path=paste0(bucket, 'readme.tsv'), n_subjects=rep(n, length(file_names)), n_rows=rep(n, length(file_names)), ) -write_tsv(phenotype_harmonized, "test_data/phenotype_harmonized.tsv") +write_tsv(phenotype_harmonized, "phenotype_harmonized.tsv") From 581d0d661c175f52dd84ca15d52120cc065718db Mon Sep 17 00:00:00 2001 From: amywatt Date: Tue, 7 May 2024 00:33:12 -0700 Subject: [PATCH 15/18] adding new variables to diabetes table --- test_data/diabetes_diabetes.tsv | 42 ++++++++++++++--------------- test_data/population_descriptor.tsv | 42 ++++++++++++++--------------- test_data/test_files.R | 6 +++-- 3 files changed, 46 insertions(+), 44 deletions(-) diff --git a/test_data/diabetes_diabetes.tsv b/test_data/diabetes_diabetes.tsv index 5a1a6fb..c5c6440 100644 --- a/test_data/diabetes_diabetes.tsv +++ b/test_data/diabetes_diabetes.tsv @@ -1,21 +1,21 @@ -subject_id age_at_obs visit t1d_1 t2d_1 -subject1 59 visit_1 0 0 -subject2 46 visit_1 0 1 -subject3 55 visit_1 0 0 -subject4 55 visit_1 0 0 -subject5 62 visit_1 0 0 -subject6 55 visit_1 0 0 -subject7 61 visit_1 0 0 -subject8 65 visit_1 0 0 -subject9 66 visit_1 0 0 -subject10 51 visit_1 0 0 -subject11 61 visit_1 0 0 -subject12 55 visit_1 0 0 -subject13 52 visit_1 0 0 -subject14 66 visit_1 1 0 -subject15 57 visit_1 0 0 -subject16 57 visit_1 0 1 -subject17 67 visit_1 1 0 -subject18 59 visit_1 0 0 -subject19 67 visit_1 1 0 -subject20 62 visit_1 0 0 +subject_id age_at_obs visit t1d_1 t2d_1 t1d_dprism_1 t2d_dprism_1 +subject1 59 visit_1 0 0 0 0 +subject2 46 visit_1 0 1 0 0 +subject3 55 visit_1 0 0 0 0 +subject4 55 visit_1 0 0 0 0 +subject5 62 visit_1 0 0 0 0 +subject6 55 visit_1 0 0 0 0 +subject7 61 visit_1 0 0 0 0 +subject8 65 visit_1 0 0 0 0 +subject9 66 visit_1 0 0 0 0 +subject10 51 visit_1 0 0 0 1 +subject11 61 visit_1 0 0 0 0 +subject12 55 visit_1 0 0 0 0 +subject13 52 visit_1 0 0 0 0 +subject14 66 visit_1 1 0 0 0 +subject15 57 visit_1 0 0 0 0 +subject16 57 visit_1 0 1 0 0 +subject17 67 visit_1 1 0 0 0 +subject18 59 visit_1 0 0 0 0 +subject19 67 visit_1 1 0 0 0 +subject20 62 visit_1 0 0 0 0 diff --git a/test_data/population_descriptor.tsv b/test_data/population_descriptor.tsv index f9fd9c0..eb85c3a 100644 --- a/test_data/population_descriptor.tsv +++ b/test_data/population_descriptor.tsv @@ -1,21 +1,21 @@ -subject_id population_descriptor_id population_descriptor population_label country_of_recruitment country_of_birth -subject1 022f19bfe0b628e1 population | subpopulation MSL | AFR USA Sierra Leone -subject2 01bb18a183122d64 population | subpopulation IBS | EUR Peru UK -subject3 01bb18a183122d64 population | subpopulation MXL | AMR Sierra Leone USA -subject4 01bb18a183122d64 population | subpopulation PEL | AMR Sierra Leone Sierra Leone -subject5 0224684f6cb9e980 population | subpopulation MSL | AFR Spain Sierra Leone -subject6 01bb18a183122d64 population | subpopulation PEL | AMR UK Peru -subject7 0224684f6cb9e980 population | subpopulation MSL | AFR Peru Peru -subject8 0224684f6cb9e980 population | subpopulation MSL | AFR USA Sierra Leone -subject9 0224684f6cb9e980 population | subpopulation GBR | EUR Sierra Leone Spain -subject10 01bb18a183122d64 population | subpopulation MXL | AMR Sierra Leone Sierra Leone -subject11 0224684f6cb9e980 population | subpopulation IBS | EUR Spain Peru -subject12 01bb18a183122d64 population | subpopulation GBR | EUR UK Peru -subject13 01bb18a183122d64 population | subpopulation MSL | AFR USA USA -subject14 0224684f6cb9e980 population | subpopulation MSL | AFR Spain Spain -subject15 022f19bfe0b628e1 population | subpopulation MSL | AFR Sierra Leone Spain -subject16 022f19bfe0b628e1 population | subpopulation MXL | AMR UK Peru -subject17 0224684f6cb9e980 population | subpopulation PEL | AMR Spain UK -subject18 022f19bfe0b628e1 population | subpopulation PEL | AMR USA USA -subject19 0224684f6cb9e980 population | subpopulation MSL | AFR Spain Peru -subject20 0224684f6cb9e980 population | subpopulation GBR | EUR Peru UK +subject_id population_descriptor population_label country_of_recruitment country_of_birth +subject1 population|subpopulation GBR|EUR Sierra Leone USA +subject2 population|subpopulation MSL|AFR Spain Peru +subject3 population|subpopulation MXL|AMR USA Sierra Leone +subject4 population|subpopulation MXL|AMR Peru Sierra Leone +subject5 population|subpopulation GBR|EUR Sierra Leone Spain +subject6 population|subpopulation MSL|AFR Peru UK +subject7 population|subpopulation MXL|AMR Sierra Leone Peru +subject8 population|subpopulation MSL|AFR Sierra Leone USA +subject9 population|subpopulation MXL|AMR UK Sierra Leone +subject10 population|subpopulation MXL|AMR USA Sierra Leone +subject11 population|subpopulation MXL|AMR Spain Spain +subject12 population|subpopulation IBS|EUR UK UK +subject13 population|subpopulation MSL|AFR Sierra Leone USA +subject14 population|subpopulation GBR|EUR Sierra Leone Spain +subject15 population|subpopulation MXL|AMR Sierra Leone Sierra Leone +subject16 population|subpopulation MSL|AFR USA UK +subject17 population|subpopulation MXL|AMR Peru Spain +subject18 population|subpopulation GBR|EUR Peru USA +subject19 population|subpopulation IBS|EUR Sierra Leone Spain +subject20 population|subpopulation PEL|AMR UK Peru diff --git a/test_data/test_files.R b/test_data/test_files.R index 4ba433a..48d67ce 100644 --- a/test_data/test_files.R +++ b/test_data/test_files.R @@ -41,7 +41,7 @@ set.seed(4) population_descriptor <- tibble( subject_id=rep(subject$subject_id), - population_descriptor_id = sample(x = c("01bb18a183122d64", "022f19bfe0b628e1", "0224684f6cb9e980"), size = n, replace = TRUE), + # population_descriptor_id = sample(x = c("01bb18a183122d64", "022f19bfe0b628e1", "0224684f6cb9e980"), size = n, replace = TRUE), population_descriptor = sample(x = c("population|subpopulation"), size = n, replace = TRUE), population_label = sample(x = c("PEL|AMR", "IBS|EUR", "MXL|AMR", "GBR|EUR", "MSL|AFR"), size = n, replace = TRUE), country_of_recruitment = sample(x = c("Peru", "Spain", "USA", "UK", "Sierra Leone"), size = n, replace = TRUE), @@ -161,6 +161,8 @@ diabetes_diabetes <- tibble( visit=rep("visit_1", n), t1d_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), t2d_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + t1d_dprism_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), + t2d_dprism_1=sample(x = c(0, 1), size = n, replace = TRUE, prob = c(0.95, 0.05)), ) set.seed(4) @@ -301,7 +303,7 @@ readme <- tibble( subject <- subject %>% select(-age_at_obs) -# working in primed_data_models/test_data directory +# working in primed_data_models/test_data directory write_tsv(readme, "readme.tsv") write_tsv(subject, "subject.tsv") From bbde08981519f47713ad7d75a760cb2f7605fba4 Mon Sep 17 00:00:00 2001 From: amywatt Date: Tue, 7 May 2024 12:53:35 -0700 Subject: [PATCH 16/18] removing population descriptor id --- test_data/phenotype_harmonized.tsv | 4 +-- test_data/population_descriptor.tsv | 42 ++++++++++++++--------------- test_data/test_files.R | 2 +- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/test_data/phenotype_harmonized.tsv b/test_data/phenotype_harmonized.tsv index 761f4f8..97d601e 100644 --- a/test_data/phenotype_harmonized.tsv +++ b/test_data/phenotype_harmonized.tsv @@ -1,5 +1,5 @@ domain md5sum file_path file_readme_path n_subjects n_rows -population_descriptor b3b28f8d7450cda4e44c69a4bf35683f gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/population_descriptor.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +population_descriptor a3b245349c56f8805dbb11d7aa331950 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/population_descriptor.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_flags 99dee9ebbef7e7a0681d4ae5b3b0063e gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_flags.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_anthropometry d26b5af92459c0961442e2dcf7ce9235 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_anthropometry.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_blood_pressure 9877023cd800d4235e1b99a650f88694 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_blood_pressure.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 @@ -7,7 +7,7 @@ cmqt_lipids 17ce825be3d94425e26c08987fa78cd9 gs://fc-e3b6ff37-761e-4e53-89c0-fb2 cmqt_hematology 01f2848bccf4ac09c8addd2434323a0b gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_hematology.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_glycemic 4af06300bac223b5462356532fa98729 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_glycemic.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_kidney_function 35962811d3e9c081de82e4f3f8e4bfb5 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_kidney_function.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -diabetes_diabetes fdf40653b5a3e9f8eb6f4c9608f07e66 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/diabetes_diabetes.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +diabetes_diabetes bf4ff29e1312614c66a08a27b4d129c5 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/diabetes_diabetes.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cvd_cad 26439afc298880695450a008d3f92290 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cvd_cad.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cancer_breast 322959303fc4c173f503aaea46fbccbf gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_breast.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cancer_prostate af65adca42868373afc81bef2dd2cd2b gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cancer_prostate.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 diff --git a/test_data/population_descriptor.tsv b/test_data/population_descriptor.tsv index eb85c3a..4d86b8b 100644 --- a/test_data/population_descriptor.tsv +++ b/test_data/population_descriptor.tsv @@ -1,21 +1,21 @@ -subject_id population_descriptor population_label country_of_recruitment country_of_birth -subject1 population|subpopulation GBR|EUR Sierra Leone USA -subject2 population|subpopulation MSL|AFR Spain Peru -subject3 population|subpopulation MXL|AMR USA Sierra Leone -subject4 population|subpopulation MXL|AMR Peru Sierra Leone -subject5 population|subpopulation GBR|EUR Sierra Leone Spain -subject6 population|subpopulation MSL|AFR Peru UK -subject7 population|subpopulation MXL|AMR Sierra Leone Peru -subject8 population|subpopulation MSL|AFR Sierra Leone USA -subject9 population|subpopulation MXL|AMR UK Sierra Leone -subject10 population|subpopulation MXL|AMR USA Sierra Leone -subject11 population|subpopulation MXL|AMR Spain Spain -subject12 population|subpopulation IBS|EUR UK UK -subject13 population|subpopulation MSL|AFR Sierra Leone USA -subject14 population|subpopulation GBR|EUR Sierra Leone Spain -subject15 population|subpopulation MXL|AMR Sierra Leone Sierra Leone -subject16 population|subpopulation MSL|AFR USA UK -subject17 population|subpopulation MXL|AMR Peru Spain -subject18 population|subpopulation GBR|EUR Peru USA -subject19 population|subpopulation IBS|EUR Sierra Leone Spain -subject20 population|subpopulation PEL|AMR UK Peru +subject_id population_descriptor_id population_descriptor population_label country_of_recruitment country_of_birth +subject1 022f19bfe0b628e1 population|superpopulation MSL|AFR USA Sierra Leone +subject2 01bb18a183122d64 population|superpopulation IBS|EUR Peru UK +subject3 01bb18a183122d64 population|superpopulation MXL|AMR Sierra Leone USA +subject4 01bb18a183122d64 population|superpopulation PEL|AMR Sierra Leone Sierra Leone +subject5 0224684f6cb9e980 population|superpopulation MSL|AFR Spain Sierra Leone +subject6 01bb18a183122d64 population|superpopulation PEL|AMR UK Peru +subject7 0224684f6cb9e980 population|superpopulation MSL|AFR Peru Peru +subject8 0224684f6cb9e980 population|superpopulation MSL|AFR USA Sierra Leone +subject9 0224684f6cb9e980 population|superpopulation GBR|EUR Sierra Leone Spain +subject10 01bb18a183122d64 population|superpopulation MXL|AMR Sierra Leone Sierra Leone +subject11 0224684f6cb9e980 population|superpopulation IBS|EUR Spain Peru +subject12 01bb18a183122d64 population|superpopulation GBR|EUR UK Peru +subject13 01bb18a183122d64 population|superpopulation MSL|AFR USA USA +subject14 0224684f6cb9e980 population|superpopulation MSL|AFR Spain Spain +subject15 022f19bfe0b628e1 population|superpopulation MSL|AFR Sierra Leone Spain +subject16 022f19bfe0b628e1 population|superpopulation MXL|AMR UK Peru +subject17 0224684f6cb9e980 population|superpopulation PEL|AMR Spain UK +subject18 022f19bfe0b628e1 population|superpopulation PEL|AMR USA USA +subject19 0224684f6cb9e980 population|superpopulation MSL|AFR Spain Peru +subject20 0224684f6cb9e980 population|superpopulation GBR|EUR Peru UK diff --git a/test_data/test_files.R b/test_data/test_files.R index 48d67ce..bea1c91 100644 --- a/test_data/test_files.R +++ b/test_data/test_files.R @@ -42,7 +42,7 @@ set.seed(4) population_descriptor <- tibble( subject_id=rep(subject$subject_id), # population_descriptor_id = sample(x = c("01bb18a183122d64", "022f19bfe0b628e1", "0224684f6cb9e980"), size = n, replace = TRUE), - population_descriptor = sample(x = c("population|subpopulation"), size = n, replace = TRUE), + population_descriptor = sample(x = c("population|superpopulation"), size = n, replace = TRUE), population_label = sample(x = c("PEL|AMR", "IBS|EUR", "MXL|AMR", "GBR|EUR", "MSL|AFR"), size = n, replace = TRUE), country_of_recruitment = sample(x = c("Peru", "Spain", "USA", "UK", "Sierra Leone"), size = n, replace = TRUE), country_of_birth = sample(x = c("Peru", "Spain", "USA", "UK", "Sierra Leone"), size = n, replace = TRUE) From 37cb2d5502cd305f3e9e682b8a5991a623ac1286 Mon Sep 17 00:00:00 2001 From: Adrienne Stilp Date: Tue, 14 May 2024 15:09:28 -0700 Subject: [PATCH 17/18] Set the random number generator type for test data This appears to generate the same random numbers across different versions of R. See stack overflow post: https://stackoverflow.com/questions/47199415/is-set-seed-consistent-over-different-versions-of-r-and-ubuntu --- test_data/test_files.R | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test_data/test_files.R b/test_data/test_files.R index bea1c91..6f4b076 100644 --- a/test_data/test_files.R +++ b/test_data/test_files.R @@ -21,6 +21,14 @@ file_names <- c( "cancer_prostate", "family_history") +# Compatibility for using set.seed between different versions of R: +# https://stackoverflow.com/questions/47199415/is-set-seed-consistent-over-different-versions-of-r-and-ubuntu +RNGkind( + kind = "Mersenne-Twister", + normal.kind = "Inversion", + sample.kind = "Rounding" +) + # truncated normal distribution rtnorm <- function(n, mean, sd, a = -Inf, b = Inf){ qnorm(runif(n, pnorm(a, mean, sd), pnorm(b, mean, sd)), mean, sd) From a9bdc3fedd2061baec02330e9eb6769a3cd570ea Mon Sep 17 00:00:00 2001 From: Adrienne Stilp Date: Tue, 14 May 2024 15:18:43 -0700 Subject: [PATCH 18/18] Update test data to match new cqmt_hematology limits For some reason, this also changes the population_descriptor table, but I'm not sure why. --- test_data/cmqt_hematology.tsv | 40 ++++++++++++------------ test_data/phenotype_harmonized.tsv | 4 +-- test_data/population_descriptor.tsv | 42 ++++++++++++------------- test_data/test_files.R | 48 ++++++++++++++--------------- 4 files changed, 67 insertions(+), 67 deletions(-) diff --git a/test_data/cmqt_hematology.tsv b/test_data/cmqt_hematology.tsv index e119655..1eaa6e2 100644 --- a/test_data/cmqt_hematology.tsv +++ b/test_data/cmqt_hematology.tsv @@ -1,21 +1,21 @@ subject_id age_at_obs visit rbc_1 hemoglobin_1 hematocrit_1 mcv_1 mch_1 mchc_1 rdw_1 wbc_1 basophil_count_1 eosinophil_count_1 lymphocyte_count_1 monocyte_count_1 neutrophil_count_1 platelet_count_1 mean_platelet_volume_1 -subject1 59 visit_1 4.325132294295363 17.622444942612933 0.937483450479749 2.1675254921479365 35.86295385902645 36.57385666659503 10.684801935982016 7.600293163441274 300.04998709787844 234.20340890495638 274.98394655251263 571.473014377524 3422.0048029466243 603.2111681998314 7.521621937029895 -subject2 46 visit_1 3.186261141604862 13.495507059129762 0.47261415383917543 1.4832875675756076 40.12890149360826 33.57170067452322 9.884886490458578 5.00070481215926 271.93289907884605 249.08294804171615 1371.0717294977026 295.21997318589365 5333.678600210294 795.4598875730798 8.99155813908785 -subject3 55 visit_1 5.336716967608491 16.92286708076371 0.917004934566857 1.7182941125277411 35.374556470925036 32.8505090713972 9.643524820181518 13.621280351191137 115.56358479576589 106.8000974030938 1681.711161646161 419.7753967516861 3726.737559935488 623.0899589794328 12.633563355254331 -subject4 55 visit_1 4.8939708657812115 16.86477063376474 -0.2752194303459178 -0.5623642598084516 42.12738940450381 31.05854591141534 9.894228392360946 6.987834703933746 262.1985390347415 57.21080161022263 -322.58831749571505 553.9207716309046 4448.178615431377 897.0080877678893 11.58886038213768 -subject5 62 visit_1 6.453427001669459 14.778690821944084 0.07160256894761835 0.11095278358164622 38.262766021325355 31.934523134355 10.04488279008542 10.919810865991021 127.73862196484127 297.5765094607528 3200.542669889818 373.21582744134145 6234.413334245976 633.6938753648045 10.97006835064897 -subject6 55 visit_1 5.033913162879584 12.1511689470332 0.055141542360093165 0.10954011437207664 37.536623102170125 31.386950289520662 8.273826768011807 10.963483792590361 155.05213748633423 45.365881222933666 583.8208336334947 602.4672580543775 1123.0112048529631 1034.6301641752973 14.742063625852968 -subject7 61 visit_1 2.0781300548482653 16.767652076799553 0.439537475655182 2.1150624073298183 37.20528895405309 32.323495185361715 11.5557870203106 5.176230174420642 80.44939499311161 201.77034792190517 1680.4596689057294 207.79126927194477 5268.731449623816 812.2030926987908 9.436589902978806 -subject8 65 visit_1 3.680283221082456 15.729517453685874 0.24973794232065624 0.6785834875154052 39.77886416456275 31.73869263795044 10.776412691732506 9.04755678171443 239.04723630168436 122.52825987780575 1740.8428474014622 505.63283175288973 3001.610392770148 831.3507242668209 10.397990954360015 -subject9 66 visit_1 6.844809807863908 10.21591568477695 0.6895616621137035 1.0074226771377572 39.05233807899346 34.73749635056603 8.901492491246513 6.216743954936394 148.36233573634303 177.06577127571126 1557.3258583450393 560.5533907121132 6252.544874980189 893.7811511205126 11.62047044172219 -subject10 51 visit_1 6.6652948205240845 16.720542514009427 -0.3189528074290151 -0.4785276810965373 37.65745351407476 33.59639395933824 8.271980246357806 0.8532715059663865 290.9868977935881 172.56178956099063 1120.5514628579476 466.64368158983604 3799.630625916291 909.0321590809087 9.7554828289856 -subject11 61 visit_1 4.8499067472704755 13.460392538663887 0.13450274336951068 0.2773305763975952 38.31735379488651 31.444680194453674 10.427638224556889 11.234056921132225 287.69846529586744 379.60637815269996 609.8723207400225 427.453770274667 1780.3159678502925 879.1204326117057 12.015438884874985 -subject12 55 visit_1 4.023579181006855 16.15579773696883 0.15050940450832806 0.3740684543224642 37.02866986765515 32.40397503866285 10.744564645243804 10.004735315067858 118.38041901281272 152.18871006333444 1299.57719753351 427.1587669181431 5315.848523890034 615.7273309314264 11.035164284836954 -subject13 52 visit_1 4.574586007775727 10.737366361547448 0.3681470272647353 0.8047657791086918 36.08218785000159 34.318164845892 10.865220797014493 5.106122999806655 353.92932698972027 140.52371469927994 1865.5808964445987 424.6810773155952 3913.407971480952 664.6854778104007 9.540057216236947 -subject14 66 visit_1 3.932294326130037 8.553432641046307 0.5742499051317631 1.4603431419562902 38.36103458421411 35.22959526628691 10.305328810056075 2.074288004406199 337.45257156092345 -25.793821702835913 91.2529902161823 656.8273331958253 5256.778450014236 411.22093885184114 9.664732442940606 -subject15 57 visit_1 4.051527861095489 15.583395617493153 1.1883603878896962 2.9331166627307272 39.443468565637836 35.37589592477283 9.885977208752555 6.245678297815839 151.67512888341756 368.26072117979635 953.8288439859309 608.3189858480944 2126.3415749328706 823.9966313289008 5.0479167273722645 -subject16 57 visit_1 4.25354016132746 11.786440507729012 0.16129653099634933 0.3792053792340621 37.260919043758406 33.90589797788752 10.423652240248318 10.907831108617831 255.03499503389 207.22906844212528 649.8029555728975 345.7409254647473 3285.2331896932055 1053.2154539560975 8.797691547079966 -subject17 67 visit_1 5.747540258541509 12.317783748179153 0.17899711535641527 0.31143255602325687 38.47507662507931 38.66064335659898 9.202290313193137 9.656876977815678 114.26343369929609 155.9975906826424 410.40832921490255 449.1612599622644 3909.8541641364536 580.4026398921327 10.063017221093515 -subject18 59 visit_1 3.933694004121869 15.802288512587474 0.6783866534804426 1.724553696270234 36.66815577517017 32.844868018038014 9.395802750630239 7.753533497858336 129.30386338275255 262.6573392588267 2777.029887306173 316.59121069599075 3303.892279283257 887.5812916491656 12.452249176572817 -subject19 67 visit_1 3.849447336121143 11.602312360627149 0.33773441415680605 0.8773581885058875 36.406384980314485 35.936958268583695 11.7150105937509 2.9698587246794848 -9.707753342933955 120.02039405625881 104.52486148489174 464.653602921763 3282.0521893331625 923.2786087419738 8.601570381694092 -subject20 62 visit_1 3.5748331466896133 11.08736950428935 0.9395592780790926 2.6282605076243866 37.89660613737723 33.44492874507535 9.284051722201893 11.637822281053985 309.9436754759688 87.2013977768164 3050.4948348315243 371.21765760935324 4791.230490643542 803.573928456392 6.9917024211199426 +subject1 59 visit_1 4.331242107540918 14.699831955867218 1.0507007173563738 2.425864662534217 38.306928369408766 36.68741726479722 9.33625684885929 4.837801587922566 71.42772404350012 94.27227724731605 10.65974915778088 92.64244538466608 67.63941344781097 736.8528701918846 12.156038334456646 +subject2 46 visit_1 0.6490170494106229 21.12323036312705 0.25918453116543316 3.9934934128587307 35.066673320966075 35.10191109304475 10.330876844482647 7.20416513742026 79.39636294584326 96.77233454209555 62.15570094970758 93.96357057999614 65.78050849353485 698.0350961379306 7.553121986292599 +subject3 55 visit_1 3.1980202589448847 13.047185612998463 0.5375344561422558 1.6808350561218874 40.10386511486943 34.36307073793644 9.3762735204598755 11.200648410483769 48.88121147558951 58.93912225932215 22.321686175271225 79.93157314915618 78.02542645121639 578.0545075181684 11.336490679094158 +subject4 55 visit_1 3.1263743668291184 12.924394473329048 0.20262728579993228 0.6481222720791566 40.72793324820852 36.76992766588175 10.081419678713079 6.134925878556956 97.00901780820928 51.03445334772283 90.0376464491842 74.33729324054906 30.01939965825568 810.6677990274707 7.639034212955718 +subject5 62 visit_1 5.340715423573847 14.149192814228957 0.961091097136232 1.7995549676621758 36.49157757418824 36.58502471622523 9.920367569109894 4.122746433104384 31.185114488057593 45.50139768568556 57.8448072929441 74.33198103732025 81.40702468371228 979.8198153207237 8.070750625543084 +subject6 55 visit_1 3.0500120361002647 15.872085292265494 0.10389887095259559 0.3406506916131398 38.65608445705722 30.867105720639945 10.18757961117527 9.79678417198308 61.22373470412549 15.56409593425434 25.968998061953926 36.07140212522006 55.974919301464524 460.46475274142426 4.829004093854326 +subject7 61 visit_1 4.898715793982734 12.864617239095438 0.9536798790737273 1.9467956892807825 35.03562180804909 30.62390278207973 10.43562475975714 14.195821163793196 72.0935297201866 15.276083235005075 74.26374632377974 80.13123679754148 63.14665804360129 700.5809197907139 6.049535145450654 +subject8 65 visit_1 5.978826777809061 16.00499616517567 0.9333799093905899 1.5611422509427955 38.07448401003646 38.5676104111825 9.200891240400765 5.514167198490328 98.56613688039826 29.772883761288625 27.885157284530123 81.65744798624922 48.47308829934582 851.8412695389802 10.276039822533358 +subject9 66 visit_1 6.456227524503385 13.103082604607906 0.691945093478521 1.0717483094459959 39.72226371777236 32.35801282228957 11.970900990481827 8.407161180831487 85.78047607122514 28.587851084480377 76.55138112768714 83.176752761290345 46.68863779671028 906.2353440351303 8.830453411428179 +subject10 51 visit_1 1.8585079840661032 13.224158007248933 0.5368324581033729 2.888513058355949 41.815538156982306 36.93759254094799 10.875624035500651 6.827592319362273 61.459014070450024 84.94724259419142 96.03383508037382 27.33603824459749 97.0884073963125 650.5337788435525 12.809173350157668 +subject11 61 visit_1 5.038398349541582 13.507104596895823 0.385568952474845 0.7652609534335965 37.19096031542644 32.27570772185876 9.40324132625517 7.322427283778194 53.24530426480112 19.77684802955605 49.685158953081555 29.513045299988676 67.49661696302564 907.9834930821021 11.93855445597262 +subject12 55 visit_1 3.164376155023718 10.870990649246837 0.7173750797582257 2.2670347790964462 38.670933254618845 31.71671126150096 9.744384026449396 5.378905951845624 8.4349891455212 75.11231898863137 91.44127828763908 97.28269461946172 84.839729514792 520.2914742217849 13.170023303923719 +subject13 52 visit_1 2.1072189633830214 16.49509377712789 0.941302211920198 4.467035596571275 37.54518918214765 34.19768738212932 9.44749277902548 6.828305332148935 73.79284419739945 35.57231620975256 99.0685898491156 98.38209897507045 22.095863207618095 589.093879395676 11.104585125000993 +subject14 66 visit_1 6.531223987188424 14.192694888090621 0.7967450472284645 1.2199015816810916 37.206056033510244 32.78847995045633 9.982435738478829 5.330558522765378 97.8167732176243 66.63150241736679 24.389152077818153 88.02449089787314 19.080751758793213 705.5060479810853 10.393691497825055 +subject15 57 visit_1 3.6888878001464 12.867416595874264 0.8100279740111506 2.1958596137811597 39.86819231089057 33.24868973829392 10.695966638651418 10.676300620280028 42.0384670478268 25.273330428640776 29.472236180941763 93.58004215951803 28.28153386847771 817.845017374823 9.835689755023369 +subject16 57 visit_1 3.8387251164439995 18.68845527121347 0.4130665035858365 1.0760512697728157 38.99113682714732 31.24404162103528 10.026817792153352 11.55648022781224 91.01508567928023 0.7303644154252709 71.66503914633745 92.50372265149338 59.27362056677657 805.4123743012162 12.300959536797052 +subject17 67 visit_1 6.8473317646864995 12.698924635972869 0.19861312771314057 0.2900591566739047 37.068208237656556 35.44780830112809 9.844336035800064 9.588419550006606 37.1110597034245 77.4808808162987 84.19512572069016 52.078220338043195 95.88002580707143 653.762105604393 6.646578015284696 +subject18 59 visit_1 4.324289957643888 13.932680937521425 0.1958217641468802 0.45284142845401315 37.70480218882887 33.72078973767995 10.809597015514536 6.244331249967679 44.95512367435171 40.63588105728326 48.12692674732034 57.738195580550155 26.119343846592074 999.2530795459547 12.79798374411447 +subject19 67 visit_1 6.667937982954537 12.149701457099965 0.9353034472497997 1.4026876819201766 36.724913033950294 30.405235842855383 11.348898207846023 7.50277481368734 64.62057896292924 59.381829864982564 40.572445263914005 29.447147916447022 24.611362824565276 311.2736854250777 12.425215195338607 +subject20 62 visit_1 5.07211015424058 5.5650216694648265 0.7143852596478991 1.4084576989137971 35.77725913248309 35.056223678308804 8.53216220346032 1.610828711104471 86.77092059988642 82.49153079927703 9.94479230358479 89.43651066161601 28.236995685596412 929.0797952542335 10.555195850440816 diff --git a/test_data/phenotype_harmonized.tsv b/test_data/phenotype_harmonized.tsv index 97d601e..36f6253 100644 --- a/test_data/phenotype_harmonized.tsv +++ b/test_data/phenotype_harmonized.tsv @@ -1,10 +1,10 @@ domain md5sum file_path file_readme_path n_subjects n_rows -population_descriptor a3b245349c56f8805dbb11d7aa331950 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/population_descriptor.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +population_descriptor 98316ca7ae3b5332e28e916155b844d3 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/population_descriptor.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_flags 99dee9ebbef7e7a0681d4ae5b3b0063e gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_flags.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_anthropometry d26b5af92459c0961442e2dcf7ce9235 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_anthropometry.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_blood_pressure 9877023cd800d4235e1b99a650f88694 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_blood_pressure.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_lipids 17ce825be3d94425e26c08987fa78cd9 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_lipids.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 -cmqt_hematology 01f2848bccf4ac09c8addd2434323a0b gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_hematology.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 +cmqt_hematology 155f8eac3c84a91fdb17eff3739e7799 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_hematology.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_glycemic 4af06300bac223b5462356532fa98729 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_glycemic.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 cmqt_kidney_function 35962811d3e9c081de82e4f3f8e4bfb5 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/cmqt_kidney_function.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 diabetes_diabetes bf4ff29e1312614c66a08a27b4d129c5 gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/diabetes_diabetes.tsv gs://fc-e3b6ff37-761e-4e53-89c0-fb243b8bd8e5/test_data/readme.tsv 20 20 diff --git a/test_data/population_descriptor.tsv b/test_data/population_descriptor.tsv index 4d86b8b..92b22ae 100644 --- a/test_data/population_descriptor.tsv +++ b/test_data/population_descriptor.tsv @@ -1,21 +1,21 @@ -subject_id population_descriptor_id population_descriptor population_label country_of_recruitment country_of_birth -subject1 022f19bfe0b628e1 population|superpopulation MSL|AFR USA Sierra Leone -subject2 01bb18a183122d64 population|superpopulation IBS|EUR Peru UK -subject3 01bb18a183122d64 population|superpopulation MXL|AMR Sierra Leone USA -subject4 01bb18a183122d64 population|superpopulation PEL|AMR Sierra Leone Sierra Leone -subject5 0224684f6cb9e980 population|superpopulation MSL|AFR Spain Sierra Leone -subject6 01bb18a183122d64 population|superpopulation PEL|AMR UK Peru -subject7 0224684f6cb9e980 population|superpopulation MSL|AFR Peru Peru -subject8 0224684f6cb9e980 population|superpopulation MSL|AFR USA Sierra Leone -subject9 0224684f6cb9e980 population|superpopulation GBR|EUR Sierra Leone Spain -subject10 01bb18a183122d64 population|superpopulation MXL|AMR Sierra Leone Sierra Leone -subject11 0224684f6cb9e980 population|superpopulation IBS|EUR Spain Peru -subject12 01bb18a183122d64 population|superpopulation GBR|EUR UK Peru -subject13 01bb18a183122d64 population|superpopulation MSL|AFR USA USA -subject14 0224684f6cb9e980 population|superpopulation MSL|AFR Spain Spain -subject15 022f19bfe0b628e1 population|superpopulation MSL|AFR Sierra Leone Spain -subject16 022f19bfe0b628e1 population|superpopulation MXL|AMR UK Peru -subject17 0224684f6cb9e980 population|superpopulation PEL|AMR Spain UK -subject18 022f19bfe0b628e1 population|superpopulation PEL|AMR USA USA -subject19 0224684f6cb9e980 population|superpopulation MSL|AFR Spain Peru -subject20 0224684f6cb9e980 population|superpopulation GBR|EUR Peru UK +subject_id population_descriptor population_label country_of_recruitment country_of_birth +subject1 population|superpopulation GBR|EUR Sierra Leone USA +subject2 population|superpopulation MSL|AFR Spain Peru +subject3 population|superpopulation MXL|AMR USA Sierra Leone +subject4 population|superpopulation MXL|AMR Peru Sierra Leone +subject5 population|superpopulation GBR|EUR Sierra Leone Spain +subject6 population|superpopulation MSL|AFR Peru UK +subject7 population|superpopulation MXL|AMR Sierra Leone Peru +subject8 population|superpopulation MSL|AFR Sierra Leone USA +subject9 population|superpopulation MXL|AMR UK Sierra Leone +subject10 population|superpopulation MXL|AMR USA Sierra Leone +subject11 population|superpopulation MXL|AMR Spain Spain +subject12 population|superpopulation IBS|EUR UK UK +subject13 population|superpopulation MSL|AFR Sierra Leone USA +subject14 population|superpopulation GBR|EUR Sierra Leone Spain +subject15 population|superpopulation MXL|AMR Sierra Leone Sierra Leone +subject16 population|superpopulation MSL|AFR USA UK +subject17 population|superpopulation MXL|AMR Peru Spain +subject18 population|superpopulation GBR|EUR Peru USA +subject19 population|superpopulation IBS|EUR Sierra Leone Spain +subject20 population|superpopulation PEL|AMR UK Peru diff --git a/test_data/test_files.R b/test_data/test_files.R index 6f4b076..2c067d1 100644 --- a/test_data/test_files.R +++ b/test_data/test_files.R @@ -40,8 +40,8 @@ subject <- tibble( subject_id = paste0("subject", 1:n), age_at_obs=round(rtnorm(n, 58, 5, 0, 90)), consent_code = sample(x = c("GRU", "HMB-IRB", "DS-CVD"), size = n, replace = TRUE), - study_nickname = sample(x = c("UKBB", "JHS", "ARIC"), size = n, replace = TRUE), - dbgap_submission = c(rep(TRUE, 2), rep(FALSE, n-2)), + study_nickname = sample(x = c("UKBB", "JHS", "ARIC"), size = n, replace = TRUE), + dbgap_submission = c(rep(TRUE, 2), rep(FALSE, n-2)), reported_sex = sample(x = c("Female", "Male", "Unknown", "Other"), size = n, replace = TRUE) ) @@ -51,7 +51,7 @@ population_descriptor <- tibble( subject_id=rep(subject$subject_id), # population_descriptor_id = sample(x = c("01bb18a183122d64", "022f19bfe0b628e1", "0224684f6cb9e980"), size = n, replace = TRUE), population_descriptor = sample(x = c("population|superpopulation"), size = n, replace = TRUE), - population_label = sample(x = c("PEL|AMR", "IBS|EUR", "MXL|AMR", "GBR|EUR", "MSL|AFR"), size = n, replace = TRUE), + population_label = sample(x = c("PEL|AMR", "IBS|EUR", "MXL|AMR", "GBR|EUR", "MSL|AFR"), size = n, replace = TRUE), country_of_recruitment = sample(x = c("Peru", "Spain", "USA", "UK", "Sierra Leone"), size = n, replace = TRUE), country_of_birth = sample(x = c("Peru", "Spain", "USA", "UK", "Sierra Leone"), size = n, replace = TRUE) ) @@ -122,21 +122,21 @@ cmqt_hematology <- tibble( subject_id=rep(subject$subject_id), age_at_obs=rep(subject$age_at_obs), visit=rep("visit_1", n), - rbc_1=rnorm(n, 4, 1.5), - hemoglobin_1=rnorm(n, 13, 3), - hematocrit_1=rnorm(n, 0.4, 0.4), + rbc_1=rtnorm(n, 4, 1.5, a=0, b=100), + hemoglobin_1=rtnorm(n, 13, 3, a=0, b=100), + hematocrit_1=rtnorm(n, 0.4, 0.4, a=0, b=100), mcv_1=(hematocrit_1 * 10) / rbc_1, - mch_1=rnorm(n, 38, 2), - mchc_1=rnorm(n, 34, 2), - rdw_1=rnorm(n, 10, 1), - wbc_1=rnorm(n, 8, 3), - basophil_count_1=rnorm(n, 200, 100), - eosinophil_count_1=rnorm(n, 200, 100), - lymphocyte_count_1=rnorm(n, 1300, 1000), - monocyte_count_1=rnorm(n, 450, 100), - neutrophil_count_1=rnorm(n, 4000, 2000), - platelet_count_1=rnorm(n, 800, 200), - mean_platelet_volume_1=rnorm(n, 10, 2) + mch_1=rtnorm(n, 38, 2, a=0, b=1000), + mchc_1=rtnorm(n, 34, 2, a=0, b=1000), + rdw_1=rtnorm(n, 10, 1, a=0, b=100), + wbc_1=rtnorm(n, 8, 3, a=0, b=10000), + basophil_count_1=rtnorm(n, 200, 100, a=0, b=100), + eosinophil_count_1=rtnorm(n, 200, 100, a=0, b=100), + lymphocyte_count_1=rtnorm(n, 1300, 1000, a=0, b=100), + monocyte_count_1=rtnorm(n, 450, 100, a=0, b=100), + neutrophil_count_1=rtnorm(n, 4000, 2000, a=0, b=100), + platelet_count_1=rtnorm(n, 800, 200, a=0, b=1000), + mean_platelet_volume_1=rtnorm(n, 10, 2, a=0, b=100) ) set.seed(4) @@ -249,7 +249,7 @@ cancer_prostate <- tibble( T_stage_unknown_2=sample(x = c("localized", "regional", "distant", "in_situ", "unstaged", "unknown"), size = n, replace = TRUE), nodal_involvement_1=sample(x = c("NX", "N0", "N1", "N2", "N3"), size = n, replace = TRUE), distant_metastasis_1=sample(x = c("MX", "M0", "M1"), size = n, replace = TRUE), - stage_system_1=rep(NA, n), + stage_system_1=rep(NA, n), gleason_score_clinical_1=sample(x = c(2, 3, 4, 5, 6, 7, 8, 9, 10), size = n, replace = TRUE), gleason_score_pathological_1=sample(x = c(2, 3, 4, 5, 6, 7, 8, 9, 10), size = n, replace = TRUE), gleason_score_unknown_1=sample(x = c(2, 3, 4, 5, 6, 7, 8, 9, 10), size = n, replace = TRUE), @@ -311,7 +311,7 @@ readme <- tibble( subject <- subject %>% select(-age_at_obs) -# working in primed_data_models/test_data directory +# working in primed_data_models/test_data directory write_tsv(readme, "readme.tsv") write_tsv(subject, "subject.tsv") @@ -331,11 +331,11 @@ write_tsv(family_history, "family_history.tsv") phenotype_harmonized <- tibble( # phenotype_harmonized_id= - domain=(file_names), - md5sum=as.vector(md5sum(paste0(file_names, ".tsv"))), - file_path=paste0(bucket, file_names, '.tsv'), - file_readme_path=paste0(bucket, 'readme.tsv'), - n_subjects=rep(n, length(file_names)), + domain=(file_names), + md5sum=as.vector(md5sum(paste0(file_names, ".tsv"))), + file_path=paste0(bucket, file_names, '.tsv'), + file_readme_path=paste0(bucket, 'readme.tsv'), + n_subjects=rep(n, length(file_names)), n_rows=rep(n, length(file_names)), )