From cb11cff20149c2a6b6bff48126b733b499475c46 Mon Sep 17 00:00:00 2001 From: Adrienne Stilp Date: Tue, 7 Jan 2025 14:28:40 -0800 Subject: [PATCH 1/6] Add request and renewal dates to the output --- parse_dars.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/parse_dars.py b/parse_dars.py index ae10a9a..e76bac9 100644 --- a/parse_dars.py +++ b/parse_dars.py @@ -7,6 +7,7 @@ def parse_phs_blocks(blocks): + #import ipdb; ipdb.set_trace() phs = blocks[0] request_date_idx = [i for i, item in enumerate(blocks) if re.search("Request Date", item)] dar_list = [] @@ -23,6 +24,10 @@ def parse_phs_blocks(blocks): this_dar["DAR"] = m.string.split(" : ")[1] else: this_dar["DAR"] = None + # Get the request and renewal dates. + date_blocks = blocks[idx].replace(":\n", ": ").split("\n") + tmp = {k: v for k, v in (xx.split(' : ') for xx in date_blocks)} + this_dar.update(tmp) # Now find the consent group. j = idx try: @@ -88,7 +93,9 @@ def parse_phs_blocks(blocks): # Convert to pandas data frame and write to tsv. df = pd.DataFrame(dars) df = df.rename(columns={ - "Abbreviation": "consent_group" + "Abbreviation": "consent_group", + "Request Date": "request_date", + "Last Renewal Date": "last_renewal_date", }) # Replace newlines in study with spaces. From c2e140452872fda25a62696f66805afe0f591720 Mon Sep 17 00:00:00 2001 From: Adrienne Stilp Date: Tue, 7 Jan 2025 14:31:46 -0800 Subject: [PATCH 2/6] Add dates to Rmd report --- dar_report.Rmd | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/dar_report.Rmd b/dar_report.Rmd index 0962751..da3264f 100644 --- a/dar_report.Rmd +++ b/dar_report.Rmd @@ -46,3 +46,19 @@ tmp <- dars %>% nrow(tmp) tmp %>% kable() ``` + +# Dates + +## Request date + +```{r} +ggplot(dars, aes(request_date)) + + geom_histogram() +``` + +## Renewal date + +```{r} +ggplot(dars, aes(last_renewal_date)) + + geom_histogram() +``` From 9b4be2b6251d3b96cfb09fd9b410b702f07e20e2 Mon Sep 17 00:00:00 2001 From: Adrienne Stilp Date: Tue, 7 Jan 2025 14:32:16 -0800 Subject: [PATCH 3/6] Ignore html files in gitignore --- .gitignore | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 107e896..fda01b2 100644 --- a/.gitignore +++ b/.gitignore @@ -161,4 +161,5 @@ cython_debug/ ########### # Custom additions -dars.tsv \ No newline at end of file +dars.tsv +*.html From ceba1da46207dece1843f944590020e42bfbd5a3 Mon Sep 17 00:00:00 2001 From: Adrienne Stilp Date: Tue, 7 Jan 2025 14:39:02 -0800 Subject: [PATCH 4/6] Update README with --no-cache --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c28e7e1..d4881c5 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,6 @@ python parse_dars.py ## Updating the docker images ``` -docker build -t uwgac/parse-dbgap-application: . +docker build --no-cache -t uwgac/parse-dbgap-application: . docker push uwgac/parse-dbgap-application: ``` From b3d79cb14eb479debaed6d966ec52e03fc213f48 Mon Sep 17 00:00:00 2001 From: Adrienne Stilp Date: Tue, 7 Jan 2025 14:39:12 -0800 Subject: [PATCH 5/6] Update WDL to use v0.2 of docker image --- parse_dbgap_application.wdl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/parse_dbgap_application.wdl b/parse_dbgap_application.wdl index 2f84277..758176d 100644 --- a/parse_dbgap_application.wdl +++ b/parse_dbgap_application.wdl @@ -37,7 +37,7 @@ task extract_dars { } runtime { - docker: "uwgac/parse-dbgap-application:0.1" + docker: "uwgac/parse-dbgap-application:0.2" } } @@ -54,6 +54,6 @@ task render_report { File dar_report = "dar_report.html" } runtime { - docker: "uwgac/parse-dbgap-application:0.1" + docker: "uwgac/parse-dbgap-application:0.2" } } From f1b1f6e418ba7b3daa021df473cbb2fa7c499dbf Mon Sep 17 00:00:00 2001 From: Adrienne Stilp Date: Wed, 8 Jan 2025 14:14:58 -0800 Subject: [PATCH 6/6] Handle dates correctly for new DARs --- parse_dars.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/parse_dars.py b/parse_dars.py index e76bac9..2dc2bc3 100644 --- a/parse_dars.py +++ b/parse_dars.py @@ -26,7 +26,7 @@ def parse_phs_blocks(blocks): this_dar["DAR"] = None # Get the request and renewal dates. date_blocks = blocks[idx].replace(":\n", ": ").split("\n") - tmp = {k: v for k, v in (xx.split(' : ') for xx in date_blocks)} + tmp = {k.strip(): v.strip() for k, v in (xx.split(":") for xx in date_blocks)} this_dar.update(tmp) # Now find the consent group. j = idx