Skip to content

Commit

Permalink
Merge pull request #5 from UW-GAC/feature/add-dates
Browse files Browse the repository at this point in the history
Add request and renewal dates to the output
  • Loading branch information
amstilp authored Jan 8, 2025
2 parents d4432be + f1b1f6e commit 74e0cba
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 5 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,4 +161,5 @@ cython_debug/

###########
# Custom additions
dars.tsv
dars.tsv
*.html
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,6 @@ python parse_dars.py <path_to_application_pdf> <path_to_output_tsv>
## Updating the docker images

```
docker build -t uwgac/parse-dbgap-application:<tag> .
docker build --no-cache -t uwgac/parse-dbgap-application:<tag> .
docker push uwgac/parse-dbgap-application:<tag>
```
16 changes: 16 additions & 0 deletions dar_report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,19 @@ tmp <- dars %>%
nrow(tmp)
tmp %>% kable()
```

# Dates

## Request date

```{r}
ggplot(dars, aes(request_date)) +
geom_histogram()
```

## Renewal date

```{r}
ggplot(dars, aes(last_renewal_date)) +
geom_histogram()
```
9 changes: 8 additions & 1 deletion parse_dars.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@


def parse_phs_blocks(blocks):
#import ipdb; ipdb.set_trace()
phs = blocks[0]
request_date_idx = [i for i, item in enumerate(blocks) if re.search("Request Date", item)]
dar_list = []
Expand All @@ -23,6 +24,10 @@ def parse_phs_blocks(blocks):
this_dar["DAR"] = m.string.split(" : ")[1]
else:
this_dar["DAR"] = None
# Get the request and renewal dates.
date_blocks = blocks[idx].replace(":\n", ": ").split("\n")
tmp = {k.strip(): v.strip() for k, v in (xx.split(":") for xx in date_blocks)}
this_dar.update(tmp)
# Now find the consent group.
j = idx
try:
Expand Down Expand Up @@ -88,7 +93,9 @@ def parse_phs_blocks(blocks):
# Convert to pandas data frame and write to tsv.
df = pd.DataFrame(dars)
df = df.rename(columns={
"Abbreviation": "consent_group"
"Abbreviation": "consent_group",
"Request Date": "request_date",
"Last Renewal Date": "last_renewal_date",
})

# Replace newlines in study with spaces.
Expand Down
4 changes: 2 additions & 2 deletions parse_dbgap_application.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ task extract_dars {
}

runtime {
docker: "uwgac/parse-dbgap-application:0.1"
docker: "uwgac/parse-dbgap-application:0.2"
}
}

Expand All @@ -54,6 +54,6 @@ task render_report {
File dar_report = "dar_report.html"
}
runtime {
docker: "uwgac/parse-dbgap-application:0.1"
docker: "uwgac/parse-dbgap-application:0.2"
}
}

0 comments on commit 74e0cba

Please sign in to comment.