-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgsr_data_report.wdl
90 lines (77 loc) · 2.29 KB
/
gsr_data_report.wdl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
version 1.0
workflow gsr_data_report {
input {
File data_file
String dd_url
String dd_table_name
File analysis_file
}
call validate_data {
input: data_file = data_file,
dd_url = dd_url,
dd_table_name = dd_table_name,
analysis_file = analysis_file
}
output {
File validation_report = validate_data.validation_report
Boolean pass_checks = validate_data.pass_checks
}
meta {
author: "Stephanie Gogarten"
email: "[email protected]"
}
}
task validate_data {
input {
File data_file
String dd_url
String dd_table_name
File analysis_file
}
command <<<
Rscript /usr/local/primed-file-checks/gsr_data_report.R \
--data_file ~{data_file} \
--dd_file ~{dd_url} \
--dd_table_name ~{dd_table_name} \
--analysis_file ~{analysis_file} \
--stop_on_fail
>>>
output {
File validation_report = "data_dictionary_validation.html"
Boolean pass_checks = read_boolean("pass.txt")
}
runtime {
docker: "uwgac/primed-file-checks:0.5.1-1"
disks: "local-disk 16 SSD"
memory: "8G"
}
}
task summarize_data_check {
input {
Array[String] file
Array[Boolean] data_check
Array[File] validation_report
}
command <<<
Rscript -e "\
files <- readLines('~{write_lines(file)}'); \
checks <- readLines('~{write_lines(data_check)}'); \
reports <- readLines('~{write_lines(validation_report)}'); \
library(dplyr); \
dat <- tibble(file_path=files, data_check=checks, validation_report=reports); \
dat <- mutate(dat, data_check = ifelse(data_check == 'true', 'PASS', 'FAIL')); \
readr::write_tsv(dat, 'details.txt'); \
ct <- mutate(count(dat, data_check), x=paste(n, data_check)); \
writeLines(paste(ct[['x']], collapse=', '), 'summary.txt'); \
"
>>>
output {
String summary = read_string("summary.txt")
File details = "details.txt"
}
runtime {
docker: "us.gcr.io/broad-dsp-gcr-public/anvil-rstudio-bioconductor:3.17.0"
disks: "local-disk 16 SSD"
memory: "8G"
}
}