-
Notifications
You must be signed in to change notification settings - Fork 4
/
nextflow.config
197 lines (180 loc) · 5.6 KB
/
nextflow.config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
manifest {
homePage = 'https://github.com/FredHutch/gig-map'
description = 'Build a map of genes present across a set of microbial genomes'
mainScript = 'align_genomes.nf'
version = '0.2.0'
}
profiles {
standard {
process {
errorStrategy = 'retry'
maxRetries = 3
withLabel: 'io_limited' {
cpus = { 1 * task.attempt }
memory = { 6.GB * task.attempt }
}
withLabel: 'mem_medium' {
cpus = { 4 * task.attempt }
memory = { 30.GB * task.attempt }
}
withLabel: 'mem_veryhigh' {
cpus = { 16 * task.attempt }
memory = { 120.GB * task.attempt }
}
}
}
smallmem {
process {
errorStrategy = 'retry'
maxRetries = 5
withLabel: 'io_limited' {
cpus = 1
memory = 1.GB
}
withLabel: 'mem_medium' {
cpus = 4
memory = 30.GB
}
withLabel: 'mem_veryhigh' {
cpus = 8
memory = 30.GB
}
}
}
testing {
process {
executor = 'local'
errorStrategy = 'terminate'
withLabel: 'io_limited' {
cpus = 1
memory = 2.GB
}
withLabel: 'mem_medium' {
cpus = 2
memory = 4.GB
}
withLabel: 'mem_veryhigh' {
cpus = 2
memory = 4.GB
}
}
}
}
// Set default parameters
params {
// Global
help = false
output = false
// Download Genomes / Genes
genome_tsv = ""
ncbi_datasets_type = "genome"
genome_csv = ""
parse_genome_csv_suffix = "_genomic.fna.gz"
ftp_threads = 25
skip_missing_ftp = "false"
// Deduplicate genes
genes = false
cluster_similarity = 0.9
cluster_coverage = 0.9
min_gene_length = 50
// Align Genes and Genomes
genes = false
genomes = false
collect_results = true
min_coverage = 90
min_identity = 90
aligner = 'diamond'
query_gencode = 11
max_evalue = 0.001
max_overlap = 50
aln_fmt = "qseqid sseqid pident length qstart qend qlen sstart send slen"
// Collect
genome_aln = false
metagenome_aln = false
gene_order = false
marker_genes = false
// ANI
sketch_size = 10000
ani_thresholds = "99,95,90,80,70,60,50"
// Align Marker Genes
min_marker_coverage = 90
pick_marker_genes = 10
raxml_model = "LG+G8+F"
raxml_starting_trees = 10
raxml_bs_trees = 10
// Align Metagenomes
// Folder containing reads of interest
reads = false
// Optional file containing a list of files to process (R1,R2)
samplesheet = false
// Suffix on all reads
reads_suffix = "fastq.gz"
// Pattern used to join paired reads
read_pairing_pattern = "{1,2}"
// Boolean flag used for paired-end input
paired = false
// Minimum alignment score
min_score_reads = 50
// Minimum proportion of reads which must aligne
min_coverage_reads = 50
// Minimum percent identity for aligning reads
min_pctiden_reads = 80
// Resolve reads aligning to any gene with a score
// that is no less than this percentage lower than
// the top hit
top_pct_reads = 5
// Filter out any samples which have fewer than this number
// of raw alignments (prior to FAMLI filtering)
min_alignments = 100
// Number of reads to resolve per batch with FAMLI
famli_batchsize = 10000000
// Filter out any genes for which the standard deviation
// of sequencing depth across the length of the gene
// exceeds this value
famli_sd_mean_cutoff = 3.0
// Mapping gene catalogs
map_batchsize = 100000
// Render
genome_aln = false
genome_distmat = false
render_options = "$projectDir/templates/render_params.json"
render_mem_gbs = 4
// Sketch genomes and search
gencode = 11
minsize = 100
sketch_size = 1000
batchsize = 1000
save_sketches = true
kmer_size = 9
search_results = "search_results"
// Bin genes
gene_annot = "$projectDir/templates/centroids.annot.csv.gz"
genome_annot = "$projectDir/templates/genome_annotations.csv"
min_genomes_per_gene = 1
max_dist_genes = 0.05
max_dist_genomes = 0.05
min_bin_size = 5
// Bin metagenomes
read_alignments = false
gene_bins = false
genome_groups = false
metadata = "$projectDir/templates/metadata.csv"
formula = false
incl_unaligned = false
min_n_reads = 0
min_n_genes = 0
// Docker containers reused across processes
container__pandas = "quay.io/fhcrc-microbiome/python-pandas:4fb7844"
container__mashtree = "quay.io/hdc-workflows/mashtree:1.2.0"
container__blast = "quay.io/biocontainers/blast:2.11.0--pl526he19e7b1_0"
container__diamond = "quay.io/biocontainers/diamond:2.0.11--hdcc8f71_0"
container__famli = "quay.io/fhcrc-microbiome/famli:v1.5"
container__cdhit = "quay.io/biocontainers/cd-hit:4.8.1--h2e03b76_5"
container__clustal = "quay.io/hdc-workflows/clustalo:main"
container__emboss = "quay.io/biocontainers/emboss:6.6.0--hf657eab_5"
container__raxml = "quay.io/biocontainers/raxml-ng:1.0.3--h32fcf60_0"
container__gigmap = "quay.io/hdc-workflows/gig-map:c08eac9"
container__mash = "staphb/mashtree:0.52.0"
container__corncob = "quay.io/fhcrc-microbiome/corncob:84c8354"
container__datasets = "biocontainers/ncbi-datasets-cli:15.12.0_cv23.1.0-4"
}