-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathdeduplicate.nf
55 lines (41 loc) · 1.82 KB
/
deduplicate.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
#!/usr/bin/env nextflow
// Using DSL-2
nextflow.enable.dsl=2
// Import helpers
GroovyShell shell = new GroovyShell()
def helpers = shell.parse(new File("${workflow.projectDir}/helpers.gvy"))
// Import sub-workflow
include { deduplicate } from './modules/deduplicate'
// Standalone entrypoint
workflow {
// Show help message if the user specifies the --help flag at runtime
helpers.help_message(
"""
Dedicated gene deduplication utility
Clusters a collection of gene sequences by amino acid similarity and outputs
the centroids of each cluster.
Parameters:
--genes Folder containing the amino acid gene FASTA files to analyze
--output Folder where output files will be written
--cluster_similarity Amino acid similarity used for clustering (ranges from 0.0 to 1.0)
(default: ${params.cluster_similarity})
--cluster_coverage Alignment coverage used for clustering (ranges from 0.0 to 1.0)
(default: ${params.cluster_coverage})
--min_gene_length Minimum amino acid length threshold used to filter genes
(default: ${params.min_gene_length})
""",
params.help
)
// Make sure that the required parameters were provided
helpers.require_param(params.output, "output")
helpers.require_param(params.genes, "genes")
// Remove any trailing slash from the gene folder
gene_folder = params.genes.replaceAll('/$', '')
// Get all of the files in the specified folder
Channel
.fromPath("${gene_folder}/**")
.ifEmpty { error "Cannot find any files at ${gene_folder}/*" }
.set { gene_ch }
// Run the deduplication utility on those files
deduplicate(gene_ch)
}