-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Input Data to the FastMatch Pipeline #2
Changes from 8 commits
346311e
1efe47c
aa4e0bd
dc91ff4
6448399
b12fb5c
fa831f7
8de39dd
a70d030
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
sample,mlst_alleles | ||
sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json | ||
sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json | ||
sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json | ||
sample,fastmatch_category,mlst_alleles | ||
sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json | ||
sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json | ||
sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,9 @@ params { | |
validationShowHiddenParams = false | ||
validate_params = true | ||
|
||
// FastMatch | ||
threshold = 1.0 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This isn't a big thing, but I was thinking about how my output script is handling this and I was assuming it was an integer (hamming distances). We'll have to remember to accommodate both integers and floats with this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's if scaled is provided it can be a float. Am I correct @apetkau? |
||
|
||
// Profile dists args | ||
pd_outfmt = "matrix" | ||
pd_distm = "hamming" | ||
|
@@ -54,11 +57,6 @@ params { | |
pd_columns = null | ||
pd_count_missing = false | ||
|
||
// GAS Cluster | ||
gm_thresholds = "10,5,0" | ||
gm_method = "average" | ||
gm_delimiter = "." | ||
|
||
// Metadata | ||
metadata_1_header = "metadata_1" | ||
metadata_2_header = "metadata_2" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -104,6 +104,20 @@ | |
}, | ||
"fa_icon": "far fa-clipboard" | ||
}, | ||
"fastmatch": { | ||
"title": "FastMatch", | ||
"type": "object", | ||
"description": "Parameters for FastMatch", | ||
"default": "", | ||
"properties": { | ||
"threshold": { | ||
"type": "number", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you set a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good idea! 8de39dd |
||
"description": "Comparison score threshold value", | ||
"default": 1.0, | ||
"minimum": 0 | ||
} | ||
} | ||
}, | ||
"profile_dists": { | ||
"title": "Profile Dists", | ||
"type": "object", | ||
|
@@ -168,32 +182,6 @@ | |
} | ||
} | ||
}, | ||
"gas_cluster": { | ||
"title": "GAS Cluster", | ||
"type": "object", | ||
"description": "", | ||
"default": "Parameters for GAS mcluster", | ||
"properties": { | ||
"gm_thresholds": { | ||
"type": "string", | ||
"default": "10,5,0", | ||
"description": "Thresholds delimited by ','. Values should match units from '--pd_distm' (either 'hamming' or 'scaled').", | ||
"pattern": "^(\\d+(\\.\\d+)?,)*\\d+(\\.\\d+)?$" | ||
}, | ||
"gm_method": { | ||
"type": "string", | ||
"default": "average", | ||
"description": "Clustering linkage method.", | ||
"enum": ["single", "average", "complete"] | ||
}, | ||
"gm_delimiter": { | ||
"type": "string", | ||
"default": ".", | ||
"description": "Delimiter desired for nomenclature code. Must be alphanumeric or one of [._-].", | ||
"pattern": "^[A-Fa-f0-9\\._-]+$" | ||
} | ||
} | ||
}, | ||
"institutional_config_options": { | ||
"title": "Institutional config options", | ||
"type": "object", | ||
|
@@ -353,13 +341,13 @@ | |
"$ref": "#/definitions/input_output_options" | ||
}, | ||
{ | ||
"$ref": "#/definitions/metadata" | ||
"$ref": "#/definitions/fastmatch" | ||
}, | ||
{ | ||
"$ref": "#/definitions/profile_dists" | ||
"$ref": "#/definitions/metadata" | ||
}, | ||
{ | ||
"$ref": "#/definitions/gas_cluster" | ||
"$ref": "#/definitions/profile_dists" | ||
}, | ||
{ | ||
"$ref": "#/definitions/institutional_config_options" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
sample,sample_name,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,S 1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 | ||
sample2,S2#,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 | ||
sample3,S2_,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 | ||
sample,fastmatch_category,sample_name,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,query,S 1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 | ||
sample2,query,S2#,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 | ||
sample3,reference,S2_,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample1.mlst.subtyping.json,,,,,,,, | ||
sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample2.mlst.subtyping.json,,,,,,,, | ||
sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample3.mlst.subtyping.json,,,,,,,, | ||
sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample1.mlst.subtyping.json,,,,,,,, | ||
sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample2.mlst.subtyping.json,,,,,,,, | ||
sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hamming/sample3.mlst.subtyping.json,,,,,,,, |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample1.mlst.subtyping.json,,,,,,,, | ||
sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample2.mlst.subtyping.json,,,,,,,, | ||
sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample3.mlst.subtyping.json,,,,,,,, | ||
sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample1.mlst.subtyping.json,,,,,,,, | ||
sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample2.mlst.subtyping.json,,,,,,,, | ||
sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample3.mlst.subtyping.json,,,,,,,, |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample1.mlst.subtyping.json,,,,,,,, | ||
sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample2.mlst.subtyping.json,,,,,,,, | ||
sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample3-more-missing.mlst.subtyping.json,,,,,,,, | ||
sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample1.mlst.subtyping.json,,,,,,,, | ||
sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample2.mlst.subtyping.json,,,,,,,, | ||
sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/case-hash-missing/sample3-more-missing.mlst.subtyping.json,,,,,,,, |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,,,,1.4,,,, | ||
sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,,,,, | ||
sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,,,,,,3.8 | ||
sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,,,,1.4,,,, | ||
sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,,,,, | ||
sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,,,,,,3.8 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sampleA,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 | ||
sampleB,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 | ||
sampleC,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 | ||
sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sampleA,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 | ||
sampleB,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 | ||
sampleC,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,,,,,,,, | ||
sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,,,,, | ||
sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,,,,,,,, | ||
sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,,,,,,,, | ||
sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,,,,, | ||
sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,,,,,,,, |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sampleA,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 | ||
sampleB,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 | ||
sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 | ||
sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sampleA,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 | ||
sampleB,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 | ||
sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,a b,,,,,,, | ||
sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,a b,,,, | ||
sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,,,,,,,,a b | ||
sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,a b,,,,,,, | ||
sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,,,,a b,,,, | ||
sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,,,,,,,,a b |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
sample,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 | ||
sample2,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 | ||
sample3,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 | ||
sample,fastmatch_category,mlst_alleles,metadata_1,metadata_2,metadata_3,metadata_4,metadata_5,metadata_6,metadata_7,metadata_8 | ||
sample1,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample1.mlst.json,1.1,1.2,1.3,1.4,1.5,1.6,1.7,1.8 | ||
sample2,query,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample2.mlst.json,2.1,2.2,2.3,2.4,2.5,2.6,2.7,2.8 | ||
sample3,reference,https://raw.githubusercontent.com/phac-nml/gasclustering/dev/tests/data/reports/sample3.mlst.json,3.1,3.2,3.3,3.4,3.5,3.6,3.7,3.8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the specific behaviour of this column may need a bit further discussion later on, but is something we can leave for this PR (and likely for this sprint to get feedback from others).
Specifically, on the nextflow side, the data in this column is being moved into the
meta
object for each sample. However, we cannot use the keyword"meta"
in this schema JSON file, since that is used by IRIDA Next to load data from the metadata table in IRIDA Next.I think it would make most sense to actually use the
"meta"
keyword in this JSON file, but maybe change the behaviour of IRIDA Next somehow? Or, to allow loading of a metadata column OR user-entered values to set query/reference samples.However, as this is a more complex use case it requires further discussion. So this is good as-is now. I just wanted to make a note here about this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based on the my question, the issue was sort of raised. Might be worth a formal discussion, I agree. I did not like my work around.