Skip to content

Commit

Permalink
phabox initial commit (Helmholtz-UFZ#68)
Browse files Browse the repository at this point in the history
* phabox initial commit

* review comments and add docs

* add doc

* add data manager
  • Loading branch information
bernt-matthias authored Nov 12, 2024
1 parent 129359f commit 3c8e23e
Show file tree
Hide file tree
Showing 23 changed files with 937 additions and 0 deletions.
1 change: 1 addition & 0 deletions data_managers/data_manager_phabox/.lint_skip
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
InputsMissing
10 changes: 10 additions & 0 deletions data_managers/data_manager_phabox/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
categories:
- Data Managers
- Metagenomics
description: Data managers for PhaBOX reference data
homepage_url: https://github.com/KennthShang/PhaBOX
long_description: Data managers for PhaBOX reference data
name: phabox_build_database
owner: ufz
remote_repository_url: https://github.com/Helmholtz-UFZ/ufz-galaxy-tools/blob/main/data_managers/data_manager_phabox
type: unrestricted
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
<tool id="phabox_build_database" name="PhaBOX" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
<description>database builder</description>
<macros>
<token name="@TOOL_VERSION@">2.1.5</token>
<token name="@DB_VERSION@">2</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@PROFILE@">22.01</token>
</macros>
<requirements>
<requirement type="package" version="6.0">unzip</requirement>
<requirement type="package" version="1.21.4">wget</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
wget https://github.com/KennthShang/PhaBOX/releases/download/v2/phabox_db_v@[email protected] &&
unzip phabox_db_v@[email protected] &&
cp '$dmjson' '$out_file'
]]></command>
<configfiles>
<configfile name="dmjson"><![CDATA[
{
"data_tables":{
"phabox":[
{
"path":"phabox_db_v@DB_VERSION@",
"name":"Version @DB_VERSION@",
"version":"@DB_VERSION@",
"value":"@DB_VERSION@"
}
]
}
}]]>
</configfile>
</configfiles>
<inputs/>
<outputs>
<data name="out_file" format="data_manager_json" />
</outputs>
<tests>
<test expect_num_outputs="1">
<output name="out_file">
<assert_contents>
<has_text text='"value":"@DB_VERSION@"'/>
<has_text text='"name":"Version 2'/>
</assert_contents>
</output>
</test>
</tests>
<help><![CDATA[
Download and extract PhaBOX reference data. The current tool version is used for versioning. For the user only the major version is shown
]]></help>
<citations>
<citation type="doi">10.1093/bioadv/vbad101</citation>
</citations>
</tool>
19 changes: 19 additions & 0 deletions data_managers/data_manager_phabox/data_manager_conf.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<data_managers>
<data_manager tool_file="data_manager/phabox_datamanager.xml" id="phabox_build_database">
<data_table name="phabox">
<output>
<column name="value"/>
<column name="name"/>
<column name="version"/>
<column name="path" output_ref="out_file">
<move type="directory">
<source>${path}</source>
<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">phabox/${path}</target>
</move>
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/phabox/${path}</value_translation>
<value_translation type="function">abspath</value_translation>
</column>
</output>
</data_table>
</data_manager>
</data_managers>
2 changes: 2 additions & 0 deletions data_managers/data_manager_phabox/tool-data/phabox.loc.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Format:
# value name version path
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<tables>
<!-- phabox reference data -->
<table name="phabox" comment_char="#">
<columns>value, name, version, path</columns>
<file path="tool-data/phabox.loc" />
</table>
</tables>
24 changes: 24 additions & 0 deletions tools/phabox/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
categories:
- Metagenomics
description: Identify and analyze phage contigs in metagenomic data
long_description: |
PhaBOX can comprehensively identify and analyze phage contigs in metagenomic
data. It supports integrated phage analysis, including phage contig
identification from the metagenomic assembly, lifestyle prediction, taxonomic
classification, and host prediction.
name: phabox
owner: ufz
homepage_url: https://github.com/KennthShang/PhaBOX
remote_repository_url: https://github.com/Helmholtz-UFZ/ufz-galaxy-tools/blob/main/tools/phabox
type: unrestricted
auto_tool_repositories:
name_template: "{{ tool_id }}"
description_template: "Wrapper for phabox task: {{ tool_name }}."
suite:
name: "suite_phabox"
description: "A suite of tools that brings the phabox project into Galaxy."
long_description: |
PhaBOX can comprehensively identify and analyze phage contigs in metagenomic
data. It supports integrated phage analysis, including phage contig
identification from the metagenomic assembly, lifestyle prediction, taxonomic
classification, and host prediction.
54 changes: 54 additions & 0 deletions tools/phabox/cherry.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
<tool id="phabox_cherry" name="PhaBOX cherry" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.1" license="MIT">
<description>Host prediction</description>
<macros>
<import>macros.xml</import>
</macros>
<xrefs>
<xref type="bio.tools">phabox</xref>
</xrefs>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">phabox</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
@CRISPR_PRE@
phabox2 --task cherry
@GENERAL@
@NETWORK@
@CRISPR@
]]></command>
<inputs>
<expand macro="general"/>
<expand macro="network"/>
<expand macro="crispr"/>
</inputs>
<outputs>
<data name="out" format="tabular" from_work_dir="output/final_prediction/cherry_prediction.tsv"/>
</outputs>
<tests>
<test>
<param name="dbdir" value="phaboxdb"/>
<param name="contigs" value="example_contigs.fa"/>
<output name="out">
<assert_contents>
<has_line line="Accession&#9;Length&#9;Host&#9;CHERRYScore&#9;Method&#9;Host_NCBI_lineage&#9;Host_GTDB_lineage"/>
<has_n_lines n="11"/>
<has_n_columns n="7"/>
</assert_contents>
</output>
</test>
</tests>
<help><![CDATA[
Predict hosts for viruses.
@COMMON_INPUT_DOC@
**Output**
@COMMON_OUTPUT_DOC@
@CHERRY_OUTPUT_DOC@
]]></help>
<expand macro="citations">
<citation type="doi">10.1093/bib/bbac182</citation>
</expand>
</tool>
58 changes: 58 additions & 0 deletions tools/phabox/contamination.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
<tool id="phabox_contamination" name="PhaBOX contamination" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.1" license="MIT">
<description>Contamination/provirus detection</description>
<macros>
<import>macros.xml</import>
</macros>
<xrefs>
<xref type="bio.tools">phabox</xref>
</xrefs>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">phabox</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
phabox2 --task contamination
@GENERAL@
@CONTAMINATION@
]]></command>
<inputs>
<expand macro="general"/>
<expand macro="contamination"/>
</inputs>
<outputs>
<data name="out" format="tabular" from_work_dir="output/final_prediction/contamination_prediction.tsv"/>
</outputs>
<tests>
<test>
<param name="dbdir" value="phaboxdb"/>
<param name="contigs" value="example_contigs.fa"/>
<output name="out">
<assert_contents>
<has_line line="Accession&#9;Length&#9;Total_genes&#9;Viral_genes&#9;Prokaryotic_genes&#9;Kmer_freq&#9;Contamination&#9;Provirus&#9;Pure_viral"/>
<has_n_lines n="11"/>
<has_n_columns n="9"/>
</assert_contents>
</output>
</test>
</tests>
<help><![CDATA[
Check for contaminations / proviruses.
@COMMON_INPUT_DOC@
**Output**:
@COMMON_OUTPUT_DOC@
- Total_genes: number of genes in the contigs (predicted by prodigal-gv)
- Viral_genes: number of viral marker genes
- Prokaryotic_genes: number of prokaryotic marker genes
- Kmer_freq: average frequency of 20-mer. This is a value to estimate the copy number of the genes; usually, the Kmer_freq of 99.9% virus is less than 1.25.
- Contamination:
- Provirus: Whether the sequence is a provirus
- Pure_viral: High quality or Medium quality or Low quality
]]></help>
<expand macro="citations"/>
</tool>
76 changes: 76 additions & 0 deletions tools/phabox/end_to_end.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
<tool id="phabox_end_to_end" name="PhaBOX end to end" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.1" license="MIT">
<description></description>
<macros>
<import>macros.xml</import>
</macros>
<xrefs>
<xref type="bio.tools">phabox</xref>
</xrefs>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">phabox</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
@CRISPR_PRE@
phabox2 --task end_to_end
@GENERAL@
@PHAMER@
@NETWORK@
@CRISPR@
]]></command>
<inputs>
<expand macro="general"/>
<expand macro="phamer"/>
<expand macro="network"/>
<expand macro="crispr"/>
<param name="supplements" type="select" optional="true" multiple="true" label="Output supplementary collections">
<option value="phamer">phamer</option>
<option value="phagcn">phagcn</option>
<option value="cherry">cherry</option>
</param>
</inputs>
<outputs>
<data name="end_to_end_out" format="tabular" from_work_dir="output/final_prediction/final_prediction_summary.tsv"/>
<expand macro="supp_out" task="phamer"/>
<expand macro="supp_out" task="phagcn"/>
<expand macro="supp_out" task="cherry"/>
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="dbdir" value="phaboxdb"/>
<param name="contigs" value="example_contigs.fa"/>
<output name="end_to_end_out">
<assert_contents>
<has_n_lines n="11"/>
<has_n_columns n="17"/>
<has_text text="Accession&#9;Length&#9;Pred&#9;Proportion&#9;PhaMerScore&#9;PhaMerConfidence&#9;Lineage&#9;PhaGCNScore&#9;Genus&#9;GenusCluster&#9;TYPE&#9;PhaTYPScore&#9;Host&#9;CHERRYScore&#9;Method&#9;Host_NCBI_lineage&#9;Host_GTDB_lineage"/>
</assert_contents>
</output>
</test>
</tests>
<help><![CDATA[
.. class:: infomark
**What it does**
Runs the phabox2 pipeline, i.e. it runs
- phamer: Virus identification
- phagcn: Taxonomic classification
- phatyp: Lifestyle prediction
- cherry: Host prediction
The outputs of the separate tools are joined into one big table.
@COMMON_INPUT_DOC@
**Output**
@COMMON_OUTPUT_DOC@
@PHAMER_OUTPUT_DOC@
@PHAGCN_OUTPUT_DOC@
@PHATYP_OUTPUT_DOC@
@CHERRY_OUTPUT_DOC@
]]></help>
<expand macro="citations"/>
</tool>
Loading

0 comments on commit 3c8e23e

Please sign in to comment.