From 26c0430ef29087eab31031a9a83f9a4675a038a6 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Sat, 4 Nov 2023 22:50:43 +0100 Subject: [PATCH 01/49] rm prodigal dep --- binette.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/binette.yaml b/binette.yaml index fc1a784..49e80b5 100644 --- a/binette.yaml +++ b/binette.yaml @@ -8,7 +8,6 @@ dependencies: - numpy=1.19.2 - diamond=2.0.4 - pandas=1.4.0 - - prodigal=2.6.3 - checkm2 - setuptools - requests From 8adfe17e6cf034a1df256471139c7db2d3bf9169 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Mon, 6 Nov 2023 09:31:14 +0100 Subject: [PATCH 02/49] minor update --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 4ad8498..0951cac 100644 --- a/README.md +++ b/README.md @@ -163,11 +163,10 @@ The `final_bins_quality_reports.tsv` file contains the following columns: | **N50** | Displays the N50 of the bin. | | **contig_count** | The number of contigs contained within the bin. | -# Bug reporting and feature requests +# Feature requests and bug reporting -Please submit bug reports and feature requests to the issue tracker: +Please submit bug reports and feature requests by opening an [issue](https://github.com/genotoul-bioinfo/Binette/issues). -[binette issue tracker](https://github.com/genotoul-bioinfo/Binette/issues) # Licence From 0ca02f0aeddc42549528f1c4877ee9e5e8202cea Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Thu, 9 Nov 2023 23:49:53 +0100 Subject: [PATCH 03/49] first draft for the joss paper --- paper/paper.bib | 145 ++++++++++++++++++++++++++++++++++++++++++++++++ paper/paper.md | 41 ++++++++++++++ 2 files changed, 186 insertions(+) create mode 100644 paper/paper.bib create mode 100644 paper/paper.md diff --git a/paper/paper.bib b/paper/paper.bib new file mode 100644 index 0000000..5b2302f --- /dev/null +++ b/paper/paper.bib @@ -0,0 +1,145 @@ +@article{Nayfach_Shi_Seshadri_Pollard_Kyrpides_2019, +title={New insights from uncultivated genomes of the global human gut microbiome}, +volume={568}, +DOI={10.1038/s41586-019-1058-x}, +number={7753}, +journal={Nature}, +author={Nayfach, +Stephen and Shi, +Zhou Jason and Seshadri, +Rekha and Pollard, +Katherine S. and Kyrpides, +Nikos C.}, +year={2019}, +month={Mar}, +pages={505–510} } + + +@article{Acinas_Sánchez_et_al_2021, +title={Deep ocean metagenomes provide insight into the metabolic architecture of bathypelagic microbial communities}, +volume={4}, +DOI={10.1038/s42003-021-02112-2}, +number={1}, +journal={Communications Biology}, +author={Acinas, Silvia G. and Sánchez, Pablo and Salazar, Guillem and Cornejo-Castillo, Francisco M. and Sebastián, Marta and Logares, Ramiro and Royo-Llonch, Marta and Paoli, Lucas and Sunagawa, Shinichi and Hingamp, Pascal and Ogata, Hiroyuki and Lima-Mendez, Gipsi and Roux, Simon and González, José M. and Arrieta, Jesús M. and Alam, Intikhab S. and Kamau, Allan and Bowler, Chris and Raes, Jeroen and Pesant, Stéphane and Bork, Peer and Agustí, Susana and Gojobori, Takashi and Vaqué, Dolors and Sullivan, Matthew B. and Pedrós-Alió, Carlos and Massana, Ramon and Duarte, Carlos M. and Gasol, Josep M.}, +year={2021}, +month={May}, +pages={1–15} } + +@article{kang2019metabat, + title={MetaBAT 2: an adaptive binning algorithm for robust and efficient genome reconstruction from metagenome assemblies}, + author={Kang, Dongwan D and Li, Feng and Kirton, Edward and Thomas, Ashleigh and Egan, Rob and An, Hong and Wang, Zhong}, + journal={PeerJ}, + volume={7}, + pages={e7359}, + year={2019}, + publisher={PeerJ Inc.} +} + +@article{alneberg2013concoct, + title={CONCOCT: clustering contigs on coverage and composition}, + author={Alneberg, Johannes and Bjarnason, Brynjar Sm{\'a}ri and de Bruijn, Ino and Schirmer, Melanie and Quick, Joshua and Ijaz, Umer Z and Loman, Nicholas J and Andersson, Anders F and Quince, Christopher}, + journal={arXiv preprint arXiv:1312.4038}, + year={2013} +} + +@article{nissen2021improved, + title={Improved metagenome binning and assembly using deep variational autoencoders}, + author={Nissen, Jakob Nybo and Johansen, Joachim and Alles{\o}e, Rosa Lundbye and S{\o}nderby, Casper Kaae and Armenteros, Jose Juan Almagro and Gr{\o}nbech, Christopher Heje and Jensen, Lars Juhl and Nielsen, Henrik Bj{\o}rn and Petersen, Thomas Nordahl and Winther, Ole and others}, + journal={Nature biotechnology}, + volume={39}, + number={5}, + pages={555--560}, + year={2021}, + publisher={Nature Publishing Group US New York} +} + + +@article{sieber2018dastool, + title={Recovery of genomes from metagenomes via a dereplication, aggregation and scoring strategy}, + author={Sieber, Christian MK and Probst, Alexander J and Sharrar, Allison and Thomas, Brian C and Hess, Matthias and Tringe, Susannah G and Banfield, Jillian F}, + journal={Nature microbiology}, + volume={3}, + number={7}, + pages={836--843}, + year={2018}, + publisher={Nature Publishing Group UK London} +} + + +@article{ruhlemann2022magscot, + title={MAGScoT: a fast, lightweight and accurate bin-refinement tool}, + author={R{\"u}hlemann, Malte Christoph and Wacker, Eike Matthias and Ellinghaus, David and Franke, Andre}, + journal={Bioinformatics}, + volume={38}, + number={24}, + pages={5430--5433}, + year={2022}, + publisher={Oxford University Press} +} + +@article{uritskiy2018metawrap, + title={MetaWRAP—a flexible pipeline for genome-resolved metagenomic data analysis}, + author={Uritskiy, Gherman V and DiRuggiero, Jocelyne and Taylor, James}, + journal={Microbiome}, + volume={6}, + number={1}, + pages={1--13}, + year={2018}, + publisher={BioMed Central} +} + +@article{meyer2022critical, + title={Critical assessment of metagenome interpretation: the second round of challenges}, + author={Meyer, Fernando and Fritz, Adrian and Deng, Zhi-Luo and Koslicki, David and Lesker, Till Robin and Gurevich, Alexey and Robertson, Gary and Alser, Mohammed and Antipov, Dmitry and Beghini, Francesco and others}, + journal={Nature methods}, + volume={19}, + number={4}, + pages={429--440}, + year={2022}, + publisher={Nature Publishing Group US New York} +} + +@article{parks2015checkm, + title={CheckM: assessing the quality of microbial genomes recovered from isolates, single cells, and metagenomes}, + author={Parks, Donovan H and Imelfort, Michael and Skennerton, Connor T and Hugenholtz, Philip and Tyson, Gene W}, + journal={Genome research}, + volume={25}, + number={7}, + pages={1043--1055}, + year={2015}, + publisher={Cold Spring Harbor Lab} +} + +@article{chklovski2023checkm2, + title={CheckM2: a rapid, scalable and accurate tool for assessing microbial genome quality using machine learning}, + author={Chklovski, Alex and Parks, Donovan H and Woodcroft, Ben J and Tyson, Gene W}, + journal={Nature Methods}, + volume={20}, + number={8}, + pages={1203--1212}, + year={2023}, + publisher={Nature Publishing Group US New York} +} + +@article{buchfink2015diamond, + title={Fast and sensitive protein alignment using DIAMOND}, + author={Buchfink, Benjamin and Xie, Chao and Huson, Daniel H}, + journal={Nature methods}, + volume={12}, + number={1}, + pages={59--60}, + year={2015}, + publisher={Nature Publishing Group US New York} +} + +@article{larralde2022pyrodigal, + title={Pyrodigal: Python bindings and interface to Prodigal, an efficient method for gene prediction in prokaryotes}, + author={Larralde, Martin}, + journal={Journal of Open Source Software}, + volume={7}, + number={72}, + pages={4296}, + year={2022} +} + diff --git a/paper/paper.md b/paper/paper.md new file mode 100644 index 0000000..3408263 --- /dev/null +++ b/paper/paper.md @@ -0,0 +1,41 @@ +--- +title: 'Binette: a fast and accurate bin refinement tool to construct high quality Metagenome Assembled Genomes.' +tags: + - Python + - Metagenomics + - Binning + - Bin refinement + - MAGs + +authors: + - name: Jean Mainguy + orcid: 0009-0006-9160-9744 + affiliation: 1 + - name: Claire Hoede + orcid: 0000-0001-5054-7731 + affiliation: 1 +affiliations: + - name: Université de Toulouse, INRAE, BioinfOmics, GenoToul Bioinformatics facility, 31326, Castanet-Tolosan, France + index: 1 +date: 30 november 2023 +bibliography: paper.bib +--- + + +# Statement of need +Metagenomics enables the study of microbial communities and their individual members through shotgun sequencing. An essential phase of metagenomic analysis is the recovery of metagenome-assembled genomes (MAGs). MAGs serve as a gateway to additional analyses, including the exploration of organism-specific metabolic pathways, and form the basis for comprehensive large-scale metagenomic surveys [@Nayfach_Shi_Seshadri_Pollard_Kyrpides_2019] [@Acinas_Sánchez_et_al_2021]. + +In a metagenomic analysis, sequence reads are first assembled into longer sequences called contigs. These contigs are then grouped into bins based on common characteristics in a process called metagenomic binning to obtain MAGs. There are several tools that can be used to binned contigs into MAGs. These tools are based on various statistical and machine learning methods and use contig characteristics such as tetranucleotide frequencies, GC content and similar abundances across samples [@kang2019metabat] [@alneberg2013concoct] [@nissen2021improved]. + +The approach of applying multiple binning methods and combining them has proven useful to obtain more and better quality MAGs from metagenomic datasets.This combination process is called bin-refinement and several tools exist to perform such tasks, such as DASTool [@sieber2018dastool], MagScot [@ruhlemann2022magscot] and the bin-refinement module of the metaWRAP pipeline [@uritskiy2018metawrap]. Of these, metaWRAP's bin-refinement tool has demonstrated remarkable efficiency in benchmark analysis [@meyer2022critical]. However, it has certain limitations, most notably its inability to integrate more than three binning results. In addition, it repeatedly uses CheckM [@parks2015checkm] to assess bin quality throughout its execution, which contributes to its slower performance. Furthermore, since it is embedded in a larger framework, it may present challenges when attempting to integrate it into an independent analysis pipeline. + +We present Binette, a bin refinement tool inspired by metaWRAP's bin refinement module, which addresses the limitations of the latter and ensures better results. + +# Summary +Binette is a Python reimplementation of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union. This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins . +Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: completeness - weight * contamination, with the default weight set to 3. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins. +The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that provides bindings and an interface to Prodigal. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process. +Binette serves as the bin refinement tool within the metaGWS metagenomic analysis pipeline, providing a robust and faster alternative to the bin refinement module of the metaWRAP pipeline as well as other similar bin refinement tools. + + +# References From 0ddc05838cfc439c807560995e49243e9b3b5ad1 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Thu, 9 Nov 2023 23:53:45 +0100 Subject: [PATCH 04/49] add joss github action to compile the paper --- .github/build_draft_pdf.yml | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 .github/build_draft_pdf.yml diff --git a/.github/build_draft_pdf.yml b/.github/build_draft_pdf.yml new file mode 100644 index 0000000..3383dd6 --- /dev/null +++ b/.github/build_draft_pdf.yml @@ -0,0 +1,24 @@ +name: build draft paper pdf +on: [push] + +jobs: + paper: + runs-on: ubuntu-latest + name: Paper Draft + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Build draft PDF + uses: openjournals/openjournals-draft-action@master + with: + journal: joss + # This should be the path to the paper within your repo. + paper-path: paper/paper.md + - name: Upload + uses: actions/upload-artifact@v1 + with: + name: paper + # This is the output path where Pandoc will write the compiled + # PDF. Note, this should be the same directory as the input + # paper.md + path: paper/paper.pdf \ No newline at end of file From 0cc4dddddd708f06b32dc5c054d12dfad718cc46 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Thu, 9 Nov 2023 23:55:51 +0100 Subject: [PATCH 05/49] relocate correctly github action wf --- .github/{ => workflows}/build_draft_pdf.yml | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/{ => workflows}/build_draft_pdf.yml (100%) diff --git a/.github/build_draft_pdf.yml b/.github/workflows/build_draft_pdf.yml similarity index 100% rename from .github/build_draft_pdf.yml rename to .github/workflows/build_draft_pdf.yml From 21f3ade5b008651fec68b2a3ac834d45f3235f36 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Fri, 10 Nov 2023 00:01:30 +0100 Subject: [PATCH 06/49] fix concoct citation --- paper/paper.bib | 15 ++++++++++----- paper/paper.md | 6 +++--- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/paper/paper.bib b/paper/paper.bib index 5b2302f..b77fbed 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -36,13 +36,18 @@ @article{kang2019metabat publisher={PeerJ Inc.} } -@article{alneberg2013concoct, - title={CONCOCT: clustering contigs on coverage and composition}, - author={Alneberg, Johannes and Bjarnason, Brynjar Sm{\'a}ri and de Bruijn, Ino and Schirmer, Melanie and Quick, Joshua and Ijaz, Umer Z and Loman, Nicholas J and Andersson, Anders F and Quince, Christopher}, - journal={arXiv preprint arXiv:1312.4038}, - year={2013} +@article{alneberg2014concoct, + title={Binning metagenomic contigs by coverage and composition}, + author={Alneberg, Johannes and Bjarnason, Brynjar Sm{\'a}ri and De Bruijn, Ino and Schirmer, Melanie and Quick, Joshua and Ijaz, Umer Z and Lahti, Leo and Loman, Nicholas J and Andersson, Anders F and Quince, Christopher}, + journal={Nature methods}, + volume={11}, + number={11}, + pages={1144--1146}, + year={2014}, + publisher={Nature Publishing Group US New York} } + @article{nissen2021improved, title={Improved metagenome binning and assembly using deep variational autoencoders}, author={Nissen, Jakob Nybo and Johansen, Joachim and Alles{\o}e, Rosa Lundbye and S{\o}nderby, Casper Kaae and Armenteros, Jose Juan Almagro and Gr{\o}nbech, Christopher Heje and Jensen, Lars Juhl and Nielsen, Henrik Bj{\o}rn and Petersen, Thomas Nordahl and Winther, Ole and others}, diff --git a/paper/paper.md b/paper/paper.md index 3408263..c3b34f9 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -23,9 +23,9 @@ bibliography: paper.bib # Statement of need -Metagenomics enables the study of microbial communities and their individual members through shotgun sequencing. An essential phase of metagenomic analysis is the recovery of metagenome-assembled genomes (MAGs). MAGs serve as a gateway to additional analyses, including the exploration of organism-specific metabolic pathways, and form the basis for comprehensive large-scale metagenomic surveys [@Nayfach_Shi_Seshadri_Pollard_Kyrpides_2019] [@Acinas_Sánchez_et_al_2021]. +Metagenomics enables the study of microbial communities and their individual members through shotgun sequencing. An essential phase of metagenomic analysis is the recovery of metagenome-assembled genomes (MAGs). MAGs serve as a gateway to additional analyses, including the exploration of organism-specific metabolic pathways, and form the basis for comprehensive large-scale metagenomic surveys [@Nayfach_Shi_Seshadri_Pollard_Kyrpides_2019, @Acinas_Sánchez_et_al_2021]. -In a metagenomic analysis, sequence reads are first assembled into longer sequences called contigs. These contigs are then grouped into bins based on common characteristics in a process called metagenomic binning to obtain MAGs. There are several tools that can be used to binned contigs into MAGs. These tools are based on various statistical and machine learning methods and use contig characteristics such as tetranucleotide frequencies, GC content and similar abundances across samples [@kang2019metabat] [@alneberg2013concoct] [@nissen2021improved]. +In a metagenomic analysis, sequence reads are first assembled into longer sequences called contigs. These contigs are then grouped into bins based on common characteristics in a process called metagenomic binning to obtain MAGs. There are several tools that can be used to binned contigs into MAGs. These tools are based on various statistical and machine learning methods and use contig characteristics such as tetranucleotide frequencies, GC content and similar abundances across samples [@kang2019metabat, @alneberg2014concoct, @nissen2021improved]. The approach of applying multiple binning methods and combining them has proven useful to obtain more and better quality MAGs from metagenomic datasets.This combination process is called bin-refinement and several tools exist to perform such tasks, such as DASTool [@sieber2018dastool], MagScot [@ruhlemann2022magscot] and the bin-refinement module of the metaWRAP pipeline [@uritskiy2018metawrap]. Of these, metaWRAP's bin-refinement tool has demonstrated remarkable efficiency in benchmark analysis [@meyer2022critical]. However, it has certain limitations, most notably its inability to integrate more than three binning results. In addition, it repeatedly uses CheckM [@parks2015checkm] to assess bin quality throughout its execution, which contributes to its slower performance. Furthermore, since it is embedded in a larger framework, it may present challenges when attempting to integrate it into an independent analysis pipeline. @@ -33,7 +33,7 @@ We present Binette, a bin refinement tool inspired by metaWRAP's bin refinement # Summary Binette is a Python reimplementation of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union. This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins . -Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: completeness - weight * contamination, with the default weight set to 3. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins. +Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 3. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins. The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that provides bindings and an interface to Prodigal. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process. Binette serves as the bin refinement tool within the metaGWS metagenomic analysis pipeline, providing a robust and faster alternative to the bin refinement module of the metaWRAP pipeline as well as other similar bin refinement tools. From 0d77f2e7ebc31b7c12cace9abda00edcbefc2715 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Fri, 10 Nov 2023 00:06:03 +0100 Subject: [PATCH 07/49] fix citations --- paper/paper.bib | 2 +- paper/paper.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/paper/paper.bib b/paper/paper.bib index b77fbed..5ca9b66 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -1,4 +1,4 @@ -@article{Nayfach_Shi_Seshadri_Pollard_Kyrpides_2019, +@article{Nayfach2019global_human_gut_microbiome, title={New insights from uncultivated genomes of the global human gut microbiome}, volume={568}, DOI={10.1038/s41586-019-1058-x}, diff --git a/paper/paper.md b/paper/paper.md index c3b34f9..ae5155f 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -23,9 +23,9 @@ bibliography: paper.bib # Statement of need -Metagenomics enables the study of microbial communities and their individual members through shotgun sequencing. An essential phase of metagenomic analysis is the recovery of metagenome-assembled genomes (MAGs). MAGs serve as a gateway to additional analyses, including the exploration of organism-specific metabolic pathways, and form the basis for comprehensive large-scale metagenomic surveys [@Nayfach_Shi_Seshadri_Pollard_Kyrpides_2019, @Acinas_Sánchez_et_al_2021]. +Metagenomics enables the study of microbial communities and their individual members through shotgun sequencing. An essential phase of metagenomic analysis is the recovery of metagenome-assembled genomes (MAGs). MAGs serve as a gateway to additional analyses, including the exploration of organism-specific metabolic pathways, and form the basis for comprehensive large-scale metagenomic surveys [@Nayfach2019global_human_gut_microbiome] [@Acinas_Sánchez_et_al_2021]. -In a metagenomic analysis, sequence reads are first assembled into longer sequences called contigs. These contigs are then grouped into bins based on common characteristics in a process called metagenomic binning to obtain MAGs. There are several tools that can be used to binned contigs into MAGs. These tools are based on various statistical and machine learning methods and use contig characteristics such as tetranucleotide frequencies, GC content and similar abundances across samples [@kang2019metabat, @alneberg2014concoct, @nissen2021improved]. +In a metagenomic analysis, sequence reads are first assembled into longer sequences called contigs. These contigs are then grouped into bins based on common characteristics in a process called metagenomic binning to obtain MAGs. There are several tools that can be used to binned contigs into MAGs. These tools are based on various statistical and machine learning methods and use contig characteristics such as tetranucleotide frequencies, GC content and similar abundances across samples [@kang2019metabat] [@alneberg2014concoct] [@nissen2021improved]. The approach of applying multiple binning methods and combining them has proven useful to obtain more and better quality MAGs from metagenomic datasets.This combination process is called bin-refinement and several tools exist to perform such tasks, such as DASTool [@sieber2018dastool], MagScot [@ruhlemann2022magscot] and the bin-refinement module of the metaWRAP pipeline [@uritskiy2018metawrap]. Of these, metaWRAP's bin-refinement tool has demonstrated remarkable efficiency in benchmark analysis [@meyer2022critical]. However, it has certain limitations, most notably its inability to integrate more than three binning results. In addition, it repeatedly uses CheckM [@parks2015checkm] to assess bin quality throughout its execution, which contributes to its slower performance. Furthermore, since it is embedded in a larger framework, it may present challenges when attempting to integrate it into an independent analysis pipeline. From 57a98097989751bea4a699940c8d5b0df622ba36 Mon Sep 17 00:00:00 2001 From: Jean Mainguy Date: Thu, 9 Nov 2023 23:30:55 +0100 Subject: [PATCH 08/49] Update paper.md --- paper/paper.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/paper/paper.md b/paper/paper.md index ae5155f..159fee6 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -34,7 +34,10 @@ We present Binette, a bin refinement tool inspired by metaWRAP's bin refinement # Summary Binette is a Python reimplementation of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union. This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins . Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 3. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins. + The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that provides bindings and an interface to Prodigal. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process. + + Binette serves as the bin refinement tool within the metaGWS metagenomic analysis pipeline, providing a robust and faster alternative to the bin refinement module of the metaWRAP pipeline as well as other similar bin refinement tools. From c5bf759795a70d238a9baf13bbe69e41ac58fc32 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Fri, 10 Nov 2023 17:13:31 +0100 Subject: [PATCH 09/49] update paper --- paper/paper.bib | 20 ++++++++++++++++++++ paper/paper.md | 19 ++++++++++--------- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/paper/paper.bib b/paper/paper.bib index 5ca9b66..09021ce 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -148,3 +148,23 @@ @article{larralde2022pyrodigal year={2022} } +@article{hyatt2010prodigal, + title={Prodigal: prokaryotic gene recognition and translation initiation site identification}, + author={Hyatt, Doug and Chen, Gwo-Liang and LoCascio, Philip F and Land, Miriam L and Larimer, Frank W and Hauser, Loren J}, + journal={BMC bioinformatics}, + volume={11}, + pages={1--11}, + year={2010}, + publisher={Springer} +} + + + +@article{metagWGS_inprep, + title={MetagWGS, a complete workflow to analyse metagenomic data (from Illumina reads or PacBio HiFi reads)}, + author={Noirot, Céline and Mainguy, Jean and Hoede, Claire}, % need completion with all authors... + journal={Journal}, + year={in preparation} + +} + diff --git a/paper/paper.md b/paper/paper.md index 159fee6..161702c 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -10,35 +10,36 @@ tags: authors: - name: Jean Mainguy orcid: 0009-0006-9160-9744 - affiliation: 1 + affiliation: "1, 2" - name: Claire Hoede orcid: 0000-0001-5054-7731 - affiliation: 1 + affiliation: "1, 2" + corresponding: true affiliations: - name: Université de Toulouse, INRAE, BioinfOmics, GenoToul Bioinformatics facility, 31326, Castanet-Tolosan, France index: 1 + - name: Université de Toulouse, INRAE, UR 875 MIAT, 31326, Castanet-Tolosan, France + index: 2 date: 30 november 2023 bibliography: paper.bib --- # Statement of need -Metagenomics enables the study of microbial communities and their individual members through shotgun sequencing. An essential phase of metagenomic analysis is the recovery of metagenome-assembled genomes (MAGs). MAGs serve as a gateway to additional analyses, including the exploration of organism-specific metabolic pathways, and form the basis for comprehensive large-scale metagenomic surveys [@Nayfach2019global_human_gut_microbiome] [@Acinas_Sánchez_et_al_2021]. +Metagenomics enables the study of microbial communities and their individual members through shotgun sequencing. An essential phase of metagenomic analysis is the recovery of metagenome-assembled genomes (MAGs). MAGs serve as a gateway to additional analyses, including the exploration of organism-specific metabolic pathways, and form the basis for comprehensive large-scale metagenomic surveys [@Nayfach2019global_human_gut_microbiome;@Acinas_Sánchez_et_al_2021]. -In a metagenomic analysis, sequence reads are first assembled into longer sequences called contigs. These contigs are then grouped into bins based on common characteristics in a process called metagenomic binning to obtain MAGs. There are several tools that can be used to binned contigs into MAGs. These tools are based on various statistical and machine learning methods and use contig characteristics such as tetranucleotide frequencies, GC content and similar abundances across samples [@kang2019metabat] [@alneberg2014concoct] [@nissen2021improved]. +In a metagenomic analysis, sequence reads are first assembled into longer sequences called contigs. These contigs are then grouped into bins based on common characteristics in a process called metagenomic binning to obtain MAGs. There are several tools that can be used to binned contigs into MAGs. These tools are based on various statistical and machine learning methods and use contig characteristics such as tetranucleotide frequencies, GC content and similar abundances across samples [@kang2019metabat;@alneberg2014concoct;@nissen2021improved]. The approach of applying multiple binning methods and combining them has proven useful to obtain more and better quality MAGs from metagenomic datasets.This combination process is called bin-refinement and several tools exist to perform such tasks, such as DASTool [@sieber2018dastool], MagScot [@ruhlemann2022magscot] and the bin-refinement module of the metaWRAP pipeline [@uritskiy2018metawrap]. Of these, metaWRAP's bin-refinement tool has demonstrated remarkable efficiency in benchmark analysis [@meyer2022critical]. However, it has certain limitations, most notably its inability to integrate more than three binning results. In addition, it repeatedly uses CheckM [@parks2015checkm] to assess bin quality throughout its execution, which contributes to its slower performance. Furthermore, since it is embedded in a larger framework, it may present challenges when attempting to integrate it into an independent analysis pipeline. We present Binette, a bin refinement tool inspired by metaWRAP's bin refinement module, which addresses the limitations of the latter and ensures better results. # Summary -Binette is a Python reimplementation of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union. This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins . -Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 3. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins. +Binette is a Python reimplementation and enhanced version of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union. This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins. -The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that provides bindings and an interface to Prodigal. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process. +Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 3. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins. The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that provides bindings and an interface to Prodigal [@hyatt2010prodigal]. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process. - -Binette serves as the bin refinement tool within the metaGWS metagenomic analysis pipeline, providing a robust and faster alternative to the bin refinement module of the metaWRAP pipeline as well as other similar bin refinement tools. +Binette serves as the bin refinement tool within the [metagWGS](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs) metagenomic analysis pipeline [@metagWGS_inprep], providing a robust and faster alternative to the bin refinement module of the metaWRAP pipeline as well as other similar bin refinement tools. # References From d9383bc81d82492dcaeb382e0b848241fcb62dd8 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Thu, 11 Jan 2024 19:32:33 +0100 Subject: [PATCH 10/49] add version in __init__.py --- binette/__init__.py | 1 + binette/binette.py | 8 ++++---- setup.py | 17 ++++++++++++++++- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/binette/__init__.py b/binette/__init__.py index e69de29..63eb0cb 100644 --- a/binette/__init__.py +++ b/binette/__init__.py @@ -0,0 +1 @@ +__version__ = '0.1.6' \ No newline at end of file diff --git a/binette/binette.py b/binette/binette.py index 013bcf4..9eb7843 100755 --- a/binette/binette.py +++ b/binette/binette.py @@ -13,8 +13,8 @@ import sys import logging import os -import pkg_resources +import binette from binette import contig_manager, cds, diamond, bin_quality, bin_manager, io_manager as io from typing import List, Dict, Set, Tuple @@ -42,10 +42,10 @@ def init_logging(verbose, debug): def parse_arguments(args): """Parse script arguments.""" - program_version = pkg_resources.get_distribution("Binette").version + parser = ArgumentParser( - description=f"Binette version={program_version}", + description=f"Binette version={binette.__version__}", formatter_class=ArgumentDefaultsHelpFormatter, ) # TODO add catagory to better visualize the required and the optional args @@ -111,7 +111,7 @@ def parse_arguments(args): parser.add_argument("--low_mem", help="low mem mode", action="store_true") - parser.add_argument("--version", action="version", version=program_version) + parser.add_argument("--version", action="version", version=binette.__version__) args = parser.parse_args(args) return args diff --git a/setup.py b/setup.py index 36029dd..8b2961a 100644 --- a/setup.py +++ b/setup.py @@ -2,6 +2,21 @@ from os import path from setuptools import setup, find_packages +import codecs + +def read(rel_path): + here = path.abspath(path.dirname(__file__)) + with codecs.open(path.join(here, rel_path), 'r') as fp: + return fp.read() + +def get_version(rel_path): + for line in read(rel_path).splitlines(): + if line.startswith('__version__'): + delim = '"' if '"' in line else "'" + return line.split(delim)[1] + else: + raise RuntimeError("Unable to find version string.") + if __name__ == "__main__": # Get the long description from the README file @@ -11,7 +26,7 @@ setup( name="binette", - version="0.1.6", + version=get_version("binette/__init__.py"), author="Jean Mainguy", packages=find_packages(), entry_points={"console_scripts": ["binette = binette.binette:main"]}, From ce3ecfffa35b0d452078a16fb09e416ba6241f6c Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Thu, 11 Jan 2024 19:34:07 +0100 Subject: [PATCH 11/49] change default parameters --- binette/binette.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/binette/binette.py b/binette/binette.py index 9eb7843..bfcbf72 100755 --- a/binette/binette.py +++ b/binette/binette.py @@ -71,7 +71,7 @@ def parse_arguments(args): parser.add_argument( "-m", "--min_completeness", - default=10, + default=40, type=int, help="Minimum completeness required for final bin selections.", ) @@ -83,7 +83,7 @@ def parse_arguments(args): parser.add_argument( "-w", "--contamination_weight", - default=5, + default=2, type=float, help="Bin are scored as follow: completeness - weight * contamination. " "A low contamination_weight favor complete bins over low contaminated bins.", From 5e53ba2516a7e685a10a45960818305c3037f163 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Thu, 11 Jan 2024 19:46:41 +0100 Subject: [PATCH 12/49] organise arguments into category --- binette/binette.py | 54 +++++++++++++++++++++++++--------------------- 1 file changed, 29 insertions(+), 25 deletions(-) diff --git a/binette/binette.py b/binette/binette.py index bfcbf72..ee2f545 100755 --- a/binette/binette.py +++ b/binette/binette.py @@ -40,35 +40,48 @@ def init_logging(verbose, debug): ) + def parse_arguments(args): """Parse script arguments.""" - parser = ArgumentParser( description=f"Binette version={binette.__version__}", formatter_class=ArgumentDefaultsHelpFormatter, ) - # TODO add catagory to better visualize the required and the optional args - input_arg = parser.add_mutually_exclusive_group(required=True) + + # Input arguments category + input_group = parser.add_argument_group('Input Arguments') + input_arg = input_group.add_mutually_exclusive_group(required=True) input_arg.add_argument( "-d", "--bin_dirs", nargs="+", - help="list of bin folders containing each bin in a fasta file.", + help="List of bin folders containing each bin in a fasta file.", ) input_arg.add_argument( "-b", "--contig2bin_tables", nargs="+", - help="list of contig2bin table with two columns separated\ + help="List of contig2bin table with two columns separated\ with a tabulation: contig, bin", ) - parser.add_argument("-c", "--contigs", required=True, help="Contigs in fasta format.") + input_group.add_argument("-c", "--contigs", required=True, help="Contigs in fasta format.") - parser.add_argument( + # Other parameters category + other_group = parser.add_argument_group('Other Arguments') + + other_group.add_argument( + "-e", + "--extension", + default="fasta", + help="Extension of fasta files in bin folders " + "(necessary when --bin_dirs is used).", + ) + + other_group.add_argument( "-m", "--min_completeness", default=40, @@ -76,11 +89,11 @@ def parse_arguments(args): help="Minimum completeness required for final bin selections.", ) - parser.add_argument("-t", "--threads", default=1, type=int, help="Number of threads.") + other_group.add_argument("-t", "--threads", default=1, type=int, help="Number of threads to use.") - parser.add_argument("-o", "--outdir", default="results", help="Output directory.") + other_group.add_argument("-o", "--outdir", default="results", help="Output directory.") - parser.add_argument( + other_group.add_argument( "-w", "--contamination_weight", default=2, @@ -89,34 +102,25 @@ def parse_arguments(args): "A low contamination_weight favor complete bins over low contaminated bins.", ) - parser.add_argument( - "-e", - "--extension", - default="fasta", - help="Extension of fasta files in bin folders " - "(necessary when --bin_dirs is used).", - ) - - parser.add_argument( + other_group.add_argument( "--checkm2_db", help="Provide a path for the CheckM2 diamond database. " "By default the database set via is used.", ) - parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true") + other_group.add_argument("--low_mem", help="Use low mem mode when running diamond", action="store_true") - parser.add_argument("--debug", help="active debug mode", action="store_true") + other_group.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true") - parser.add_argument("--resume", help="active resume mode", action="store_true") + other_group.add_argument("--debug", help="Active debug mode", action="store_true") - parser.add_argument("--low_mem", help="low mem mode", action="store_true") + other_group.add_argument("--resume", help="Active resume mode", action="store_true") - parser.add_argument("--version", action="version", version=binette.__version__) + other_group.add_argument("--version", action="version", version=binette.__version__) args = parser.parse_args(args) return args - def parse_input_files(bin_dirs: List[str], contig2bin_tables: List[str], contigs_fasta: str) -> Tuple[Dict[str, List], List, Dict[str, List], Dict[str, int]]: """ Parses input files to retrieve information related to bins and contigs. From 351ae7a59015a972129c698aac24829b8a71fcc6 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Thu, 11 Jan 2024 19:58:56 +0100 Subject: [PATCH 13/49] add figure in paper --- paper/paper.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/paper/paper.md b/paper/paper.md index 161702c..6c824d9 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -37,7 +37,11 @@ We present Binette, a bin refinement tool inspired by metaWRAP's bin refinement # Summary Binette is a Python reimplementation and enhanced version of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union. This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins. -Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 3. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins. The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that provides bindings and an interface to Prodigal [@hyatt2010prodigal]. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process. + +![Overview of Binette Steps. (A) Binette Workflow Overview: Input bins serve as the basis for generating intermediate bins. Each bin undergoes a scoring process utilizing quality metrics provided by CheckM2. Subsequently, the bins are sorted based on their scores, and a selection process is executed to retain non-redundant bins. (B) Intermediate Bin Creation Example: Bins are represented as square shapes, each containing colored lines representing the contigs they contain. Creation of intermediate bins involves the initial bins sharing at least one contig. Set operations are applied to the contigs within the bins to generate these intermediate bins.](./binette_overview.svg) + + +Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 2. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins. The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that provides bindings and an interface to Prodigal [@hyatt2010prodigal]. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process. Binette serves as the bin refinement tool within the [metagWGS](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs) metagenomic analysis pipeline [@metagWGS_inprep], providing a robust and faster alternative to the bin refinement module of the metaWRAP pipeline as well as other similar bin refinement tools. From 7ce1b64d7497684edc518eb72315f6bc13b546f1 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Thu, 11 Jan 2024 20:01:51 +0100 Subject: [PATCH 14/49] replace svg by png figures in paper.md --- paper/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paper/paper.md b/paper/paper.md index 6c824d9..edd5c47 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -38,7 +38,7 @@ We present Binette, a bin refinement tool inspired by metaWRAP's bin refinement Binette is a Python reimplementation and enhanced version of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union. This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins. -![Overview of Binette Steps. (A) Binette Workflow Overview: Input bins serve as the basis for generating intermediate bins. Each bin undergoes a scoring process utilizing quality metrics provided by CheckM2. Subsequently, the bins are sorted based on their scores, and a selection process is executed to retain non-redundant bins. (B) Intermediate Bin Creation Example: Bins are represented as square shapes, each containing colored lines representing the contigs they contain. Creation of intermediate bins involves the initial bins sharing at least one contig. Set operations are applied to the contigs within the bins to generate these intermediate bins.](./binette_overview.svg) +![Overview of Binette Steps. (A) Binette Workflow Overview: Input bins serve as the basis for generating intermediate bins. Each bin undergoes a scoring process utilizing quality metrics provided by CheckM2. Subsequently, the bins are sorted based on their scores, and a selection process is executed to retain non-redundant bins. (B) Intermediate Bin Creation Example: Bins are represented as square shapes, each containing colored lines representing the contigs they contain. Creation of intermediate bins involves the initial bins sharing at least one contig. Set operations are applied to the contigs within the bins to generate these intermediate bins.](./binette_overview.png) Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 2. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins. The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that provides bindings and an interface to Prodigal [@hyatt2010prodigal]. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process. From 1ce65d7a84b5a96900fc56df30d928cac44daef7 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Thu, 11 Jan 2024 20:39:13 +0100 Subject: [PATCH 15/49] commit figure in svg --- paper/binette_overview.svg | 683 +++++++++++++++++++++++++++++++++++++ paper/paper.md | 2 +- 2 files changed, 684 insertions(+), 1 deletion(-) create mode 100644 paper/binette_overview.svg diff --git a/paper/binette_overview.svg b/paper/binette_overview.svg new file mode 100644 index 0000000..43f7ecd --- /dev/null +++ b/paper/binette_overview.svg @@ -0,0 +1,683 @@ + + + +A-BUnion binIntersection binDifference binsshare at least a contigA∪BA∩BB-AIntermediate bins between A and BBin BBin Acontig n contig n Input bin setsAll bins sorted by their scoreIntermediate binsFinal binsCreation of intermediate binsBins are scored with CheckM2Selection of non redundant binsAB diff --git a/paper/paper.md b/paper/paper.md index edd5c47..6c824d9 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -38,7 +38,7 @@ We present Binette, a bin refinement tool inspired by metaWRAP's bin refinement Binette is a Python reimplementation and enhanced version of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union. This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins. -![Overview of Binette Steps. (A) Binette Workflow Overview: Input bins serve as the basis for generating intermediate bins. Each bin undergoes a scoring process utilizing quality metrics provided by CheckM2. Subsequently, the bins are sorted based on their scores, and a selection process is executed to retain non-redundant bins. (B) Intermediate Bin Creation Example: Bins are represented as square shapes, each containing colored lines representing the contigs they contain. Creation of intermediate bins involves the initial bins sharing at least one contig. Set operations are applied to the contigs within the bins to generate these intermediate bins.](./binette_overview.png) +![Overview of Binette Steps. (A) Binette Workflow Overview: Input bins serve as the basis for generating intermediate bins. Each bin undergoes a scoring process utilizing quality metrics provided by CheckM2. Subsequently, the bins are sorted based on their scores, and a selection process is executed to retain non-redundant bins. (B) Intermediate Bin Creation Example: Bins are represented as square shapes, each containing colored lines representing the contigs they contain. Creation of intermediate bins involves the initial bins sharing at least one contig. Set operations are applied to the contigs within the bins to generate these intermediate bins.](./binette_overview.svg) Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 2. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins. The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that provides bindings and an interface to Prodigal [@hyatt2010prodigal]. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process. From b22be678425d09e8e62a1080995e9dfb497c85e1 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Thu, 11 Jan 2024 20:46:04 +0100 Subject: [PATCH 16/49] add bold in fig caption and adjust svg --- paper/binette_overview.svg | 369 ++++++++++++++++++------------------- paper/paper.md | 4 +- 2 files changed, 182 insertions(+), 191 deletions(-) diff --git a/paper/binette_overview.svg b/paper/binette_overview.svg index 43f7ecd..8228c17 100644 --- a/paper/binette_overview.svg +++ b/paper/binette_overview.svg @@ -133,10 +133,7 @@ id="path56" inkscape:connector-type="orthogonal" inkscape:connector-curvature="20" />Input bin setsAll bins sorted byInput bin setsAll bins sorted by their scoreIntermediatetheir scoreIntermediate binsFinal binsCreation ofbinsFinal binsCreation of intermediateintermediate binsBins are scoredbinsBins are scored with CheckM2Selection of nonwith CheckM2Selection of non redundant binsredundant bins Date: Thu, 11 Jan 2024 20:49:42 +0100 Subject: [PATCH 17/49] adjust svg --- paper/binette_overview.svg | 103 +++++-------------------------------- 1 file changed, 14 insertions(+), 89 deletions(-) diff --git a/paper/binette_overview.svg b/paper/binette_overview.svg index 8228c17..59ccc39 100644 --- a/paper/binette_overview.svg +++ b/paper/binette_overview.svg @@ -8,32 +8,8 @@ version="1.1" id="svg1" xml:space="preserve" - inkscape:version="1.3 (0e150ed6c4, 2023-07-21)" - sodipodi:docname="dessin.svg" - xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" - xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns="http://www.w3.org/2000/svg" - xmlns:svg="http://www.w3.org/2000/svg">contig n Date: Thu, 11 Jan 2024 20:56:27 +0100 Subject: [PATCH 18/49] try with pdf fig --- paper/binette_overview.pdf | Bin 0 -> 13591 bytes paper/paper.md | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) create mode 100644 paper/binette_overview.pdf diff --git a/paper/binette_overview.pdf b/paper/binette_overview.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c31f7e8b2f0f0623a6f1a5f2c46c00293fc3ac4f GIT binary patch literal 13591 zcmd6Nby!qe`!*qngrszglpr}XFqDAQfOIL{-QA^tbc2+Hba$814bmW8N`rKY-+)Jt z=RN25d(ZdJcYO@k?7g40W?gGP&)U!Z-1lBIvcga%ATv8EP1R2EB`O<$6=0=niu&XU zfJM^4(#YNzzz#FXqXGZ`7O082y@4(4Xs%;#AZ(y#rEh@B$A@ZXZ)>1qf$E%;tSVte z-G%Y~v}8SFqPcSG8=N;>!yw|lMIXJ-)MtQjs3sSaMProC>cSdTaN%wW(LnEnZq^dV zbH=C+N)=2V;;R{ztAjjAcGa!9S}#&hqHVG7eBaqTJga_G(+UdyVzvefbdpsquN>sGWuq&MDgOQMYfE45zB6njX+7IH>ocx1xx@%|iOa!h zH4aW~L|g0>D}K5hqwb5h5Wc|Vc$_9ZvA7jai`GKXg>|^9zT4d|R*oiUe=O;@{+cwN z8NRpsbTB@T`w==}X>^xzZ@4keGf_NOIJ5^6Z~LG2#_V`wkx6b!64Zy*D`lcVP9cx$ z4ir1WLIn?W{7r(Im#Vy7BbOaJS4U1u8qweZ>~B?lDrf z^ekf%Sth@8$Qm3*Vyth*%iNDgvL%xp_NON>y&L$FekU}8NY9N9Z$KZhU^LRVW0OY+ z^HJI0$x@6T1-p&k{g&`@1OFG)&9r#;Vm1V8bWYLI%LmBh_utUxRYTx;mO82N&J#tj z&-v6b@hi-N243J`bte{K=+QlZ=QZjlm>>|&n_u0(usOT%G;-8G*t%TW{I=WHu$8K{ zg3y5P{4|$_HtapK>Rm+dzVQ#K`dN}P}oJL)X>vbM?_af{;snjceBE9X@2Y5x6#rIPe zxa=I9rztv}_|@AcvtOpFLROw|(C47jEEP;hDWi{}EG($=dNo=|cFx4v@Xhjlpks(* zMAlHA3%it&VZr3rV5OeW>fo-RQBXfTi!XC#A~|SU*Js(Z?>Ib~+%n)wE=*Qdx5Gn| zIf~)LC2hPI!4`gyI!%m2jWjwDNo7?mjtN&9QS^C;TFN$}aVSzZ!snnBkGyqX+TVhW z1^PxRQ5*PWR$Vh9UPBG1wQ>}+tr;m&?P5Z`?3A58U-pbsI5|FWV$6gnYCx_+TOt@b zmk1BvMiobgLmFihs6m=GIk7EO&(1>`f79YEIzjTdEiq!cN@;ilP9Em)tenvlPl?ydjPPZjKv=pTHsY;f6#GCD4+! z3J%ycwu8PwepqoE^Wi6ht;0{a-rTQf(H(HIF5OYfQ$Eg%&B-O?H;^ajM_YJ)clSdO zbs*orZguyZYKqI-Ja+pUkohpY`}MFL4QgtBwg- zYq=t&W>I(W^|ppk^6WCgbVSl8L(;Duen<=6i)= z0{IEq!-@V8buI-3)V)ep{?9jCxl0|A>mnfPnm7dI$m1j2i4nYJ7(t9G5^w|jtuoJu zN^%&nO-N%yXg~}fvWH%Lb10YhY2>ZdC5_I{5JuPoKX^T}q1&FhZrctSc&B`{F5z~W zwCIpKC1JxiqHc5UZ5_J#P12mnddoYM+hT}`NhdldWG;yE@SJ*v$FCU;fy`nx8cF2+ z7w{bKYRzGgPy`(;lH*}EKhnNZunwj#LR!}2WVQYMA| z*Jrxv-A{rV(+&3GpZKi>-A|jEo?Qq-T5oBOGfDRLaB$20FY+udIWbMZZXFg-Te+Ch8=oJM6ee3~At zKzYCYW0`0~4s9XpWaQ+N=RCR}2&r;7%G1482q{~EvIc!v$Nk?(9!58SCq$IVmvTsT+~eibt+$bAmXYi_w?Xpc{F2M#&IL% za^XQx>@vV^g7cHr0#HofK|T51S)1lWS;P7?PHk&(#u2`=JslY-PhHY}^w5WRxti zm_~JNZsv6Jmee_D*sDl;vYxxKG9I_9j+~MqeELe+Vgz_@Ar%l!ZC~*aF z%7E}d^a z!%hruK1>S;jqtsHUSOiO3dMvgG?%CfJlvT#)qEJn`yydP0FFtA?1k}QniQJ_Fj^a(iM`AS&~fVhNQ(DKNaqj(;jd@BxW-FlrWZkQei#lZWtzaFuD* zWHyx#(q_nJu2C_1NL`pP#^1`e;G%))zljGxHj?Qz{B6=^p%&Di3M6QmwC(Ylo+x+& z%xV(wQqbn0a}4DH5SCEki$TRo9tg^(oiVZ1Ku9D*6h*doeq^k9dg|<;v$k$O<%t3g z0s0y%e76tdT{U>dAX9F-v;wUaZZ2nis=j<<#QH*Urc#tBbgg##uxDWw_TA7La0>04 z%6HuN=d0k-l5yxoX$-oX-~z~`^6P4h-@QIN_|VzbFMnBa^w3KzMFKdO$x4tiMDxg%vD7g-q;IbXk5t)6m3LC1p>i7j326dtXg1-qGeBcU~%Pw;Cj^PXmH(DJl9c@5e!($Xlqth@n6jh z+vrt4e<|gdj;FrukddZXUaj;XJYz^=a9$OiCb}TOXz-Bl-VR!aY^4(^CfrwcJ~-~z zc>^6U^sSGAgr@hpHeEjClNupRfjSO(;GA)FdBw~V7^B^Hr^Qs`A4FYbwe$;?zq#|! zbzoolUiEaK?$+zTMU{*-Nk8Q6M`az<0*M(zelH?-UEgKMGZEp_BWJ<$7BBHG5cg0A zmkUdOu2;EhQAzy(Us81_?K|G+2om)J?N;kFQ|o~@koJF-#YkIutdmk z1lQbqE@36lqy%?}OwZ?#GJcHG;RC5}iqfsigtr2aR>z|b?Ys0OF9f0Q$e|(z%i38{ z>N4;To0|EZaGILMUp}l%Z%*Sg70s-4>V-IFS`@EuL$TAjWJZf4Mr1M`DuH(Bm8{^` z#6^_ZUOIk56OGsrm+3TTdn>*gh2JPuVY$(I-bUXhz3=RnbvM#@7eiRzfR>ny=oOQL zdEBMI-mYw1NwFnyU|na*Bz1k$nhbkNUBl&G35&f62Oovzv=F{9y6byED-%)!BA2cM z&y;gpEBEW8^Uu~C*J9zS*Ev=yl&Jb1-L`he2m4>Yp3mJ+)a2dcuUca%dU!8R_9IKV zUw!6FQqCGmZXFGR_eRt6%)M%4B#HFuI*?0%9uEM|lietxFhtp7Zk)7w!XBERY$05{G+aikX=>Wd4hCb_`LZUf@ z(mBSNOE`vM8c5RHhr*{>uG{XqjB$v0aPKBQ#Vf1E%<*({KIim2+%Nt9u}1o;PBk(t17mEg;?@ebf=&IdjMQ^o zpB&eGWoQAuN0U8+5o^Jqid>QZ*e=pKS(-|_2*AnqyfO&7O07>ojri< zkMsYXx$_PHF}+#w?kpy2*#t9GcBIo{AjV_e#r3i5e6<*?WAY3QdfjA!5qA1{jt4%L z?)7$ci0}4D6AH=KJ18wtDa|cIP|i`i^~Z-TbGSke4eD{dAKmWD(;B_9%6wo;1i#YY7=r} z=&u7=N26lX479bmujJ`4L=&PmhHwBY=E@*}u*tV`oAumNjgxVFLgiz{1`i+rQbV#^vw0-1bhOFnROuVl9KA7EV zT#>nK*V=)@U2%GRZc2y!6y4i8T1zNf8L z2)vN!qr~^J($s(bDd(WYWBC=WhIBh>aAMqYF$i7>T+T4TZj+<{Z|7mw zJk?)7^hi5cW3EqbgSsaqpv`kZ%w1c{?Qk$fKd7a!D)MXKT;6)Ww*Y^xks799Hm1XI z=*jE)Cmil5ULJG){UPSLpzNWYu|juV;3KOQ$@zNL0M*A zHvW9=o7^4r6rZN6Exw(=MAQjuH@q4wt8Q7JWJyDkmX9L%JjBqQu!QMB-#j4k#_4X) zgw({;nWs!)S-A+_CS?s;qSXQ$`&_TSAQH|h4M3;QPVC`yIftL~@=85M*GwKa z_$uYF*2WnxEuM;%&?<*A==q9sX?vc;d=Y!qSkXG5E2DQJiYO@!iX;8upah#lYfrRAQXiNrJK5dIt@fpDeGH`w2 z2_AFO-DG(7mr5ckc0)(1^ZpLPm?w*o@u$qd`*^2FT@enrV@UbHL!@!lXNrzQ<^j^v ziZr~DWO`B(mZuZhyE@(`Th8t??>%aeqX!kkOL~f&PO#>;wgx=dN{Mwo$z_;S`ZL_a zKX1gO;JQ-wa<0~M;UU<*XO~yVU&6qS#O-pO45r$}I2v%th<|0!U1Lzlf zrCOj5GT6cio3Qc8H!W_?Bht>N-{4K!=TP-g=porAMtO}uvR`;txgX?}7qij}~qx%#51*)l`{3XI!2l<97$?QKMpD z)|W313_Mnv%CqoBW0aD>Ou;ZA(sXU3ptL*UY*Q$t!9gUbObATk&HVClcqS-P@svyz zwdIh}68ce`#1TA`=V-wdk}mf`O#0a0R)iNXs}3ry44INyQaoysU>xb#{F0m2{`q** zTI}Ktm%Nd72R=#lXXZ67wYy0Jh>u(+EAqD^$OW=15PHjpi7ct)76U&c?thlnQj^W0 zJ+jxjSA<4epi2fZ-!adZn0CA8wSCq<_bj@(47<%mJQ@T8iT7pHGeDBC0UE;(>DDc* ziv!qbi=kiF1eff%Vy!*0*htK9r$=5%DoCU^s7hNmy&%|nyV;61GHQXPRWy~f265N$ zr#}Z)zC+bTb0S;m z9(0fj$#)vyss@2EW(oUX_5S{xUKOt9p8nykze3B=(*D_tl0Gi+FjvUlcG9(Jc(HzN zBje@R{P1P9uvF(O+u(FGS#mx>TU)+11mQcYWh@8uU4hPv*Bj?93$nA=?C}ns3fiOJ z&XVw&*W4j)r!h)>_60$2qgyXDvhc1JN(Wsc2#9iii#uwD*_vIzH zL!CA$kwXVGudZk|ioAriUcW)*i%E)Zp_Ude{ak^kEEMBIFhoFib@qrYt@~{F@aVN0 z*thd2wTG#5B1VnakKr>K^_$=&0^ePHW+#Lb!~;s=<}&Wol~)d^)G7e|bDVB#>Ga4( zmadB>2m#T_xGImoPjc|^kn)vWBk%F(RJ(C)UB_qR>RPF^o-ck(Xl3m65ojf?^Hp1Q z)^idEpCT?WH=YgpE2&wL9c1GeP7Zb&r}o8~VmkQuOBEy!s5MR`+FP*dR5-BHjJ7CE zgLI_o9L1GPLQw>#j0|~?Ev`FC0L%878lWD#JC+;fFw`mh+R zliQPpuh2A7P_$x3Pj9t%Po^Xlb%`bB2F- zP0^LJbgI8}N_?f+Rl=|r!NQ>5^!bbehfe>s9UxO(_~E0q;BR(zNwOO(Dkc7O4E{6v z_mmeZUEn~K$sT@EcQ@M+ zz{2Gh+kHtV;rQ+u`rZSO|1u$?#EUfkutsN|kw_GGH-rm{huB(AZ4T6w5f7h{p|&JG zizmrRVZPdme4&Z9(xxmJ&&uk=Z`v}Nn-o7p?Lc~Q5094pU_gXEqGiZJ%!Q&af0;Sg zmZUMk7w!02?jD>d*E>GAW6~s?`Hx33?>v!JyzXqim0iLpoX0#yee&8UO;D=><-z=j zh;;>{_|w--LE4R+2*;Ya1glPUl!;vfi>VxliCXRG%4;I--_|RRgw|7~G`gGqp}zjf zf?(w_h=U9KtHAz;*Mq=+|9(&->Z7F(D|+XiOAu*;5YlTl6&hbQ2ZH;T8D+e1)YATJ z2&hNRGathWq^Hx){0~35TP7vHU86-W=$~KFS5wZS@D47OoHQ0Vu_p57#2dkIY--BN zA!|_4SDv-(L)F8*qRtmLq;Re+w25wb#Bux;kQwty%^u6&Isoku%S`FCxu|cPyZHGE@C(0)|XS0S8BChmd+%756Z33hcHSZI_yKFe|D+ z(cvoMnt`MD6ytPqY@u{MVm_vy#w4D-Y+3bgP~3 zNOmSYD7M+5>aQz(P42Fr_`{v@Cg&vNkL(eY^)@X0Lwx^2emQ=H_lBk3(5f4H`-5rT zkXc~^J3U(yYkMo(A6ybMkf~7#{f4MVm~i#ZW{Kk+W-W(Wt8&2aq)ll(2wyPbnS21 zxuUIu!HxHBt%5ps2Dgg9zw_1&XBRTovHj@}Nu58gfuKKF{a*qC0R9pdSVSygFXT-u zjQ}i4CYA!0b|ya!xARak(YH6YgSiz1xseL~`2z;sFz6p2c8)*Kf7*UOLcm-A2s=#K z0D}M=;F}h9;Ehtj3IM}ee)QyGy%9Nne%RS=>>Oa=FU!xCA4hfw5CDOt6E;rRsIVhD zgbe^@W4k?v06765_8WbK9n5xn#sy}FwQvF;tT)G-1S<0a@n>M5)!(+bgw{v2lvtN#1gQ8)UDLh&6KAt z0hedc4|m{Vdk~HL>If6?;f@&ED(wqA)?8pQ;(^*r_t)O(W8Uc1enTY|qC$RcjSZ|R4? zjf4PfIt|~|-idgDe@9TkP5~DT)}sR2II^=F&_Lwm^q{FH0kxb`{B}qRIyUf3K(Zaj zPKNLHgeAtkJq>;)))fuqwkk6WJ_CrVZDS>lZ3pg8J`5-!8-5{pitX2c5tr(-ere_1 zel2~iG;sV%grwU5u^uTCu`pcZbio1{*)s4$^mpl{3%CcOaE93^ew^7D=vPmj4uv|{ zUQWT|4Qh|~hfF-x!b6Y}8SiUtuJzgX+Cq-iM26#DmJ9iZoOTndKQtjQ`=5~YyL$FV zSV1@I?SIyUV9MLCEiD=mA5GhCAj=b#Y(~gt)D9(NH>&M=$Q*NV%GS_WKZDc1@tZz} zJ?krsk<)MeMjK||rxm*2@pDbXgU_CZ*#t7E`C{ih&98fj2HDeS+A)tBLBO1evf1qs z9j|_8Mai7N!P!{Lxhx0&@><)U)Y!Z2)WJE#k3EZLP>g6Wf2%nyZf#!Dl>_Wu{58)h z-M=yU&ZV8Z#l0o@%P-hHP=5&!{-+_|d|nBMA=f#}CmIpw?Y~cjB#Q{%|4)^vj8`MEvKo@h^ZuT3S+3 zNSZ;PN`&hc6IlNnCP3J@xS*{6i3xvbm#~cY{}Uwq&~5*eXK!=n&&2VkbN-46LeGS6 zP~fKd2hjbA3;)GguwN>;*umiGnWdrC&7e2nEN@_B0$Zal06GDfYHdJ&V~72*$k^%| z*xq0O-7|d{{+Zaj{AyBgu(mcgu((0RTgZdmz|AlI3h;p2NdI^=lmKe1FuZ`kuouF~ z3DD5^4F-SDmH*Q%-`IbE>%YF&pNIuu`8j!% z;vL%d;6_1h1m7GCV7G(`Kp?t$3a$-{p6wL?b5!iHuPB}mO&Dqb-ouWFX?l?WUp~h4 zk$Ap-y))fQEm#nxdS37RA0&v zjq1WNfmb1A)Tp%o)cqzn5DmKL~ABv7T4U!hP^$hm_2$^Ccp zXVo5+R99<`gRNel<2f1oeDC;>bM`g`{X-Y;Z1N^K7oh3`lYpbDcOXI4wVgI}@M+ir+@XL7Tv;{H|I8><9JY)E5%s5Rlqu&@96ar#G zbQ|-@m!42g8sfE&x{%9o^y^)xV+J60g^;$17!7&5a1DTd8IwOu< zs(fLPNqm{rV0!I|NS96=*u=Z3hh}Of>}DS;nz2<7LRpl;Oek`4j&muj7`gf+^CP~9 z05`4Kn*ltjm8!?)vEmsyJ!875mYKmHgiDnIOw(~f(mTo-a+EdzSr3KTaq`qkm=rW{ zKPGpo46A2Y4425E*&1UZg-X~?uF{}lla2{U6ULo zIBLb)whgks?<_8!MaZamj9$Q}qYTt(6X(55z5g*ABRNuS5p5wc$&P1pHJx{WeF;-2qdV7SK-_K(we16sxYLU8 zgW*-sz64YPWe;-td=Hd(xlG`hq~G}b^9!rvmO$ag9?rott`&^VkA(aNSHAmtoj@q= zN-jx>Rq5DPNGFw4ex%LnS9OQXd!mm++e%b6zs?pilMaG3E35D#TUqJ?#Lh5H4slt2jEeF?_Aq#}7 zR4yPC-aO*%w02r==y8aHOzk*#C$6RYTe%+PzV1!e#&kj6!Q9;D8U(tJGP0lX$@L0# z3LSNxlfLL&mg@{c>7`UAZz)F+C!z_l0>x%7l7w{ZnpbgRhO-7Sl!`r#D2u?>X4LLC z>L>9}MO?F!E{^9K2Bi$o9e_{*W z7(}W|%C>voQTz|UP)^n-b7_7&k-N>MmE@-JftBQhn_UqtvBWsdivr~AQ>_3@#>S_$?7UP#6a>4A&Srm z+1h`0j4VXhMqE-5p{;Md=WD<#|0VpQUGw+{0hrAl)}1tuA5lpcuHYY@CA~FM{+MIsGX5@$0hAzz3eobOrKC_1kd5$gim8dQ2NM%(y_ZLof=|tY-_)F9 zjeM{13sa?%em?b?n4P|l0geyzH=J2W&Cu{D`IO=m=IQu8e9Ezd{)01r>JKnyg21f5 zeEHAy*#9Wp2nmQOzL0_mHy|o5Au16F6({sx2sa=RHqO5bH-9hEfvhkZ!NdlE$tN)F z5T>EP6oUUE+;G9z^G)6Lr)~rOAs_vw+rWmo?elMRo7=MQFLmHgcKBCm>&JjUg`5AS zNdGq;`+r=i|KK4vUi-6BXJ+N%fUrYuL>6W)FqoZ-?f28al<@z?A^%ju{~J&I-_-H9 z63c(q@wdt0_xJ(-&Ol}Uwh87|S^7ah81f6g@K?Wx)DrLsFo+%fl1%{Q#=hLotnVsb&BQ=hxRYr?Z><#6Jw$$}JxqZr?ra-B>6q*4Q$RC|U~|3y4=4cFxQeT85Xq_SjA? zACX&7k3Nb)+Eu$qdsLk!lKA&TaqEgdh&+({z)@|xS(ul6O)GT?Il|12r(z%!Nzy}Qb^-1ZZgUg1NSzA>sr3-aDb3- og%01D@r_Uz&Et9h-BWh Date: Thu, 11 Jan 2024 21:15:21 +0100 Subject: [PATCH 19/49] rm useless svg file --- paper/binette_overview.svg | 597 ------------------------------------- 1 file changed, 597 deletions(-) delete mode 100644 paper/binette_overview.svg diff --git a/paper/binette_overview.svg b/paper/binette_overview.svg deleted file mode 100644 index 59ccc39..0000000 --- a/paper/binette_overview.svg +++ /dev/null @@ -1,597 +0,0 @@ - - - -A-BUnion binIntersection binDifference binsshare at least a contigA∪BA∩BB-AIntermediate bins between A and BBin BBin Acontig n contig n Input bin setsAll bins sorted by their scoreIntermediate binsFinal binsCreation of intermediate binsBins are scored with CheckM2Selection of non redundant binsAB From 3cabcf748161080030ef1da947ed2b9cd939f9c7 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Thu, 11 Jan 2024 21:58:53 +0100 Subject: [PATCH 20/49] add basic documentation build with sphinx --- README.md | 4 +-- docs/Makefile | 20 +++++++++++ docs/api/api_ref.md | 8 +++++ docs/api/binette.md | 75 +++++++++++++++++++++++++++++++++++++++ docs/api/modules.md | 7 ++++ docs/conf.py | 57 ++++++++++++++++++++++++++++++ docs/contribution.md | 19 ++++++++++ docs/index.md | 34 ++++++++++++++++++ docs/installation.md | 55 +++++++++++++++++++++++++++++ docs/make.bat | 35 ++++++++++++++++++ docs/requirements.txt | 6 ++++ docs/usage.md | 82 +++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 400 insertions(+), 2 deletions(-) create mode 100644 docs/Makefile create mode 100644 docs/api/api_ref.md create mode 100644 docs/api/binette.md create mode 100644 docs/api/modules.md create mode 100644 docs/conf.py create mode 100644 docs/contribution.md create mode 100644 docs/index.md create mode 100644 docs/installation.md create mode 100644 docs/make.bat create mode 100644 docs/requirements.txt create mode 100644 docs/usage.md diff --git a/README.md b/README.md index 8f9804c..46c3d8f 100644 --- a/README.md +++ b/README.md @@ -9,10 +9,10 @@ From the input bin sets, Binette constructs new hybrid bins. A bin can be seen a - Difference bin: This bin contains the contigs that are exclusively found in one bin and not present in the others. - Union bin: The union bin includes all the contigs contained within the overlapping bins -It then uses checkm2 to assess bins quality to finally select the best bins possible. +It then uses CheckM2 to assess bins quality to finally select the best bins possible. Binette is inspired from the metaWRAP bin-refinement tool but it effectively solves all the problems from that very tool. -- Enhanced Speed: Binette significantly improves the speed of the refinement process. It achieves this by launching the initial steps of checkm2, such as prodigal and diamond runs, only once on all contigs. These intermediate results are then utilized to assess the quality of any given bin, eliminating redundant computations and accelerating the refinement process. +- Enhanced Speed: Binette significantly improves the speed of the refinement process. It achieves this by launching the initial steps of CheckM2, such as Prodigal and Diamond runs, only once on all contigs. These intermediate results are then utilized to assess the quality of any given bin, eliminating redundant computations and accelerating the refinement process. - No Limit on Input Bin Sets: Unlike its predecessor, Binette is not constrained by the number of input bin sets. It can handle and process multiple bin sets simultaneously. diff --git a/docs/Makefile b/docs/Makefile new file mode 100644 index 0000000..d4bb2cb --- /dev/null +++ b/docs/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = . +BUILDDIR = _build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/docs/api/api_ref.md b/docs/api/api_ref.md new file mode 100644 index 0000000..3de18e1 --- /dev/null +++ b/docs/api/api_ref.md @@ -0,0 +1,8 @@ +# API Reference + +```{toctree} +:maxdepth: 2 +binette +indice_and_table +``` + diff --git a/docs/api/binette.md b/docs/api/binette.md new file mode 100644 index 0000000..bc3c754 --- /dev/null +++ b/docs/api/binette.md @@ -0,0 +1,75 @@ +# binette package + +## Submodules + +## binette.bin_manager module + +```{eval-rst} +.. automodule:: binette.bin_manager + :members: + :undoc-members: + :show-inheritance: +``` + +## binette.bin_quality module + +```{eval-rst} +.. automodule:: binette.bin_quality + :members: + :undoc-members: + :show-inheritance: +``` + +## binette.binette module + +```{eval-rst} +.. automodule:: binette.binette + :members: + :undoc-members: + :show-inheritance: +``` + +## binette.cds module + +```{eval-rst} +.. automodule:: binette.cds + :members: + :undoc-members: + :show-inheritance: +``` + +## binette.contig_manager module + +```{eval-rst} +.. automodule:: binette.contig_manager + :members: + :undoc-members: + :show-inheritance: +``` + +## binette.diamond module + +```{eval-rst} +.. automodule:: binette.diamond + :members: + :undoc-members: + :show-inheritance: +``` + +## binette.io_manager module + +```{eval-rst} +.. automodule:: binette.io_manager + :members: + :undoc-members: + :show-inheritance: +``` + +## Module contents + +```{eval-rst} +.. automodule:: binette + :members: + :undoc-members: + :show-inheritance: +``` diff --git a/docs/api/modules.md b/docs/api/modules.md new file mode 100644 index 0000000..b83d27c --- /dev/null +++ b/docs/api/modules.md @@ -0,0 +1,7 @@ +# binette + +```{toctree} +:maxdepth: 4 + +binette +``` diff --git a/docs/conf.py b/docs/conf.py new file mode 100644 index 0000000..b8e839a --- /dev/null +++ b/docs/conf.py @@ -0,0 +1,57 @@ +# Configuration file for the Sphinx documentation builder. +# +# For the full list of built-in configuration values, see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Project information ----------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information + +from binette import __version__ + +project = 'Binette' +copyright = '2024, Jean Mainguy' +author = 'Jean Mainguy' +release = __version__ + + +# -- General configuration --------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration + +extensions = [ + "myst_parser", + # "sphinxcontrib.jquery", + "sphinx.ext.duration", + "sphinx.ext.autosectionlabel", + "sphinx.ext.autodoc", + 'sphinx_search.extension' +] + + +source_suffix = { + '.md': 'markdown' +} + + +templates_path = ['_templates'] + + +# Prefix document path to section labels, to use: +# `path/to/file:heading` instead of just `heading` +autosectionlabel_prefix_document = True + +exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] + + + +# -- Options for HTML output ------------------------------------------------- +# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output + +html_theme = 'sphinx_rtd_theme' #'alabaster' # + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] + + + diff --git a/docs/contribution.md b/docs/contribution.md new file mode 100644 index 0000000..eee4511 --- /dev/null +++ b/docs/contribution.md @@ -0,0 +1,19 @@ +# Contributing + +Thank you for your interest in contributing to Binette! This is an open-source project and everyone is welcome to contribute to it. + +- ## Reporting a Bug + + - Check the [Issues](https://github.com/genotoul-bioinfo/Binette/issues) page to see if the bug is already reported. + - If it's not reported, create a new [issue](https://github.com/genotoul-bioinfo/Binette/issues). + +- ## Fixing a Bug + + - Fix a bug by opening a new GitHub pull request (PR). + - Describe the issue and your solution in the PR, including the relevant issue number if applicable. + +- ## Suggesting a New Feature + + - Share your ideas for new features by opening a [new issue](https://github.com/genotoul-bioinfo/Binette/issues). + + diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 0000000..6f72455 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,34 @@ +% Binette documentation master file, created by +% sphinx-quickstart on Thu Jan 11 21:13:20 2024. +% You can adapt this file completely to your liking, but it should at least +% contain the root `toctree` directive. + + +[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/binette/README.html) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/downloads.svg)](https://anaconda.org/bioconda/binette) [![Test Coverage](https://genotoul-bioinfo.github.io/Binette/coverage-badge.svg)](https://genotoul-bioinfo.github.io/Binette/) + +# Binette + + +Binette is a fast and accurate binning refinement tool to constructs high quality MAGs from the output of multiple binning tools. + +From the input bin sets, Binette constructs new hybrid bins. A bin can be seen as a set of contigs. When at least two bins overlap, meaning they share at least one contig, Binette utilizes basic set operations to create new bins. +- Intersection bin: This bin consists of the contigs that are shared by the overlapping bins. +- Difference bin: This bin contains the contigs that are exclusively found in one bin and not present in the others. +- Union bin: The union bin includes all the contigs contained within the overlapping bins + +It then uses CheckM2 to assess bins quality to finally select the best bins possible. + +Binette is inspired from the metaWRAP bin-refinement tool but it effectively solves all the problems from that very tool. +- Enhanced Speed: Binette significantly improves the speed of the refinement process. It achieves this by launching the initial steps of CheckM2, such as Prodigal and Diamond runs, only once on all contigs. These intermediate results are then utilized to assess the quality of any given bin, eliminating redundant computations and accelerating the refinement process. +- No Limit on Input Bin Sets: Unlike its predecessor, Binette is not constrained by the number of input bin sets. It can handle and process multiple bin sets simultaneously. + +```{toctree} +:caption: 'Documentation' +:maxdepth: 2 + +installation +usage +contribution +api/api_ref +``` + diff --git a/docs/installation.md b/docs/installation.md new file mode 100644 index 0000000..ac7fdaf --- /dev/null +++ b/docs/installation.md @@ -0,0 +1,55 @@ + +# Installation + +## With Bioconda + +Binette can be esailly installed with conda + +```bash +conda create -c bioconda -c defaults -c conda-forge -n binette binette +conda activate binette +``` + +Binette should be able to run : + +``` +binette -h +``` + + +## From a conda environnement + +Clone this repository: +``` +git clone https://github.com/genotoul-bioinfo/Binette +cd Binette +``` + +Then create a Conda environment using the `binette.yaml` file: +``` +conda env create -n binette -f binette.yaml +conda activate binette +``` + +Finally install Binette with pip + +``` +pip install . +``` + +Binette should be able to run : + +``` +binette -h +``` + + +## Downloading the CheckM2 database + +Before using Binette, it is necessary to download the CheckM2 database: + +```bash +checkm2 database --download --path +``` + +Make sure to replace `` with the desired path where you want to store the CheckM2 database. diff --git a/docs/make.bat b/docs/make.bat new file mode 100644 index 0000000..32bb245 --- /dev/null +++ b/docs/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=. +set BUILDDIR=_build + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.https://www.sphinx-doc.org/ + exit /b 1 +) + +if "%1" == "" goto help + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/docs/requirements.txt b/docs/requirements.txt new file mode 100644 index 0000000..e8024c8 --- /dev/null +++ b/docs/requirements.txt @@ -0,0 +1,6 @@ +sphinx==6.2.1 +sphinx_rtd_theme==1.2.2 +readthedocs-sphinx-search==0.3.1 +sphinx-autobuild==2021.3.14 +myst-parser==1.0.0 +docutils==0.18.1 \ No newline at end of file diff --git a/docs/usage.md b/docs/usage.md new file mode 100644 index 0000000..2108a50 --- /dev/null +++ b/docs/usage.md @@ -0,0 +1,82 @@ + +# Usage + +## Input Formats + +Binette supports two input formats for bin sets: + +1. **Contig2bin Tables:** You can provide bin sets using contig2bin tables, which establish the relationship between each contig and its corresponding bin. In this format, you need to specify the `--contig2bin_tables` argument. + +For example, consider the following two `contig2bin_tables`: + +- `bin_set1.tsv`: + + ```tsv + contig_1 binA + contig_8 binA + contig_15 binB + contig_9 binC + ``` + +- `bin_set2.tsv`: + + ```tsv + contig_1 bin.0 + contig_8 bin.0 + contig_15 bin.1 + contig_9 bin.2 + contig_10 bin.0 + ``` + + The `binette` command to process this input would be: + + ```bash + binette --contig2bin_tables bin_set1.tsv bin_set2.tsv --contigs assembly.fasta + ``` + +2. **Bin Directories:** Alternatively, you can use bin directories, where each bin is represented by a separate FASTA file. For this format, you need to provide the `--bin_dirs` argument. Here's an example of two bin directories: + + ``` + bin_set1/ + ├── binA.fa: contains sequences of contig_1, contig_8 + ├── binB.fa: contains sequences of contig_15 + └── binC.fa: contains sequences of contig_9 + ``` + + ``` + bin_set2/ + ├── binA.fa: contains sequences of contig_1, contig_8, contig_10 + ├── binB.fa: contains sequences of contig_15 + └── binC.fa: contains sequences of contig_9 + ``` + + The `binette` command to process this input would be: + + ```bash + binette --bin_dirs bin_set1 bin_set2 --contigs assembly.fasta + ``` + +In both formats, the `--contigs` argument should specify a FASTA file containing all the contigs found in the bins. Typically, this file would be the assembly FASTA file used to generate the bins. In these exemple the `assembly.fasta` file should contain at least the five contigs mentioned in the `contig2bin_tables` files or in the bin fasta files: `contig_1`, `contig_8`, `contig_15`, `contig_9`, and `contig_10`. + +## Outputs + +Binette results are stored in the `results` directory. You can specify a different directory using the `--outdir` option. + +In this directory you will find: +- `final_bins_quality_reports.tsv`: This is a TSV (tab-separated values) file containing quality information about the final selected bins. +- `final_bins/`: This directory stores all the selected bins in fasta format. +- `temporary_files/`: This directory contains intermediate files. If you choose to use the `--resume` option, Binette will utilize files in this directory to prevent the recomputation of time-consuming steps. + + +The `final_bins_quality_reports.tsv` file contains the following columns: +| Column Name | Description | +|---------------------|--------------------------------------------------------------------------------------------------------------| +| **bin_id** | This column displays the unique ID of the bin. | +| **origin** | Indicates the source or origin of the bin, specifying from which bin set it originates or the intermediate set operation that created it. | +| **name** | The name of the bin. | +| **completeness** | The completeness of the bin, determined by CheckM2. | +| **contamination** | The contamination of the bin, determined by CheckM2. | +| **score** | This column displays the computed score, which is calculated as: `completeness - contamination * weight`. You can customize the contamination weight using the `--contamination_weight` option. | +| **size** | Represents the size of the bin in nucleotides. | +| **N50** | Displays the N50 of the bin. | +| **contig_count** | The number of contigs contained within the bin. \ No newline at end of file From 05c6d406d96e845e84793c4b8c56006b0a5a5903 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Thu, 11 Jan 2024 22:04:07 +0100 Subject: [PATCH 21/49] add .readthedocs.yaml to let readthedoc build the doc --- .readthedocs.yaml | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 .readthedocs.yaml diff --git a/.readthedocs.yaml b/.readthedocs.yaml new file mode 100644 index 0000000..f940de9 --- /dev/null +++ b/.readthedocs.yaml @@ -0,0 +1,42 @@ +# .readthedocs.yaml +# Read the Docs configuration file +# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details + +# Required +version: 2 +python: + install: + - method: pip + path: . + +# Set the OS, Python version and other tools you might need +# build: +# os: ubuntu-22.04 +# tools: +# python: "3.8" + + +build: + os: "ubuntu-22.04" + tools: + python: "mambaforge-22.9" + +conda: + environment: binette.yaml + + +# Build documentation in the "docs/" directory with Sphinx +sphinx: + configuration: docs/conf.py + +# Optionally build your docs in additional formats such as PDF and ePub +# formats: +# - pdf +# - epub + +# Optional but recommended, declare the Python requirements required +# to build your documentation +# See https://docs.readthedocs.io/en/stable/guides/reproducible-builds.html +# python: +# install: +# - requirements: docs/requirements.txt \ No newline at end of file From 4a94fc909b3d573c822c43bcd25bd04054ff1439 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Thu, 11 Jan 2024 22:35:22 +0100 Subject: [PATCH 22/49] try install in readthedoc with requirements.txt --- .readthedocs.yaml | 10 ++-------- requirements.txt | 10 ++++++++++ 2 files changed, 12 insertions(+), 8 deletions(-) create mode 100644 requirements.txt diff --git a/.readthedocs.yaml b/.readthedocs.yaml index f940de9..1a5f5fb 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -6,6 +6,8 @@ version: 2 python: install: + - requirements: docs/requirements.txt + - requirements: requirements.txt - method: pip path: . @@ -16,14 +18,6 @@ python: # python: "3.8" -build: - os: "ubuntu-22.04" - tools: - python: "mambaforge-22.9" - -conda: - environment: binette.yaml - # Build documentation in the "docs/" directory with Sphinx sphinx: diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..5624289 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,10 @@ +checkm2==1.* +networkx==3.* +numpy==1.19.2 +packaging>=23.* +pandas==1.4.0 +pyfastx>=2.* +pyrodigal>=2.* +requests==2.* +tqdm==4.* +pandas==1.4.0 \ No newline at end of file From a7a54bcb9f99e3411398ddaa45493ce1925a0372 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Thu, 11 Jan 2024 22:37:54 +0100 Subject: [PATCH 23/49] fix .readthedoc.yaml --- .readthedocs.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 1a5f5fb..04c3852 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -12,10 +12,10 @@ python: path: . # Set the OS, Python version and other tools you might need -# build: -# os: ubuntu-22.04 -# tools: -# python: "3.8" +build: + os: ubuntu-22.04 + tools: + python: "3.8" From 34a740b7799c0cec0b057b8966b0b9f05200fc3c Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Fri, 12 Jan 2024 08:41:42 +0100 Subject: [PATCH 24/49] remove useless arg extension --- binette/binette.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/binette/binette.py b/binette/binette.py index ee2f545..01a64e1 100755 --- a/binette/binette.py +++ b/binette/binette.py @@ -72,14 +72,6 @@ def parse_arguments(args): # Other parameters category other_group = parser.add_argument_group('Other Arguments') - - other_group.add_argument( - "-e", - "--extension", - default="fasta", - help="Extension of fasta files in bin folders " - "(necessary when --bin_dirs is used).", - ) other_group.add_argument( "-m", From 3eee02acfb932d83212a6e3e0b77a8a2c395c9e1 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Fri, 12 Jan 2024 09:30:59 +0100 Subject: [PATCH 25/49] change contribution page name --- docs/{contribution.md => contributing.md} | 0 docs/index.md | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename docs/{contribution.md => contributing.md} (100%) diff --git a/docs/contribution.md b/docs/contributing.md similarity index 100% rename from docs/contribution.md rename to docs/contributing.md diff --git a/docs/index.md b/docs/index.md index 6f72455..1bf9efd 100644 --- a/docs/index.md +++ b/docs/index.md @@ -28,7 +28,7 @@ Binette is inspired from the metaWRAP bin-refinement tool but it effectively sol installation usage -contribution +contributing api/api_ref ``` From f0edd0f3be2bd94a287caa311add26b7cd8f6e74 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Fri, 12 Jan 2024 18:26:25 +0100 Subject: [PATCH 26/49] improve args help --- binette/binette.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/binette/binette.py b/binette/binette.py index 01a64e1..80d1e90 100755 --- a/binette/binette.py +++ b/binette/binette.py @@ -104,9 +104,14 @@ def parse_arguments(args): other_group.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true") - other_group.add_argument("--debug", help="Active debug mode", action="store_true") + other_group.add_argument("--debug", help="Activate debug mode", action="store_true") + + other_group.add_argument("--resume", + action="store_true", + help="Activate resume mode. Binette will examine the 'temporary_files' directory " + "within the output directory and reuse any existing files if possible." + ) - other_group.add_argument("--resume", help="Active resume mode", action="store_true") other_group.add_argument("--version", action="version", version=binette.__version__) From 9d11a3cbf822d96d76ffe61c9df9ecd6ba081584 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Fri, 12 Jan 2024 18:32:42 +0100 Subject: [PATCH 27/49] update figure --- paper/binette_overview.pdf | Bin 13591 -> 13584 bytes paper/paper.md | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/paper/binette_overview.pdf b/paper/binette_overview.pdf index c31f7e8b2f0f0623a6f1a5f2c46c00293fc3ac4f..bc9978fafc8550d97c9ecf5e0ceff8b6c6f63763 100644 GIT binary patch delta 7578 zcmZ{lRa6v!x9;hVAqHs}hVBv=y1P51yKyK%V5CEOq#FdJ8>B(HmF|}APC5U3?z%5` zo%6KoTfgR5}gt?VU!AZUH3HyMU)o8w?PPBQ7cc7DQF|qAK{9(Scv=gSg?VxUh&{-XQP5YP7JegoSBwV2<>iXbWPmB>sjGs>s0Zld|-g z-Z!ZvGMb}Z9h#64n9di+u)O5@^)6(3fE~VaL*r-#3$$Ed$;BC?KAWc%#T}UfQu?Jc z-HK&WK_KTA&K1-cP7GRU<@Yl*M&OmqEv&(kMfW4juFV`FtGPiyh=@sXU5pTNXF6uR z*6dfS?w_3TlYp2!7fR#1L^7u1fn=b52Lz`E9s%G zZB4&l0mwM9o))gTP-U+N=^Q#EHGr^^hebCn%QDLV3ORbAeldvRuCaFQDKVYNg`7_d z#U6cJc*c+^Ypvd;Xen4vfDCCTa@I#Rk#w6l^j!5_8%Mi=O_Hk_TJ%Fs*^id@m_}!- zs=EmSNixYW!IET3zapg@c$Ns`y2<&r?jy6v)Q~&^wD`A`Ix^iq`bZ2^d?W)9K`O2uuL_!b-6Z#8}GdBfjfJ8OW!^T7@IF_{&28vw4oW>zyA z+N3KS}6nwJ^?G z5T6Q6zOi`06?LX%_Ge zosnHtO0cA~y!LW(HngwPO`Oz$mD2b@lu`lJKSqLwdK7>{pI&YOeCHTY5(B#YGzQ}d z0l1S#hJBN!93h%-tQy`CM&;pVcnCXnv6mP9y2aDVZEA&bDl`%v)|Z<+O&P;i-((sw z9;?az2LkvPwl7Z>Uuut4J;f$x5W49CWN(s?zVT3nw+s^X-DzV=wGTEB#yI+=25K@BKSnA=Qm|*MNn`?v(Wc;Ypcbwh zFXpQdFBw?g_7;P8A9&~P?ifpT#t-i2o9#R8jVrx|_^)N!)y%MbU0_)mIL0Vo)#{wo zam;bP7cN*ZE0yU#{$R`)z2aR$;X3cu;_&InC!FnbBvwL9BUmBggv4f#l?+B!3zh}5 z6O#SB13@h^t(Lp+C2x-nAAyp#w)Tdu2^JkTMd*$ShMZ*5LGacCt+0cz-{q?KUKLRFHg#@u8j<;oYSo5tt0b< zx7Sy{m&dJ-0m7mZovn}Fi@l_S==`W6x{?E+ll4{H^Xm0UzPQiidE&5u0P)p0iSCbk zN3fl&Jhv{Y#2AkNpNE-2@oovx0Lr{O8u96T848&Y27dIf*#+8VlwhYRa-_Kr7+elp z0B$8sjQfCxPok@0tKu~B$#La^E3(cirb)6l6UX|JxQXC-sR1fLii~RbgvV_{pKY&jUanhVoTvx+s?o-DZ}n%xL`A!4y8U2_Z~G!l zlbZ$XK91w`pi;3_tTKadVct=nwkP0hIUJ=uffE{1x;pt9gM1uU^+(L z8Y-Ft6qP5FRw;|yD}+=UjTV?bhY^SW7-N*J@G`S(E~Jq|21aAheHd5_k>-?B(i77K zJLtRBf+H)z?*iB?+<^I~2uWULgNHx;iz8EOST+wZ?3o3HEdKz)sAoU@N>(IPN+nK4 zidQT~wnHj3MSx-5YMMQI*K_=Qmp{gTi~%%WEK{cdYyN~ZjJ@_4Yhp%D{?tRnLw)b- z81kR3D=WeHGay@1)S`@!qdQbT3a~Ha2E@Q?dFeBaKbvYFNS)%gdMP#t+Ct?Pdq%uV z*o^l=jcqcY5r8IS<;EsDi35`po)SN4CufZ}!AGc5elYzcGR4gmcDX1!7^<3B*l%Ry z;^ZFT{(?ML@itm!)8up2X!YWnWksJ_TF(6e3F%D=Rm7+p0q-E}(|#EgqyEfi+LRA| z<#rU-wP~)Wp55Zx1Qe%4@joe&BnQo>658SrZKHRtzW+92LTozNvYRKXWqfn|BBT8^ zoKf>}#Sx~Pk#5Ki6cR>JN_5BEL+w3ubes8f6-TgL!^z73{2il2W>Z^HVs|md&@hs7 zC%iPd{=n>uT&+>$kAqQ7{+09)F{pD6oX&Kb5-Y4j*%OE=kY3<6*^7eCP~gH5PL1GF z5!ur2JfY0xQNWBEDM|IKZh@BTk!0fS_2IXtyfW+}$GdQ+BNU0((4D1dMDPQSsM35A zyV0*^(tIvJZQ|ac0A?__x9MT5TkN+ldzb6>+W@KcTe-#5T4ZV?^*uU=8&Z(IW0k!% z0-2av{~m-D$q`W6S$dxdbyb0&=

R>e^M#*b# zAePmz#mXaKY!TPI#A3RhOWFr4VBsoDMF(Y*&~moMaIRAVWM-Wj9sl3PAY&3 z-6I6%UxnsIF`!0$|3zT4y(M53LNfUuqixezIdAazIiXZ+=9v8k}8G zQx9oO>1B*f=9bm#7G!FZS7D+n#GNC2`;LwpP)HnXLf);ewRGufY#(`A0NW3!H!Sk^ zCnq+Vh9e|9hh=Hjr|&Tml-id_4V-ours)hU7%8b;j+6p&FmjqX=DTKNJ<-XR<#@!b#GyQ-)~CZSP}WnTvag%#2v>!-U#T7&qQzBP?cz6X zMwJeuR`q9qNS)R)_X$bsF!zkgq0)z#1xTBiG@z$r&nI}}3xTAcnZPV+b9mkt>Q+Oh zYv!w%)Ih5%HEM&yEmghEsS>RfEyClZMWL7fy3Zdpne%Ic(Y{7Cst0FtFFz zyLC{hEaAPa!{e$0jH8+X&9}Z3N6vp#Q!*sLeu4K3XUTsqLH#JHwy<-AqjytDp8RbX z$V-FNIPGigXbcdd<*yfp9}j$`^Yq`OjA>zl&Wu$h&WfNk ztzDJXG^@N^_}V|JX;0fI)a(kiJqA0AbkUpBx|nUJVXb#_Uq4lF>61%es*$B3PUPM= zsl4(RYtg#H1i$hZ-`OrmRD~Espig6~=(v53R5d6=45{(sBqozY4z)-%Txx(+9y7Vx zPoB0>mk;!`DiMtG{PniyA)2(lEe%r4w!NnhMLm=98ND4or(x`Cm#fRBNW%lF11e{* zbu@edL&tObQbPJs2sciS*}uyd?<_7lPR(i9Z}iqF(?0$Akh|_7o&DXu#fH83(9I_r zVFUjS{Slud9KMf7gTy1)fPs&L2>Y!MfJY#zMukR83GPU44%BK+lYrmWo6&}zug<6x z$d#gZyC%bSmpd^;YB4UPW^8&B3voRPEKgW(d;7FvM8~#~_d6^AegrgpgD69M+4D~I z)h@MfEfiI$WZF-g*X}$A@`9ypS>q>Z&)z$Hm1YhqP3^*eKA)7ivL@J);r(l|nJxUx z3$UEIeD@CmF95{A%p4h3>Z?<{2a;`T@bZ!aOybW~QUhrEC+EVzi2#g@X~`@}84R2p zG~Pz9{F&rbZPIkZ+DHcUtG8pC1v6@JrQEgBm; z*RMem=j|BaF#ZGXLV)pl=g<2bd-d*^#5-1L`i8dSp5|_tgy9Ch$XRfcsq+FywXUw{f?cB?q)lWfYmk8_QY(p`&*Hjo%}flE*MCd?*cHHcg}hvs+#5plqGact62F zewkx&^^fkmmOH@EAM2TxA-~?9h@aQr7#+PX1K0lut%wuFjyI17ppJ;+RGEX&tfb}B zYeP(!LG^pwZ1qpsjH#slUmXRK0fc3lUOLVy20$KLuKVqyr%wm>93qX?8}kx~CJD-(CR*Ic%qOHJs@-w@wZSHuTv89-`Qhw8P}GOWpQxo^}#*97D?Xagiv z{$aFgV?EI(YsrxvU`5i|m%R8_=uMfh^P=?MjE%DLeb7sRY{*@_Iz|=Y2TZNJ+q>3p zT}4D9?C}EcXxF1iJ2nDdyc`NTE&H9as!tuZDEbP-r*c)-WSa*5?8o!#H^x}i90$WR zTN+daD=SvFXIEVOJ4*{n8WODARc=DUTl5bh8bwy0iEDr5Z$wopQ%hYf0$;N%G34kH z6=JDQdMB&@)_R3}j~oEy`3fy}*Hro0`-+ZO%&#g^a&mC3PQBuBYY7zo+XvHr)&l_--(` z+sGA8KfK8K*AyMNeyag_p>1QUf~1+JRMnCyD7t?3z9U(($TCw`L6szvGV`v&v0;uj zamaGTv0tt*xx=Hs?=Q}|J5&6=a&{0Cf48+AKev8#v_2Pq)7#t}didZH`04@^O)6Vy zDxiURHbi*Vv49qoKmE{Uo+mL;slSU5}>TeX@sM*eSe9!uGEpAS-u8AAk2B{6v-xHO@Q4i({m^Q1e{ei7XrP5XoJu$)8{X z^^ZDs;oQv}6AXsShVSmt(Vg(tAZx^Tw<~Pn;k_xr2dR@d-=?#@>bplx_YR_QN*zY$ z30N1JJX5CS+N5rtoAJ;)9h=L^E9l;woNyD;v?d&R=v9oWdNH@$`Ai*FB=?^LP|uWF_s%v3^e2{dt=1s z^QVZWrop_MrW3M<-Oh~3y2GsVtn&u^Mr2}@u4WE$;LeeoLsm6|eX(FxR_1_&Yf60} zM>Sf-b|{Hu4CpU!!VUte<#$r$e5f8tQQ@QiSAy1cloyz_w^ne(?=Y6%yx43^rO*5f z#?m^ONL5ZjmJuu*A@!+Gsv_SU5 z^R0|V~nv4chphehqg7FYwu^5$Q?1YSL}AXJcN06U*YfE5ambLw zbdk<}=Jc!9L76>jsT_Cn*m%)f$ypmoo_Da^+8;4(G9vO>B&OA_YNj$MGkbLh%Uywj zPdj%uEiEH@cIS+E2`x;|L2ph`whA{diY*1h=9{;xu>Gybrx3o=u@&5ZXZfIvOO`nq zQ0p_7G=I`(7VPO>Wz06@svVw=sQVq6hoCDgh`Z-fNHgOK+-K;iZMCIH;gZ?x%Ra1S zKhN``^H}y4n9~J8b>V|*F6ZDmhUib znv%B!Zb6vmdgwMg{H6?Am$!+Gmq&I6egUBKu&46jT(C&8%J!K;aMsd{uiyMfeh@)l z;Nf|q0_MwC__A7QfB*X8ci#rTJj4De6b0OU_o}_H_Qy^xQos5b;=at)iEPPlt6@jO z{PKw}h$1aiwE!X-A!>u3Nn63~lMm{kI^>p>&9(l-)pHI`3e)_yib%KB88%+X93;C$ zGeF4R3%f)mF3$bZBuq?;V@&_7#g;2wyvuC-eKaxk!XZQ$?U;jEu{oD zAPRS7&rgJ!-b7EFVN1yfG@%dIqOlqTtY7XgLe9auq#=m+;rG3tOoF%!G@+Yz!>`AP z6I0e`F0Z0y5?925Tu*`=PQ!2{N?K$g9$q*Dr6Npw%q^4~(DBR`po&kxWvz`VS3Zp$ zDXzPWkMLP3i98esa&dp_U#NS8=+lv#bLk`qc|R3LVUW@BJkr=WYUwpU!)w|JTAK0m zq8lrwn(!AH=x+6DP?Y}I&cS9%E~r{qZF@z)nKtJRy|*dNr4IYX>vx^{axe+K_jXEh zs)1d<$h?DAs195*bKpvy6=#{ZV~^02oKsMo4SBCb)=LcT3&6*BAbs$08^;V0nJ!Oz zLhnp!TQM93smXusY*^oJLLbjMe#}|tmpEb+&J%ycD6F22lEL_i!b!$v0rbFApQOww z60pZno5<8d4C?heaG2L33*eii*u~vJ`~=%HcF@-%>dFjcJ21GMK;OlW-6}3hLv8*~ zSWjOJj_ii?C#)G@8UXflx9vS#m=Er+SGIR>>dGF%Qi0=yv zXpr!a1%T8P=L>-}<8#loSRXNW@Gfo?6z;`xN;4|9@3AAb+c&Um-E2I)pthE-@KY)h zEdKvfyIVP7@xZxgNZ_LMWF-1}(nk7fY+xB)u#^y3P7W+20EY0xvAJo{q=e*n{*!*9 zzWQHO4PR*HP)Kz3wDmM({x3oP|0Br%H-U$n4qi<^`@bp0cmm-7MzjBA+yGAMfjY zN0k)M&`C`%jLrd_|y>v;am7*a1MTXn6-t)!&_c-a8__|bTB+PIO!Zc z(P9(5jv5Km&zrWu4v`+G8xixL8!d?Yog(Q3dR;RDCK)220FmBi0|^sCLR&D5LH`Xe zB#J^(IviJa7ztDFbE4nWmoPK5CCljfDGFrN*aci?gr`Z7`ht6Gpcaos(;Z~MR8 zbMN`$eCK=*H_1N3K1m${9K`-{Uw2Tn(A9kW2O$)pV*(jtGt6=i`0zea)f$ zw0@4n$($HEV472CAAgAkLN|)+o2bQVK^@ds6uYL7GE~^M?F;sj*yZ2-t>0*0ZQv9= z52p{Kc5UEi>{!2yGJ+nv*aA3Ub3ndUd^^=BoM zkyZlIJn$GUzln#zO4~iWjZ5a7_tmRpVj6|Dy^E;$G84s}N_{$tnbaOFI)wQ&zKF~+ zfQe|_3b|@FIlTW!SPGZ1Vd8EpHIkawEfS|YvC%q82iV0xgpqnE*=l|dC~O?3`gEDb zT+zaZD7-ZQBznx2!G9Dr!X;^Ph#A)*z#D>O*I-*R5hIG&j*%{qN>{A!UOu@!JO$f& zTU{MLZyo(P>+Lu$Fx^4wAn_G1r)Nm`!ES(t96CDxv*4YmNc>v(-o@i%J!HBTCzj{ENUmpsrV7n+Xe|u*oe=Vvf%cR@T(1K8 zEJkK|Qb0^p<6MhVup#K-<-1Hh;3H<#yI2}hV8FNYk)OE?i+-zWK}8R14t-<8NCX0R zQXW{za1&h>{mI0V!HR0Ex0dj%rpAFQX3Pm(Fzpv=qSrLKe#mU_Wuv(2Jh0+8^6bC9 zSv#0AUb1VEJ8g=9LHT18xh`lOn@DWHmb5_~Gz zA}Q>uhv;3;rD~KDh(k6_CEJYa_o|UeQl_yXK~M86_lZfeOp6~Fxb0I~xZa>3Af1;P zy)XwROBvT_GE<6`UCTy9?4`?KBA`rhi#Dc&EiRm>wDStn#`u(4WQv|xj6>KBTT9iL zu9f)(?Wz@zg=bZL955SOY+U}&@_UN_*K7`5D#zOai!ADzFHRvstwO8RdQ>8D-Vy=c zhu6P&;@T5!$h_)g>-L-Y|K0drR@RjXwY}`uzpRh1e-VZXeP+P&A}Dzd#B9u;T$f+d z$g1pkmt_TY3jP}iiA0j~dUyHS6BxYInNe|c8*uTZm?m*3V%mcqv!HUw8)=GL?z0gb zN&3?OY(1#?TkZ>)bC|%T_jhpJ<7IW)^-$ zw91Phr&pqb(DBY_`C3YuIy$KfPs)|wuU71vrsaOmM$)!)kl(asRjG6qXxJv2g3xC{ z3bA)*@0?9>ihfIl(K4tz@2D6$wOw6Yt!!O@eo(sZTaGeX%( zA`aLwtolj_ka4jdH3hP|GFE&rWqKSvH_Ok`DV;xFji18cBHt}2p_N6_NasAn5i5rl zeZ>dveSG6z^==N70-kd>y~>xA+(c)L+#W++YRhsFyzO|h|=L(AgUVrZ`) zfh)q1T>vC1r(GzD%#S(z?v{bE=~Z)%x56_*SBJ=XkOs5%O=y~E7l`Y}cCeE=8h=Hx ztb25C4Z_HEV+ngbo90-=gGJ%c#~65_^?ByU$o&``*s3J~(c>q%<<#&#DnO1LzBY=+ z2RwqJABdOd z7JK^1A#ZqKbS~5Pg|V8nIDL~1a!F+j)trmTiyyUxEq=bGE8}-;V>V#oE~vK8?2~h3 zS}A0}d`*x>V=?dz>WSo153RWbb+>d5E#2wDYatt$T@!$stqggV6aK7IrY>sFoIcx+ zo=8yNs|H?2aK>ldJcd>g#f^!JOH~asr`FR;NKaKiJW`{kg163Ye9f+Ah+fvGai3Vm z-$Xd%f>Fd3Hp zd%bP~^tyCQi|`hjWH-EzNucs2t1qex>WY?o^$WJ`jSDU_)0y7*;)YtiLLVgCnBZr^ zuHgNujVF4y^fs$I5{anH)6`?S0(wnY6Ch1e^uZ^5#w<3Udh*s$GkT5;I}n245$42M zqb5~RY_<&E|3XVBLC`p@?kT>G9v50ifPzTNrmdeGYMhW)t~GUat@Ct>C{bsn^)R1~`WE99@MGrT)m@n$ zIjbcbKx<}qz4dscfS;Y%bq#@iiBAg0d8`5(?#kjKR68nti@rWxw>P0l5Yfq+kw9QG zr_!;TfT@6-xS;0cRY*TvF*OqM9Hq`s^!l>DMPstRXw}ha)^jD+c!({zfR}mccb}|SyHG=Lo1Wh z9SaLqCw$E;#%S*}2}rT_IT44#SkVj-S!3*lm|#_=J3@lKR&=A4cF673ifr}hDOjd` zo(T#GPM$-utvvY*X^`fxLOAQ+5MX%(%yRVBL!Ag;s8lN3TJ1iuw3f~+yg7BXbx55z zCL7?d(DJEWok7?;$W6wiv|SEFM>sE3xj_^TgeW^7GD?x%e6-=JVs{V|M$b2#|+ ziJas&(ximd3FJGnF?cL4X?jbR$t;tuvI{$&mG9;qsC=iZ(gl5mQg6roT&`C8=ap$U%NFer74JeKqZ6+pm{wzpE-@srXi8~f-2jUoT9suxaV?5* z3g}mF_MyZ@xZo8<5Q?Z6@7J+%y@`=pJ|8^t`&mh8i?qbue=UsQOK2e?@0i634LnY6mUeO#Sd`2nHj5ftz+CtbN=(y(JpU?O8knRd-ofdY3Yd~qDrmWh88cd) z3Cc$>I==cbySalBAjX?usrI{|x#%x9YB!Y-JoyDRk$q@Wk9`{@!{IBwf2Cro<`m~rTYG^Pr)F(L_BioBZCkT)e@su%Imohf z!%@jQ0oPiY*)tvy=6v9O<~PSLA0%{^El$+Xf?F0G;QJ^L ze0^E}cd}LWrTba$U<%75{6mCu$n@EBFDq{S*HukOkF`=|)x8G4F86Aknop}5$M_tG zQ_)q^U9xk`pB1<^_{)1gT#ey9DEwwog(UkI-ll`6nK>Jlm3}6a!u%uv&T+ZToX(%_ za~A=f-zf)Cf4*nwJ&*j0e87^RgR78F~k9QhAznfl#+tt=TZ=@d0#D3f1dg1$Ym-(iX1wHXQ ze4@H5GzD(p{KcjrZS!V39pb6!Q0ma$s#TKMf*|&Ue@hZ>C?0_Ajh!+x) z&-fD=ID*gJb@V+nq*@c`Z6jpYyb{ll6R+o<5xqyMJ+H$wuN?zcr<~YAq|)0(V2iVs z-jsTi-rwgBRj>x_;R5Lfs-orW3LN5WgzX6e&@=;5W>js}SQmD7AssuGf4(<+e5)%h zb@|5QQQ!H;j6qP>R4l`Bbm8>Vu5xHDXIrH55Q-4_d|dgL&8sE9H>eYioAdSwnw)Q< zZJ7$W6M?!oeY!#^bT+aDxxv*9z00C&iz`Ygfjpy83Dh~ARlK_gw2Q@n9PYK7t2-(d zekZmE%%0%-L=yM}86>_bzZ`0*$8)DN)AxpfkPsgk%qL4buv&%c_v7qI`=jbY63U4_ zb*x8>fte-^d17qt5QdpyHP@5UC@l%Z#mr3oORkXi%E(lnvWAzbu=53=mGUHIVqxBS zGX_zYr;%lW*DY5Q(Id#AYiX>CjL|ICcx_bU5I7w7r8js(9ujC~8gM<4Zxz#B{Vn+- zdadGMEL1|Q+}04+rWDufHvTT6{Ud)MIwWW?kQm#9}z`0;vjL2bu4r5I|S)ET36$e8M)Qg zUazk^RMpskxXP!7LVg72%YXB@+`V__P60lA`|E-Y_^pfT*4lZ5t2mdev>03A_gls;s!Jj|mbM+`=mDYr87Q%qp1=MRG6s4Z<{sDNe}I zBOpi=m(^%`W89F6)SVGo{#dhu@=X48ImBW-%BA|ozg;mud5bkIiHC~4V8vdI9dxrR zD~5qDOAS#185LTrf4x*^ZELwoe*gAj5q2@rTSc^;qgCcAmQ0YMEHtx?fxS@#EumA! z#!z6gA+afTiG9i-0;8*B(y}#uZ{(Y;V}{HUWX}+RwE?kB%6=Ge;8EV3ro`H9g9+W8 zDSIksYQ|0_)|vjJ-{r8rz1t&K`KL6&Pqt?LBor-s?E8X-Xu0FajQ)#Fl_yE>Bubl* zMjEHdoar<+qxX<6_f$;{)yo)eJWVlb01Q#ivXFe-yDiwAZsJeFQ zkRIn8^#EpOo$yR+mU8#`0*#~SmvFzAgqX52?3A4tUf7`Xe1sTs8OqKcjWn(Ss44oi z`nEsJFY5C!zkxhtF}v;Vrf!Vt z2TJ-QU(ZCjHVk2pUK?yWlDVr6V{z+K{H5pGH72#gt>c|&xx8Gn{6=VUZgqVYrP+P6 zb@`xEH!7%@;Lmq{lKX0(SZh@dXMLVqpMGwURvGy09$N@deKLw8`;ym{7K^`Y<|ON{qmENP`u9(Eo%I@$;%7YU{o8Sy>zdNlucRm3LxqVbT|*bDt5O0R$l}C+UBPI&J+4>O zm`YXNxFLKY+tZ2Dyvd7~ezse8ncG~Y&0A3y9FH!!WNRaG>-0J>9ikvs&ZE7sY;4{- zOnIt1RhAU^%pn0vKKJJ<4&$≻`u#xI+)8rtZOoE)Mxct>Alu1oPdl7VAduy>Ec; zdSf|*eau%8cH)mAJEkU_Ljs9EBd7<wcI`vvt&Kus&4~N#yDLJ;`*?bxc4{l6=X9~qkt9*shi6AT7`PNO>uni^+puV z%hvX7m9AeGbAumKt#tZh8(OLNdk(?K^r2@aCGW2aSm(stQQL1>m2_ZY3275*wLckO%snQXY~r=^Vz1HE`}1 zL)jsP5&_bu4+nXX;yf2Mh#&w}qd-WQH(e`}3011gu!pGYUxCiKc{%$GSXE=|J648z zCDfs@^~#HO5_hg-p#nrR*xsF;C1q3{`c`_Y&ZC%?gfGBKMH_10@6~S54o3dli}%H; zzYIO`qFj?Z>O4j(?jS;wVw{qC*6c?HLKjPCB{`-Y_*UsL_wl{g=4*F)cV!Q&Hc2x6 zV)gXGf_lFvBu{!!q|^4lRm1C0-rS#8v$hrA-*vHD& z3;vZKYqHChF|<)~%*5uJEC4i(k5Ybv@?W+^i?5^YYna9qr;9<$|J} zz-scPo_~X{s;aCdrOIMOCnNYjV$OeJ8WfPApe!de8hH2r;6hwMPyz;%{|6D{7HaBZ z_=rIwB_Xe+qw+5nH=UpqoeUqHfb9Rp;+6pk{EHP0q=jm8&iyB*5ZBFrfSSa(O%bW} zr+te{rM3|@UV~2Z8fP>%jG#BEC})OGFvb>`Og6NWM-`DTg^>!%43dNpPq5(hs1t>6 zwJj#6tdmnFiW^W8+4UeTVT_uId=a8l5(jnDb#J+%YdsDIX7O0K18;-07-X=ElQ7jo zU(lM!(*u^Clx>-6>mBmWINxOvEC1l6>RHu{oq&HGQ$d;U4p7sh9>+L}ZR`Gh%Fm5L hD0ThSL3D=NcAY5nUuDo^5G95n2$P;(Mokv;zW{)Af;s>I diff --git a/paper/paper.md b/paper/paper.md index 9050e91..5dccb37 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -38,7 +38,7 @@ We present Binette, a bin refinement tool inspired by metaWRAP's bin refinement Binette is a Python reimplementation and enhanced version of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union. This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins. -![**Overview of Binette Steps**. **(A) Binette Workflow Overview**: Input bins serve as the basis for generating intermediate bins. Each bin undergoes a scoring process utilizing quality metrics provided by CheckM2. Subsequently, the bins are sorted based on their scores, and a selection process is executed to retain non-redundant bins. **(B) Intermediate Bin Creation Example**: Bins are represented as square shapes, each containing colored lines representing the contigs they contain. Creation of intermediate bins involves the initial bins sharing at least one contig. Set operations are applied to the contigs within the bins to generate these intermediate bins.](./binette_overview.pdf) +![**Overview of Binette Steps**. **(A) Intermediate Bin Creation Example**: Bins are represented as square shapes, each containing colored lines representing the contigs they contain. Creation of intermediate bins involves the initial bins sharing at least one contig. Set operations are applied to the contigs within the bins to generate these intermediate bins. **(B) Binette Workflow Overview**: Input bins serve as the basis for generating intermediate bins. Each bin undergoes a scoring process utilizing quality metrics provided by CheckM2. Subsequently, the bins are sorted based on their scores, and a selection process is executed to retain non-redundant bins.](./binette_overview.pdf) From 40c0b2524e44836b728f6ce77729558765ac4e9b Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Fri, 12 Jan 2024 18:32:48 +0100 Subject: [PATCH 28/49] add doi to references when missing --- paper/paper.bib | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/paper/paper.bib b/paper/paper.bib index 09021ce..ab2e2b2 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -31,6 +31,7 @@ @article{kang2019metabat author={Kang, Dongwan D and Li, Feng and Kirton, Edward and Thomas, Ashleigh and Egan, Rob and An, Hong and Wang, Zhong}, journal={PeerJ}, volume={7}, + DOI={10.7717/peerj.7359}, pages={e7359}, year={2019}, publisher={PeerJ Inc.} @@ -41,6 +42,7 @@ @article{alneberg2014concoct author={Alneberg, Johannes and Bjarnason, Brynjar Sm{\'a}ri and De Bruijn, Ino and Schirmer, Melanie and Quick, Joshua and Ijaz, Umer Z and Lahti, Leo and Loman, Nicholas J and Andersson, Anders F and Quince, Christopher}, journal={Nature methods}, volume={11}, + DOI={10.1038/nmeth.3103}, number={11}, pages={1144--1146}, year={2014}, @@ -53,6 +55,7 @@ @article{nissen2021improved author={Nissen, Jakob Nybo and Johansen, Joachim and Alles{\o}e, Rosa Lundbye and S{\o}nderby, Casper Kaae and Armenteros, Jose Juan Almagro and Gr{\o}nbech, Christopher Heje and Jensen, Lars Juhl and Nielsen, Henrik Bj{\o}rn and Petersen, Thomas Nordahl and Winther, Ole and others}, journal={Nature biotechnology}, volume={39}, + DOI={10.1038/s41587-020-00777-4}, number={5}, pages={555--560}, year={2021}, @@ -65,6 +68,7 @@ @article{sieber2018dastool author={Sieber, Christian MK and Probst, Alexander J and Sharrar, Allison and Thomas, Brian C and Hess, Matthias and Tringe, Susannah G and Banfield, Jillian F}, journal={Nature microbiology}, volume={3}, + DOI={10.1038/s41564-018-0171-1}, number={7}, pages={836--843}, year={2018}, @@ -77,6 +81,7 @@ @article{ruhlemann2022magscot author={R{\"u}hlemann, Malte Christoph and Wacker, Eike Matthias and Ellinghaus, David and Franke, Andre}, journal={Bioinformatics}, volume={38}, + DOI={10.1093/bioinformatics/btac694}, number={24}, pages={5430--5433}, year={2022}, @@ -88,6 +93,7 @@ @article{uritskiy2018metawrap author={Uritskiy, Gherman V and DiRuggiero, Jocelyne and Taylor, James}, journal={Microbiome}, volume={6}, + DOI={10.1186/s40168-018-0541-1}, number={1}, pages={1--13}, year={2018}, @@ -99,6 +105,7 @@ @article{meyer2022critical author={Meyer, Fernando and Fritz, Adrian and Deng, Zhi-Luo and Koslicki, David and Lesker, Till Robin and Gurevich, Alexey and Robertson, Gary and Alser, Mohammed and Antipov, Dmitry and Beghini, Francesco and others}, journal={Nature methods}, volume={19}, + DOI={10.1038/s41592-022-01431-4}, number={4}, pages={429--440}, year={2022}, @@ -110,6 +117,7 @@ @article{parks2015checkm author={Parks, Donovan H and Imelfort, Michael and Skennerton, Connor T and Hugenholtz, Philip and Tyson, Gene W}, journal={Genome research}, volume={25}, + DOI={10.1101/gr.186072.114}, number={7}, pages={1043--1055}, year={2015}, @@ -121,6 +129,7 @@ @article{chklovski2023checkm2 author={Chklovski, Alex and Parks, Donovan H and Woodcroft, Ben J and Tyson, Gene W}, journal={Nature Methods}, volume={20}, + DOI={10.1038/s41592-023-01940-w}, number={8}, pages={1203--1212}, year={2023}, @@ -132,6 +141,7 @@ @article{buchfink2015diamond author={Buchfink, Benjamin and Xie, Chao and Huson, Daniel H}, journal={Nature methods}, volume={12}, + DOI={10.1038/nmeth.3176}, number={1}, pages={59--60}, year={2015}, @@ -143,6 +153,7 @@ @article{larralde2022pyrodigal author={Larralde, Martin}, journal={Journal of Open Source Software}, volume={7}, + DOI={10.21105/joss.04296}, number={72}, pages={4296}, year={2022} @@ -153,6 +164,7 @@ @article{hyatt2010prodigal author={Hyatt, Doug and Chen, Gwo-Liang and LoCascio, Philip F and Land, Miriam L and Larimer, Frank W and Hauser, Loren J}, journal={BMC bioinformatics}, volume={11}, + DOI={10.1186/1471-2105-11-119}, pages={1--11}, year={2010}, publisher={Springer} @@ -162,7 +174,7 @@ @article{hyatt2010prodigal @article{metagWGS_inprep, title={MetagWGS, a complete workflow to analyse metagenomic data (from Illumina reads or PacBio HiFi reads)}, - author={Noirot, Céline and Mainguy, Jean and Hoede, Claire}, % need completion with all authors... + author={Mainguy, Jean and Noirot, Céline and Hoede, Claire}, % need completion with all authors... journal={Journal}, year={in preparation} From 2a35d1f2c9a3b082885732a86fb3ab6033a801a6 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Fri, 12 Jan 2024 18:35:22 +0100 Subject: [PATCH 29/49] add fig ref --- paper/paper.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index 5dccb37..24a011d 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -35,15 +35,15 @@ The approach of applying multiple binning methods and combining them has proven We present Binette, a bin refinement tool inspired by metaWRAP's bin refinement module, which addresses the limitations of the latter and ensures better results. # Summary -Binette is a Python reimplementation and enhanced version of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union. This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins. +Binette is a Python reimplementation and enhanced version of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union (\autoref{fig:example}. A). This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins. -![**Overview of Binette Steps**. **(A) Intermediate Bin Creation Example**: Bins are represented as square shapes, each containing colored lines representing the contigs they contain. Creation of intermediate bins involves the initial bins sharing at least one contig. Set operations are applied to the contigs within the bins to generate these intermediate bins. **(B) Binette Workflow Overview**: Input bins serve as the basis for generating intermediate bins. Each bin undergoes a scoring process utilizing quality metrics provided by CheckM2. Subsequently, the bins are sorted based on their scores, and a selection process is executed to retain non-redundant bins.](./binette_overview.pdf) +![**Overview of Binette Steps**. **(A) Intermediate Bin Creation Example**: Bins are represented as square shapes, each containing colored lines representing the contigs they contain. Creation of intermediate bins involves the initial bins sharing at least one contig. Set operations are applied to the contigs within the bins to generate these intermediate bins. **(B) Binette Workflow Overview**: Input bins serve as the basis for generating intermediate bins. Each bin undergoes a scoring process utilizing quality metrics provided by CheckM2. Subsequently, the bins are sorted based on their scores, and a selection process is executed to retain non-redundant bins.\label{fig:overview}](./binette_overview.pdf) -Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 2. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins. The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that provides bindings and an interface to Prodigal [@hyatt2010prodigal]. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process. +Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 2. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins (\autoref{fig:example}. B). The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that provides bindings and an interface to Prodigal [@hyatt2010prodigal]. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process. Binette serves as the bin refinement tool within the [metagWGS](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs) metagenomic analysis pipeline [@metagWGS_inprep], providing a robust and faster alternative to the bin refinement module of the metaWRAP pipeline as well as other similar bin refinement tools. From e17be7db52ad007ae7e985b72a68aee483a7e982 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Sat, 13 Jan 2024 09:16:05 +0100 Subject: [PATCH 30/49] fix fig ref --- paper/paper.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index 24a011d..93cf540 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -35,15 +35,13 @@ The approach of applying multiple binning methods and combining them has proven We present Binette, a bin refinement tool inspired by metaWRAP's bin refinement module, which addresses the limitations of the latter and ensures better results. # Summary -Binette is a Python reimplementation and enhanced version of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union (\autoref{fig:example}. A). This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins. +Binette is a Python reimplementation and enhanced version of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union (\autoref{fig:overview}. A). This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins. ![**Overview of Binette Steps**. **(A) Intermediate Bin Creation Example**: Bins are represented as square shapes, each containing colored lines representing the contigs they contain. Creation of intermediate bins involves the initial bins sharing at least one contig. Set operations are applied to the contigs within the bins to generate these intermediate bins. **(B) Binette Workflow Overview**: Input bins serve as the basis for generating intermediate bins. Each bin undergoes a scoring process utilizing quality metrics provided by CheckM2. Subsequently, the bins are sorted based on their scores, and a selection process is executed to retain non-redundant bins.\label{fig:overview}](./binette_overview.pdf) - - -Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 2. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins (\autoref{fig:example}. B). The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that provides bindings and an interface to Prodigal [@hyatt2010prodigal]. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process. +Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 2. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins (\autoref{fig:overview}. B). The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that provides bindings and an interface to Prodigal [@hyatt2010prodigal]. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process. Binette serves as the bin refinement tool within the [metagWGS](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs) metagenomic analysis pipeline [@metagWGS_inprep], providing a robust and faster alternative to the bin refinement module of the metaWRAP pipeline as well as other similar bin refinement tools. From 4740866a3224c7df66f6c0374228abe157f935a1 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Sat, 13 Jan 2024 09:30:24 +0100 Subject: [PATCH 31/49] adjust fig ref --- paper/paper.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index 93cf540..8afa174 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -35,13 +35,13 @@ The approach of applying multiple binning methods and combining them has proven We present Binette, a bin refinement tool inspired by metaWRAP's bin refinement module, which addresses the limitations of the latter and ensures better results. # Summary -Binette is a Python reimplementation and enhanced version of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union (\autoref{fig:overview}. A). This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins. +Binette is a Python reimplementation and enhanced version of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union (\autoref{fig:overview}.A). This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins. ![**Overview of Binette Steps**. **(A) Intermediate Bin Creation Example**: Bins are represented as square shapes, each containing colored lines representing the contigs they contain. Creation of intermediate bins involves the initial bins sharing at least one contig. Set operations are applied to the contigs within the bins to generate these intermediate bins. **(B) Binette Workflow Overview**: Input bins serve as the basis for generating intermediate bins. Each bin undergoes a scoring process utilizing quality metrics provided by CheckM2. Subsequently, the bins are sorted based on their scores, and a selection process is executed to retain non-redundant bins.\label{fig:overview}](./binette_overview.pdf) -Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 2. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins (\autoref{fig:overview}. B). The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that provides bindings and an interface to Prodigal [@hyatt2010prodigal]. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process. +Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 2. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins (\autoref{fig:overview}.B). The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that provides bindings and an interface to Prodigal [@hyatt2010prodigal]. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process. Binette serves as the bin refinement tool within the [metagWGS](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs) metagenomic analysis pipeline [@metagWGS_inprep], providing a robust and faster alternative to the bin refinement module of the metaWRAP pipeline as well as other similar bin refinement tools. From b00eae0d3bcee4c028580c3fe72e3f3e2a52c2aa Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Sat, 13 Jan 2024 09:42:08 +0100 Subject: [PATCH 32/49] add metagwgs authors --- paper/paper.bib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paper/paper.bib b/paper/paper.bib index ab2e2b2..33e696b 100644 --- a/paper/paper.bib +++ b/paper/paper.bib @@ -174,7 +174,7 @@ @article{hyatt2010prodigal @article{metagWGS_inprep, title={MetagWGS, a complete workflow to analyse metagenomic data (from Illumina reads or PacBio HiFi reads)}, - author={Mainguy, Jean and Noirot, Céline and Hoede, Claire}, % need completion with all authors... + author={Mainguy, Jean and Vienne, Maïna and Fourquet, Joanna and Darbot, Vincent and Noirot, Céline and Castinel, Adrien and Combes, Sylvie and Gaspin, Christine and Milan, Denis and Donnadieu, Cécile and Iampietro, Carole and Bouchez, Olivier and Pascal, Géraldine and Hoede, Claire}, journal={Journal}, year={in preparation} From a62722dfbaab1df81d6b718d7471fb59ce75d9e1 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Sun, 14 Jan 2024 23:15:42 +0100 Subject: [PATCH 33/49] improve paper text --- paper/paper.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/paper/paper.md b/paper/paper.md index 8afa174..2666e38 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -28,14 +28,14 @@ bibliography: paper.bib # Statement of need Metagenomics enables the study of microbial communities and their individual members through shotgun sequencing. An essential phase of metagenomic analysis is the recovery of metagenome-assembled genomes (MAGs). MAGs serve as a gateway to additional analyses, including the exploration of organism-specific metabolic pathways, and form the basis for comprehensive large-scale metagenomic surveys [@Nayfach2019global_human_gut_microbiome;@Acinas_Sánchez_et_al_2021]. -In a metagenomic analysis, sequence reads are first assembled into longer sequences called contigs. These contigs are then grouped into bins based on common characteristics in a process called metagenomic binning to obtain MAGs. There are several tools that can be used to binned contigs into MAGs. These tools are based on various statistical and machine learning methods and use contig characteristics such as tetranucleotide frequencies, GC content and similar abundances across samples [@kang2019metabat;@alneberg2014concoct;@nissen2021improved]. +In a metagenomic analysis, sequence reads are first assembled into longer sequences called contigs. These contigs are then grouped into bins based on common characteristics in a process called binning to obtain MAGs. There are several tools that can be used to binned contigs into MAGs. These tools are based on various statistical and machine learning methods and use contig characteristics such as tetranucleotide frequencies, GC content and similar abundances across samples [@kang2019metabat;@alneberg2014concoct;@nissen2021improved]. The approach of applying multiple binning methods and combining them has proven useful to obtain more and better quality MAGs from metagenomic datasets.This combination process is called bin-refinement and several tools exist to perform such tasks, such as DASTool [@sieber2018dastool], MagScot [@ruhlemann2022magscot] and the bin-refinement module of the metaWRAP pipeline [@uritskiy2018metawrap]. Of these, metaWRAP's bin-refinement tool has demonstrated remarkable efficiency in benchmark analysis [@meyer2022critical]. However, it has certain limitations, most notably its inability to integrate more than three binning results. In addition, it repeatedly uses CheckM [@parks2015checkm] to assess bin quality throughout its execution, which contributes to its slower performance. Furthermore, since it is embedded in a larger framework, it may present challenges when attempting to integrate it into an independent analysis pipeline. We present Binette, a bin refinement tool inspired by metaWRAP's bin refinement module, which addresses the limitations of the latter and ensures better results. # Summary -Binette is a Python reimplementation and enhanced version of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union (\autoref{fig:overview}.A). This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins. +Binette is a Python reimplementation and enhanced version of the bin refinement module used in metaWRAP. It takes as input sets of bins generated by various binning tools. Using these input bin sets, Binette constructs new hybrid bins using basic set operations. Specifically, a bin can be defined as a set of contigs, and when two or more bins share at least one contig, Binette generates new bins based on their intersection, difference, and union (\autoref{fig:overview}.A). This approach differs from metaWRAP, which exclusively generates hybrid bins based on bin intersections and allows Binette to expand the range of possible bins. ![**Overview of Binette Steps**. **(A) Intermediate Bin Creation Example**: Bins are represented as square shapes, each containing colored lines representing the contigs they contain. Creation of intermediate bins involves the initial bins sharing at least one contig. Set operations are applied to the contigs within the bins to generate these intermediate bins. **(B) Binette Workflow Overview**: Input bins serve as the basis for generating intermediate bins. Each bin undergoes a scoring process utilizing quality metrics provided by CheckM2. Subsequently, the bins are sorted based on their scores, and a selection process is executed to retain non-redundant bins.\label{fig:overview}](./binette_overview.pdf) From 64f7019d0817aa1175640564e79687c6c27cfe99 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Mon, 22 Jan 2024 19:38:25 +0100 Subject: [PATCH 34/49] fix grammatical typo --- paper/paper.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/paper/paper.md b/paper/paper.md index 2666e38..8b8bdf2 100644 --- a/paper/paper.md +++ b/paper/paper.md @@ -28,7 +28,7 @@ bibliography: paper.bib # Statement of need Metagenomics enables the study of microbial communities and their individual members through shotgun sequencing. An essential phase of metagenomic analysis is the recovery of metagenome-assembled genomes (MAGs). MAGs serve as a gateway to additional analyses, including the exploration of organism-specific metabolic pathways, and form the basis for comprehensive large-scale metagenomic surveys [@Nayfach2019global_human_gut_microbiome;@Acinas_Sánchez_et_al_2021]. -In a metagenomic analysis, sequence reads are first assembled into longer sequences called contigs. These contigs are then grouped into bins based on common characteristics in a process called binning to obtain MAGs. There are several tools that can be used to binned contigs into MAGs. These tools are based on various statistical and machine learning methods and use contig characteristics such as tetranucleotide frequencies, GC content and similar abundances across samples [@kang2019metabat;@alneberg2014concoct;@nissen2021improved]. +In a metagenomic analysis, sequence reads are first assembled into longer sequences called contigs. These contigs are then grouped into bins based on common characteristics in a process called binning to obtain MAGs. There are several tools that can be used to bin contigs into MAGs. These tools are based on various statistical and machine learning methods and use contig characteristics such as tetranucleotide frequencies, GC content and similar abundances across samples [@kang2019metabat;@alneberg2014concoct;@nissen2021improved]. The approach of applying multiple binning methods and combining them has proven useful to obtain more and better quality MAGs from metagenomic datasets.This combination process is called bin-refinement and several tools exist to perform such tasks, such as DASTool [@sieber2018dastool], MagScot [@ruhlemann2022magscot] and the bin-refinement module of the metaWRAP pipeline [@uritskiy2018metawrap]. Of these, metaWRAP's bin-refinement tool has demonstrated remarkable efficiency in benchmark analysis [@meyer2022critical]. However, it has certain limitations, most notably its inability to integrate more than three binning results. In addition, it repeatedly uses CheckM [@parks2015checkm] to assess bin quality throughout its execution, which contributes to its slower performance. Furthermore, since it is embedded in a larger framework, it may present challenges when attempting to integrate it into an independent analysis pipeline. From d82847b0a03399bedac8aa50b7f8beebcfe09ef8 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Mon, 22 Jan 2024 19:40:29 +0100 Subject: [PATCH 35/49] use a pyproject.toml to package binette --- binette/{binette.py => main.py} | 0 tests/bin_manager_test.py | 2 +- tests/main_binette_test.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) rename binette/{binette.py => main.py} (100%) diff --git a/binette/binette.py b/binette/main.py similarity index 100% rename from binette/binette.py rename to binette/main.py diff --git a/tests/bin_manager_test.py b/tests/bin_manager_test.py index 73e1d7f..4daa0bf 100644 --- a/tests/bin_manager_test.py +++ b/tests/bin_manager_test.py @@ -5,7 +5,7 @@ import pytest -from binette import bin_manager, binette +from binette import bin_manager import networkx as nx def test_get_all_possible_combinations(): diff --git a/tests/main_binette_test.py b/tests/main_binette_test.py index f4ebb13..e93dcea 100644 --- a/tests/main_binette_test.py +++ b/tests/main_binette_test.py @@ -1,7 +1,7 @@ import pytest import logging -from binette.binette import log_selected_bin_info, select_bins_and_write_them, manage_protein_alignement, parse_input_files, parse_arguments, init_logging, main +from binette.main import log_selected_bin_info, select_bins_and_write_them, manage_protein_alignement, parse_input_files, parse_arguments, init_logging, main from binette.bin_manager import Bin from binette import diamond import os From a2879e0189df9fd35c83a46b8eac2140ca6fb3b5 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Mon, 22 Jan 2024 19:56:59 +0100 Subject: [PATCH 36/49] add more detail on installation --- docs/installation.md | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/docs/installation.md b/docs/installation.md index ac7fdaf..e0743f0 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -17,29 +17,43 @@ binette -h ``` -## From a conda environnement +```{tip} +For quicker installation and potential resolution of conflicting dependencies, consider using [Mamba](https://github.com/mamba-org/mamba), an efficient alternative to conda. -Clone this repository: ``` + + +## Installing from Source Code within a conda environnement + +A straightforward method to install Binette from the source code is by utilizing a conda environment that includes all the necessary dependencies. + +**1. Clone the Binette Repository** + +```bash git clone https://github.com/genotoul-bioinfo/Binette cd Binette ``` -Then create a Conda environment using the `binette.yaml` file: -``` +**2. Installing Dependencies with a Conda Environment File** + +Install Binette dependencies listed in the [binette.yaml](https://github.com/genotoul-bioinfo/Binette/blob/main/binette.yaml) file located at the root of the repository, using conda: + +```bash conda env create -n binette -f binette.yaml -conda activate binette +conda activate binette ``` -Finally install Binette with pip +**3. Installing Binette** -``` +Finally, install Binette using **pip**: + +```bash pip install . ``` Binette should be able to run : -``` +```bash binette -h ``` From 63d31a7039102d9fa15511168c5f85ade18bdd43 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Mon, 22 Jan 2024 20:07:26 +0100 Subject: [PATCH 37/49] update the contribution doc --- docs/contributing.md | 67 ++++++++++++++++++++++++++++++++++++++------ pyproject.toml | 56 ++++++++++++++++++++++++++++++++++++ setup.py | 39 -------------------------- 3 files changed, 115 insertions(+), 47 deletions(-) create mode 100644 pyproject.toml delete mode 100644 setup.py diff --git a/docs/contributing.md b/docs/contributing.md index eee4511..5cd4195 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -2,18 +2,69 @@ Thank you for your interest in contributing to Binette! This is an open-source project and everyone is welcome to contribute to it. -- ## Reporting a Bug +## Reporting a Bug - - Check the [Issues](https://github.com/genotoul-bioinfo/Binette/issues) page to see if the bug is already reported. - - If it's not reported, create a new [issue](https://github.com/genotoul-bioinfo/Binette/issues). +If you have any question, if you found a bug. Please open an issue. -- ## Fixing a Bug +You can check the [Issues](https://github.com/genotoul-bioinfo/Binette/issues) page to see if the bug or question has been already reported. - - Fix a bug by opening a new GitHub pull request (PR). - - Describe the issue and your solution in the PR, including the relevant issue number if applicable. +If it's not reported, create a new [issue](https://github.com/genotoul-bioinfo/Binette/issues). -- ## Suggesting a New Feature - - Share your ideas for new features by opening a [new issue](https://github.com/genotoul-bioinfo/Binette/issues). +## Adding a New Feature to Binette +### Starting with an Issue + +If you have ideas for new features or improvements, initiating a discussion in an issue. This allows us to evaluate and discuss your suggestions together. + +For minor changes like fixing typos or making small edits, feel free to create a new Pull Request (PR) directly with your proposed changes. + +### Setting Up the Development Environment + +1. **Fork the Repository:** Start by forking the repository to your GitHub account. 🍴 + +2. **Clone the Forked Repository:** Clone your forked repository to your local machine. + +3. **Get an Environment:** Create an environment with all Binette prerequisites installed. For that, you can follow installation instructions [here](./installation.md#installing-from-source-code-within-a-conda-environnement). + +4. **Install in Editable Mode:** To enable seamless code editing and testing of new functionality, install PPanGGOLiN in editable mode using the following command: + + ```bash + pip install -e . + ``` + + This allows you to modify the code and experiment with new features directly. + + ```{note} + Note: Currently, we are not utilizing any auto formatters (like autopep8 or black). Kindly refrain from using them, as it could introduce extensive changes across the project, making code review challenging for us. + ``` + +### Making Your Changes + +We encourage consistency in code formatting; when adding new code, try to follow the existing code structure as closely as possible. Functions should include descriptive docstrings explaining their purpose and detailing the parameters. Ensure that argument types are specified in the function definitions. + +### Update Documentation + +If your changes change the behavior of the tool, it's essential to update the documentation to reflect your changes. Provide clear descriptions and, if necessary, examples of commands and their respective outputs. + +### Tests + +#### Continuous Integration (CI) Workflow + +We've set up a CI workflow in the Actions tab, which executes Binette on a small dataset and tests its results. If you've introduced a new feature, consider updating the CI YAML file to test it and ensure its seamless integration. + +#### Unit Tests + +It is recommended to add unit test to any additions to the code. The test suite is located in the 'tests' directory at the root of the project. + +### Creating a Pull Request + +Once you've made your changes: + +1. **Create a Pull Request:** Submit a pull request from your forked repository to the 'dev' branch on GitHub. 🚀 + +2. **Describe Your Changes:** Clearly describe the modifications you've made and link any associated issue(s) in the PR description. 📝 + +3. **Collaborative Review:** We will review your changes, offer feedback, and engage in discussions until we collectively agree on the implementation. 🤝 + diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..01297d0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,56 @@ +[build-system] +requires = ["setuptools>=61.0.0", "setuptools-scm"] +build-backend = "setuptools.build_meta" + +[project] +name = "Binette" +description = "Binette: accurate binning refinement tool to constructs high quality MAGs." +dynamic = ["version"] +authors = [ + {name = "Jean Mainguy"}, +] +maintainers = [ + {name = "Jean Mainguy"}, +] + +readme = "README.md" +keywords = ["Bioinformatics", "Prokaryote", "Binning", "Refinement", "Metagenomics"] +classifiers=[ + "Environment :: Console", + "Intended Audience :: Science/Research", + "Natural Language :: English", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python :: 3", + "Topic :: Scientific/Engineering :: Bio-Informatics"] +requires-python = ">=3.8" +license = {file="LICENCE"} + +[project.optional-dependencies] +doc = [ + "sphinx==6.2.1", + "sphinx_rtd_theme==1.2.2", + "readthedocs-sphinx-search==0.3.1", + "sphinx-autobuild==2021.3.14", + "myst-parser==1.0.0", + "docutils==0.18.1" +] +dev = [ + "pytest>=7.0.0", + "pytest-cov" +] +# +[project.urls] +Repository = "https://github.com/genotoul-bioinfo/Binette" +#Changelog = "https://github.com/me/spam/blob/master/CHANGELOG.md" +Documentation = "https://binette.readthedocs.io" +# +# +[project.scripts] +binette = "binette.main:main" + +[tool.setuptools] +packages = ["binette"] + + +[tool.setuptools.dynamic] +version = {attr = "binette.__version__"} diff --git a/setup.py b/setup.py deleted file mode 100644 index 8b2961a..0000000 --- a/setup.py +++ /dev/null @@ -1,39 +0,0 @@ -#!/usr/bin/env python - -from os import path -from setuptools import setup, find_packages -import codecs - -def read(rel_path): - here = path.abspath(path.dirname(__file__)) - with codecs.open(path.join(here, rel_path), 'r') as fp: - return fp.read() - -def get_version(rel_path): - for line in read(rel_path).splitlines(): - if line.startswith('__version__'): - delim = '"' if '"' in line else "'" - return line.split(delim)[1] - else: - raise RuntimeError("Unable to find version string.") - - -if __name__ == "__main__": - # Get the long description from the README file - setup_dir = path.abspath(path.dirname(__file__)) - with open(path.join(setup_dir, "README.md"), encoding="utf-8") as f: - long_description = f.read() - - setup( - name="binette", - version=get_version("binette/__init__.py"), - author="Jean Mainguy", - packages=find_packages(), - entry_points={"console_scripts": ["binette = binette.binette:main"]}, - url="https://github.com/genotoul-bioinfo/Binette", - license="MIT", - description="Binette: accurate binning refinement tool to constructs high quality MAGs.", - long_description=(long_description), - long_description_content_type="text/markdown", - install_requires=[],#"pyrodigal", "pyfastx", "networkx", "checkm2"], - ) From e1280bd44489c807f018f78a7b9ff71b93e1258b Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Mon, 22 Jan 2024 20:08:34 +0100 Subject: [PATCH 38/49] update the contribution doc --- docs/contributing.md | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/docs/contributing.md b/docs/contributing.md index 5cd4195..6d3c839 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -10,49 +10,52 @@ You can check the [Issues](https://github.com/genotoul-bioinfo/Binette/issues) p If it's not reported, create a new [issue](https://github.com/genotoul-bioinfo/Binette/issues). - ## Adding a New Feature to Binette - ### Starting with an Issue -If you have ideas for new features or improvements, initiating a discussion in an issue. This allows us to evaluate and discuss your suggestions together. +If you have ideas for new features or improvements, initiate a discussion in an issue. This allows us to evaluate and discuss your suggestions together. -For minor changes like fixing typos or making small edits, feel free to create a new Pull Request (PR) directly with your proposed changes. +For minor changes like fixing typos or making small edits, create a new Pull Request (PR) directly with your proposed changes. ### Setting Up the Development Environment -1. **Fork the Repository:** Start by forking the repository to your GitHub account. 🍴 +1. **Fork and Clone the Repository:** + - Fork the repository to your GitHub account. 🍴 + - Clone your forked repository to your local machine. -2. **Clone the Forked Repository:** Clone your forked repository to your local machine. +2. **Get an Environment:** + Create an environment with all Binette prerequisites installed by following the installation instructions [here](./installation.md#installing-from-source-code-within-a-conda-environment). -3. **Get an Environment:** Create an environment with all Binette prerequisites installed. For that, you can follow installation instructions [here](./installation.md#installing-from-source-code-within-a-conda-environnement). +3. **Install in Editable Mode:** + To enable seamless code editing and testing of new functionality, install PPanGGOLiN in editable mode using the following command: -4. **Install in Editable Mode:** To enable seamless code editing and testing of new functionality, install PPanGGOLiN in editable mode using the following command: + ```bash + pip install -e . + ``` - ```bash - pip install -e . - ``` + This allows you to modify the code and experiment with new features directly. - This allows you to modify the code and experiment with new features directly. - ```{note} - Note: Currently, we are not utilizing any auto formatters (like autopep8 or black). Kindly refrain from using them, as it could introduce extensive changes across the project, making code review challenging for us. - ``` +```{note} +Currently, we are not utilizing any auto formatters (like autopep8 or black). Kindly refrain from using them, as it could introduce extensive changes across the project, making code review challenging for us. +``` + ### Making Your Changes -We encourage consistency in code formatting; when adding new code, try to follow the existing code structure as closely as possible. Functions should include descriptive docstrings explaining their purpose and detailing the parameters. Ensure that argument types are specified in the function definitions. +Maintain consistency in code formatting. When adding new code, closely follow the existing structure. Functions should include descriptive docstrings explaining their purpose and detailing the parameters. Ensure that argument types are specified in the function definitions. ### Update Documentation -If your changes change the behavior of the tool, it's essential to update the documentation to reflect your changes. Provide clear descriptions and, if necessary, examples of commands and their respective outputs. +If your changes alter the tool's behavior, update the documentation to reflect them. Provide clear descriptions and, if necessary, examples of commands and their respective outputs. + ### Tests #### Continuous Integration (CI) Workflow -We've set up a CI workflow in the Actions tab, which executes Binette on a small dataset and tests its results. If you've introduced a new feature, consider updating the CI YAML file to test it and ensure its seamless integration. +We've configured a CI workflow in the Actions tab, executing Binette on a small dataset and testing its results. If you've introduced a new feature, consider updating the CI YAML file to test it and ensure seamless integration. #### Unit Tests From 624110d1dabdbbf9de8f8cffabfd72f0aa99cd44 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Mon, 22 Jan 2024 20:09:58 +0100 Subject: [PATCH 39/49] refine emojo usage --- docs/contributing.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/contributing.md b/docs/contributing.md index 6d3c839..80a8ac9 100644 --- a/docs/contributing.md +++ b/docs/contributing.md @@ -21,11 +21,11 @@ For minor changes like fixing typos or making small edits, create a new Pull Req ### Setting Up the Development Environment 1. **Fork and Clone the Repository:** - - Fork the repository to your GitHub account. 🍴 + - Fork the repository to your GitHub account. - Clone your forked repository to your local machine. 2. **Get an Environment:** - Create an environment with all Binette prerequisites installed by following the installation instructions [here](./installation.md#installing-from-source-code-within-a-conda-environment). + Create an environment with all Binette prerequisites installed by following the installation instructions [here](./installation.md#installing-from-source-code-within-a-conda-environnement). 3. **Install in Editable Mode:** To enable seamless code editing and testing of new functionality, install PPanGGOLiN in editable mode using the following command: @@ -61,13 +61,13 @@ We've configured a CI workflow in the Actions tab, executing Binette on a small It is recommended to add unit test to any additions to the code. The test suite is located in the 'tests' directory at the root of the project. -### Creating a Pull Request +### Creating a Pull Request 🚀 Once you've made your changes: -1. **Create a Pull Request:** Submit a pull request from your forked repository to the 'dev' branch on GitHub. 🚀 +1. **Create a Pull Request:** Submit a pull request from your forked repository to the 'dev' branch on GitHub. -2. **Describe Your Changes:** Clearly describe the modifications you've made and link any associated issue(s) in the PR description. 📝 +2. **Describe Your Changes:** Clearly describe the modifications you've made and link any associated issue(s) in the PR description. -3. **Collaborative Review:** We will review your changes, offer feedback, and engage in discussions until we collectively agree on the implementation. 🤝 +3. **Collaborative Review:** We will review your changes, offer feedback, and engage in discussions until we collectively agree on the implementation. From e75f568e42f1b297cef433a3cab5520190417cf0 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Mon, 22 Jan 2024 20:20:33 +0100 Subject: [PATCH 40/49] change WF name --- .github/workflows/binette_ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/binette_ci.yml b/.github/workflows/binette_ci.yml index cb6b54a..a9d0f19 100644 --- a/.github/workflows/binette_ci.yml +++ b/.github/workflows/binette_ci.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python -name: Test Binette +name: CI on: pull_request: From 5de57c3ea96ecb162b3e7eadac3cf370b3ccd3df Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Mon, 22 Jan 2024 20:23:25 +0100 Subject: [PATCH 41/49] add badges --- README.md | 2 +- docs/index.md | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 46c3d8f..cc357d9 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/binette/README.html) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/downloads.svg)](https://anaconda.org/bioconda/binette) [![Test Coverage](https://genotoul-bioinfo.github.io/Binette/coverage-badge.svg)](https://genotoul-bioinfo.github.io/Binette/) +[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/binette/README.html) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/downloads.svg)](https://anaconda.org/bioconda/binette) [![Test Coverage](https://genotoul-bioinfo.github.io/Binette/coverage-badge.svg)](https://genotoul-bioinfo.github.io/Binette/) [![Documentation Status](https://readthedocs.org/projects/binette/badge/?version=latest)](https://binette.readthedocs.io/en/latest/?badge=latest) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/license.svg)](https://anaconda.org/bioconda/binette) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/version.svg)](https://anaconda.org/bioconda/binette) [![CI Status](https://github.com/genotoul-bioinfo/Binette/actions/workflows/binette_ci.yml/badge.svg)](https://github.com/genotoul-bioinfo/Binette/actions/workflows) # Binette diff --git a/docs/index.md b/docs/index.md index 1bf9efd..9c9f1ee 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,7 +4,8 @@ % contain the root `toctree` directive. -[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/binette/README.html) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/downloads.svg)](https://anaconda.org/bioconda/binette) [![Test Coverage](https://genotoul-bioinfo.github.io/Binette/coverage-badge.svg)](https://genotoul-bioinfo.github.io/Binette/) +[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/binette/README.html) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/downloads.svg)](https://anaconda.org/bioconda/binette) [![Test Coverage](https://genotoul-bioinfo.github.io/Binette/coverage-badge.svg)](https://genotoul-bioinfo.github.io/Binette/) [![Documentation Status](https://readthedocs.org/projects/binette/badge/?version=latest)](https://binette.readthedocs.io/en/latest/?badge=latest) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/license.svg)](https://anaconda.org/bioconda/binette) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/version.svg)](https://anaconda.org/bioconda/binette) [![CI Status](https://github.com/genotoul-bioinfo/Binette/actions/workflows/binette_ci.yml/badge.svg)](https://github.com/genotoul-bioinfo/Binette/actions/workflows) + # Binette From 39b7271c46c66f72ae4dc60503c0cf9a3a8f269b Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Mon, 22 Jan 2024 20:25:59 +0100 Subject: [PATCH 42/49] organise badges --- README.md | 5 ++++- docs/index.md | 4 +++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index cc357d9..dfc390b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ -[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/binette/README.html) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/downloads.svg)](https://anaconda.org/bioconda/binette) [![Test Coverage](https://genotoul-bioinfo.github.io/Binette/coverage-badge.svg)](https://genotoul-bioinfo.github.io/Binette/) [![Documentation Status](https://readthedocs.org/projects/binette/badge/?version=latest)](https://binette.readthedocs.io/en/latest/?badge=latest) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/license.svg)](https://anaconda.org/bioconda/binette) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/version.svg)](https://anaconda.org/bioconda/binette) [![CI Status](https://github.com/genotoul-bioinfo/Binette/actions/workflows/binette_ci.yml/badge.svg)](https://github.com/genotoul-bioinfo/Binette/actions/workflows) +[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/binette/README.html) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/downloads.svg)](https://anaconda.org/bioconda/binette) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/license.svg)](https://anaconda.org/bioconda/binette) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/version.svg)](https://anaconda.org/bioconda/binette) + +[![Test Coverage](https://genotoul-bioinfo.github.io/Binette/coverage-badge.svg)](https://genotoul-bioinfo.github.io/Binette/) [![CI Status](https://github.com/genotoul-bioinfo/Binette/actions/workflows/binette_ci.yml/badge.svg)](https://github.com/genotoul-bioinfo/Binette/actions/workflows) [![Documentation Status](https://readthedocs.org/projects/binette/badge/?version=latest)](https://binette.readthedocs.io/en/latest/?badge=latest) + # Binette diff --git a/docs/index.md b/docs/index.md index 9c9f1ee..0ecd6ae 100644 --- a/docs/index.md +++ b/docs/index.md @@ -4,7 +4,9 @@ % contain the root `toctree` directive. -[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/binette/README.html) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/downloads.svg)](https://anaconda.org/bioconda/binette) [![Test Coverage](https://genotoul-bioinfo.github.io/Binette/coverage-badge.svg)](https://genotoul-bioinfo.github.io/Binette/) [![Documentation Status](https://readthedocs.org/projects/binette/badge/?version=latest)](https://binette.readthedocs.io/en/latest/?badge=latest) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/license.svg)](https://anaconda.org/bioconda/binette) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/version.svg)](https://anaconda.org/bioconda/binette) [![CI Status](https://github.com/genotoul-bioinfo/Binette/actions/workflows/binette_ci.yml/badge.svg)](https://github.com/genotoul-bioinfo/Binette/actions/workflows) +[![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/binette/README.html) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/downloads.svg)](https://anaconda.org/bioconda/binette) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/license.svg)](https://anaconda.org/bioconda/binette) [![Anaconda-Server Badge](https://anaconda.org/bioconda/binette/badges/version.svg)](https://anaconda.org/bioconda/binette) + +[![Test Coverage](https://genotoul-bioinfo.github.io/Binette/coverage-badge.svg)](https://genotoul-bioinfo.github.io/Binette/) [![CI Status](https://github.com/genotoul-bioinfo/Binette/actions/workflows/binette_ci.yml/badge.svg)](https://github.com/genotoul-bioinfo/Binette/actions/workflows) [![Documentation Status](https://readthedocs.org/projects/binette/badge/?version=latest)](https://binette.readthedocs.io/en/latest/?badge=latest) # Binette From 9eabaa6d37b6b5c5c884c82e6d1025a76ab6a5b4 Mon Sep 17 00:00:00 2001 From: Jean Mainguy Date: Mon, 22 Jan 2024 23:29:42 +0100 Subject: [PATCH 43/49] Create release.yml --- .github/workflows/release.yml | 45 +++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..d9ca0a1 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,45 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v3 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + + - name: Publish package distributions to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} + repository-url: https://test.pypi.org/legacy/ + + + + From d67c0bc9f6822514be8bf914bc2eef26586a9e5b Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Mon, 22 Jan 2024 23:32:42 +0100 Subject: [PATCH 44/49] tmp publish on test pypi on push --- .github/workflows/release.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index d9ca0a1..843d630 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -8,9 +8,11 @@ name: Upload Python Package -on: - release: - types: [published] +# on: +# release: +# types: [published] + +on: [push] permissions: contents: read From 7f45a4b7d40c055bed51e17af095a70df62b3e3e Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Mon, 22 Jan 2024 23:36:45 +0100 Subject: [PATCH 45/49] fix release.yml --- .github/workflows/release.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 843d630..e3eb19f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -16,6 +16,7 @@ on: [push] permissions: contents: read + id-token: write jobs: deploy: From 2ebcdf8bfa3f1ec37319604501e5852b930a66a3 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Tue, 23 Jan 2024 00:08:44 +0100 Subject: [PATCH 46/49] add python deps in toml --- pyproject.toml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 01297d0..8ff3ff8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,18 @@ requires-python = ">=3.8" license = {file="LICENCE"} [project.optional-dependencies] +main_deps = [ + "checkm2==1.*", + "networkx==3.*", + "numpy==1.19.2", + "packaging==23.*", + "pandas==1.4.0", + "pyfastx==2.*", + "pyrodigal==2.*", + "requests==2.*", + "tqdm==4.*", +] + doc = [ "sphinx==6.2.1", "sphinx_rtd_theme==1.2.2", From bf1b9b3824613280ea2d798bc9645c2c519419ec Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Tue, 23 Jan 2024 00:15:52 +0100 Subject: [PATCH 47/49] make release.yml ready to publish on pypi --- .github/workflows/release.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e3eb19f..a9b0c1f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -8,11 +8,11 @@ name: Upload Python Package -# on: -# release: -# types: [published] +on: + release: + types: [published] -on: [push] +# on: [push] permissions: contents: read @@ -41,7 +41,7 @@ jobs: with: user: __token__ password: ${{ secrets.PYPI_API_TOKEN }} - repository-url: https://test.pypi.org/legacy/ + # repository-url: https://test.pypi.org/legacy/ From e5679d1bd760a2485000438b07dca3ca4016ede6 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Tue, 23 Jan 2024 00:16:19 +0100 Subject: [PATCH 48/49] use python deps in toml to setup readthedoc --- .readthedocs.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 04c3852..de6efe3 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -6,10 +6,13 @@ version: 2 python: install: - - requirements: docs/requirements.txt - - requirements: requirements.txt - method: pip path: . + extra_requirements: + - doc + - main_deps + + # Set the OS, Python version and other tools you might need build: From 7cc665356484789ca58088b19673404689c545c9 Mon Sep 17 00:00:00 2001 From: JeanMainguy Date: Tue, 23 Jan 2024 00:18:50 +0100 Subject: [PATCH 49/49] rm requirements as they are manage in toml --- docs/requirements.txt | 6 ------ requirements.txt | 10 ---------- 2 files changed, 16 deletions(-) delete mode 100644 docs/requirements.txt delete mode 100644 requirements.txt diff --git a/docs/requirements.txt b/docs/requirements.txt deleted file mode 100644 index e8024c8..0000000 --- a/docs/requirements.txt +++ /dev/null @@ -1,6 +0,0 @@ -sphinx==6.2.1 -sphinx_rtd_theme==1.2.2 -readthedocs-sphinx-search==0.3.1 -sphinx-autobuild==2021.3.14 -myst-parser==1.0.0 -docutils==0.18.1 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 5624289..0000000 --- a/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -checkm2==1.* -networkx==3.* -numpy==1.19.2 -packaging>=23.* -pandas==1.4.0 -pyfastx>=2.* -pyrodigal>=2.* -requests==2.* -tqdm==4.* -pandas==1.4.0 \ No newline at end of file