Skip to content

Commit

Permalink
v1.2.5: --groupcontig
Browse files Browse the repository at this point in the history
  • Loading branch information
Echoring committed Jan 3, 2025
1 parent 2b439fa commit dbb6794
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 5 deletions.
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ Task include:
- [CentroMiner](#CentroMiner): centromere candidate prediction

## Version Change log
1.2.5
- Add new '--groupcontig' option for AssemblyMapper. Adding this option will output a folder containing contigs grouped by reference sequence (will group unassigned contigs into one).

1.2.4
- Add new '--teclade' and '--teminrepeattimes' option for AssemblyMapper to control the behavior of built-in TeloExplorer.
- Add new '-a' option for GapFiller to select unimap as aligner. (Also fix the bug that default aligner not set in GapFiller after v1.2.3, thanks to a927050047, [PR #46](https://github.com/aaranyue/quarTeT/pull/46))
Expand Down Expand Up @@ -158,6 +161,7 @@ Usage: python3 quartet.py AssemblyMapper <parameters>
Specify alignment program (support minimap2, unimap and mummer), default: minimap2
--nofilter Use original sequence input, no filtering.
--keep Keep the unplaced contigs in draft genome
--groupcontig Add an folder output of contigs grouped by destination.
--extract-ref-flanks CHIMERA
Add an output of chimera contig containing reference flanks of x bp (check issue#42 for detail), default: 0 (off)
--plot Plot a colinearity graph for draft genome to reference alignments. (will cost more time)
Expand Down
2 changes: 1 addition & 1 deletion quartet.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import sys

usage = '''quarTeT: Telomere-to-telomere Toolkit
version 1.2.4
version 1.2.5
Usage: python3 quartet.py <module> <parameters>
Expand Down
20 changes: 16 additions & 4 deletions quartet_assemblymapper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/env python3
# Last modified: V1.2.4
# Last modified: V1.2.5

import argparse
import subprocess
Expand All @@ -10,7 +10,7 @@

### MAIN PROGRAM ###
def AssemblyMapper(args):
refgenomefile, qryfile, mincontiglength, minalignmentlength, minalignmentidentity, prefix, threads, aligner, nofilter, keep, chimera, plot, noplot, overwrite, nucmeroption, deltafilteroption, minimapoption, teclade, teminrepeattimes = args
refgenomefile, qryfile, mincontiglength, minalignmentlength, minalignmentidentity, prefix, threads, aligner, nofilter, keep, groupcontig, chimera, plot, noplot, overwrite, nucmeroption, deltafilteroption, minimapoption, teclade, teminrepeattimes = args

# split scaffolds to contigs and remove short contigs
print('[Info] Filtering contigs input...')
Expand Down Expand Up @@ -192,6 +192,16 @@ def AssemblyMapper(args):
w.write(f'{tigid}\t{tiglen}\t{target}\n')
print(f'[Output] Mapping result for each contigs write to: {contigmapinfofile}')

# group contig option
if groupcontig == True:
subprocess.run(f'rm -rf {prefix}.groupcontig', stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
subprocess.run(f'mkdir {prefix}.groupcontig', stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
for [tigid, tiglen, target] in contiginfo:
file_path = f"{prefix}.groupcontig/{target}.tig.fasta"
with open(file_path, 'a') as file:
file.write(f">{tigid}\n{totaldict[tigid]}\n")
print(f'[Output] grouped contigs write to: {prefix}.groupcontig/')

# chimera option
if chimera != 0:
refdict = quartet_util.readFastaAsDict(refgenomefile)
Expand Down Expand Up @@ -313,6 +323,7 @@ def AssemblyMapper(args):
parser.add_argument('-a', dest='aligner', choices=['minimap2', 'unimap', 'mummer'], default='minimap2', help='Specify alignment program (support minimap2, unimap and mummer), default: minimap2')
parser.add_argument('--nofilter', dest='nofilter', action='store_true', default=False, help='Use original sequence input, no filtering.')
parser.add_argument('--keep', dest='keep', action='store_true', default=False, help='Keep the unplaced contigs in draft genome')
parser.add_argument('--groupcontig', dest='groupcontig', action='store_true', default=False, help='Add an folder output of contigs grouped by destination.')
parser.add_argument('--extract-ref-flanks', dest='chimera', type=int, default=0, help='Add an output of chimera contig containing reference flanks of x bp (check issue#42 for detail), default: 0 (off)')
parser.add_argument('--plot', dest='plot', action='store_true', default=False, help='Plot a colinearity graph for draft genome to reference alignments. (will cost more time)')
parser.add_argument('--noplot', dest='noplot', action='store_true', default=False, help='Skip all ploting.')
Expand All @@ -337,6 +348,7 @@ def AssemblyMapper(args):
aligner = parser.parse_args().aligner
nofilter = parser.parse_args().nofilter
keep = parser.parse_args().keep
groupcontig = parser.parse_args().groupcontig
chimera = parser.parse_args().chimera
plot = parser.parse_args().plot
noplot = parser.parse_args().noplot
Expand All @@ -357,6 +369,6 @@ def AssemblyMapper(args):

# run
args = [refgenomefile, qryfile, mincontiglength, minalignmentlength, minalignmentidentity,
prefix, threads, aligner, nofilter, keep, chimera, plot, noplot, overwrite, nucmeroption, deltafilteroption, minimapoption, teclade, teminrepeattimes]
print(f'[Info] Paramater: refgenomefile={refgenomefile}, qryfile={qryfile}, mincontiglength={mincontiglength}, minalignmentlength={minalignmentlength}, minalignmentidentity={minalignmentidentity}, prefix={prefix}, threads={threads}, aligner={aligner}, nofilter={nofilter}, keep={keep}, chimera={chimera}, plot={plot}, noplot={noplot}, overwrite={overwrite}, nucmeroption={nucmeroption}, deltafilteroption={deltafilteroption}, minimapoption={minimapoption}, teclade={teclade}, teminrepeattimes={teminrepeattimes}')
prefix, threads, aligner, nofilter, keep, groupcontig, chimera, plot, noplot, overwrite, nucmeroption, deltafilteroption, minimapoption, teclade, teminrepeattimes]
print(f'[Info] Paramater: refgenomefile={refgenomefile}, qryfile={qryfile}, mincontiglength={mincontiglength}, minalignmentlength={minalignmentlength}, minalignmentidentity={minalignmentidentity}, prefix={prefix}, threads={threads}, aligner={aligner}, nofilter={nofilter}, keep={keep}, groupcontig={groupcontig}, chimera={chimera}, plot={plot}, noplot={noplot}, overwrite={overwrite}, nucmeroption={nucmeroption}, deltafilteroption={deltafilteroption}, minimapoption={minimapoption}, teclade={teclade}, teminrepeattimes={teminrepeattimes}')
quartet_util.run(AssemblyMapper, args)

0 comments on commit dbb6794

Please sign in to comment.