From 56e1ad1054f6c8fb05cee80cde0be241bca906ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi=20Allio?= <rem.allio@yahoo.fr>
Date: Wed, 22 Jan 2020 14:51:43 +0100
Subject: [PATCH] Update

---
 FirstBuildChecker.pyc    | Bin 11615 -> 11594 bytes
 README                   | 154 ---------------
 README.md                | 168 ++++++++--------
 circularizationCheck.pyc | Bin 4015 -> 4001 bytes
 genbankOutput.py         |  86 ++++----
 genbankOutput.pyc        | Bin 6254 -> 6488 bytes
 geneChecker.pyc          | Bin 16159 -> 16103 bytes
 geneChecker_fasta.py     |  19 +-
 mitofinder               | 160 ++++++++-------
 rename_fasta_seqID.py    |  23 +++
 runIDBA.pyc              | Bin 5303 -> 5149 bytes
 runMegahit.pyc           | Bin 4756 -> 4635 bytes
 runMetaspades.pyc        | Bin 4614 -> 4501 bytes
 sort_gff.py              | 409 ++++++++++++++++++++++++++++++++++++++-
 tRNAscanChecker.pyc      | Bin 9776 -> 9664 bytes
 15 files changed, 662 insertions(+), 357 deletions(-)
 delete mode 100644 README
 create mode 100755 rename_fasta_seqID.py

diff --git a/FirstBuildChecker.pyc b/FirstBuildChecker.pyc
index f5f2a2036198f3ccaf3689394f9f33536d37ab88..4cb5808a10d40bbfbb832205a46f49a7f9927627 100755
GIT binary patch
delta 49
wcmcZ~bt;O3`7<xqmOWErH*%~OVzipPLr7!ubD>;uD6>T?feFH#ETZcT0O73>p#T5?

delta 81
zcmX>Vbw7%O`7<w9&Y5>H8#&esF*;86)6!s%FEi9L&@<ZnKqyxnRcP`9A+gOpT3$?;
J!jnaHO#x!F8BG8H

diff --git a/README b/README
deleted file mode 100644
index 0581bc0..0000000
--- a/README
+++ /dev/null
@@ -1,154 +0,0 @@
-MitoFinder version 1.0.1		22/03/2019
-Author : Rémi ALLIO
-
-Mitofinder is a pipeline to assemble mitochondrial genomes and extract mitochondrial genes from trimmed 
-sequencing read data.
-
-# Requirements
-
-This software is suitable for all linux-like systems with gcc installed (Unfortunately, not MAC and Windows).
-
-# Installation guide for MitoFinder
-
-## Download mitofinder_vX.tar.gz at [www.github.com]
-
-$ tar -zxf mitofinder_vX.tar.gz
-$ cd mitofinder_vX
-$ ./install.sh
-
-$ PATH/TO/MITOFINDER_VX/mitofinder -h
-
-## Add mitofinder to your path -> linux
-
-$ cd mitofinder_vX/
-$ p=$(pwd)
-$ echo -e "\n#Path to mitofinder \nexport PATH=$PATH:$p" >> ~/.bashrc 
-$ source ~/.bashrc
-
-# How to use MitoFinder
-TIP: use mitofinder --example to print usual examples of use
-
-First, you can choose the assembler using the following options:
--- megahit 				(default: faster)
--- metaspades			(recommended: a bit slower but more efficient (see associated paper). WARNING: Not compatible with single-end reads)
--- idba
-
-### For mitochondrial genome assembly 
-
-## Trimmed paired-end reads
-$ mitofinder -j [jobname] -1 [left_reads.fastq.gz] -2 [right_reads.fastq.gz] -r [genbank_reference.gb] -o [genetic_code] -p [threads] -m [memory]
-
-## Trimmed single-end reads
-$ mitofinder -j [jobname] -s [SE_reads.fastq.gz] -r [genbank_reference.gb] -o [genetic_code] -p [threads] -m [memory]
-
-## MitoFinder can be used with your own assembly (one or several contig.s in fasta format)
-$ mitofinder -j [jobname] -a [assembly.fasta] -r [genbank_reference.gb] -o [genetic_code] -p [threads] -m [memory]
-
-### Restart
-Use the same command line.
-WARNING: If you want to make the assembly again (for example because it failed) you have to remove the result assembly directory. If not, MitoFinder will skip the assembly step.
-
-# OUTPUT
-
-## Result folder
-
-Mitofinder returns several files for each mitochondrial contig found:
-- [job_name]_partial_mito_1.fasta				containing a mitochondrial contig
-- [job_name]_partial_mito_1.gff				containing the final annotation for a given contig
-- [job_name]_partial_mito_1.arwen				containing the result of the tRNA annotation returned by the Arwen software.
-- [job_name]_partial_mito_1.gb 				containing the final annotation for a given contig (option --out_gb)
-- [job_name]_final_genes.fasta				containing the final genes selected from all contigs by MitoFinder 
-
-
-## UCE annotation
-MitoFinder starts by assembling both mitochondrial and nuclear reads. It is only in a second step that mitochondrial contigs are identified and extracted.
-MitoFinder thus provides UCE contigs already assembled and the annotation can be done from the following file:
-- [job_name]link.scafSeq 	containing all assembled contigs from raw reads. 
-
-To do so, we recommend the PHYLUCE pipeline, which is specifically designed to annotate ultraconserved elements (Faircloth  2015; Tutorial: https://phyluce.readthedocs.io/en/latest/tutorial-one.html#finding-uce-loci).
-You can thus use the file [job_name]link.scafSeq and start the pipeline at the "Finding UCE" step.
-
-# Help
-usage: mitofinder [-h] [--megahit] [--idba] [--metaspades] [-j PROCESSNAME]
-                  [-1 PE1] [-2 PE2] [-s SE] [-a ASSEMBLY] [-m MEM]
-                  [-l SHORTESTCONTIG] [-p PROCESSORSTOUSE] [-r REFSEQFILE]
-                  [-e BLASTEVAL] [--out_gb]
-                  [--blastidentitynucl BLASTIDENTITYNUCL]
-                  [--blastidentityprot BLASTIDENTITYPROT]
-                  [--blastsize ALIGNCUTOFF] [--circularsize CIRCULARSIZE]
-                  [--circularoffset CIRCULAROFFSET] [-cove COVECUTOFF]
-                  [-o ORGANISMTYPE] [-v] [--example]
-
-Mitofinder is a pipeline to assemble and annotate mitochondrial DNA from
-trimmed sequencing reads.
-
-optional arguments:
-  -h, --help            show this help message and exit
-  --megahit             Use Megahit for assembly. (Default)
-  --idba                Use IDBA-UD for assembly.
-  --metaspades          Use MetaSPAdes for assembly.
-  -j PROCESSNAME, --jobname PROCESSNAME
-                        Job name to be used throughout the process
-  -1 PE1, --Paired-end1 PE1
-                        File with forward paired-end reads
-  -2 PE2, --Paired-end2 PE2
-                        File with reverse paired-end reads
-  -s SE, --Single-end SE
-                        File with single-end reads
-  -a ASSEMBLY, --assembly ASSEMBLY
-                        File with your own assembly
-  -m MEM, --max-memory MEM
-                        max memory to use in Go (Megahit or MetaSPAdes)
-  -l SHORTESTCONTIG, --length SHORTESTCONTIG
-                        Shortest contig length to be used in scaffolding.
-                        Default = 100
-  -p PROCESSORSTOUSE, --processors PROCESSORSTOUSE
-                        Number of threads Mitofinder will use at most.
-  -r REFSEQFILE, --refseq REFSEQFILE
-                        Reference mitochondrial genome in GenBank format
-                        (.gb).
-  -e BLASTEVAL, --blaste BLASTEVAL
-                        e-value of blast program used for contig
-                        identification and annotation. Default = 0.000001
-  --out_gb              Create annotation output file in GenBank format.
-  --blastidentitynucl BLASTIDENTITYNUCL
-                        Nucleotide identity percentage for a hit to be
-                        retained. Default = 50
-  --blastidentityprot BLASTIDENTITYPROT
-                        Amino acid identity percentage for a hit to be
-                        retained. Default = 40
-  --blastsize ALIGNCUTOFF
-                        Percentage of overlap in blast best hit to be
-                        retained. Default = 30
-  --circularsize CIRCULARSIZE
-                        Size to consider when checking for circularization.
-                        Default = 45
-  --circularoffset CIRCULAROFFSET
-                        Offset from start and finish to consider when looking
-                        for circularization. Default = 200
-  -cove COVECUTOFF, --covecutoff COVECUTOFF
-                        Cove cutoff for tRNAscan-SE. Default = 7
-  -o ORGANISMTYPE, --organism ORGANISMTYPE
-                        Organism genetic code following NCBI table (integer):
-                        1. The Standard Code 2. The Vertebrate Mitochondrial
-                        Code 3. The Yeast Mitochondrial Code 4. The Mold,
-                        Protozoan, and Coelenterate Mitochondrial Code and the
-                        Mycoplasma/Spiroplasma Code 5. The Invertebrate
-                        Mitochondrial Code 6. The Ciliate, Dasycladacean and
-                        Hexamita Nuclear Code 9. The Echinoderm and Flatworm
-                        Mitochondrial Code 10. The Euplotid Nuclear Code 11.
-                        The Bacterial, Archaeal and Plant Plastid Code 12. The
-                        Alternative Yeast Nuclear Code 13. The Ascidian
-                        Mitochondrial Code 14. The Alternative Flatworm
-                        Mitochondrial Code 16. Chlorophycean Mitochondrial
-                        Code 21. Trematode Mitochondrial Code 22. Scenedesmus
-                        obliquus Mitochondrial Code 23. Thraustochytrium
-                        Mitochondrial Code 24. Pterobranchia Mitochondrial
-                        Code 25. Candidate Division SR1 and Gracilibacteria
-                        Code
-  -v, --version         Version 1.0.2
-  --example             Print getting started examples
-  
-# To cite Mitofinder
-
-Allio R., Schomaker-Bastos A., Romiguier J., Prosdocimi F., Nabholz B. & Delsuc F. (2019). Efficient automated extraction of mitogenomic data in target enrichment phylogenomics with MitoFinder. Molecular Ecology Resources (submitted).
diff --git a/README.md b/README.md
index e8a50e6..c645828 100755
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# MitoFinder version 1.0.2		06/01/2019
+# MitoFinder version 1.1		
 Allio, R., Schomaker-Bastos, A., Romiguier, J., Prosdocimi, F., Nabholz, B., & Delsuc, F.
 
 <p align="center">
@@ -16,14 +16,12 @@ This software is suitable for all linux-like systems with gcc installed (Unfortu
 
 1. [Installation guide for MitoFinder](#installation-guide-for-mitofinder)
 2. [How to use MitoFinder](#how-to-use-mitofinder)
-3. [INPUTS](#inputs)
-4. [OUTPUTS](#outputs)
-5. [Test case](#test-case)
-6. [Restart](#restart)
-7. [UCE annotation](#uce-annotation)
-8. [Associated publication](#associated-publication)
-9. [How to get reference mitochondrial genomes from ncbi](#how-to-get-reference-mitochondrial-genomes-from-ncbi)
-10. [Detailed options](#detailed-options)
+3. [Detailed options](#detailed-options)
+4. [INPUTS](#inputs)
+5. [OUTPUTS](#outputs)
+6. [UCE annotation](#uce-annotation)
+7. [Associated publication](#associated-publication)
+8. [How to get reference mitochondrial genomes from ncbi](#how-to-get-reference-mitochondrial-genomes-from-ncbi)
 
 
 # Installation guide for MitoFinder
@@ -71,7 +69,7 @@ First, you can choose the assembler using the following options:
 
 ## Mitochondrial genome assembly  
 
-**TIP**: use mitofinder --example to print usual examples of use  
+TIP: use mitofinder --example to print usual examples of use  
 
 ### Trimmed paired-end reads
 ```shell
@@ -88,74 +86,17 @@ mitofinder -j [jobname] -s [SE_reads.fastq.gz] -r [genbank_reference.gb] -o [gen
 mitofinder -j [jobname] -a [assembly.fasta] -r [genbank_reference.gb] -o [genetic_code] -p [threads] -m [memory]
 ```
 
-# INPUTS
-
-Mitofinder needs several files to run depending on the method you have choosen (see above):  
-- [x] **Reference_file.gb**				containing at least one mitochondrial genome of reference extracted from [NCBI](https://www.ncbi.nlm.nih.gov/)
-- [ ] **left_reads.fastq.gz**				containing the left reads of paired-end sequencing  
-- [ ] **right_reads.fastq.gz**				containing the right reads of paired-end sequencing  
-- [ ] **SE_reads.fastq.gz** 				containing the reads of single-end sequencing  
-- [ ] **assembly.fasta**				containing the assembly on which MitoFinder have to find and annotate mitochondrial contig.s   
-
-**TIP 1**: If you hesitate between two or more mitogenomes to use as references, you can put them together in the same reference file. MitoFinder will use all of them to find mitochondrial signal and will choose the best matching one for each gene during the annotation step.  
-**TIP 2**: If you want to assemble several mitogenomes for which you have different references, you can group them together in the same reference file. This allows to use MitoFinder in a loop and does not affect the result because MitoFinder will select the best matching reference for the annotation step.  
-**TIP 3**: If you have a large number of reads, from a whole genome sequencing for example, you can pre-filter mitochondrial reads using [mirabait](http://manpages.ubuntu.com/manpages/xenial/man1/mirabait.1.html). This will considerably reduce the computation time. However, this can be done only if you have a reference mitogenome for a closely related species because MIRA uses mapping to select reads and this method is rather stringent. If you don't have the mitogenome of a closely related species you can just use a random subset of your read data (~ 15/20 Millions of reads) to reduce computation time and assemble the mitogenome. Of course, in doing so, you could lose some of the information if the mitogenome coverage is unsufficient.  
-
-# OUTPUTS 
-### Result folder  
-
-Mitofinder returns several files for each mitochondrial contig found:  
-- [x] **[job_name]_partial_mito_1.fasta**				containing a mitochondrial contig  
-- [x] **[job_name]_partial_mito_1_genes.fasta** 				containing the annoted genes for a given contig    
-- [x] **[job_name]_partial_mito_1.gff**				containing the final annotation for a given contig  
-- [x] **[job_name]_partial_mito_1.arwen**				containing the result of the tRNA annotation returned by the Arwen software  
-- [x] **[job_name]_partial_mito_1.gb** 				containing the final annotation for a given contig (option --out_gb)  
-- [x] **[job_name]_final_genes.fasta**				containing the final genes selected from all contigs by MitoFinder   
-
-# Test case
-A test case is available in the testcase directory.
-
-To run it:
-```shell
-cd /PATH/TO/MITOFINDER/testcase/
-
-mitofinder -j test -s test.fastq -r test_reference.gb -o 5 -p 5 -m 5
-```
-
-# Restart
+### Restart
 Use the same command line.  
 WARNING: If you want to make the assembly again (for example because it failed) you have to remove the result assembly directory. If not, MitoFinder will skip the assembly step.  
 
-# UCE annotation
-MitoFinder starts by assembling both mitochondrial and nuclear reads. It is only in a second step that mitochondrial contigs are identified and extracted.
-MitoFinder thus provides UCE contigs already assembled and the annotation can be done from the following file:  
-- **[job_name]link.scafSeq** 	containing all assembled contigs from raw reads. 
-
-To do so, we recommend the PHYLUCE pipeline, which is specifically designed to annotate ultraconserved elements (Faircloth  2015; Tutorial: https://phyluce.readthedocs.io/en/latest/tutorial-one.html#finding-uce-loci).  
-You can thus use the file **[job_name]link.scafSeq** and start the pipeline at the **"Finding UCE"** step.  
-  
-# Associated publication
-
-Allio, R., Schomaker-Bastos, A., Romiguier, J., Prosdocimi, F., Nabholz, B., & Delsuc, F. (2019). MitoFinder: efficient automated large-scale extraction of mitogenomic data in target enrichment phylogenomics. BioRxiv, 685412. https://doi.org/10.1101/685412
-
-# HOW TO GET REFERENCE MITOCHONDRIAL GENOMES FROM NCBI
-
-1. Go to [NCBI](https://www.ncbi.nlm.nih.gov/)  
-2. Select "Nucleotide" in the search bar  
-3. Search for mitochondrion genomes:  
-- [x] RefSeq (if available)  
-- [x] Sequence length from 12000 to 20000  
-4. Download complete record in GenBank format  
-
-![](/image/NCBI.png)
-
 # Detailed options
 
 ```shell
 usage: mitofinder [-h] [--megahit] [--idba] [--metaspades] [-j PROCESSNAME]
                   [-1 PE1] [-2 PE2] [-s SE] [-a ASSEMBLY] [-m MEM]
                   [-l SHORTESTCONTIG] [-p PROCESSORSTOUSE] [-r REFSEQFILE]
-                  [-e BLASTEVAL] [--out_gb]
+                  [-e BLASTEVAL] [-n NWALK] [--out_gb]
                   [--blastidentitynucl BLASTIDENTITYNUCL]
                   [--blastidentityprot BLASTIDENTITYPROT]
                   [--blastsize ALIGNCUTOFF] [--circularsize CIRCULARSIZE]
@@ -163,7 +104,7 @@ usage: mitofinder [-h] [--megahit] [--idba] [--metaspades] [-j PROCESSNAME]
                   [-o ORGANISMTYPE] [-v] [--example]
 
 Mitofinder is a pipeline to assemble and annotate mitochondrial DNA from
-cleaned sequencing datas.
+trimmed sequencing reads.
 
 optional arguments:
   -h, --help            show this help message and exit
@@ -171,7 +112,7 @@ optional arguments:
   --idba                Use IDBA-UD for assembly.
   --metaspades          Use MetaSPAdes for assembly.
   -j PROCESSNAME, --jobname PROCESSNAME
-                        Job name to be used throughout the project
+                        Sequence ID to be used throughout the process
   -1 PE1, --Paired-end1 PE1
                         File with forward paired-end reads
   -2 PE2, --Paired-end2 PE2
@@ -181,29 +122,33 @@ optional arguments:
   -a ASSEMBLY, --assembly ASSEMBLY
                         File with your own assembly
   -m MEM, --max-memory MEM
-                        max memory to use in Go (megahit or metaspades)
+                        max memory to use in Go (Megahit or MetaSPAdes)
   -l SHORTESTCONTIG, --length SHORTESTCONTIG
                         Shortest contig length to be used in scaffolding.
                         Default = 100
   -p PROCESSORSTOUSE, --processors PROCESSORSTOUSE
                         Number of threads Mitofinder will use at most.
   -r REFSEQFILE, --refseq REFSEQFILE
-                        Reference mitochondrial genome.s in GenBank format
-                        (.gb). You can put several reference mitgenomes
-                        together.
+                        Reference mitochondrial genome in GenBank format
+                        (.gb).
   -e BLASTEVAL, --blaste BLASTEVAL
-                        e-Value for blast program for contig identification
-                        and annotation. Default = 0.000001
-  --out_gb              Create annotation output with genbank format.
+                        e-value of blast program used for contig
+                        identification and annotation. Default = 0.000001
+  -n NWALK, --nwalk NWALK
+                        Maximum number of codon steps to be tested on each
+                        size of the gene to find the start and stop codon
+                        during the annotation step. Default = 20 (60 bases)
+  --out_gb              Do not create annotation output file in GenBank
+                        format.
   --blastidentitynucl BLASTIDENTITYNUCL
-                        Nucleotide percentage of identity or a hit to be
-                        considered good. Default = 50
+                        Nucleotide identity percentage for a hit to be
+                        retained. Default = 50
   --blastidentityprot BLASTIDENTITYPROT
-                        Amino acid percentage of identity or a hit to be
-                        considered good. Default = 40
+                        Amino acid identity percentage for a hit to be
+                        retained. Default = 40
   --blastsize ALIGNCUTOFF
-                        Percentage of covered span in blast best hit to be
-                        considered good. Default = 30
+                        Percentage of overlap in blast best hit to be
+                        retained. Default = 30
   --circularsize CIRCULARSIZE
                         Size to consider when checking for circularization.
                         Default = 45
@@ -213,8 +158,7 @@ optional arguments:
   -cove COVECUTOFF, --covecutoff COVECUTOFF
                         Cove cutoff for tRNAscan-SE. Default = 7
   -o ORGANISMTYPE, --organism ORGANISMTYPE
-                        What should the genome checking and annotation
-                        consider as genetic code type. NCBI table (integer):
+                        Organism genetic code following NCBI table (integer):
                         1. The Standard Code 2. The Vertebrate Mitochondrial
                         Code 3. The Yeast Mitochondrial Code 4. The Mold,
                         Protozoan, and Coelenterate Mitochondrial Code and the
@@ -231,9 +175,57 @@ optional arguments:
                         Mitochondrial Code 24. Pterobranchia Mitochondrial
                         Code 25. Candidate Division SR1 and Gracilibacteria
                         Code
-  -v, --version         Version 1.0.2
-  --example             Print usual examples of use
+  -v, --version         Version 1.1
+  --example             Print getting started examples
 ```
 
+# INPUTS
+
+Mitofinder needs several files to run depending on the method you have choosen (see above):  
+- [x] **Reference_file.gb**				containing at least one mitochondrial genome of reference extracted from [NCBI](https://www.ncbi.nlm.nih.gov/)
+- [ ] **left_reads.fastq.gz**				containing the left reads of paired-end sequencing  
+- [ ] **right_reads.fastq.gz**				containing the right reads of paired-end sequencing  
+- [ ] **SE_reads.fastq.gz** 				containing the reads of single-end sequencing  
+- [ ] **assembly.fasta**				containing the assembly on which MitoFinder have to find and annotate mitochondrial contig.s   
+
+# OUTPUTS 
+### Result folder  
+
+Mitofinder returns several files for each mitochondrial contig found:  
+- [x] **[job_name]_final_genes.fasta**				containing the final genes selected from all contigs by MitoFinder   
+- [x] **[job_name]_mtDNA_contig.fasta**				containing a mitochondrial contig  
+- [x] **[job_name]_mtDNA_contig_final.gff**				containing the final annotation for a given contig  
+- [x] **[job_name]_mtDNA_contig_final.tbl**				containing the final annotation for a given contig (Genbank submission format)
+- [x] **[job_name]_mtDNA_contig.gb** 				containing the final annotation for a given contig 
+- [x] **[job_name]_mtDNA_contig_genes.fasta** 				containing the annoted genes for a given contig    
+- [x] **[job_name]_mtDNA_contig.png** 				schematic representation of the annotation of the mtDNA contig    
+
+
+# UCE annotation
+MitoFinder starts by assembling both mitochondrial and nuclear reads. It is only in a second step that mitochondrial contigs are identified and extracted.
+MitoFinder thus provides UCE contigs already assembled and the annotation can be done from the following file:  
+- **[job_name]link.scafSeq** 	containing all assembled contigs from raw reads. 
+
+To do so, we recommend the PHYLUCE pipeline, which is specifically designed to annotate ultraconserved elements (Faircloth  2015; Tutorial: https://phyluce.readthedocs.io/en/latest/tutorial-one.html#finding-uce-loci).  
+You can thus use the file **[job_name]link.scafSeq** and start the pipeline at the **"Finding UCE"** step.  
+  
+# Associated publication
+
+Allio, R., Schomaker-Bastos, A., Romiguier, J., Prosdocimi, F., Nabholz, B., & Delsuc, F. (2019). MitoFinder: efficient automated large-scale extraction of mitogenomic data in target enrichment phylogenomics. BioRxiv, 685412. https://doi.org/10.1101/685412
+
+
+# HOW TO GET REFERENCE MITOCHONDRIAL GENOMES FROM NCBI
+
+1. Go to [NCBI](https://www.ncbi.nlm.nih.gov/)  
+2. Select "Nucleotide" in the search bar  
+3. Search for mitochondrion genomes:  
+- [x] RefSeq (if available)  
+- [x] Sequence length from 12000 to 20000  
+4. Download complete record in GenBank format  
+
+![](/image/NCBI.png)
+
+
+
 
 
diff --git a/circularizationCheck.pyc b/circularizationCheck.pyc
index ad7fa99a4a8a68f640c2e97b0f84329d6b10e646..9658cec88c65ff7898e62152ada729fdcda01561 100755
GIT binary patch
delta 42
ucmZ24zfhio`7<xq);&{WH*!qpVYHpRfJbBVd7daH2y^mz9?8i?{I&o=mJV+K

delta 58
zcmZ1|zh0h$`7<w9&Y5>H8#$))FuF`$z@xz)UuLLhpl3AMn_qJCLmo#a4B;>UE`1T4

diff --git a/genbankOutput.py b/genbankOutput.py
index 2b18007..47f8a40 100755
--- a/genbankOutput.py
+++ b/genbankOutput.py
@@ -31,7 +31,7 @@
 from Bio.Data import CodonTable
 from decimal import Decimal
 
-def genbankOutput(resultGbFile, resultFile, listOfFeaturesToOutput, buildCloroplast = False, dLoopSize = 800):
+def genbankOutput(resultGbFile, resultFile, listOfFeaturesToOutput, buildCloroplast = False, dLoopSize = 800, nWalk = 20):
 	'''
 	Creates a genbank file based on a fasta file given (resultfile) and a list of features that the genbank
 	file should present (listoffeaturestooutput)
@@ -68,21 +68,24 @@ def genbankOutput(resultGbFile, resultFile, listOfFeaturesToOutput, buildCloropl
 				
 			main_start_pos = SeqFeature.ExactPosition(thisFeatureAlignment.startBase)
 			main_end_pos = SeqFeature.ExactPosition(thisFeatureAlignment.endBase)
-			
+
 			if main_feature_type == "gene":
-				codonDiff = ((main_end_pos - (main_start_pos + 1)) % 3)
+				codonDiff = ((main_end_pos - main_start_pos+1) % 3)
 				if codonDiff == 2:
 					main_end_pos += 1
 				elif codonDiff == 1:
 					main_end_pos -= 1
+			#print main_start_pos 
+			#print main_end_pos
+			#print thisFeatureAlignment.frame
+			
 
 			# 2. Use the locations do define a FeatureLocation
 			if thisFeatureAlignment.frame < 0:
 				strandToOutput = -1
 			else:
 				strandToOutput = 1
-			main_feature_location = SeqFeature.FeatureLocation(main_start_pos,main_end_pos,strand=strandToOutput)
-
+			main_feature_location = SeqFeature.FeatureLocation(main_start_pos-1,main_end_pos,strand=strandToOutput)
 			# 3. Create a SeqFeature
 			main_feature = SeqFeature.SeqFeature(main_feature_location,type=main_feature_type, qualifiers=main_feature_qualifiers)
 			'''
@@ -103,11 +106,10 @@ def genbankOutput(resultGbFile, resultFile, listOfFeaturesToOutput, buildCloropl
 
 			lastFeatureAlignment = thisFeatureAlignment
 			'''
-
 			# 4. Append your newly created SeqFeature to your SeqRecord
 			if main_feature_type == "gene":				
 				cds_qualifiers = dict(main_feature_qualifiers)
-				coding_dna = Seq(str(finalResults.seq[thisFeatureAlignment.startBase:thisFeatureAlignment.endBase]), IUPAC.unambiguous_dna)
+				coding_dna = Seq(str(finalResults.seq[thisFeatureAlignment.startBase-1:thisFeatureAlignment.endBase]), IUPAC.unambiguous_dna)
 				if strandToOutput == -1:
 					coding_dna = coding_dna.reverse_complement()
 				translationTable = thisFeatureAlignment.translationTable
@@ -120,8 +122,8 @@ def genbankOutput(resultGbFile, resultFile, listOfFeaturesToOutput, buildCloropl
 						listOfStartCodons.append(startCodonTranslation)
 					startCodons = tuple(listOfStartCodons) #need to make it a tuple so that startswith works with it
 				stopCodons = ('*','$','#','+')
-				nWalkStart = 20
-				nWalkStop = 20
+				nWalkStart = nWalk
+				nWalkStop = nWalk
 				'''
 				For genes in the -1 strand, we look for the stop codons at the start and the start codons at the end!
 				'''
@@ -130,8 +132,8 @@ def genbankOutput(resultGbFile, resultFile, listOfFeaturesToOutput, buildCloropl
 					tempStopCodons = stopCodons
 					startCodons = tempStopCodons
 					stopCodons = tempStartCodons
-					nWalkStart = 20
-					nWalkStop = 20			
+					nWalkStart = nWalk
+					nWalkStop = nWalk			
 
 				try:
 					'''
@@ -153,33 +155,45 @@ def genbankOutput(resultGbFile, resultFile, listOfFeaturesToOutput, buildCloropl
 						and not coding_dna_TranslationBackward.startswith(startCodons) and n < nWalkStart and startBase - (3*(n+1)) >= 0:
 							try:
 								n += 1
-								coding_dna_Backward = Seq(str(finalResults.seq[startBase - (3*n):endBase]), IUPAC.unambiguous_dna)
+								coding_dna_Backward = Seq(str(finalResults.seq[startBase - (3*n) - 1:endBase]), IUPAC.unambiguous_dna)
 								if strandToOutput == -1:
 									coding_dna_Backward = coding_dna_Backward.reverse_complement()
 								coding_dna_TranslationBackward = coding_dna_Backward.translate(table=translationTable)
-								coding_dna_Forward = Seq(str(finalResults.seq[startBase + (3*n):endBase]), IUPAC.unambiguous_dna)
+								coding_dna_Forward = Seq(str(finalResults.seq[startBase -1 - (3*n):endBase]), IUPAC.unambiguous_dna)
 								if strandToOutput == -1:
+									coding_dna_Forward = Seq(str(finalResults.seq[startBase -1 + (3*n):endBase]), IUPAC.unambiguous_dna)
 									coding_dna_Forward = coding_dna_Forward.reverse_complement()
 								coding_dna_TranslationForward = coding_dna_Forward.translate(table=translationTable)
 								if strandToOutput == -1:
 									coding_dna_TranslationBackward = coding_dna_TranslationBackward[::-1]
 									coding_dna_TranslationForward = coding_dna_TranslationForward[::-1]
+								
 							except:
 								pass
 						else:
-							if coding_dna_TranslationBackward.startswith(startCodons):
+							if coding_dna_TranslationForward.startswith(startCodons):
+								main_start_pos = SeqFeature.ExactPosition(startBase - (3*n))
+								if strandToOutput == -1:
+									SeqFeature.ExactPosition(startBase + (3*n))
+								#startBase += (3*n) #backup
+								#thisFeatureAlignment.startBase = startBase #backup
+								thisFeatureAlignment.startBase = main_start_pos  #update
+								main_feature_location = SeqFeature.FeatureLocation(main_start_pos-1,main_end_pos,strand=strandToOutput)
+							elif coding_dna_TranslationBackward.startswith(startCodons):
 								main_start_pos = SeqFeature.ExactPosition(startBase - (3*n))
-								startBase += (3*n)
-								thisFeatureAlignment.startBase = startBase
-								main_feature_location = SeqFeature.FeatureLocation(main_start_pos,main_end_pos,strand=strandToOutput)
-							elif coding_dna_TranslationForward.startswith(startCodons):
-								main_start_pos = SeqFeature.ExactPosition(startBase + (3*n))
-								startBase += (3*n)
-								thisFeatureAlignment.startBase = startBase
-								main_feature_location = SeqFeature.FeatureLocation(main_start_pos,main_end_pos,strand=strandToOutput)
+								#startBase += (3*n) #backup
+								#thisFeatureAlignment.startBase = startBase #backup
+								thisFeatureAlignment.startBase = main_start_pos  #update
+								main_feature_location = SeqFeature.FeatureLocation(main_start_pos-1,main_end_pos,strand=strandToOutput)
 					except:
 						pass
-
+					
+					'''
+					Updating coding_dna with (new) coordinates
+					'''
+					coding_dna = Seq(str(finalResults.seq[thisFeatureAlignment.startBase-1:thisFeatureAlignment.endBase]), IUPAC.unambiguous_dna)
+					if strandToOutput == -1:
+						coding_dna = coding_dna.reverse_complement()
 					'''
 					Making sure it ends with * (stop codon)
 					'''
@@ -199,16 +213,15 @@ def genbankOutput(resultGbFile, resultFile, listOfFeaturesToOutput, buildCloropl
 						and not coding_dna_TranslationBackward.endswith(stopCodons) and n < nWalkStop and endBase + (3*(n+1)) <= len(finalResults):
 							try:
 								n += 1
-								coding_dna_Backward = Seq(str(finalResults.seq[startBase:endBase - (3*n)]), IUPAC.unambiguous_dna)
+								coding_dna_Backward = Seq(str(finalResults.seq[startBase -1 :endBase - (3*n)]), IUPAC.unambiguous_dna)
 								if strandToOutput == -1:
+									coding_dna_Backward = Seq(str(finalResults.seq[startBase -1 :endBase + (3*n)]), IUPAC.unambiguous_dna)
 									coding_dna_Backward = coding_dna_Backward.reverse_complement()
 								coding_dna_TranslationBackward = coding_dna_Backward.translate(table=translationTable)
-								
-								coding_dna_Forward = Seq(str(finalResults.seq[startBase:endBase + (3*n)]), IUPAC.unambiguous_dna)
+								coding_dna_Forward = Seq(str(finalResults.seq[startBase - 1:endBase + (3*n)]), IUPAC.unambiguous_dna)
 								if strandToOutput == -1:
 									coding_dna_Forward = coding_dna_Forward.reverse_complement()
 								coding_dna_TranslationForward = coding_dna_Forward.translate(table=translationTable)
-
 								if strandToOutput == -1:
 									coding_dna_TranslationBackward = coding_dna_TranslationBackward[::-1]
 									coding_dna_TranslationForward = coding_dna_TranslationForward[::-1]
@@ -217,18 +230,23 @@ def genbankOutput(resultGbFile, resultFile, listOfFeaturesToOutput, buildCloropl
 						else:
 							if coding_dna_TranslationBackward.endswith(stopCodons):
 								main_end_pos = SeqFeature.ExactPosition(endBase - (3 * n))
-								endBase -= (3 * (n-1))
-								thisFeatureAlignment.endBase = endBase
-								main_feature_location = SeqFeature.FeatureLocation(main_start_pos,main_end_pos,strand=strandToOutput)
+								if strandToOutput == -1:
+									main_end_pos = SeqFeature.ExactPosition(endBase + (3 * n))
+								#endBase -= (3 * (n-1)) #backup
+								#thisFeatureAlignment.endBase = endBase #backup
+								thisFeatureAlignment.endBase = main_end_pos #update
+								main_feature_location = SeqFeature.FeatureLocation(main_start_pos-1,main_end_pos,strand=strandToOutput)
 							elif coding_dna_TranslationForward.endswith(stopCodons):
 								main_end_pos = SeqFeature.ExactPosition(endBase + (3 * n))
-								endBase += (3 * (n-1))
-								thisFeatureAlignment.endBase = endBase
-								main_feature_location = SeqFeature.FeatureLocation(main_start_pos,main_end_pos,strand=strandToOutput)
+								#endBase += (3 * (n-1)) #backup
+								#thisFeatureAlignment.endBase = endBase #backup
+								thisFeatureAlignment.endBase = main_end_pos #update
+								main_feature_location = SeqFeature.FeatureLocation(main_start_pos-1,main_end_pos,strand=strandToOutput)
 					except:
 						pass
+						 
+					coding_dna = Seq(str(finalResults.seq[thisFeatureAlignment.startBase -1 :thisFeatureAlignment.endBase]),IUPAC.unambiguous_dna)
 					
-					coding_dna = Seq(str(finalResults.seq[thisFeatureAlignment.startBase:thisFeatureAlignment.endBase]),IUPAC.unambiguous_dna)
 					if strandToOutput == 1:
 						coding_dna_Translation = coding_dna.translate(table=translationTable)
 					else:
diff --git a/genbankOutput.pyc b/genbankOutput.pyc
index 7bb886941f1ea5a7a5819b5f927b6e1a6445fe6c..15a6663235c7e14b3ebfc2403d232b83f11977d8 100755
GIT binary patch
delta 3011
zcmbtWO=ufO6#i!Qvyv=Fwj^sM%Z@EOiIv2d(3B=^eXvPNZ-J15Od&NPTSBBFu)Gi(
zcDe^O&_ihmf)ePp6iT5`Orbrr^w>k7JrtUAFS!*;>9Mrm%>HO4C8?ptyPA1F-@JKm
zzIprmg(ol76Ms(UKDw~>_H6`zN&ar}*?;e0-6_~vyq~~Z1=u;<Xik0g$<yTe-7?j;
z(|n!hGtK7;pZy<lfCIRIEzmdc;2X&F;Pcd?XTmknPNJPcp9Us&lUkC7orIl2!%o9e
z;`Fn{CY(&5=?mB<I!W}tjM<uK4BbtQ8i-7E;lUATXO+FnB{S$JATwxjZ`XH%hz%X_
zZ&{hv#ztW2B(yOD$U$at@Lkq5aP{WbS((FKKu)Q=xH+_^(9WyPb<)_sDeS4ZFds_u
zln#DQm%UMk-b3L-xP&`_%}L1K2f{8u=CSvwuxHeuTMAZCKC|?hRZ?df?Gsv205c`0
zW0Dyyp*9bX)*S=Ay5Vv@jjCOQWtUKq1-LAAS!@3h)KYUvp1>YWsVENJIKJ#Fx-2O-
zG^^6y0eTk=dk%62UFN!|1}dteT8GoB!t^R|8IB?3tX59LrMIO*v#a2a`ofU(;M5QF
zUe4iul^n7JrwnIKSD(j-E9=0jYBYleO`Hnama3;Lt92NS+U!q;TSmL8isVrdL|ucN
zgENnI9qt@zvVuD-tA&Hxa0#=(V(&m!)tWBf5pqq3P(ynWc1^kVAyjnpaQSbcW1_d6
z;Glg{NvINB(OFWuzDL74_Wm+jb^~$&ZVGZyziAu={BgpqL(*dbavAQj@^P2O?TLEm
zq!0bw#e$IysQtzzE1|>-BbKYN^~|KZ3UWgI&*6T8*`Ru5OthjiSB5;L(B$c$0`iO=
z@NBNBb0C{Ic-5eDn~~Sm_Vc~ZQpDGDFA_B2F6u)=<r(k@x=nRTta4QyrWELOxF&qz
zC4!EWK6Ie5I#_FC#b;<yE`LnAr-R}%WKR{J^LTPrC$Nd>2UMQK=H?^p>#6amhMv|$
zRP;PGxRSFxePkqS5&=C@2|8W@T6-HwAq3w$d|L5D_iE|j8W`qBolKG}TIr9J<M~%b
zWMc(Klr$xib_<iNjghu{4Ph-+DSLOGyX14x;&%Yc(dwGuhPnbfBiTGQy}wOI^!C|(
z_^T#-yvA`#4D&z6k~k&S%wtzXk>Ninv8^e=Fc#oxPCU-2JZeEWCnZj5N>E2G4Jl#y
zO^|jSru=c{g~ad)5uV@CTTv6k3$De5%t*d@9!6ZP3D+2pj!UCxALSujNng-$9^K0f
zwsW;%U82+ay-Lz?d2!8DytroW@M*=9+uP0tya=;et}>VGVMAWjvLP=b8^sISsIAhH
z%y>pW8)F7<y|ALa2NR11fX_qoV8`M97c=~J{WLq?2pZ_J&4%4@<Qi;va2b3F_DO8E
zy0eOuHhYjS(DdZR5jJdlSvPFfk`4Rp3wt>~0GSu|x1zc+1i!_C`B^>R`#bP{!~Yau
z;~#zsoYT$TcRx4g?!(c&<$uo+_(pT(=&*2K93^Id5#s#dN%2-T#V^!u>(;xSqsyf)
z4Dre!Uu_Obm8Q5l*sN^Lm<5qF(qh4=m-8Yo)(0O}-rRawBt%YZa9$8a{kx$@c~KJc
zMnYtag0QHqCbD9Q8jHWH^hu`Hl}~aoWzBEC7J>NAA)E%GE(Dn$2Qk)-!LIda?j1(n
O<P&@C`{BP{z4$L>$Qh6T

delta 2775
zcmbVO&1)M+6#vaiyDQ1E^{ri7b{zRjiVvxqK<$PU0xj*Og_b}hN`h&<P_hDn)}q*S
zr+ZKhJ*5~yDHKW%IrUITO(2K%+<zd49tu6?P}&j#ffP*oW_DNFl@qtA(XMCS$8X-e
z_nUe9%Y{ETYKceFna7_#zP^LtFG2sW(6jr=VXdG2Ou_p(SZQ2sX6}6RAlcYaNw-eV
zG(E2iyWi44A8-MSVW8lPd*JHuj=JD!a5dOT*eV8OpkX^HB^s<GEERQ2gCWGe;R&&h
zekyPnFjzV|NjTp}AavBn?z%(`L^`_g&}XpI!roP3r7%c<>uAw#-fIUDD>7o1=BhMS
z0?U3v8f)lhz*E@!HRURJ<Hr3IPva)QGq5sn)3B#tXT|0^8g_3mYbq-ALTNTC4TaN_
zLg$eAh%TYe;PS~8oX?q+1D`_gHnZ|#&@BZh!lyt!1tINE!=8cbQG7X2=6E&~kt8oA
zwEC3r=qPY%ic8ZhtRf84Dxu2Bc?MQlYM(OFl4_GrqerHsD30A|yyYvpRAGWIh$?gd
z=c3|S74RIoRM(;yh_H%cApuy0OR*`8=cSwi$ip&ZR3>OQ1E%moB*-t*ecG##gBQ^+
zqhFCkAV$e;igzA$QunK{4N+E)lLwKvBE$VzaZ9jGQ7n=N(awSs(j4p>+%jf)1s{>R
zioG4U71;As^ES94)^q6{B6CwNZ^B-HH7i^PU?MUa)31t-4(C-bK_mN&5Rs12lKn-Y
z6ozx?{h_q1CGZ-oGvM>`y%34_R>GYFC+`|Kxh)DGmn`G<1iJsVx_=-y7YT-+h5Hii
zH!7+_WFRV8imdlEF+5C{Jhv`RyZBXbkf4;*Wv)u#%fg+n1o`HxazHFx6BXwT?ES^a
z*`|f(#1^xTrMgQK^9I~`$qv%3g1F{3P&Nf)RFM7tLr+K~+ZypAkFDLcc$q~=qblD^
zx+`IoS7hWgjc+-|oe`|z!@e0QGHrN-n^WE85O-PP(lm-o$sHMAQ%^}{tGAY9jM2MI
zPb-=t=O;Bdx}&g6K^8hOBf+Lfyv+Uf^G{T_4Y%%%bMI8NNnqpu0RyeB4vN3a(B2<D
zi*_N$8vd~$0-Z*?TiPkP5`_C#+8s;U(JD2cc29JX)7YG7w;*XpI-;vjJHu}-l!p%U
z;5^ayA3mRaV>dH)$Hd6@j{c(L3t`_z*@)vt$#+|S0rLH>pOVm4@2)1ucUNoEBgq#g
z%DI*bI5NtRBp=<(p@7b%S>B=pPZGLsxBaX8Syy`3yG{%oSHbz=1h^#NB>3ik0*{d}
zcAQ78z?<j=K`Zb(mzJJn<XLp7>xBIwcChI_qhAr$A}+VO1;Kr5BoL=SWG!r87Tul%
zRD>t`SM$`?MxRE+Q@Y>_vOUmON-%HAdG9#pO}$zm`;GCOY<<4>xe0F{El=((|4WB}
zuQr#DKFl95^=6sA*p40)?lZPN{IT@GQKS5w!d@F5n@xTD<Co8GoWC&qyV_)zhRep4
zd?LrvipI=Dt&(L~)*S8{Z*6U`1k12>n&((?^gqi=Y>qT3CC3al!_urqdVjP?@}xTa
q)|k8WO4L~iK$N(1apWvU(phP;;k)Kn6^o*8(i3^@#)Ds-z4#A_&i#%6

diff --git a/geneChecker.pyc b/geneChecker.pyc
index 56f8fd282b1c14847fad64afacc4847af99d02c7..61b14d8d9d1995ddda1c72bbf7e20e0f7f936e2f 100755
GIT binary patch
delta 105
zcmbPV_q>*a`7<xq);&{WH*#nRGMY?Q6xNvhz*b~(nP57U$19WpWi1t|hOz>L%OI@H
a--O?ZK$#+%^4w5nvWYJzl=;Bco)G}zydNe2

delta 169
zcmaD}JHL*D`7<w9&Y5>H8#%ND8LcNb*lMuHml^6A=ow9p7OX}SnII%KSyU(pLlj+|
hiEtUJn$4Gm--)0Jz0#29MiVkJ@#RDlT48I?2ml)CF{c0k

diff --git a/geneChecker_fasta.py b/geneChecker_fasta.py
index e1105fc..3375196 100755
--- a/geneChecker_fasta.py
+++ b/geneChecker_fasta.py
@@ -145,12 +145,12 @@ def geneCheck(fastaReference, resultFile, cutoffEquality_prot, cutoffEquality_nu
 				for hsp in qhit.hsps: #hsp object checking, this contains the alignment info 
 					featureName = qhit.id
 					if float(str(hsp.ident_num)+".00")/float(str(hsp.aln_span)+".00")*100 >= float(cutoffEquality_prot):
-						if hsp.aln_span*3+3 >= alignCutOff:
+						if hsp.aln_span*3 >= alignCutOff:
 							if featureName in listOfImportantFeatures:
 								targetFeature = listOfImportantFeatures[featureName]
-								startBase = min(hsp.query_range[0],hsp.query_range[1])
+								startBase = min(hsp.query_range[0],hsp.query_range[1])+1
 								endBase = max(hsp.query_range[0],hsp.query_range[1])
-								alignLen = endBase - startBase
+								alignLen = (endBase-startBase)+1
 								if featureName in listOfPresentFeatures:
 									mainFeatureName = featureName
 									mainFeatureFound = listOfPresentFeatures[mainFeatureName]
@@ -171,7 +171,7 @@ def geneCheck(fastaReference, resultFile, cutoffEquality_prot, cutoffEquality_nu
 											alignment.frame = featureFrame
 											alignment.startBase = startBase
 											alignment.endBase = endBase
-											alignment.seqFound = refSeq.seq[startBase:endBase]
+											alignment.seqFound = refSeq.seq[startBase-1:endBase]
 											listOfPresentFeatures[featureName] = (listOfImportantFeatures[qhit.id], alignment,
 																											featureFrame <= -1)
 								else:
@@ -184,13 +184,14 @@ def geneCheck(fastaReference, resultFile, cutoffEquality_prot, cutoffEquality_nu
 										alignment.frame = featureFrame
 										alignment.startBase = startBase
 										alignment.endBase = endBase
-										alignment.seqFound = refSeq.seq[startBase:endBase]
+										alignment.seqFound = refSeq.seq[(startBase-1):endBase]
 										listOfPresentFeatures[featureName] = (listOfImportantFeatures[qhit.id], alignment, featureFrame <= -1)
 										if alignLen >= (len(targetFeature*3)+3) * 0.99:
 										#if we've already built a lot, dont even bother with finding splits
 											listOfCompleteGenes.append(featureName)
 										break
 
+		#exit()
 		#copying the blast result in order for this info to be assessed later if the user desires
 		shutil.copyfile("important_features.blast.xml", out_blast+"_ref.cds.blast.xml")
 		os.remove("important_features.blast.xml")
@@ -275,12 +276,13 @@ def geneCheck(fastaReference, resultFile, cutoffEquality_prot, cutoffEquality_nu
 					alignment.seqFound = refSeq.seq[startBase:endBase]
 					listOfPresentFeatures[featureName] = (listOfImportantFeatures[featureName], alignment, featureFrame == -1)
 					break
+				
 		shutil.copyfile("important_features.blast.xml", out_blast+"_ref.blast.xml")
 		shutil.copyfile("important_features.fasta", out_blast+"_ref.fasta")
 		os.remove("important_features.blast.xml")
 
 	os.remove("important_features.fasta")
-
+	
 	return (listOfPresentFeatures, listOfImportantFeatures, listOfSplits, listOfCompleteGenes)
 
 def createImageOfAnnotation(sequenceObject, outputFile):
@@ -308,7 +310,7 @@ def createImageOfAnnotation(sequenceObject, outputFile):
 
 	for gbkFeature in sequenceObject.features:
 		if gbkFeature.type == 'tRNA' or gbkFeature.type == 'CDS' or gbkFeature.type == 'rRNA' or gbkFeature.type == 'D-loop':
-			featureLen = gbkFeature.location.end - gbkFeature.location.start
+			featureLen = (gbkFeature.location.end - gbkFeature.location.start) + 1
 			featureRelativeSize = horizontalSize * featureLen / len(sequenceObject.seq)
 			featureRelativeStart = (horizontalSize * gbkFeature.location.start / len(sequenceObject.seq)) + 1
 		
@@ -388,6 +390,7 @@ def createImageOfAnnotation(sequenceObject, outputFile):
 	percent_equality_prot=sys.argv[8]
 	percent_equality_nucl=sys.argv[9]
 	genbank=sys.argv[10]
+	nWalk=sys.argv[11]
 	if sys.argv[1] == '-h' or sys.argv[1] == '--help':
 		print 'Usage: genbank_reference fasta_file output_file organism_type(integer, default=2) alignCutOff(float, default=45) coveCutOff(7)'
 		print 'Only the first, second, and third arguments are required.'
@@ -490,7 +493,7 @@ def createImageOfAnnotation(sequenceObject, outputFile):
 		listOfFeaturesToOutput.sort()
 		print 'Total features found after Arwen: ',len(listOfFeaturesToOutput)
 
-		finalResults = genbankOutput.genbankOutput(outputFile, resultFile, listOfFeaturesToOutput, False, 900)
+		finalResults = genbankOutput.genbankOutput(outputFile, resultFile, listOfFeaturesToOutput, False, 900, nWalk)
 
 		with open(outputFile, "w") as outputResult:
 			count = SeqIO.write(finalResults, outputResult, "genbank")
diff --git a/mitofinder b/mitofinder
index de66b11..a289171 100755
--- a/mitofinder
+++ b/mitofinder
@@ -1,5 +1,5 @@
 #!/usr/bin/python
-#Version: 1.0.2
+#Version: 1.1
 #Authors: Allio Remi & Schomaker-Bastos Alex
 #ISEM - CNRS - LAMPADA - IBQM - UFRJ
 
@@ -65,7 +65,7 @@ if __name__ == "__main__":
 	parser.add_argument('--metaspades', help='Use MetaSPAdes for assembly. ',
 						default=False, dest='metaspades', action='store_true')
 #	parser.add_argument('--annotate', help='Use Mitofinder to annotate a given mitochondrial (-c) ', dest='Annotate', action='store_true')	
-	parser.add_argument('-j', '--jobname', help = 'Job name to be used throughout the process', default="MitoFinder_run", dest='processName')
+	parser.add_argument('-j', '--jobname', help = 'Sequence ID to be used throughout the process', default="", dest='processName')
 	parser.add_argument('-1', '--Paired-end1', help='File with forward paired-end reads', default="", dest='PE1')	
 	parser.add_argument('-2', '--Paired-end2', help='File with reverse paired-end reads', default="", dest='PE2')	
 	parser.add_argument('-s', '--Single-end', help='File with single-end reads', default="", dest='SE')	
@@ -77,11 +77,13 @@ if __name__ == "__main__":
 						default=100, dest='shortestContig')
 	parser.add_argument('-p', '--processors', help='Number of threads Mitofinder will use at most.', type=int,
 						default=4, dest='processorsToUse')
-	parser.add_argument('-r', '--refseq', help='Reference mitochondrial genome.s in GenBank format (.gb). You can put several reference mitgenomes together.',
+	parser.add_argument('-r', '--refseq', help='Reference mitochondrial genome in GenBank format (.gb).',
 						default="", dest='refSeqFile')
 	parser.add_argument('-e', '--blaste', help='e-value of blast program used for contig identification and annotation. Default = 0.000001', type=float,
 						default=0.000001, dest='blasteVal')
-	parser.add_argument('--out_gb', help='Create annotation output file in GenBank format.', default=False, dest='genbk', action='store_true') 
+	parser.add_argument('-n', '--nwalk', help='Maximum number of codon steps to be tested on each size of the gene to find the start and stop codon during the annotation step. Default = 20 (60 bases)', type=int,
+						default=20, dest='nWalk')
+	parser.add_argument('--out_gb', help='Do not create annotation output file in GenBank format.', default=True, dest='genbk', action='store_false') 
 	parser.add_argument('--blastidentitynucl', help='Nucleotide identity percentage for a hit to be retained. Default = 50',
 					default=50, type=float, dest='blastIdentityNucl')
 	parser.add_argument('--blastidentityprot', help='Amino acid identity percentage for a hit to be retained. Default = 40',
@@ -114,7 +116,7 @@ if __name__ == "__main__":
 					24. Pterobranchia Mitochondrial Code\
 					25. Candidate Division SR1 and Gracilibacteria Code",
 						type=int, default=1, dest='organismType')
-	parser.add_argument('-v', '--version', help="Version 1.0.2", default=False, dest='versionCheck', action='store_true')
+	parser.add_argument('-v', '--version', help="Version 1.1", default=False, dest='versionCheck', action='store_true')
 	parser.add_argument('--example', help="Print getting started examples", default=False, dest='example', action='store_true')
 	
 	args = parser.parse_args()
@@ -128,10 +130,20 @@ if __name__ == "__main__":
 		exit()
 	
 	if args.versionCheck == True:
-		print "MitoFinder version 1.0.2"
+		print "MitoFinder version 1.1"
 		exit()
-	
-	Logfile=args.processName+".log"
+
+	if args.PE1 == "" and args.PE2 == "" and args.SE == "" and args.Assembly == "" :
+		print "\nERROR: Read or assembly files are not specified.\n Please, use -1 -2 -s or -a option to specify input data."
+		exit()
+	if args.refSeqFile == "":
+		print "\nERROR: Reference file is required (-r option)"
+		exit()
+	if args.processName == "":
+		print "\nERROR: SeqID is required (-j option)"
+		exit()
+		
+	Logfile=args.processName+"_MitoFinder.log"
 	Logfile=os.path.join(initial_path,Logfile)
 	logfile=open(Logfile,"w")
 	logfile.write('Command line: %s' % ' '.join(sys.argv)+"\n")
@@ -145,15 +157,7 @@ if __name__ == "__main__":
 	print 'Now running MitoFinder ...\n'
 	print "Start time : "+datetime.now().strftime("%Y-%m-%d %H:%M:%S")+'\n'
 
-	if args.PE1 == "" and args.PE2 == "" and args.SE == "" and args.Assembly == "" :
-		print "\nERROR: Read or assembly files are not specified.\n Please, use -1 -2 -s or -a option to specify input data."
-		logfile.write("\nERROR: Read or assembly files are not specified.\n Please, use -1 -2 -s or -a option to specify input data.\n")
-		exit()
-	if args.refSeqFile == "":
-		print "ERROR: Reference file is required (-r option)"
-		logfile.write("ERROR: Reference file is required (-r option)")
-		exit()
-
+		
 	print "Job name = "+args.processName
 	
 	args.refSeqFile=os.path.join(initial_path,args.refSeqFile)
@@ -240,10 +244,6 @@ if __name__ == "__main__":
 	logfile.write("\n")
 	if pathToMegahitFolder.lower() == 'default' and args.idba == False and args.metaspades == False:
 		pathToMegahitFolder = os.path.join(module_dir, 'megahit/')
-		command = 'make -C '+pathToMegahitFolder
-		args1 = shlex.split(command)
-		make = Popen(args1, stdout=open("log","w"))
-		make.wait()
 		logfile.write('WARNING: Megahit is still set in its default folder. Change it in the config file if you encounter problems running this script.\n')
 		print 'WARNING: Megahit is still set in its default folder. Change it in the config file if you encounter problems running this script.'
 	
@@ -795,8 +795,8 @@ if __name__ == "__main__":
 	
 			#creating some stat file:
 			print "\n"
-			print "Creating summary statistics for "+args.processName+"_best_contig.fasta"
-			logfile.write("\n\n"+"Creating summary statistics for "+args.processName+"_best_contig.fasta\n")
+			print "Creating summary statistics for the mtDNA contig"
+			logfile.write("\n\n"+"Creating summary statistics for the mtDNA contig\n")
 			
 			finalResults = SeqIO.read(open(resultFile, 'rU'), "fasta", generic_dna)
 			finalStatsFile = open(pathOfFinalResults + args.processName + '.stats', 'w')
@@ -809,19 +809,25 @@ if __name__ == "__main__":
 			else:
 				finalStatsFile.write("Circularization: No\n")
 				
-			shutil.copyfile(resultFile, pathOfFinalResults+"/"+args.processName+"_best_contig.fasta")
+			command = module_dir+"/rename_fasta_seqID.py " + args.processName + " " + resultFile + " " + pathOfFinalResults+"/"+args.processName+"_mtDNA_contig.fasta" + " " + str(1)
+			args1 = shlex.split(command)
+			rename = Popen(args1, stdout=open(os.devnull, 'wb'))
+			rename.wait()	
+			
+			os.remove(resultFile)
+			#shutil.copyfile(resultFile, pathOfFinalResults+"/"+args.processName+"_mtDNA_contig.fasta")
 			
 			
 			# Annotating with gene_checker
 			print ""		
-			print "Annotating mitochondrial contigs"		
+			print "Annotating mitochondrial contig"		
 			print ""		
 			logfile.write("\nAnnotating\n\n")
 			
 			if recordCount > 1: #if more than 1 ref
 							
-				if os.path.isfile(pathtowork+'/ref_for_best_contig.fasta') == True:
-					os.remove(pathtowork+'/ref_for_best_contig.fasta')
+				if os.path.isfile(pathtowork+'/ref_for_mtDNA_contig.fasta') == True:
+					os.remove(pathtowork+'/ref_for_mtDNA_contig.fasta')
 				
 				for line in open(pathtowork+"/genes_list"):
 					if line.rstrip() != "rrnL" and line.rstrip() != "rrnS":
@@ -831,7 +837,7 @@ if __name__ == "__main__":
 						formatDB = Popen(args1, stdout=open(os.devnull, 'wb'))
 						formatDB.wait()
 						with open(pathtowork+'/'+gene+'_blast_out.txt','w') as BlastResultGene:
-							command = blastFolder+"/blastx -db " + "ref_" + gene+ "_database.fasta" + " -query "+ resultFile + " -evalue " + str(blasteVal) + " -outfmt 6" + " -query_gencode " + str(args.organismType) + " -seg no"
+							command = blastFolder+"/blastx -db " + "ref_" + gene+ "_database.fasta" + " -query "+ pathOfFinalResults+"/"+args.processName+"_mtDNA_contig.fasta" + " -evalue " + str(blasteVal) + " -outfmt 6" + " -query_gencode " + str(args.organismType) + " -seg no"
 							args1 = shlex.split(command)
 							blast = Popen(args1, stdout=BlastResultGene)
 							blast.wait()
@@ -842,7 +848,7 @@ if __name__ == "__main__":
 						formatDB = Popen(args1, stdout=open(os.devnull, 'wb'))
 						formatDB.wait()
 						with open(pathtowork+'/'+gene+'_blast_out.txt','w') as BlastResultGene:
-							command = blastFolder+"/blastn -db " + "ref_" + gene+ "_database.fasta"+ " -query "+ resultFile + " -evalue " + str(blasteVal) + " -outfmt 6 -perc_identity " + str(args.blastIdentityNucl) + " -dust no"
+							command = blastFolder+"/blastn -db " + "ref_" + gene+ "_database.fasta"+ " -query "+ pathOfFinalResults+"/"+args.processName+"_mtDNA_contig.fasta" + " -evalue " + str(blasteVal) + " -outfmt 6 -perc_identity " + str(args.blastIdentityNucl) + " -dust no"
 							args1 = shlex.split(command)
 							blast = Popen(args1, stdout=BlastResultGene)
 							blast.wait()
@@ -861,7 +867,7 @@ if __name__ == "__main__":
 							dico_query[testedGene]=query					
 							bestScore=score
 					
-					refFile=open(pathtowork+'/ref_for_best_contig.fasta','a')
+					refFile=open(pathtowork+'/ref_for_mtDNA_contig.fasta','a')
 					for cle, valeur in dico_query.items():
 						for name, seq in read_fasta(open(pathtowork+"/ref_"+gene+ "_database.fasta")):
 							if name.replace(">","") == valeur:
@@ -869,26 +875,26 @@ if __name__ == "__main__":
 				
 					refFile.close()
 					
-				command = module_dir+"/geneChecker_fasta.py " + pathtowork + "/ref_for_best_contig.fasta" + " " + pathOfFinalResults + "/" + args.processName+"_best_contig.fasta" + " " + args.processName+"_mito.gb" + " " + str(args.organismType) + " " + str(args.aligncutoff) + " " + str(args.coveCutOff) + " " + str(blasteVal) + " " + str(args.blastIdentityProt) + " " + str(args.blastIdentityNucl) + " " + str(args.genbk)
+				command = module_dir+"/geneChecker_fasta.py " + pathtowork + "/ref_for_mtDNA_contig.fasta" + " " + pathOfFinalResults + "/" + args.processName+"_mtDNA_contig.fasta" + " " + args.processName+"_mtDNA_contig.gb" + " " + str(args.organismType) + " " + str(args.aligncutoff) + " " + str(args.coveCutOff) + " " + str(blasteVal) + " " + str(args.blastIdentityProt) + " " + str(args.blastIdentityNucl) + " " + str(args.genbk)+ " " + str(args.nWalk)
 				args1 = shlex.split(command)
 				fifthStep = Popen(args1, cwd=pathOfFinalResults, stdout=open('geneChecker.log', 'a'), stderr=open('geneChecker_erreur.log', 'a'))
 				fifthStep.wait()	
 			
 			else:
 
-				best_ref=open(pathtowork + "/ref_for_best_contig.fasta","w")
+				best_ref=open(pathtowork + "/ref_for_mtDNA_contig.fasta","w")
 				for line in open(pathtowork+"/genes_list"):
 					gene=line.rstrip()
 					for name, seq in read_fasta(open(pathtowork+"/ref_"+gene+ "_database.fasta")):
 						best_ref.write(name+"\n"+seq+"\n")
 				best_ref.close()
 				
-				command = module_dir+"/geneChecker_fasta.py " + pathtowork + "/ref_for_best_contig.fasta" + " " + pathOfFinalResults + "/" + args.processName+"_best_contig.fasta" + " " + args.processName+"_mito.gb" + " " + str(args.organismType) + " " + str(args.aligncutoff) + " " + str(args.coveCutOff) + " " + str(blasteVal) + " " + str(args.blastIdentityProt) + " " + str(args.blastIdentityNucl) + " " + str(args.genbk)
+				command = module_dir+"/geneChecker_fasta.py " + pathtowork + "/ref_for_mtDNA_contig.fasta" + " " + pathOfFinalResults + "/" + args.processName+"_mtDNA_contig.fasta" + " " + args.processName+"_mtDNA_contig.gb" + " " + str(args.organismType) + " " + str(args.aligncutoff) + " " + str(args.coveCutOff) + " " + str(blasteVal) + " " + str(args.blastIdentityProt) + " " + str(args.blastIdentityNucl) + " " + str(args.genbk)+ " " + str(args.nWalk)
 				args1 = shlex.split(command)
 				fifthStep = Popen(args1, cwd=pathOfFinalResults, stdout=open('geneChecker.log', 'w'), stderr=open('geneChecker_erreur.log', 'w'))
 				fifthStep.wait()
 
-			test_arwen=pathOfFinalResults + "/" + args.processName+"_best_contig.arwen"
+			test_arwen=pathOfFinalResults + "/" + args.processName+"_mtDNA_contig.arwen"
 			if os.path.isfile(test_arwen) == True:
 				print "tRNA annotation with Arwen run well.\n"
 				logfile.write("tRNA annotation with Arwen run well.\n\n")
@@ -896,7 +902,7 @@ if __name__ == "__main__":
 				print "ERROR: tRNA annotation failed.\nPlease check  "+ pathtowork + "/geneChecker_erreur.log or geneChecker.log to see what happened\nAborting\n"
 				logfile.write("ERROR: tRNA annotation failed.\nPlease check  "+ pathtowork + "/geneChecker_erreur.log or geneChecker.log to see what happened\nAborting\n\n")
 				exit()
-			test_gene_checker=pathOfFinalResults+args.processName+"_mito.gb"
+			test_gene_checker=pathOfFinalResults+args.processName+"_mtDNA_contig.gb"
 			if os.path.isfile(test_gene_checker) == True:
 				print "Annotation completed\n"
 				logfile.write("Annotation completed\n\n")
@@ -985,7 +991,7 @@ if __name__ == "__main__":
 			for line in open(pathtowork+"/"+'contig_list.txt','r'):
 				pathOfResult = pathtowork+"/"+args.processName+'_contig_'+str(c)+'.fasta'
 	
-				resultFile = args.processName + '_partial_mito_'+str(c)+'.fasta'
+				resultFile = args.processName + '_mtDNA_contig_'+str(c)+'.fasta'
 		
 				with open(resultFile, "w") as outputResult: #create draft file to be checked and annotated
 					finalResults = SeqIO.read(open(pathOfResult, 'rU'), "fasta", generic_dna)
@@ -999,32 +1005,38 @@ if __name__ == "__main__":
 		
 				#creating some stat file:
 				print "\n"
-				print "Creating summary statistics for Partial_mito_"+str(c)
+				print "Creating summary statistics for mtDNA contig "+str(c)
 				print ""
-				logfile.write("\n\n"+"Creating summary statistics for Partial_mito_"+str(c)+"\n\n")
+				logfile.write("\n\n"+"Creating summary statistics for mtDNA contig "+str(c)+"\n\n")
 				
 				finalResults = SeqIO.read(open(resultFile, 'rU'), "fasta", generic_dna)
-				finalStatsFile = open(pathOfFinalResults + args.processName + '_partial_mito_'+str(c)+'.stats', 'w')
+				finalStatsFile = open(pathOfFinalResults + args.processName + '_mtDNA_contig_'+str(c)+'.stats', 'w')
 		
 				finalStatsFile.write('Statistics for contig '+str(c)+':\n\n')
 				finalStatsFile.write('Length: ' + str(len(finalResults.seq)) + "\n")
 				finalStatsFile.write('GC content: ' + ("{0:.2f}".format(SeqUtils.GC(finalResults.seq))) + '%\n')
 				finalStatsFile.write("Circularization: No\n")
 					
-				shutil.copyfile(resultFile, pathOfFinalResults+"/"+args.processName+"_partial_mito_"+str(c)+".fasta")
+				command = module_dir+"/rename_fasta_seqID.py " + args.processName + " " + resultFile + " " + pathOfFinalResults+"/"+args.processName+"_mtDNA_contig_"+str(c)+".fasta" + " " + str(c)
+				args1 = shlex.split(command)
+				rename = Popen(args1, stdout=open(os.devnull, 'wb'))
+				rename.wait()	
+				
+				os.remove(resultFile)
+				#shutil.copyfile(resultFile, pathOfFinalResults+"/"+args.processName+"_mtDNA_contig_"+str(c)+".fasta")
 				
 				
 				# creating best ref file for annotation	
 						
-				command = blastFolder+"/makeblastdb -in " + pathOfFinalResults+"/"+args.processName+"_partial_mito_"+str(c)+".fasta" + " -dbtype nucl" #need to formatdb refseq first
+				command = blastFolder+"/makeblastdb -in " + pathOfFinalResults+"/"+args.processName+"_mtDNA_contig_"+str(c)+".fasta" + " -dbtype nucl" #need to formatdb refseq first
 				args1 = shlex.split(command)
 				formatDB = Popen(args1, stdout=open(os.devnull, 'wb'))
 				formatDB.wait()		
 				
 				if recordCount > 1: #if more than 1 ref
-					print "Looking for best reference genes for Partial_mito_"+str(c)
+					print "Looking for best reference genes for mtDNA contig "+str(c)
 					print ""
-					logfile.write("Looking for best reference genes for Partial_mito_"+str(c)+"\n\n")
+					logfile.write("Looking for best reference genes for mtDNA contig "+str(c)+"\n\n")
 					
 					if os.path.isfile(pathtowork+'/ref_for_contig_'+str(c)+'.fasta') == True:
 						os.remove(pathtowork+'/ref_for_contig_'+str(c)+'.fasta')
@@ -1037,7 +1049,7 @@ if __name__ == "__main__":
 							formatDB = Popen(args1, stdout=open(os.devnull, 'wb'))
 							formatDB.wait()
 							with open(pathtowork+'/'+gene+'_blast_out.txt','w') as BlastResultGene:
-								command = blastFolder+"/blastx -db " + "ref_" + gene+ "_database.fasta" + " -query "+ pathOfFinalResults+"/"+args.processName+"_partial_mito_"+str(c)+".fasta" + " -evalue " + str(blasteVal) + " -outfmt 6" + " -query_gencode " + str(args.organismType) + " -seg no"
+								command = blastFolder+"/blastx -db " + "ref_" + gene+ "_database.fasta" + " -query "+ pathOfFinalResults+"/"+args.processName+"_mtDNA_contig_"+str(c)+".fasta" + " -evalue " + str(blasteVal) + " -outfmt 6" + " -query_gencode " + str(args.organismType) + " -seg no"
 								args1 = shlex.split(command)
 								blast = Popen(args1, stdout=BlastResultGene)
 								blast.wait()
@@ -1048,7 +1060,7 @@ if __name__ == "__main__":
 							formatDB = Popen(args1, stdout=open(os.devnull, 'wb'))
 							formatDB.wait()
 							with open(pathtowork+'/'+gene+'_blast_out.txt','w') as BlastResultGene:
-								command = blastFolder+"/blastn -db " + "ref_" + gene+ "_database.fasta"+ " -query "+ pathOfFinalResults+"/"+args.processName+"_partial_mito_"+str(c)+".fasta" + " -evalue " + str(blasteVal) + " -outfmt 6 -perc_identity " + str(args.blastIdentityNucl) + " -dust no"
+								command = blastFolder+"/blastn -db " + "ref_" + gene+ "_database.fasta"+ " -query "+ pathOfFinalResults+"/"+args.processName+"_mtDNA_contig_"+str(c)+".fasta" + " -evalue " + str(blasteVal) + " -outfmt 6 -perc_identity " + str(args.blastIdentityNucl) + " -dust no"
 								args1 = shlex.split(command)
 								blast = Popen(args1, stdout=BlastResultGene)
 								blast.wait()
@@ -1075,9 +1087,9 @@ if __name__ == "__main__":
 					
 						refFile.close()	
 					
-				print "Annotating Partial_mito_"+str(c)
+				print "Annotating mtDNA contig "+str(c)
 				print ""
-				logfile.write("Annotating Partial_mito_"+str(c)+"\n\n")
+				logfile.write("Annotating mtDNA contig "+str(c)+"\n\n")
 				
 				# Annotating with gene_checker
 				"""QRY_dico={}
@@ -1102,14 +1114,14 @@ if __name__ == "__main__":
 								tmp.write("LOCUS"+x)
 								tmp.close()
 											
-						command = module_dir+"/geneChecker.py " + "../best_query.gb" + " " + resultFile + " " + args.processName+"_partial_mito_"+str(c)+".gb" + " " + str(args.organismType) + " " + str(args.aligncutoff) + " " + str(args.coveCutOff)
+						command = module_dir+"/geneChecker.py " + "../best_query.gb" + " " + resultFile + " " + args.processName+"_mtDNA_contig_"+str(c)+".gb" + " " + str(args.organismType) + " " + str(args.aligncutoff) + " " + str(args.coveCutOff)
 						args1 = shlex.split(command)
 						fifthStep = Popen(args1, cwd=pathOfFinalResults, stdout=open('geneChecker.log', 'a'), stderr=open('geneChecker_erreur.log', 'a'))
 						fifthStep.wait()
 				"""
 
 				if recordCount > 1:
-					command = module_dir+"/geneChecker_fasta.py " + pathtowork + "/ref_for_contig_" + str(c) + ".fasta" + " " + pathOfFinalResults+"/"+args.processName+"_partial_mito_"+str(c)+".fasta" + " " + args.processName+"_partial_mito_"+str(c)+".gb" + " " + str(args.organismType) + " " + str(args.aligncutoff) + " " + str(args.coveCutOff) + " " + str(blasteVal) + " " + str(args.blastIdentityProt) + " " + str(args.blastIdentityNucl) + " " + str(args.genbk)
+					command = module_dir+"/geneChecker_fasta.py " + pathtowork + "/ref_for_contig_" + str(c) + ".fasta" + " " + pathOfFinalResults+"/"+args.processName+"_mtDNA_contig_"+str(c)+".fasta" + " " + args.processName+"_mtDNA_contig_"+str(c)+".gb" + " " + str(args.organismType) + " " + str(args.aligncutoff) + " " + str(args.coveCutOff) + " " + str(blasteVal) + " " + str(args.blastIdentityProt) + " " + str(args.blastIdentityNucl) + " " + str(args.genbk)+ " " + str(args.nWalk)
 					args1 = shlex.split(command)
 					fifthStep = Popen(args1, cwd=pathOfFinalResults, stdout=open('geneChecker.log', 'a'), stderr=open('geneChecker_erreur.log', 'a'))
 					fifthStep.wait()
@@ -1123,28 +1135,28 @@ if __name__ == "__main__":
 							best_ref.write(name+"\n"+seq+"\n")
 					best_ref.close()
 					
-					command = module_dir+"/geneChecker_fasta.py " + pathtowork + "/ref_for_contigs.fasta" + " " + pathOfFinalResults+"/"+args.processName+"_partial_mito_"+str(c)+".fasta" + " " + args.processName+"_partial_mito_"+str(c)+".gb" + " " + str(args.organismType) + " " + str(args.aligncutoff) + " " + str(args.coveCutOff) + " " + str(blasteVal) + " " + str(args.blastIdentityProt) + " " + str(args.blastIdentityNucl) + " " + str(args.genbk)
+					command = module_dir+"/geneChecker_fasta.py " + pathtowork + "/ref_for_contigs.fasta" + " " + pathOfFinalResults+"/"+args.processName+"_mtDNA_contig_"+str(c)+".fasta" + " " + args.processName+"_mtDNA_contig_"+str(c)+".gb" + " " + str(args.organismType) + " " + str(args.aligncutoff) + " " + str(args.coveCutOff) + " " + str(blasteVal) + " " + str(args.blastIdentityProt) + " " + str(args.blastIdentityNucl) + " " + str(args.genbk)+ " " + str(args.nWalk)
 					args1 = shlex.split(command)
 					fifthStep = Popen(args1, cwd=pathOfFinalResults, stdout=open('geneChecker.log', 'w'), stderr=open('geneChecker_erreur.log', 'w'))
 					fifthStep.wait()
 	
 			
-				test_arwen=pathOfFinalResults+"/"+args.processName+"_partial_mito_"+str(c)+".arwen"
+				test_arwen=pathOfFinalResults+"/"+args.processName+"_mtDNA_contig_"+str(c)+".arwen"
 				
 				if os.path.isfile(test_arwen) == True:
-					print "tRNA annotation with Arwen run well for partial mito "+str(c)+".\n"
-					logfile.write("tRNA annotation with Arwen run well for partial mito "+str(c)+".\n"+"\n")
+					print "tRNA annotation with Arwen run well for mtDNA contig "+str(c)+".\n"
+					logfile.write("tRNA annotation with Arwen run well for mtDNA contig "+str(c)+".\n"+"\n")
 				else:
 					print "ERROR: tRNA annotation with Arwen failed\nPlease check  "+ pathtowork + "/geneChecker_erreur.log or geneChecker.log to see what happened\nAborting\n"
 					logfile.write("ERROR: tRNA annotation with Arwen failed\nPlease check  "+ pathtowork + "/geneChecker_erreur.log or geneChecker.log to see what happened\nAborting\n\n")
 					exit()
-				test_gene_checker=pathOfFinalResults+args.processName+"_partial_mito_"+str(c)+".gb"
+				test_gene_checker=pathOfFinalResults+args.processName+"_mtDNA_contig_"+str(c)+".gb"
 				if os.path.isfile(test_gene_checker) == True:
 					print "Annotation completed\n"
 					logfile.write("Annotation completed\n"+"\n")
 				else:
-					print "ERROR: Gene annotation failed for partial mito "+str(c)+".\nPlease check  "+ pathtowork + "/geneChecker_error.log to see what happened\nAborting\n"
-					logfile.write("ERROR: Gene annotation failed for partial mito "+str(c)+".\nPlease check  "+ pathtowork + "/geneChecker_error.log to see what happened\nAborting\n"+"\n")
+					print "ERROR: Gene annotation failed for mtDNA contig "+str(c)+".\nPlease check  "+ pathtowork + "/geneChecker_error.log to see what happened\nAborting\n"
+					logfile.write("ERROR: Gene annotation failed for mtDNA contig "+str(c)+".\nPlease check  "+ pathtowork + "/geneChecker_error.log to see what happened\nAborting\n"+"\n")
 					exit()
 				c+=1		
 			
@@ -1153,8 +1165,8 @@ if __name__ == "__main__":
 		
 	else:	
 		if recordCount > 1: #if more than 1 ref
-			if os.path.isfile(pathtowork+'/ref_for_best_contig.fasta') == True:
-				os.remove(pathtowork+'/ref_for_best_contig.fasta')
+			if os.path.isfile(pathtowork+'/ref_for_mtDNA_contig.fasta') == True:
+				os.remove(pathtowork+'/ref_for_mtDNA_contig.fasta')
 			
 			for line in open(pathtowork+"/genes_list"):
 				if line.rstrip() != "rrnL" and line.rstrip() != "rrnS":
@@ -1194,7 +1206,7 @@ if __name__ == "__main__":
 						dico_query[testedGene]=query					
 						bestScore=score
 				
-				refFile=open(pathtowork+'/ref_for_best_contig.fasta','a')
+				refFile=open(pathtowork+'/ref_for_mtDNA_contig.fasta','a')
 				for cle, valeur in dico_query.items():
 					for name, seq in read_fasta(open(pathtowork+"/ref_"+gene+ "_database.fasta")):
 						if name.replace(">","") == valeur:
@@ -1202,7 +1214,7 @@ if __name__ == "__main__":
 			
 				refFile.close()
 				
-			command = module_dir+"/geneChecker_fasta.py " + pathtowork + "/ref_for_best_contig.fasta" + " " + args.CONTIG + " " + args.processName+"_mito.gb" + " " + str(args.organismType) + " " + str(args.aligncutoff) + " " + str(args.coveCutOff) + " " + str(blasteVal) + " " + str(args.blastIdentityProt) + " " + str(args.blastIdentityNucl) + " " + str(args.genbk)
+			command = module_dir+"/geneChecker_fasta.py " + pathtowork + "/ref_for_mtDNA_contig.fasta" + " " + args.CONTIG + " " + args.processName+"_mtDNA_contig.gb" + " " + str(args.organismType) + " " + str(args.aligncutoff) + " " + str(args.coveCutOff) + " " + str(blasteVal) + " " + str(args.blastIdentityProt) + " " + str(args.blastIdentityNucl) + " " + str(args.genbk)+ " " + str(args.nWalk)
 			args1 = shlex.split(command)
 			fifthStep = Popen(args1, cwd=pathOfFinalResults, stdout=open('geneChecker.log', 'a'), stderr=open('geneChecker_erreur.log', 'a'))
 			fifthStep.wait()	
@@ -1211,14 +1223,14 @@ if __name__ == "__main__":
 			print "Running Annotation\n"
 			logfile.write("Running Annotation\n"+"\n")
 
-			best_ref=open(pathtowork + "/ref_for_best_contig.fasta","w")
+			best_ref=open(pathtowork + "/ref_for_mtDNA_contig.fasta","w")
 			for line in open(pathtowork+"/genes_list"):
 				gene=line.rstrip()
 				for name, seq in read_fasta(open(pathtowork+"/ref_"+gene+ "_database.fasta")):
 					best_ref.write(name+"\n"+seq+"\n")
 			best_ref.close()
 			
-			command = module_dir+"/geneChecker_fasta.py " + pathtowork + "/ref_for_best_contig.fasta" + " " + args.CONTIG + " " + args.processName+"_mito.gb" + " " + str(args.organismType) + " " + str(args.aligncutoff) + " " + str(args.coveCutOff) + " " + str(blasteVal) + " " + str(args.blastIdentityProt) + " " + str(args.blastIdentityNucl) + " " + str(args.genbk)
+			command = module_dir+"/geneChecker_fasta.py " + pathtowork + "/ref_for_mtDNA_contig.fasta" + " " + args.CONTIG + " " + args.processName+"_mtDNA_contig.gb" + " " + str(args.organismType) + " " + str(args.aligncutoff) + " " + str(args.coveCutOff) + " " + str(blasteVal) + " " + str(args.blastIdentityProt) + " " + str(args.blastIdentityNucl) + " " + str(args.genbk)+ " " + str(args.nWalk)
 			args1 = shlex.split(command)
 			fifthStep = Popen(args1, cwd=pathOfFinalResults,stdout=open('geneChecker.log', 'w'), stderr=open('geneChecker_erreur.log', 'w'))
 			fifthStep.wait()
@@ -1232,7 +1244,7 @@ if __name__ == "__main__":
 			print "ERROR: tRNA annotation with Arwen failed\nPlease check  "+ pathtowork + "/geneChecker_erreur.log or geneChecker.log to see what happened\nAborting\n"
 			logfile.write( "ERROR: tRNA annotation with Arwen failed\nPlease check  "+ pathtowork + "/geneChecker_erreur.log or geneChecker.log to see what happened\nAborting\n"+"\n")
 			exit()
-		test_gene_checker=pathOfFinalResults+args.processName+"_mito.gb"
+		test_gene_checker=pathOfFinalResults+args.processName+"_mtDNA_contig.gb"
 		if os.path.isfile(test_gene_checker) == True:
 			print "Annotation is done\n"
 			logfile.write("Annotation is done\n"+"\n")
@@ -1275,11 +1287,19 @@ if __name__ == "__main__":
 	logfile.write("\n")
 	
 	#sort gff
-	for f in glob.glob(pathOfFinalResults+"/*_raw.gff"):
-		command = module_dir+"/sort_gff.py " + f
-		args1 = shlex.split(command)
-		sort_gff = Popen(args1, cwd=pathOfFinalResults, stdout=open('geneChecker.log', 'a'), stderr=open('geneChecker_erreur.log', 'a'))
-		sort_gff.wait()	
+	pathlist = glob.glob(pathOfFinalResults+"/*.gb")
+	if len(pathlist) == 1 :
+		for f in glob.glob(pathOfFinalResults+"/*_raw.gff"):
+			command = module_dir+"/sort_gff.py " + f + " " + args.processName +".1 " + str(args.organismType)
+			args1 = shlex.split(command)
+			sort_gff = Popen(args1, cwd=pathOfFinalResults, stdout=open('geneChecker.log', 'a'), stderr=open('geneChecker_erreur.log', 'a'))
+			sort_gff.wait()	
+	else:
+		for f in glob.glob(pathOfFinalResults+"/*_raw.gff"):
+			command = module_dir+"/sort_gff.py " + f + " " + args.processName +"."+ f.split("_raw")[0][-1] + " " + str(args.organismType)
+			args1 = shlex.split(command)
+			sort_gff = Popen(args1, cwd=pathOfFinalResults, stdout=open('geneChecker.log', 'a'), stderr=open('geneChecker_erreur.log', 'a'))
+			sort_gff.wait()	
 	
 	#check genes (doublon ?)
 	
@@ -1339,7 +1359,9 @@ if __name__ == "__main__":
 		os.remove(f)
 	shutil.copy(pathOfFinalResults+"Arwen.log", tmpfiles+"/")
 	os.remove(pathOfFinalResults+"Arwen.log")
-	
+	for f in glob.glob(pathOfFinalResults+"*_raw.gff"):
+		shutil.copy(f, tmpfiles+"/")
+		os.remove(f)
 	for f in glob.glob(pathtowork+"/*blast*"):
 		shutil.copy(f, tmpfiles+"/")
 		os.remove(f)
diff --git a/rename_fasta_seqID.py b/rename_fasta_seqID.py
new file mode 100755
index 0000000..2ecb989
--- /dev/null
+++ b/rename_fasta_seqID.py
@@ -0,0 +1,23 @@
+#! /usr/bin/env python
+
+import sys
+import os.path
+
+def read_fasta(fp):
+    name, seq = None, []
+    for line in fp:
+        line = line.rstrip()
+        if line.startswith(">"):
+            if name: yield (name, ''.join(seq))
+            name, seq = line, []
+        else:
+            seq.append(line)
+    if name: yield (name, ''.join(seq))
+
+seqID=sys.argv[1]
+fasta = open(sys.argv[2])
+fout = open(sys.argv[3], 'w')
+c=sys.argv[4]
+
+for name, seq in read_fasta(fasta):
+	fout.write(">"+seqID+"."+str(c)+"\n"+seq+"\n")
diff --git a/runIDBA.pyc b/runIDBA.pyc
index cb78398a3d8272b95964ad5fb94ecf605825556d..9c95e443c1d4b9c3556af4487792c45d2aa16df2 100755
GIT binary patch
delta 294
zcmdn4Iah;&`7<xq);&{WH*!p1WlY*UlXWYzp)^|wBSQ^0Ly8E4cnT9kGZRC<KU1v$
zLya&)iZVks6GPEUmJ}5RcaItlhAbwA6yweHoXm{6c?@6``d}48Y$*l^6($VLj0}u5
z3=Ao%4C1vC3@L__uW;qE>}O3en(WFQ%Vf$h`8$__8y`arA48T1Lo)*dBha+)6b6RC
z2q2?GfFTR$BA`Bbpo<j2E>dKum1d}s1~~vI0W!XXnITII$eHZNBgJU8Ig=-adGkyD
zWJWGF1_p+r)ZF~C)Xm|7rHqUQlg|oiO#UMz!?<g*q;NkIFZ1Lz!YUkM%#w_}jKYkQ
N?+8~w)wGBh0{{<lL^l8c

delta 459
zcma)1y-vbF0RHYup|+xg603qyBPMmQQ1I^!&`}w5z!<`T5hFyYKSPm$#lZxVn;3is
zqm6NOck&&aTuq$4784T_6Nk&?yZe4`&8G!L`iQDm=bxo}<whniUSEEc>}uE{a5b<5
z3`WtV==Etk23H50!y(1Vh|JO`th);86l~e+3`u0mQ38<+!;K1IM%==~hY&CeLl}_;
z0vkK9DTP_?$7Tgx0wTz49!7f{EYI)MqtKhomiUaeOIG=%w%GY^$~FTwcSM{MLtV^C
zfK7`Uv%#DM+DW)czr8aS#181+T;p%<q~CEs_SYNe0z^a=&9#SNCZ$>`fJ~*?a1QpZ
zTDheNXtgTkTCsZUvC#_ABI3pWuQnX-HFiqGu0q_5?48+G*2?i_JVl=Qb-Wi+iAnjC
Wo*q_-O(%#>#G!mczxbti*YjT>gJARk

diff --git a/runMegahit.pyc b/runMegahit.pyc
index 409f2bcbd63e5b23dc8d2053a9ec4370ab4706d7..8af34e8f495bb747b7dd4c4d576a38304b0e456a 100755
GIT binary patch
delta 204
zcmbQDI$MQ<`7<xq);&{WH*)l_GF@TWJdJfPlkzc^5=Mp^9)=Vd2JsYGhGr&){*R2c
zVhlBc3@LI9*-Q*YhbNbDEM`1BS&GwIq3|$s9s^iT9W1w(rG|qcg&WLfVoA}Me3Dao
z@?K8v$qPAUCO_efVcE-)qBU8T(}KBhFU#aLT+)o%n@@0=GH>SMyUoOCJlRl4W3s5A
t5@X$D3&BhYUREAP9!7O0bw)WxE=C<jRz_Y%MMg2^$$JHhp^6iPOaPU5Gl~EJ

delta 321
zcmbQOGDVex`7<xqoAPfl8##Jdne<pUPh;K7q^!VN!pKm=!;m7wAf6)2(9Fcp@6A*z
z#!w^3kRr#B&BRdjVR9MAV#WiLr8uow^B5RPI3|DOl+XidVPO!jVE}4VXAlSKV+2Zq
zbZLNfiL=&lFr;vU+4?M#XK~6-p1{e?s5N;GXF1DrmK5#DvYZwO^YpQs7oNhv5Eub6
zn3Ew(grS*%fe|PYtf8|xkIR%<teAs=fkCf0IWa9bwXlQ<$O(2Wwx9fg*J<-R-U&>M
zR+H-mG}z<I4D}53j3zG^P-1MFd`ciwhKtdFg@=)cQJqPhQI3&|QHPP0k(W^s$g*Ub
N>@8S~rt!I;2>@MFNOAxG

diff --git a/runMetaspades.pyc b/runMetaspades.pyc
index 56d017e5036e938f03e66414ef0420eb9a6d1c4c..e810c5ef378d7e81e430296b00a99e84f44f9ce2 100755
GIT binary patch
delta 232
zcmZounX1ge{F#?)>z=8x8##Jd8JBFH%DRt9c^XRzBSQ@jLy8Q8c#14TGZRDqLB?7!
zh8jVJ6gh@$CWfNTlgl_JvutKgQJpNnB|ce&Q&FLCGjkpTScy7VNiRzc2SW-snEiw~
zMPqUwrv#(s<mH^HEWIo#S|ELlT9Y-n6yytgS%5mjQy3TmBS1!QGGvJ`G&3+T0!4x~
zv^P)S;$hyrl6NT+quJyIf*O<S1T+}yCNCGrl;CCMVdP<yWRhf*W8`AgU}R<FWmIGo
PW1g%dSPWHsUeFu>1mHIF

delta 360
zcmb7-KTE?<6vfYddBG$mm}+SZ)S|Rtv?Zt@4*mf*agz=@h@!q0R48alad>sG;3POa
z`Vrif><)eo!PQX^y6Q_tH>Z2<;hf(YR_|+;@$ESqH>1NB>%pW^Iy7Hd^^<oA(uF7^
z5HlDsJWjEG39<lD!6n1>yXskro|R*-pLT$*mB%>(S=1GMMNMl>$Sml09)ZS?&SsD(
zhkS5pL5%0x<6;5Qft-X!NCRz-Qa?Z^gjiI!c8jN6)RkukR98Rt?9M+|_XV(bkj>k;
zC}IGRcA8Nm-Ev&RODq6!a(2>gwyx4$?}^c-`kIKwj(f}TQoN$3{fO?=wtrrBsKH%w
aDP$p4$f1Z#@@P)0Ri@tj<WKRau=Wd0a!P*y

diff --git a/sort_gff.py b/sort_gff.py
index ed76419..cb99c02 100755
--- a/sort_gff.py
+++ b/sort_gff.py
@@ -5,13 +5,29 @@
 import operator
 import collections
 
+def read_fasta(fp):
+    name, seq = None, []
+    for line in fp:
+        line = line.rstrip()
+        if line.startswith(">"):
+            if name: yield (name, ''.join(seq))
+            name, seq = line, []
+        else:
+            seq.append(line)
+    if name: yield (name, ''.join(seq))
+
 file1=open(sys.argv[1])
+fasta=open(sys.argv[1].split("_raw.gff")[0]+".fasta")
+seqID=sys.argv[2]
 
 dico={}
 dicotrna={}
 dicof={}
 c=0
 
+for name, seq in read_fasta(fasta):
+	length=len(seq)
+
 for line in open(sys.argv[1]):
 	dico[line.rstrip().split("\t")[8]]=line.rstrip()
 	
@@ -45,14 +61,399 @@
 	
 sorted_x = sorted(dicof.items(), key=operator.itemgetter(0))
 sorted_dict = collections.OrderedDict(sorted_x)
-fout=sys.argv[1].split("_raw.gff")[0]+"_filtered.gff"
-fout=open(fout, "w")
+gout=sys.argv[1].split("_raw.gff")[0]+"_final.gff"
+gout=open(gout, "w")
+gout.write(seqID+"\t"+"mitofinder\tsource\t1\t"+str(length)+"\t.\t+\t.\tName=source "+seqID.replace("_"," ")+"\n")
+tout=sys.argv[1].split("_raw.gff")[0]+"_final.tbl"
+tout=open(tout, "w")
+tout.write(">Feature "+seqID+"\n")
+#tout.write("1\t"+str(length)+"\tREFERENCE\n")
+#tout.write("\t\t\tMitoFinder\txxxxxx\n")
 
 for k, v in sorted_dict.items():
 	if not dicotrna.has_key(v.split("\t")[8]):
-		fout.write(v+"\n")
-	
+		col1=seqID
+		col2=v.split("\t")[1]
+		col3=v.split("\t")[2]
+		col4=v.split("\t")[3]
+		col5=v.split("\t")[4]
+		col6=v.split("\t")[5]
+		col7=v.split("\t")[6]
+		col8=v.split("\t")[7]
+		col9=v.split("\t")[8].rstrip()
+		if "COX1" == col9:
+			size=int(col5)-int(col4)+1
+			if size%3 == 0:
+				ext=""
+				tout.write(col4+"\t"+col5+"\t"+"gene\n")
+				tout.write("\t\t\tgene\t"+col9+"\n")
+				tout.write(col4+"\t"+col5+"\t"+"CDS\n")
+				tout.write("\t\t\tproduct\tcytochrome c oxidase subunit I\n")
+				tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+			else:
+				ext=" Note: Not a multiple of 3"
+				if col7 == "+":
+					tout.write(col4+"\t"+col5+">"+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write(col4+"\t"+col5+">"+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tcytochrome c oxidase subunit I\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+				if col7 == "-":
+					tout.write("<"+col4+"\t"+col5+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write("<"+col4+"\t"+col5+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tcytochrome c oxidase subunit I\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+			gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+ext+"\n")
+			gout.write(col1+"\t"+col2+"\t"+"CDS"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" CDS"+ext+"\n")
+		
+		if "COX2" == col9:
+			size=int(col5)-int(col4)+1
+			if size%3 == 0:
+				ext=""
+				tout.write(col4+"\t"+col5+"\t"+"gene\n")
+				tout.write("\t\t\tgene\t"+col9+"\n")
+				tout.write(col4+"\t"+col5+"\t"+"CDS\n")
+				tout.write("\t\t\tproduct\tcytochrome c oxidase subunit II\n")
+				tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+			else:
+				ext=" Note: Not a multiple of 3"
+				if col7 == "+":
+					tout.write(col4+"\t"+col5+">"+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write(col4+"\t"+col5+">"+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tcytochrome c oxidase subunit II\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+				if col7 == "-":
+					tout.write("<"+col4+"\t"+col5+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write("<"+col4+"\t"+col5+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tcytochrome c oxidase subunit II\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+			gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+ext+"\n")
+			gout.write(col1+"\t"+col2+"\t"+"CDS"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" CDS"+ext+"\n")
+		
+		if "COX3" == col9:
+			size=int(col5)-int(col4)+1
+			if size%3 == 0:
+				ext=""
+				tout.write(col4+"\t"+col5+"\t"+"gene\n")
+				tout.write("\t\t\tgene\t"+col9+"\n")
+				tout.write(col4+"\t"+col5+"\t"+"CDS\n")
+				tout.write("\t\t\tproduct\tcytochrome c oxidase subunit III\n")
+				tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+			else:
+				ext=" Note: Not a multiple of 3"
+				if col7 == "+":
+					tout.write(col4+"\t"+col5+">"+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write(col4+"\t"+col5+">"+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tcytochrome c oxidase subunit III\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+				if col7 == "-":
+					tout.write("<"+col4+"\t"+col5+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write("<"+col4+"\t"+col5+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tcytochrome c oxidase subunit III\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+			gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+ext+"\n")
+			gout.write(col1+"\t"+col2+"\t"+"CDS"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" CDS"+ext+"\n")
 	
+		if "ND1" == col9:
+			size=int(col5)-int(col4)+1
+			if size%3 == 0:
+				ext=""
+				tout.write(col4+"\t"+col5+"\t"+"gene\n")
+				tout.write("\t\t\tgene\t"+col9+"\n")
+				tout.write(col4+"\t"+col5+"\t"+"CDS\n")
+				tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 1\n")
+				tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+			else:
+				ext=" Note: Not a multiple of 3"
+				if col7 == "+":
+					tout.write(col4+"\t"+col5+">"+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write(col4+"\t"+col5+">"+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 1\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+				if col7 == "-":
+					tout.write("<"+col4+"\t"+col5+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write("<"+col4+"\t"+col5+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 1\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+			gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+ext+"\n")
+			gout.write(col1+"\t"+col2+"\t"+"CDS"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" CDS"+ext+"\n")
+
+		if "ND2" == col9:
+			size=int(col5)-int(col4)+1
+			if size%3 == 0:
+				ext=""
+				tout.write(col4+"\t"+col5+"\t"+"gene\n")
+				tout.write("\t\t\tgene\t"+col9+"\n")
+				tout.write(col4+"\t"+col5+"\t"+"CDS\n")
+				tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 2\n")
+				tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+			else:
+				ext=" Note: Not a multiple of 3"
+				if col7 == "+":
+					tout.write(col4+"\t"+col5+">"+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write(col4+"\t"+col5+">"+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 2\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+				if col7 == "-":
+					tout.write("<"+col4+"\t"+col5+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write("<"+col4+"\t"+col5+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 2\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+			gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+ext+"\n")
+			gout.write(col1+"\t"+col2+"\t"+"CDS"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" CDS"+ext+"\n")
+		
+		if "ND3" == col9:
+			size=int(col5)-int(col4)+1
+			if size%3 == 0:
+				ext=""
+				tout.write(col4+"\t"+col5+"\t"+"gene\n")
+				tout.write("\t\t\tgene\t"+col9+"\n")
+				tout.write(col4+"\t"+col5+"\t"+"CDS\n")
+				tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 3\n")
+				tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+			else:
+				ext=" Note: Not a multiple of 3"
+				if col7 == "+":
+					tout.write(col4+"\t"+col5+">"+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write(col4+"\t"+col5+">"+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 3\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+				if col7 == "-":
+					tout.write("<"+col4+"\t"+col5+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write("<"+col4+"\t"+col5+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 3\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+			gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+ext+"\n")
+			gout.write(col1+"\t"+col2+"\t"+"CDS"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" CDS"+ext+"\n")
+
+		if "ND4" == col9:
+			size=int(col5)-int(col4)+1
+			if size%3 == 0:
+				ext=""
+				tout.write(col4+"\t"+col5+"\t"+"gene\n")
+				tout.write("\t\t\tgene\t"+col9+"\n")
+				tout.write(col4+"\t"+col5+"\t"+"CDS\n")
+				tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 4\n")
+				tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+			else:
+				ext=" Note: Not a multiple of 3"
+				if col7 == "+":
+					tout.write(col4+"\t"+col5+">"+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write(col4+"\t"+col5+">"+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 4\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+				if col7 == "-":
+					tout.write("<"+col4+"\t"+col5+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write("<"+col4+"\t"+col5+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 4\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+			gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+ext+"\n")
+			gout.write(col1+"\t"+col2+"\t"+"CDS"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" CDS"+ext+"\n")
+
+		if "ND4L" == col9:
+			size=int(col5)-int(col4)+1
+			if size%3 == 0:
+				ext=""
+				tout.write(col4+"\t"+col5+"\t"+"gene\n")
+				tout.write("\t\t\tgene\t"+col9+"\n")
+				tout.write(col4+"\t"+col5+"\t"+"CDS\n")
+				tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 4L\n")
+				tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+			else:
+				ext=" Note: Not a multiple of 3"
+				if col7 == "+":
+					tout.write(col4+"\t"+col5+">"+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write(col4+"\t"+col5+">"+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 4L\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+				if col7 == "-":
+					tout.write("<"+col4+"\t"+col5+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write("<"+col4+"\t"+col5+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 4L\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+			gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+ext+"\n")
+			gout.write(col1+"\t"+col2+"\t"+"CDS"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" CDS"+ext+"\n")
+
+		if "ND5" == col9:
+			size=int(col5)-int(col4)+1
+			if size%3 == 0:
+				ext=""
+				tout.write(col4+"\t"+col5+"\t"+"gene\n")
+				tout.write("\t\t\tgene\t"+col9+"\n")
+				tout.write(col4+"\t"+col5+"\t"+"CDS\n")
+				tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 5\n")
+				tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+			else:
+				ext=" Note: Not a multiple of 3"
+				if col7 == "+":
+					tout.write(col4+"\t"+col5+">"+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write(col4+"\t"+col5+">"+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 5\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+				if col7 == "-":
+					tout.write("<"+col4+"\t"+col5+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write("<"+col4+"\t"+col5+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tNADH dehydrogenase subunit 5\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+			gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+ext+"\n")
+			gout.write(col1+"\t"+col2+"\t"+"CDS"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" CDS"+ext+"\n")
+
+		if "CYTB" == col9:
+			size=int(col5)-int(col4)+1
+			if size%3 == 0:
+				ext=""
+				tout.write(col4+"\t"+col5+"\t"+"gene\n")
+				tout.write("\t\t\tgene\t"+col9+"\n")
+				tout.write(col4+"\t"+col5+"\t"+"CDS\n")
+				tout.write("\t\t\tproduct\tcytochrome b\n")
+				tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+			else:
+				ext=" Note: Not a multiple of 3"
+				if col7 == "+":
+					tout.write(col4+"\t"+col5+">"+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write(col4+"\t"+col5+">"+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tcytochrome b\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+				if col7 == "-":
+					tout.write("<"+col4+"\t"+col5+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write("<"+col4+"\t"+col5+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tcytochrome b\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+			gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+ext+"\n")
+			gout.write(col1+"\t"+col2+"\t"+"CDS"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" CDS"+ext+"\n")
+
+		if "ATP8" == col9:
+			size=int(col5)-int(col4)+1
+			if size%3 == 0:
+				ext=""
+				tout.write(col4+"\t"+col5+"\t"+"gene\n")
+				tout.write("\t\t\tgene\t"+col9+"\n")
+				tout.write(col4+"\t"+col5+"\t"+"CDS\n")
+				tout.write("\t\t\tproduct\tATP synthase F0 subunit 8\n")
+				tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+			else:
+				ext=" Note: Not a multiple of 3"
+				if col7 == "+":
+					tout.write(col4+"\t"+col5+">"+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write(col4+"\t"+col5+">"+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tATP synthase F0 subunit 8\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+				if col7 == "-":
+					tout.write("<"+col4+"\t"+col5+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write("<"+col4+"\t"+col5+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tATP synthase F0 subunit 8\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+			gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+ext+"\n")
+			gout.write(col1+"\t"+col2+"\t"+"CDS"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" CDS"+ext+"\n")
+
+		if "ATP6" == col9:
+			size=int(col5)-int(col4)+1
+			if size%3 == 0:
+				ext=""
+				tout.write(col4+"\t"+col5+"\t"+"gene\n")
+				tout.write("\t\t\tgene\t"+col9+"\n")
+				tout.write(col4+"\t"+col5+"\t"+"CDS\n")
+				tout.write("\t\t\tproduct\tATP synthase F0 subunit 6\n")
+				tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+			else:
+				ext=" Note: Not a multiple of 3"
+				if col7 == "+":
+					tout.write(col4+"\t"+col5+">"+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write(col4+"\t"+col5+">"+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tATP synthase F0 subunit 6\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+				if col7 == "-":
+					tout.write("<"+col4+"\t"+col5+"\t"+"gene\n")
+					tout.write("\t\t\tgene\t"+col9+"\n")
+					tout.write("<"+col4+"\t"+col5+"\t"+"CDS\n")
+					tout.write("\t\t\tproduct\tATP synthase F0 subunit 6\n")
+					tout.write("\t\t\ttransl_table\t"+sys.argv[3]+"\n")
+					tout.write("\t\t\tNote\tPartial sequence\n")
+			gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+ext+"\n")
+			gout.write(col1+"\t"+col2+"\t"+"CDS"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" CDS"+ext+"\n")
+
+		if "tRNA" in col9:
+			size=int(col5)-int(col4)+1
+			if size <= 120:
+				tout.write(col4+"\t"+col5+"\t"+"gene\n")
+				tout.write("\t\t\tgene\t"+col9+"\n")
+				tout.write(col4+"\t"+col5+"\t"+"tRNA\n")
+				tout.write("\t\t\ttRNA\t"+col9+"\n")
+				gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+"\n")
+				gout.write(col1+"\t"+col2+"\t"+"tRNA"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" "+"\n")
+		if "trn" in col9:
+			size=int(col5)-int(col4)+1
+			if size <= 120:
+				tout.write(col4+"\t"+col5+"\t"+"gene\n")
+				tout.write("\t\t\tgene\t"+col9+"\n")
+				tout.write(col4+"\t"+col5+"\t"+"tRNA\n")
+				tout.write("\t\t\ttRNA\t"+col9+"\n")
+				gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+"\n")
+				gout.write(col1+"\t"+col2+"\t"+"tRNA"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" tRNA"+"\n")
+		if "rrnL" in col9:
+			tout.write(col4+"\t"+col5+"\t"+"gene\n")
+			tout.write("\t\t\tgene\t"+col9+"\n")
+			tout.write(col4+"\t"+col5+"\t"+"rRNA\n")
+			tout.write("\t\t\trRNA\t"+col9+"\n")
+			tout.write("\t\t\tproduct\tlarge subunit ribosomal RNA\n")
+			gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+"\n")
+			gout.write(col1+"\t"+col2+"\t"+"rRNA"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" rRNA"+"\n")
+		if "rrnS" in col9:
+			tout.write(col4+"\t"+col5+"\t"+"gene\n")
+			tout.write("\t\t\tgene\t"+col9+"\n")
+			tout.write(col4+"\t"+col5+"\t"+"rRNA\n")
+			tout.write("\t\t\trRNA\t"+col9+"\n")
+			tout.write("\t\t\tproduct\tsmall subunit ribosomal RNA\n")
+			gout.write(col1+"\t"+col2+"\t"+"gene"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" gene"+"\n")
+			gout.write(col1+"\t"+col2+"\t"+"rRNA"+"\t"+col4+"\t"+col5+"\t"+col6+"\t"+col7+"\t"+col8+"\t"+"Name="+col9+" rRNA"+"\n")
+gout.close()
+tout.close()
+
 	
 	
 	
diff --git a/tRNAscanChecker.pyc b/tRNAscanChecker.pyc
index d47678a75ac1b38838e0cf9f2b96f4bc393eb442..00e3f296a5ba8eb9f9651d00ee1a8c04f55206c4 100755
GIT binary patch
delta 237
zcmdnsbHJN}`7<xq);&{WH*&n?Vzij-FQ75`p^(UAOYVh8JUN~S1aI>$p3O{9?nM!i
z$s76SBJm~(1S0X&1z|jpp?d_w5yngo7m9-OvfwO<$&-Z(kW6qFfvVbkQ6!xMu1sWe
ZjqEB`ByN^+0V9GtSx$915|2|&9RNZxOY8su

delta 361
zcmX@$y}^fr`7<xqii&SB8#&%`G1^Z)D4@X}UuLLhpl39BB9F*qBkqM*L<M*vP(?RS
z<T=iSDm1x~e<Oy7$mD#1KrEtvF+@R@cMFE2sRfD33q@fO-6j-<MbuNc1WmM&Uu5zx
vVRRFKqMIj*1aqJ%UZ^6mSygr!E1EFS6`MJf3mDOa7pm+=69GAQrm8vs!T)Nr