From b0da69869076a7993518feb9dd94ceb033075a48 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 31 Aug 2023 11:46:03 +0200 Subject: [PATCH 01/11] check python version out of install_requires option from setup --- setup.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 35ee50d..7b2bde4 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,11 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- +# Check python version - it is not possible to specify which Python version to use in the setup.py file +import sys +if sys.version_info < (3,9): + sys.exit('Sorry, Python >= 3.9 is required') + from setuptools import setup, find_packages # access the version wihtout importing the EMBLmyGFF3 package @@ -19,7 +24,7 @@ license='GPL-3.0', packages=find_packages(), - install_requires=['biopython>=1.78', 'bcbio-gff>=0.6.4','numpy>=1.22', 'python_version>="3.8.0"' ], + install_requires=['biopython>=1.78', 'bcbio-gff>=0.6.4','numpy>=1.22'], include_package_data=True, entry_points={ From b9880ceed7dec547e8d1281e0c6be51b7e18dd50 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 31 Aug 2023 11:55:23 +0200 Subject: [PATCH 02/11] update python version to use --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f6efc26..0585592 100644 --- a/README.md +++ b/README.md @@ -62,7 +62,7 @@ __You don't know how to submit to ENA ? Please visit the [ENA: Guidelines and Ti ## Prerequisites -**Python >=3.8**, **biopython >=1.78**, **numpy >=1.22** and the **bcbio-gff >=0.6.4** python packages. +**Python >=3.9**, **biopython >=1.78**, **numpy >=1.22** and the **bcbio-gff >=0.6.4** python packages. In order to install pip please use the following steps: From 14a141309701acbb2fcde4f7c9fc1d209288dc13 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 31 Aug 2023 11:57:18 +0200 Subject: [PATCH 03/11] Minimum python version required is now 3.9 --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index a0001e0..9d06b7c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: # Run in all these versions of Python - python-version: [3.8, 3.9] + python-version: [3.9, 3.10, 3.11] # Steps represent a sequence of tasks that will be executed as part of the job steps: From c0de9117aef13cf3d5461f809822f71d88a9277d Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 31 Aug 2023 11:59:00 +0200 Subject: [PATCH 04/11] fix 3.10 version check --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 9d06b7c..a54d469 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -16,7 +16,7 @@ jobs: strategy: matrix: # Run in all these versions of Python - python-version: [3.9, 3.10, 3.11] + python-version: [3.9, '3.10', 3.11] # Steps represent a sequence of tasks that will be executed as part of the job steps: From 7697e51c73211806470f9bec8c66f4019fcff84f Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 31 Aug 2023 15:19:03 +0200 Subject: [PATCH 05/11] --no_progress to have clear output --- examples/aa_example.py | 2 +- examples/augustus_example.py | 2 +- examples/dbxref_test_example.py | 2 +- examples/maker_example.py | 2 +- examples/prokka_disorder_example.py | 2 +- examples/prokka_example.py | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/examples/aa_example.py b/examples/aa_example.py index 4b511d6..93ea66a 100755 --- a/examples/aa_example.py +++ b/examples/aa_example.py @@ -42,7 +42,7 @@ def main(): MOLECULE="genomic DNA" #Create the command - command = "EMBLmyGFF3 --translate --rg REFERENCE_GROUP -i "+LOCUS_TAG+" -p "+PROJECT+" -m \""+MOLECULE+"\" -r "+TABLE+" -t linear -s \""+SPECIES+"\" -x "+TAXONOMY+" -o EMBLmyGFF3-aa-example.embl "+fill_path(ANNOTATION)+" "+fill_path(GENOME) + command = "EMBLmyGFF3 --no_progress --translate --rg REFERENCE_GROUP -i "+LOCUS_TAG+" -p "+PROJECT+" -m \""+MOLECULE+"\" -r "+TABLE+" -t linear -s \""+SPECIES+"\" -x "+TAXONOMY+" -o EMBLmyGFF3-aa-example.embl "+fill_path(ANNOTATION)+" "+fill_path(GENOME) print("Running the following command: "+command) #Execute the command diff --git a/examples/augustus_example.py b/examples/augustus_example.py index 78d91e7..3bedbbf 100755 --- a/examples/augustus_example.py +++ b/examples/augustus_example.py @@ -42,7 +42,7 @@ def main(): MOLECULE="genomic DNA" #Create the command - command = "EMBLmyGFF3 --rg REFERENCE_GROUP -i "+LOCUS_TAG+" -p "+PROJECT+" -m \""+MOLECULE+"\" -r "+TABLE+" -t linear -s \""+SPECIES+"\" -x "+TAXONOMY+" -o EMBLmyGFF3-augustus-example.embl "+fill_path(ANNOTATION)+" "+fill_path(GENOME) + command = "EMBLmyGFF3 --no_progress --rg REFERENCE_GROUP -i "+LOCUS_TAG+" -p "+PROJECT+" -m \""+MOLECULE+"\" -r "+TABLE+" -t linear -s \""+SPECIES+"\" -x "+TAXONOMY+" -o EMBLmyGFF3-augustus-example.embl "+fill_path(ANNOTATION)+" "+fill_path(GENOME) print("Running the following command: "+command) #Execute the command diff --git a/examples/dbxref_test_example.py b/examples/dbxref_test_example.py index 2e957d5..8539ab5 100755 --- a/examples/dbxref_test_example.py +++ b/examples/dbxref_test_example.py @@ -42,7 +42,7 @@ def main(): MOLECULE="genomic DNA" #Create the command - command = "EMBLmyGFF3 --rg REFERENCE_GROUP -i "+LOCUS_TAG+" -p "+PROJECT+" -m \""+MOLECULE+"\" -r "+TABLE+" -t linear -s \""+SPECIES+"\" -x "+TAXONOMY+" -o EMBLmyGFF3-dbxref_test-example.embl "+fill_path(ANNOTATION)+" "+fill_path(GENOME) + command = "EMBLmyGFF3 --no_progress --rg REFERENCE_GROUP -i "+LOCUS_TAG+" -p "+PROJECT+" -m \""+MOLECULE+"\" -r "+TABLE+" -t linear -s \""+SPECIES+"\" -x "+TAXONOMY+" -o EMBLmyGFF3-dbxref_test-example.embl "+fill_path(ANNOTATION)+" "+fill_path(GENOME) print("Running the following command: "+command) #Execute the command diff --git a/examples/maker_example.py b/examples/maker_example.py index 18f9ad2..99a20af 100755 --- a/examples/maker_example.py +++ b/examples/maker_example.py @@ -42,7 +42,7 @@ def main(): MOLECULE="genomic DNA" #Create the command - command = "EMBLmyGFF3 --rg REFERENCE_GROUP -i "+LOCUS_TAG+" -p "+PROJECT+" -m \""+MOLECULE+"\" -r "+TABLE+" -t linear -s \""+SPECIES+"\" -x "+TAXONOMY+" -o EMBLmyGFF3-maker-example.embl "+fill_path(ANNOTATION)+" "+fill_path(GENOME) + command = "EMBLmyGFF3 --no_progress --rg REFERENCE_GROUP -i "+LOCUS_TAG+" -p "+PROJECT+" -m \""+MOLECULE+"\" -r "+TABLE+" -t linear -s \""+SPECIES+"\" -x "+TAXONOMY+" -o EMBLmyGFF3-maker-example.embl "+fill_path(ANNOTATION)+" "+fill_path(GENOME) print("Running the following command: "+command) #Execute the command diff --git a/examples/prokka_disorder_example.py b/examples/prokka_disorder_example.py index b2e9951..192e435 100755 --- a/examples/prokka_disorder_example.py +++ b/examples/prokka_disorder_example.py @@ -48,7 +48,7 @@ def main(): STRAIN="K-12" #Create the command - command = "EMBLmyGFF3 --rg REFERENCE_GROUP -i "+LOCUS_TAG+" -p "+PROJECT+" -m \""+MOLECULE+"\" -r "+TABLE+" -t "+TOPOLOGY+" --strain \""+STRAIN+"\" -s \""+SPECIES+"\" -x "+TAXONOMY+" -o EMBLmyGFF3-prokka_disorder-example.embl "+fill_path(ANNOTATION)+" "+fill_path(GENOME) + command = "EMBLmyGFF3 --no_progress --rg REFERENCE_GROUP -i "+LOCUS_TAG+" -p "+PROJECT+" -m \""+MOLECULE+"\" -r "+TABLE+" -t "+TOPOLOGY+" --strain \""+STRAIN+"\" -s \""+SPECIES+"\" -x "+TAXONOMY+" -o EMBLmyGFF3-prokka_disorder-example.embl "+fill_path(ANNOTATION)+" "+fill_path(GENOME) print("Running the following command: "+command) #Execute the command diff --git a/examples/prokka_example.py b/examples/prokka_example.py index 385a6b3..6b70885 100755 --- a/examples/prokka_example.py +++ b/examples/prokka_example.py @@ -48,7 +48,7 @@ def main(): STRAIN="K-12" #Create the command - command = "EMBLmyGFF3 --rg REFERENCE_GROUP -i "+LOCUS_TAG+" -p "+PROJECT+" -m \""+MOLECULE+"\" -r "+TABLE+" -t "+TOPOLOGY+" --strain \""+STRAIN+"\" -s \""+SPECIES+"\" -x "+TAXONOMY+" -o EMBLmyGFF3-prokka-example.embl "+fill_path(ANNOTATION)+" "+fill_path(GENOME) + command = "EMBLmyGFF3 --no_progress --rg REFERENCE_GROUP -i "+LOCUS_TAG+" -p "+PROJECT+" -m \""+MOLECULE+"\" -r "+TABLE+" -t "+TOPOLOGY+" --strain \""+STRAIN+"\" -s \""+SPECIES+"\" -x "+TAXONOMY+" -o EMBLmyGFF3-prokka-example.embl "+fill_path(ANNOTATION)+" "+fill_path(GENOME) print("Running the following command: "+command) #Execute the command From 9571e7dd10a68d8c97fb3c01e0819060ec2ea47e Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 31 Aug 2023 17:19:19 +0200 Subject: [PATCH 06/11] Move Proteobacteria to Pseudomonadota in test due to changes in NCBI taxonomy database move to minimum python 3.9 in conda yaml --- conda_environment_EMBLmyGFF3.yml | 2 +- t/EMBLmyGFF3-aa-test.embl | 2 - t/EMBLmyGFF3-augustus-test.embl | 2 - t/EMBLmyGFF3-dbxref_test-test.embl | 2 - t/EMBLmyGFF3-maker-test.embl | 2 - t/EMBLmyGFF3-prokka-test.embl | 132 +++++++------------------ t/EMBLmyGFF3-prokka_disorder-test.embl | 132 +++++++------------------ 7 files changed, 67 insertions(+), 207 deletions(-) diff --git a/conda_environment_EMBLmyGFF3.yml b/conda_environment_EMBLmyGFF3.yml index e737060..d156727 100644 --- a/conda_environment_EMBLmyGFF3.yml +++ b/conda_environment_EMBLmyGFF3.yml @@ -6,7 +6,7 @@ channels: - defaults dependencies: - - python>=3.8.0 + - python>=3.9.0 - biopython>=1.78 - bcbio-gff>=0.6.4 - numpy>=1.22 diff --git a/t/EMBLmyGFF3-aa-test.embl b/t/EMBLmyGFF3-aa-test.embl index f4a1101..bdf9def 100644 --- a/t/EMBLmyGFF3-aa-test.embl +++ b/t/EMBLmyGFF3-aa-test.embl @@ -6,7 +6,6 @@ AC * _4 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -26,7 +25,6 @@ RN [1] RP 1-1351857 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH diff --git a/t/EMBLmyGFF3-augustus-test.embl b/t/EMBLmyGFF3-augustus-test.embl index 59ee1fe..942137f 100644 --- a/t/EMBLmyGFF3-augustus-test.embl +++ b/t/EMBLmyGFF3-augustus-test.embl @@ -6,7 +6,6 @@ AC * _4 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -26,7 +25,6 @@ RN [1] RP 1-1351857 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH diff --git a/t/EMBLmyGFF3-dbxref_test-test.embl b/t/EMBLmyGFF3-dbxref_test-test.embl index 2adffb0..2345636 100644 --- a/t/EMBLmyGFF3-dbxref_test-test.embl +++ b/t/EMBLmyGFF3-dbxref_test-test.embl @@ -6,7 +6,6 @@ AC * _4 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -26,7 +25,6 @@ RN [1] RP 1-1351857 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH diff --git a/t/EMBLmyGFF3-maker-test.embl b/t/EMBLmyGFF3-maker-test.embl index 822cda5..b527ae1 100644 --- a/t/EMBLmyGFF3-maker-test.embl +++ b/t/EMBLmyGFF3-maker-test.embl @@ -6,7 +6,6 @@ AC * _4 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -26,7 +25,6 @@ RN [1] RP 1-1351857 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH diff --git a/t/EMBLmyGFF3-prokka-test.embl b/t/EMBLmyGFF3-prokka-test.embl index 960c7c3..79d545b 100644 --- a/t/EMBLmyGFF3-prokka-test.embl +++ b/t/EMBLmyGFF3-prokka-test.embl @@ -6,7 +6,6 @@ AC * _ERS324955|SC|contig000001 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -14,14 +13,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-317941 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -10870,7 +10868,6 @@ AC * _ERS324955|SC|contig000002 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -10878,14 +10875,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-267336 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -19755,7 +19751,6 @@ AC * _ERS324955|SC|contig000003 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -19763,14 +19758,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-161432 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -24941,7 +24935,6 @@ AC * _ERS324955|SC|contig000004 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -24949,14 +24942,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-132407 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -29463,7 +29455,6 @@ AC * _ERS324955|SC|contig000005 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -29471,14 +29462,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-131196 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -33639,7 +33629,6 @@ AC * _ERS324955|SC|contig000006 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -33647,14 +33636,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-102838 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -36949,7 +36937,6 @@ AC * _ERS324955|SC|contig000007 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -36957,14 +36944,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-85930 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -39868,7 +39854,6 @@ AC * _ERS324955|SC|contig000008 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -39876,14 +39861,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-78315 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -42180,7 +42164,6 @@ AC * _ERS324955|SC|contig000009 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -42188,14 +42171,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-67683 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -44462,7 +44444,6 @@ AC * _ERS324955|SC|contig000010 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -44470,14 +44451,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-64624 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -46700,7 +46680,6 @@ AC * _ERS324955|SC|contig000011 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -46708,14 +46687,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-57335 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -48438,7 +48416,6 @@ AC * _ERS324955|SC|contig000012 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -48446,14 +48423,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-57237 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -50353,7 +50329,6 @@ AC * _ERS324955|SC|contig000013 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -50361,14 +50336,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-55719 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -52272,7 +52246,6 @@ AC * _ERS324955|SC|contig000014 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -52280,14 +52253,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-53798 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -53952,7 +53924,6 @@ AC * _ERS324955|SC|contig000015 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -53960,14 +53931,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-46744 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -55375,7 +55345,6 @@ AC * _ERS324955|SC|contig000016 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -55383,14 +55352,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-42399 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -56769,7 +56737,6 @@ AC * _ERS324955|SC|contig000017 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -56777,14 +56744,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-40680 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -58224,7 +58190,6 @@ AC * _ERS324955|SC|contig000018 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -58232,14 +58197,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-39787 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -59536,7 +59500,6 @@ AC * _ERS324955|SC|contig000019 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -59544,14 +59507,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-37849 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -60860,7 +60822,6 @@ AC * _ERS324955|SC|contig000020 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -60868,14 +60829,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-35574 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -62037,7 +61997,6 @@ AC * _ERS324955|SC|contig000021 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -62045,14 +62004,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-33199 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -63118,7 +63076,6 @@ AC * _ERS324955|SC|contig000022 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -63126,14 +63083,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-30628 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -64115,7 +64071,6 @@ AC * _ERS324955|SC|contig000023 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -64123,14 +64078,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-27067 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -65020,7 +64974,6 @@ AC * _ERS324955|SC|contig000024 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -65028,14 +64981,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-23145 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -65822,7 +65774,6 @@ AC * _ERS324955|SC|contig000025 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -65830,14 +65781,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-21703 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -66518,7 +66468,6 @@ AC * _ERS324955|SC|contig000026 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -66526,14 +66475,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-21044 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -67205,7 +67153,6 @@ AC * _ERS324955|SC|contig000027 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -67213,14 +67160,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-19971 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -67920,7 +67866,6 @@ AC * _ERS324955|SC|contig000028 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -67928,14 +67873,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-15942 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -68527,7 +68471,6 @@ AC * _ERS324955|SC|contig000029 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -68535,14 +68478,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-2486 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -68718,7 +68660,6 @@ AC * _ERS324955|SC|contig000030 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -68726,14 +68667,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-1475 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -68805,7 +68745,6 @@ AC * _ERS324955|SC|contig000031 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -68813,14 +68752,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-1228 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -68860,7 +68798,6 @@ AC * _ERS324955|SC|contig000032 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -68868,14 +68805,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-412 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH @@ -68901,7 +68837,6 @@ AC * _ERS324955|SC|contig000033 XX PR Project:17285; XX -DT 04-MAR-2021 (Rel. 133, Created) XX DE XXX XX @@ -68909,14 +68844,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-321 RG REFERENCE_GROUP RT ; -RL Submitted (04-MAR-2021) to the INSDC. XX FH Key Location/Qualifiers FH diff --git a/t/EMBLmyGFF3-prokka_disorder-test.embl b/t/EMBLmyGFF3-prokka_disorder-test.embl index 34b12ec..950b72d 100644 --- a/t/EMBLmyGFF3-prokka_disorder-test.embl +++ b/t/EMBLmyGFF3-prokka_disorder-test.embl @@ -6,7 +6,6 @@ AC * _ERS324955|SC|contig000001 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -14,14 +13,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-317941 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -5354,7 +5352,6 @@ AC * _ERS324955|SC|contig000002 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -5362,14 +5359,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-57335 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -6344,7 +6340,6 @@ AC * _ERS324955|SC|contig000003 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -6352,14 +6347,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-57237 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -7332,7 +7326,6 @@ AC * _ERS324955|SC|contig000004 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -7340,14 +7333,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-132407 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -9588,7 +9580,6 @@ AC * _ERS324955|SC|contig000005 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -9596,14 +9587,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-131196 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -11809,7 +11799,6 @@ AC * _ERS324955|SC|contig000006 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -11817,14 +11806,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-102838 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -13559,7 +13547,6 @@ AC * _ERS324955|SC|contig000007 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -13567,14 +13554,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-85930 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -15026,7 +15012,6 @@ AC * _ERS324955|SC|contig000008 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -15034,14 +15019,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-78315 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -16375,7 +16359,6 @@ AC * _ERS324955|SC|contig000009 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -16383,14 +16366,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-67683 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -17538,7 +17520,6 @@ AC * _ERS324955|SC|contig000010 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -17546,14 +17527,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-64624 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -18666,7 +18646,6 @@ AC * _ERS324955|SC|contig000011 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -18674,14 +18653,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-161432 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -21404,7 +21382,6 @@ AC * _ERS324955|SC|contig000012 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -21412,14 +21389,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-267336 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -25915,7 +25891,6 @@ AC * _ERS324955|SC|contig000013 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -25923,14 +25898,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-55719 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -26878,7 +26852,6 @@ AC * _ERS324955|SC|contig000014 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -26886,14 +26859,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-53798 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -27809,7 +27781,6 @@ AC * _ERS324955|SC|contig000015 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -27817,14 +27788,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-46744 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -28623,7 +28593,6 @@ AC * _ERS324955|SC|contig000016 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -28631,14 +28600,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-42399 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -29364,7 +29332,6 @@ AC * _ERS324955|SC|contig000017 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -29372,14 +29339,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-40680 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -30076,7 +30042,6 @@ AC * _ERS324955|SC|contig000018 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -30084,14 +30049,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-39787 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -30774,7 +30738,6 @@ AC * _ERS324955|SC|contig000019 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -30782,14 +30745,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-37849 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -31439,7 +31401,6 @@ AC * _ERS324955|SC|contig000020 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -31447,14 +31408,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-35574 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -32066,7 +32026,6 @@ AC * _ERS324955|SC|contig000021 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -32074,14 +32033,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-33199 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -32654,7 +32612,6 @@ AC * _ERS324955|SC|contig000022 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -32662,14 +32619,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-30628 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -33199,7 +33155,6 @@ AC * _ERS324955|SC|contig000023 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -33207,14 +33162,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-27067 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -33685,7 +33639,6 @@ AC * _ERS324955|SC|contig000024 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -33693,14 +33646,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-23145 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -34105,7 +34057,6 @@ AC * _ERS324955|SC|contig000025 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -34113,14 +34064,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-21703 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -34501,7 +34451,6 @@ AC * _ERS324955|SC|contig000026 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -34509,14 +34458,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-21044 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -34886,7 +34834,6 @@ AC * _ERS324955|SC|contig000027 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -34894,14 +34841,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-19971 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -35253,7 +35199,6 @@ AC * _ERS324955|SC|contig000028 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -35261,14 +35206,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-15942 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -35553,7 +35497,6 @@ AC * _ERS324955|SC|contig000029 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -35561,14 +35504,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-2486 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -35629,7 +35571,6 @@ AC * _ERS324955|SC|contig000030 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -35637,14 +35578,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-1475 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -35688,7 +35628,6 @@ AC * _ERS324955|SC|contig000031 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -35696,14 +35635,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-1228 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -35743,7 +35681,6 @@ AC * _ERS324955|SC|contig000032 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -35751,14 +35688,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-412 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH @@ -35784,7 +35720,6 @@ AC * _ERS324955|SC|contig000033 XX PR Project:17285; XX -DT 02-SEP-2022 (Rel. 133, Created) XX DE XXX XX @@ -35792,14 +35727,13 @@ KW . XX OS Escherichia coli XX -OC cellular organisms; Bacteria; Proteobacteria; Gammaproteobacteria; +OC cellular organisms; Bacteria; Pseudomonadota; Gammaproteobacteria; OC Enterobacterales; Enterobacteriaceae; Escherichia. XX RN [1] RP 1-321 RG REFERENCE_GROUP RT ; -RL Submitted (02-SEP-2022) to the INSDC. XX FH Key Location/Qualifiers FH From 200c9a6b5379d89335596354e6d246153eee16c6 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 31 Aug 2023 17:21:46 +0200 Subject: [PATCH 07/11] remove date when doing test to avoid problem with date --- t/test.sh | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/t/test.sh b/t/test.sh index e03aba7..637b472 100755 --- a/t/test.sh +++ b/t/test.sh @@ -16,10 +16,17 @@ python setup.py install ## RUN TESTS cd t -thedate=$(LC_TIME=en_US.UTF-8 date +%d-%^b-%Y) -sed -Ei -e 's/^(DT )[0-9]{2}-[A-Za-z]{3}-[0-9]{4}/\1'"$thedate"'/' \ - -e 's/^(RL Submitted \()[0-9]{2}-[A-Za-z]{3}-[0-9]{4}/\1'"$thedate"'/' \ - *.embl + +# Remove line related to dates +if [ "$(uname)" == "Darwin" ]; then + sed -Ei '' -e '/^DT .*/d' \ + -e '/^RL .*/d' \ + *.embl +else + sed -Ei -e '/^DT .*/d' \ + -e '/^RL .*/d' \ + *.embl +fi SUCCESS=0 FAIL=0 @@ -27,16 +34,31 @@ FAIL=0 for NAME in augustus maker prokka prokka_disorder dbxref_test aa; do RESULT_FILE="EMBLmyGFF3-${NAME}-example.embl" EXPECTED_FILE="EMBLmyGFF3-${NAME}-test.embl" + cp $EXPECTED_FILE $EXPECTED_FILE.copy [ -f "$RESULT_FILE" ] && rm $RESULT_FILE ../examples/${NAME}_example.py - if diff -q "$RESULT_FILE" "$EXPECTED_FILE"; then + # Remove line related to dates + if [ "$(uname)" == "Darwin" ]; then + sed -Ei '' -e '/^DT .*/d' \ + -e '/^RL .*/d' \ + $RESULT_FILE $EXPECTED_FILE.copy + else + sed -Ei -e '/^DT .*/d' \ + -e '/^RL .*/d' \ + $RESULT_FILE $EXPECTED_FILE.copy + fi + + if diff -q "$RESULT_FILE" "$EXPECTED_FILE.copy"; then SUCCESS=$(( $SUCCESS + 1 )) else - diff "$RESULT_FILE" "$EXPECTED_FILE" + diff "$RESULT_FILE" "$EXPECTED_FILE.copy" FAIL=$(( $FAIL + 1 )) fi - [ -f "$RESULT_FILE" ] && rm $RESULT_FILE + + #[ -f "$RESULT_FILE" ] && rm $RESULT_FILE + #[ -f "$EXPECTED_FILE.copy" ] && rm $EXPECTED_FILE.copy + done if [ $FAIL -eq 0 ]; then From c217492958e0019d6c118d463ed962b4a34be4f4 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Thu, 31 Aug 2023 17:22:02 +0200 Subject: [PATCH 08/11] remove date when doing test to avoid problem with date --- t/test.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/test.sh b/t/test.sh index 637b472..d9f1e78 100755 --- a/t/test.sh +++ b/t/test.sh @@ -56,8 +56,8 @@ for NAME in augustus maker prokka prokka_disorder dbxref_test aa; do FAIL=$(( $FAIL + 1 )) fi - #[ -f "$RESULT_FILE" ] && rm $RESULT_FILE - #[ -f "$EXPECTED_FILE.copy" ] && rm $EXPECTED_FILE.copy + [ -f "$RESULT_FILE" ] && rm $RESULT_FILE + [ -f "$EXPECTED_FILE.copy" ] && rm $EXPECTED_FILE.copy done From 1bf69907e39085e2e85605fd4da760ce400b027f Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Fri, 1 Sep 2023 10:21:02 +0200 Subject: [PATCH 09/11] add parameter to keep short sequences (< 100bp) --keep_short_sequences. Fix #78 --- EMBLmyGFF3/EMBLmyGFF3.py | 9 ++++++--- EMBLmyGFF3/modules/help.py | 7 +++++++ README.md | 1 + 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/EMBLmyGFF3/EMBLmyGFF3.py b/EMBLmyGFF3/EMBLmyGFF3.py index 894d9c0..476d637 100755 --- a/EMBLmyGFF3/EMBLmyGFF3.py +++ b/EMBLmyGFF3/EMBLmyGFF3.py @@ -1299,6 +1299,7 @@ def main(): parser.add_argument("--force_uncomplete_features", action="store_true", help="Force to keep features whithout all the mandatory qualifiers. /!\ Option not suitable for submission purpose.") parser.add_argument("--interleave_genes", action="store_false", help="Print gene features with interleaved mRNA and CDS features.") parser.add_argument("--keep_duplicates", action="store_true", help="Do not remove duplicate features during the process. /!\ Option not suitable for submission purpose.") + parser.add_argument("--keep_short_sequences", action="store_true", help="Do not skip short sequences (<100bp). /!\ Option not suitable for submission purpose.") parser.add_argument("--locus_numbering_start", default=1, type=int, help="Start locus numbering with the provided value.") parser.add_argument("--no_progress", action="store_false", help="Hide conversion progress counter.") parser.add_argument("--no_wrap_qualifier", action="store_true", help="By default there is a line wrapping at 80 characters. The cut is at the world level. Activating this option will avoid the line-wrapping for the qualifiers.") @@ -1472,9 +1473,11 @@ def main(): "For you information, if you use the --translate option the tool will raise an error due to ??? codons that do not exist." % (record.id)) # Check sequence size and skip if < 100 bp - if len(record.seq)<100: - logging.warning("Sequence %s too short (%s bp)! Minimum accpeted by ENA is 100, we skip it !" % (record.name, len(record.seq) ) ) - continue + if not args.keep_short_sequences: + if len(record.seq)<100: + logging.warning("Sequence %s too short (%s bp)! Minimum accpeted by ENA is 100, we skip it !" % (record.name, len(record.seq) ) ) + continue + writer = EMBL( record, True ) # qualifiers / features json information diff --git a/EMBLmyGFF3/modules/help.py b/EMBLmyGFF3/modules/help.py index cd05f68..205d074 100755 --- a/EMBLmyGFF3/modules/help.py +++ b/EMBLmyGFF3/modules/help.py @@ -569,6 +569,13 @@ def Help(string): Bolean - Doesnt expect any value Do not remove duplicate features during the process. /!\ Option not suitable for submission purpose. Features that have the same key (feature type) and location as another feature are considered as duplicates and aren't allowed by the EMBL database. So they are remove during the process. If you don't plan to submit the file to ENA and you wish to keep these features, use the --keep_duplicates option. +""" + if(string == "keep_short_sequences" or string == "all"): + output += string+""": +EMBLmyGFF3 tool specific +Bolean - Doesnt expect any value +Do not remove short sequences (< 100bp) during the process. +/!\ Option not suitable for submission purpose. """ if(string == "force_unknown_features" or string == "all"): output += string+""": diff --git a/README.md b/README.md index 0585592..c7615d2 100644 --- a/README.md +++ b/README.md @@ -321,6 +321,7 @@ You can also find a comprehensive help about the different parameters using the | --isolate| Individual isolate from which the sequence was obtained. May be needed when organism belongs to Bacteria.| | --isolation_source| Describes the physical, environmental and/or local geographical source of the biological sample from which the sequence was derived. Mandatory when environmental_sample option used.| | --keep_duplicates| Do not remove duplicate features during the process. /!\ Option not suitable for submission purpose.| +| --keep_short_sequences| Do not remove short sequences (< 100bp) during the process. /!\ Option not suitable for submission purpose.| | --locus_numbering_start| Start locus numbering with the provided value.| | --no_progress| Hide conversion progress counter.| | --no_wrap_qualifier| By default there is a line wrapping at 80 characters. The cut is at the world level. Activating this option will avoid the line-wrapping for the qualifiers.| From 91c5c3afcbf710cbdd8c04fcf1460d9160b31169 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Fri, 1 Sep 2023 10:21:58 +0200 Subject: [PATCH 10/11] increment to vesion 2.3 --- EMBLmyGFF3/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/EMBLmyGFF3/version.py b/EMBLmyGFF3/version.py index 2b9ccf1..93bbb90 100644 --- a/EMBLmyGFF3/version.py +++ b/EMBLmyGFF3/version.py @@ -1 +1 @@ -__version__ = '2.2' +__version__ = '2.3' From d9f951f16c1c97ae886d698c98648e8f9a51d170 Mon Sep 17 00:00:00 2001 From: Jacques Dainat Date: Fri, 1 Sep 2023 10:43:08 +0200 Subject: [PATCH 11/11] Change travis badge by GitHub actions for CI --- README.md | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c7615d2..ce05733 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,5 @@ - - -[![Build Status](https://travis-ci.org/NBISweden/EMBLmyGFF3.svg?branch=master)](https://travis-ci.org/NBISweden/EMBLmyGFF3) [![DOI](EMBLmyGFF3.svg)](https://doi.org/10.1186/s13104-018-3686-x) +![GitHub CI](https://github.com/NBISweden/EMBLmyGFF3/actions/workflows/main.yml/badge.svg) +[![DOI](EMBLmyGFF3.svg)](https://doi.org/10.1186/s13104-018-3686-x) [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/emblmygff3/README.html) [![Anaconda-Server Badge](https://img.shields.io/conda/dn/bioconda/emblmygff3.svg?style=flat)](https://anaconda.org/bioconda/emblmygff3) [docker_emblmygff3](https://quay.io/repository/biocontainers/emblmygff3)