From 172561f59dac05d10df9e26a7ab98c039523c4e1 Mon Sep 17 00:00:00 2001 From: Jover Date: Fri, 13 Oct 2023 13:56:04 -0700 Subject: [PATCH] Remove `reverse` column, associated script, and references The `reverse` column has effectively been replaced by `is_reverse_complement` from the Nextclade output. The script `ingest/bin/reverse_reversed_sequences.py` is no longer used because it's function is now built into Nextclade. Resolves https://github.com/nextstrain/monkeypox/issues/209. --- ingest/bin/reverse_reversed_sequences.py | 29 ------- ingest/config/config.yaml | 1 - phylogenetic/example_data/metadata.tsv | 78 +++++++++---------- .../scripts/reverse_reversed_sequences.py | 2 +- 4 files changed, 40 insertions(+), 70 deletions(-) delete mode 100644 ingest/bin/reverse_reversed_sequences.py diff --git a/ingest/bin/reverse_reversed_sequences.py b/ingest/bin/reverse_reversed_sequences.py deleted file mode 100644 index 6ca5ed24..00000000 --- a/ingest/bin/reverse_reversed_sequences.py +++ /dev/null @@ -1,29 +0,0 @@ -import pandas as pd -import argparse -from Bio import SeqIO - -if __name__=="__main__": - parser = argparse.ArgumentParser( - description="Reverse-complement reverse-complemented sequence", - formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - - parser.add_argument('--metadata', type=str, required=True, help="input metadata") - parser.add_argument('--sequences', type=str, required=True, help="input sequences") - parser.add_argument('--output', type=str, required=True, help="output sequences") - args = parser.parse_args() - - metadata = pd.read_csv(args.metadata, sep='\t') - - # Read in fasta file - with open(args.sequences, 'r') as f_in: - with open(args.output, 'w') as f_out: - for seq in SeqIO.parse(f_in, 'fasta'): - # Check if metadata['reverse'] is True - if metadata.loc[metadata['accession'] == seq.id, 'reverse'].values[0] == True: - # Reverse-complement sequence - seq.seq = seq.seq.reverse_complement() - print("Reverse-complementing sequence:", seq.id) - - # Write sequences to file - SeqIO.write(seq, f_out, 'fasta') diff --git a/ingest/config/config.yaml b/ingest/config/config.yaml index a8e26f70..65c4b2ab 100644 --- a/ingest/config/config.yaml +++ b/ingest/config/config.yaml @@ -62,7 +62,6 @@ transform: 'date_submitted', 'sra_accession', 'abbr_authors', - 'reverse', 'authors', 'institution' ] diff --git a/phylogenetic/example_data/metadata.tsv b/phylogenetic/example_data/metadata.tsv index cd4fd04a..8edd5dcc 100644 --- a/phylogenetic/example_data/metadata.tsv +++ b/phylogenetic/example_data/metadata.tsv @@ -1,39 +1,39 @@ -accession genbank_accession_rev strain date region country division location host date_submitted sra_accession abbr_authors reverse clade outbreak lineage coverage missing_data divergence nonACGTN QC_missing_data QC_mixed_sites QC_rare_mutations QC_frame_shifts QC_stop_codons frame_shifts is_reverse_complement authors institution -MK783032 MK783032.1 MK783032 2017-11-XX Africa Nigeria Rivers Homo sapiens 2019-07-24 Yinka-Ogunleye et al IIb hMPXV-1 A 0.997946341191325 5 551 0 good good good good good false Yinka-Ogunleye,A.,Aruna,O.,Dalhat,M.,Ogoina,D.,McCollum,A.,Disu,Y.,Mamadu,I.,Akinpelu,A.,Ahmad,A.,Burga,J.,Ndoreraho,A.,Nkunzimana,E.,Manneh,L.,Mohammed,A.,Adeoye,O.,Tom-Aba,D.,Silenou,B.,Ipadeola,O.,Saleh,M.,Adeyemo,A.,Nwadiutor,I.,Aworabhi,N.,Uke,P.,John,D.,Wakama,P.,Reynolds,M.,Mauldin,M.,Doty,J.,Wilkins,K.,Musa,J.,Khalakdina,A.,Adedeji,A.,Mba,N.,Ojo,O.,Krause,G.,Ihekweazu,C.,Mauldin,M.R.,Li,Y.,Gao,J. NCEZID/DHCPP/PRB, Centers for Disease Control & Prevention (US CDC), USA -MK783030 MK783030.1 MK783030 2017-11-30 Africa Nigeria Rivers Homo sapiens 2019-07-24 Yinka-Ogunleye et al IIb hMPXV-1 A 0.9979412704288344 6 552 0 good good good good good false Yinka-Ogunleye,A.,Aruna,O.,Dalhat,M.,Ogoina,D.,McCollum,A.,Disu,Y.,Mamadu,I.,Akinpelu,A.,Ahmad,A.,Burga,J.,Ndoreraho,A.,Nkunzimana,E.,Manneh,L.,Mohammed,A.,Adeoye,O.,Tom-Aba,D.,Silenou,B.,Ipadeola,O.,Saleh,M.,Adeyemo,A.,Nwadiutor,I.,Aworabhi,N.,Uke,P.,John,D.,Wakama,P.,Reynolds,M.,Mauldin,M.,Doty,J.,Wilkins,K.,Musa,J.,Khalakdina,A.,Adedeji,A.,Mba,N.,Ojo,O.,Krause,G.,Ihekweazu,C.,Mauldin,M.R.,Li,Y.,Gao,J. NCEZID/DHCPP/PRB, Centers for Disease Control & Prevention (US CDC), USA -GN354498 GN354498.1 GN354498 2009-05-12 Baptista et al Baptista,C.S.,Wu,X.,Munroe,D.J. -AY299036 AY299036.1 AY299036 2004-06-30 Meyer et al I 0.0018001206841472752 0 1 0 good good good good good true Meyer,H.,Pfeffer,M.,Babkin,I.V.,Shchelkunov,S.N.,Damon,I.K.,Esposito,J.J. Institute of Microbiology of the Bundeswehr -OK573051 OK573051.1 USA-2003 North America USA Maryland 2022-07-30 SRR10017689 Shashan et al II 0.003473472306030658 0 5 0 good good good good good OPG037:empty range false Shashan,N.,Makarenko,J.,Tallon,L.,Sadzewicz,L.,Vavikolanu,K.,Mehta,A.,Aluvathingal,J.,Nadendla,S.,Myers,T.,Yan,Y.,Sichtig,H. US Food and Drug Administration, Center for Devices and Radiological Health -AF375110 AF375110.1 mpv-pch Europe France Ile de France Paris FR Pan troglodytes 2002-10-19 Esposito et al IIa 0.004776658266103474 0 5 0 good good good good good false Esposito,J.J.,Ropp,S.L.,Jin,Q.,Cai,B.,Knight,J.C.,Yu,L.,Taubenberger,J.K.,Tsai,M.M.,Nowotny,N.,Meyer,H.,Cavallaro,K.F. Centers for Disease Control and Prevention, Poxvirus Section, VEHB, DVRD, NCID -L20439 L20439.1 L20439 1993-07-26 Douglass et al IIb 0.004695526066254583 0 4 0 good good good good good false Douglass,N.J.,Richardson,M.,Dumbell,K.R. -OP612680 OP612680.1 MPXV/021/19 2019-04-01 Africa Nigeria Homo sapiens 2022-10-11 Ndodo et al IIb hMPXV-1 A 0.9959586022950271 395 552 0 good good good mediocre good OPG097:267-345,OPG138:7-191 false Ndodo,N.,Ashcroft,J.,Lewandowski,K.,Yinka-Ogunleye,A.,Chukwu,C.,Ahmad,A.,King,D.,Akinpelu,A.,Maluquer de Motes,C.,Ribeca,P.,Summer,R.P.,Rambaut,A.,Chester,M.,Maishman,T.,Babatunde,O.,Mba,N.,Aruna,O.,Pullan,S.T.,Gannon,B.,Brown,C.,Ihekweazu,C.,Adetifa,I.,Ulaeto,D.O. Defence Science and Technology Laboratory, Chemical, Biological and Radiological Sciences -OP612686 OP612686.1 MPXV/053/19 2019-06-26 Africa Nigeria Homo sapiens 2022-10-11 Ndodo et al IIb hMPXV-1 A.1 0.9979666242412871 0 566 0 good good good good good OPG097:267-345 false Ndodo,N.,Ashcroft,J.,Lewandowski,K.,Yinka-Ogunleye,A.,Chukwu,C.,Ahmad,A.,King,D.,Akinpelu,A.,Maluquer de Motes,C.,Ribeca,P.,Summer,R.P.,Rambaut,A.,Chester,M.,Maishman,T.,Babatunde,O.,Mba,N.,Aruna,O.,Pullan,S.T.,Gannon,B.,Brown,C.,Ihekweazu,C.,Adetifa,I.,Ulaeto,D.O. Defence Science and Technology Laboratory, Chemical, Biological and Radiological Sciences -ON676708 ON676708.1 MPXV_USA_2021_MD 2021-11-XX North America USA Maryland Homo sapiens 2022-06-03 Gigante et al IIb hMPXV-1 A.1.1 0.9999645046625661 7 597 0 good good good good good false Gigante,C.M.,Myers,R.,Seabolt,M.H.,Wilkins,K.,McCollum,A.,Hutson,C.,Davidson,W.,Rao,A.,Blythe,D.,Li,Y. Division of High-Consequence Pathogens & Pathology, Centers for Disease Control & Prevention (US CDC), USA -OP612689 OP612689.1 MPXV/111/19 2019-12-17 Africa Nigeria Homo sapiens 2022-10-11 Ndodo et al IIb hMPXV-1 A.2 0.9937680328991071 270 556 0 good good good good good OPG174:290-347 false Ndodo,N.,Ashcroft,J.,Lewandowski,K.,Yinka-Ogunleye,A.,Chukwu,C.,Ahmad,A.,King,D.,Akinpelu,A.,Maluquer de Motes,C.,Ribeca,P.,Summer,R.P.,Rambaut,A.,Chester,M.,Maishman,T.,Babatunde,O.,Mba,N.,Aruna,O.,Pullan,S.T.,Gannon,B.,Brown,C.,Ihekweazu,C.,Adetifa,I.,Ulaeto,D.O. Defence Science and Technology Laboratory, Chemical, Biological and Radiological Sciences -OP642362 OP642362.1 MCL-22-H-MPxV-16-5316 2022-07-13 Asia India Kerala Homo sapiens 2022-10-14 Shete AM et al IIb hMPXV-1 A.2.1 0.9987576631898138 245 742 0 good good mediocre mediocre good OPG047:477-483,OPG051:75,OPG072:81-96,OPG078:62-74 false Shete AM,Yadav PD,Kumar A,Patil S,Patil DY,Joshi Y,Majumdar T,Relhan V,Sahay RR,Vasu M,Gawande P,Verma A,Dhakad S,Krishnan AB,Chenayil S,Kumar S,Abraham P,Yadav,P.D.,Shete,A.M.,Sahay,R.R.,Mohandas,S.,Abraham,P. National Institute of Virology, Maximum Containment Laboratory -OP331336 OP331336.1 MPXV_UK_2022_9000166 2022-06-29 Europe United Kingdom Homo sapiens 2022-08-31 Groves et al IIb hMPXV-1 A.2.2 1 0 588 0 good good good good good OPG174:290-347 false Groves,N.,Osman,K.L.,Lewandowski,K.S.,Carter,D.P.,Pullan,S.T.,Myers,R.,Vipond,R.,Chand,M. UKHSA, Research and Evaluation -OP555515 OP555515.1 Monkeypox/PT0428/2022 2022-08-01 Europe Portugal Homo sapiens 2022-09-30 Isidro et al IIb hMPXV-1 A.2.3 0.9475277497477296 9670 563 0 mediocre good good mediocre good OPG174:290-347,OPG175:60-126 false Isidro,J.,Borges,V.,Pinto,M.,Sobral,D.,Santos,J.,Nunes,A.,Mixao,V.,Ferreira,R.,Santos,D.,Duarte,S.,Vieira,L.,Borrego,M.J.,Nuncio,S.,Lopes de Carvalho,I.,Pelerito,A.,Cordeiro,R.,Gomes,J.P. National Institute of Health Doutor Ricardo Jorge, Portugal (INSA), Department of Infectious Diseases -OP413718 OP413718.1 9000360 2022-08-XX Europe United Kingdom Homo sapiens 2022-09-12 Groves et al IIb hMPXV-1 A.3 0.9999949292375094 1 592 0 good good good good good OPG174:2-347 false Groves,N.,Osman,K.L.,Lewandowski,K.S.,Carter,D.P.,Pullan,S.T.,Myers,R.,Vipond,R.,Chand,M. UKHSA, Research and Evaluation -OQ565449 OQ565449.1 NY-NYCPHL-000810 2022-07-13 North America USA New York New York City Homo sapiens 2023-03-08 Clabby et al IIb hMPXV-1 B.1 0.9826884168572428 1880 596 0 good good good good good false Clabby,T.T.,Amin,H.S.,Wang,J.C.,Taki,F.,Su,M.,Rahat,A.,De La Cruz,N.,Olsen,A.,Thi,C.,Silver,S.,Akther,S.,Chowdhury,M.,Omoregie,E.,Hughes,S. NYC Department of Health and Mental Hygiene, Public Health Laboratory -ON929064 ON929064.1 MPXV/Germany/2022/RKI106 2022-07-XX Europe Germany Homo sapiens 2022-07-06 Brinkmann et al IIb hMPXV-1 B.1.1 1 0 610 0 good good good good good false Brinkmann,A.,Kohl,C.,Pape,K.,Uddin,S.,Schrick,L.,Michel,J.,Schaade,L.,Nitsche,A. Robert Koch Institute, Centre for Biological Threats, Highly Pathogenic Viruses -OR004196 OR004196.1 ANT-LDSP-ANT-MPX-32351C 2022-XX-XX South America Colombia Antioquia Homo sapiens 2023-05-27 Betancur et al IIb hMPXV-1 B.1.10 0.949621974656329 7748 576 0 mediocre good good good good OPG199:321-345 false Betancur,I.I.B.,Velarde Hoyos,C.A.C.V.,Gomez,R.R.G.,Mercado-Reyes,M.M.R. Laboratorio Departamental de Salud Publica de Antioquia, Antioquia -OP440583 OP440583.1 hMPX/Human/USA/CA-LACPHL-MA00010/2022 2022-07-22 North America USA Homo sapiens 2022-09-15 Hemarajata et al IIb hMPXV-1 B.1.11 0.9556815358325431 7191 585 1 mediocre good good good good false Hemarajata,P.,Green,N.M. Los Angeles County Public Health Laboratories, Los Angeles County Department of Public Health -OQ419048 OQ419048.1 NY-NYCPHL-000613 2022-08-10 North America USA New York New York City Homo sapiens 2023-02-15 Wang et al IIb hMPXV-1 B.1.12 0.9922214503394875 0 606 0 good good good good good false Wang,J.C.,Amin,H.S.,Clabby,T.T.,Taki,F.,Su,M.,Rahat,A.,De La Cruz,N.,Olsen,A.,Thi,C.,Silver,S.,Akther,S.,Chowdhury,M.,Omoregie,E.,Hughes,S. NYC Department of Health and Mental Hygiene, Public Health Laboratory -OP820455 OP820455.1 hMpxV/Human/USA/IL-RIPHL-MPXV_10006/2022 2022-08-08 North America USA Illinois Cook County IL Homo sapiens 2022-11-14 Barbian et al IIb hMPXV-1 B.1.13 0.9959433900075554 108 610 0 good good good good good false Barbian,H.,Green,S.,Kunstman,K.,Araujo Perez,F.,Bobrovska,S.,Hayden,M.,Moore,N. Rush University Meical Center, Regional Innovative Public Health Laboratory (RIPHL) -OP523989 OP523989.1 MPXV/Germany/2022/RKI387 2022-09-XX Europe Germany Homo sapiens 2022-09-26 Brinkmann et al IIb hMPXV-1 B.1.14 0.9987120263273989 254 614 0 good good good good good false Brinkmann,A.,Kohl,C.,Pape,K.,Schrick,L.,Michel,J.,Schaade,L.,Nitsche,A. Robert Koch Institute, Centre for Biological Threats, Highly Pathogenic Viruses -OP881932 OP881932.1 ANT-LDSP-ANT-MPX-26250 2022-09-26 South America Colombia Antioquia Homo sapiens 2022-11-23 Betancur et al IIb hMPXV-1 B.1.15 0.8388055311877247 28670 488 0 bad good good good good false Betancur,I.I.B.,Velarde-Hoyos,C.-A.C.V.,Gomez,R.R.G.,Mercado-Reyes,M.M.R. Laboratorio Departamental de Salud Publica de Antioquia, Antioquia -OP459211 OP459211.1 ANT-LDSP-ANT-MPX-23596-O 2022-08-29 South America Colombia Antioquia Medellin Homo sapiens 2022-09-19 Betancur et al IIb hMPXV-1 B.1.16 0.8801930946356403 23527 519 0 bad good good good good false Betancur,I.I.B.,Velarde-Hoyos,C.-A.C.V.,Gomez,R.R.G.,Mercado-Reyes,M.M.R. Laboratorio Departamental de Salud Publica de Antioquia -OQ503819 OQ503819.2 MPXV/Human/USA/CA-LACPHL-MA00426/2022 2022-12-14 North America USA Homo sapiens 2023-02-28 Hemarajata et al IIb hMPXV-1 B.1.17 0.9754524387832199 3293 600 0 good good good good good false Hemarajata,P.,Green,N.M.,Garrigues,J.M. Los Angeles County Public Health Laboratories, Los Angeles County Department of Public Health -OQ504379 OQ504379.1 NY-NYCPHL-000019 2022-07-25 North America USA New York New York City Homo sapiens 2023-02-28 Wang et al IIb hMPXV-1 B.1.18 0.9848587032032007 1443 599 0 good good good good good false Wang,J.C.,Amin,H.S.,Clabby,T.T.,Taki,F.,Su,M.,Rahat,A.,De La Cruz,N.,Olsen,A.,Thi,C.,Silver,S.,Akther,S.,Chowdhury,M.,Omoregie,E.,Hughes,S. NYC Department of Health and Mental Hygiene, Public Health Laboratory -OQ427121 OQ427121.1 MPXV_USA_2022_OR0008 2022-06-XX North America USA Oregon Homo sapiens 2023-02-18 Gigante et al IIb hMPXV-1 B.1.19 0.9997920987378872 41 609 0 good good good good good false Gigante,C.,Cogswell,K.,Zhao,H.,Batra,D.,Hetrick,E.,Howard,D.,Kovar,L.,Seabolt,M.,Morrison,S.,Desch,M.,Knipe,K.,Weigand,M.,Sheth,M.,Burgin,A.,Burroughs,M.,Lee,J.,Wilkins,K.,McCollum,A.,Hutson,C.,Davidson,W.,Rao,A.,Grenz,L.,Li,Y. CDC, DHCPP-PRB -OR209316 OR209316.1 MPXV_USA_2022_LA0013 2022-07-XX North America USA Louisiana Homo sapiens 2023-07-05 Li et al IIb hMPXV-1 B.1.2 0.9997870279753966 41 611 0 good good good good good false Li,C.M. CDC, DHCPP-PRB -OR499977 OR499977.1 MPXV/Human/USA/CA-LACPHL-MA00520/2023 2023-05-23 North America USA Homo sapiens 2023-09-05 Garrigues et al IIb hMPXV-1 B.1.20 0.988895030145683 1441 608 2 good good good good good false Garrigues,J.M.,Green,N.M. Los Angeles County Public Health Laboratories -OP879722 OP879722.1 NJ-MPXV-001 2022-07-26 North America USA New Jersey Homo sapiens 2022-11-22 SRR22321727 Palmateer et al IIb hMPXV-1 B.1.3 0.9920997520397142 0 602 0 good good good good good false Palmateer,N.C. New Jersey Department of Health, Public Health and Environmental Laboratories -OP279033 OP279033.1 MpxV/human/CAN/UN-NML-4094/2022 2022-07-06 North America Canada Homo sapiens 2022-08-23 Duggan et al IIb hMPXV-1 B.1.4 0.975097485408881 4911 585 0 good good good good good false Duggan,A.,Hole,D.,Yadav,C.,Knox,N.,Tyler,A.,Haidl,E.,Chapel,M.,Domselaar,G.V.,Graham,M.,Audet,J.,Fernando,L.,Antonation,K.,Safronetz,D.,Hagan,M.,Peters,G.,Go,A.,Laminman,V.,Kaplen,B.,Leung,A.,Griffiths,E.,Jolly,G.,Eshaghi,A.,Gubbay,J.B.,Hasso,M.,Marchand-Austin,A.,Olsha,R.,Patel,S.N. National Microbiology Laboratory, Public Health Agency of Canada -OQ468871 OQ468871.1 NY-NYCPHL-000426 2022-09-12 North America USA New York New York City Homo sapiens 2023-02-26 Wang et al IIb hMPXV-1 B.1.5 0.9850868875152756 1407 608 0 good good good good good false Wang,J.C.,Amin,H.S.,Clabby,T.T.,Taki,F.,Su,M.,Rahat,A.,De La Cruz,N.,Olsen,A.,Thi,C.,Silver,S.,Akther,S.,Chowdhury,M.,Omoregie,E.,Hughes,S. NYC Department of Health and Mental Hygiene, Public Health Laboratory -OQ644785 OQ644785.1 MPXV/Human/USA/CA-LACPHL-MA00479/2022 2022-12-01 North America USA Homo sapiens 2023-03-22 Hemarajata et al IIb hMPXV-1 B.1.6 0.9853201425898412 2154 607 0 good good good good good OPG178:158-205 false Hemarajata,P.,Green,N.M.,Garrigues,J.M. Los Angeles County Public Health Laboratories, Los Angeles County Department of Public Health -OR449631 OR449631.1 MPXV/Germany/2022/ON/RKI917 2022-06-XX Europe Germany Homo sapiens 2023-08-22 Brinkmann et al IIb hMPXV-1 B.1.7 0.9647277761156945 446 580 0 good good good good good OPG193:264-268 false Brinkmann,A.,Pape,K.,Kohl,C.,Schrick,L.,Michel,J.,Schaade,L.,Nitsche,A. Robert Koch Institute, Centre for Biological Threats, Highly Pathogenic Viruses -OX044338 OX044338.1 MPXV_347_SP_2022 2022-XX-XX Europe Spain Homo sapiens 2022-07-30 ERR10297607 Cuesta et al IIb hMPXV-1 B.1.8 0.9729018452504703 5344 604 0 good good good good good false Cuesta,I. Institute of Health Carlos III, Bioinformatics Unit, Carretera Majadahonda-Pozuelo km2 -ON649713 ON649713.1 Monkeypox/PT0020/2022 2022-05-19 Europe Portugal Homo sapiens 2022-06-01 Isidro et al IIb hMPXV-1 B.1.9 0.9999695754250567 6 604 0 good good good good good OPG023:526-661 false Isidro,J.,Borges,V.,Pinto,M.,Sobral,D.,Santos,J.D.,Nunes,A.,Mixao,V.,Ferreira,R.,Santos,D.,Duarte,S.,Vieira,L.,Borrego,M.J.,Nuncio,S.,Carvalho,I.L.,Pelerito,A.,Cordeiro,R.,Gomes,J.P. Institute National de Saude Doutor Ricardo Jorge (INSA), Portugal -LC756924 LC756924.1 MPXV/human/Japan/Tokyo/2023/TKY220325 2023-01-30 Asia Japan Tokyo Homo sapiens 2023-02-22 Kasuya et al IIb hMPXV-1 C.1 1 0 613 0 good good good good good false Kasuya,F.,Negishi,A.,Kumagai,R.,Hasegawa,M.,Fujiwara,T.,Miyake,H.,Nagashima,M.,Sadamasu,K. Department of Microbiology; 3-24-1,Hyakunin-cho, Shinjuku-ku, Tokyo 169-0073 -OK573144 OK573144.1 WRAIR 7-61 North America USA Maryland Macaca fascicularis 2022-07-30 SRR10027401 Hunt et al outgroup 0.007788691185493563 0 2 0 good good good good good OPG133:401-711 false Hunt,S.,Shashan,N.,Tallon,L.,Sadzewicz,L.,Vavikolanu,K.,Mehta,A.,Aluvathingal,J.,Nadendla,S.,Myers,T.,Yan,Y.,Sichtig,H. US Food and Drug Administration, Center for Devices and Radiological Health +accession genbank_accession_rev strain date region country division location host date_submitted sra_accession abbr_authors clade outbreak lineage coverage missing_data divergence nonACGTN QC_missing_data QC_mixed_sites QC_rare_mutations QC_frame_shifts QC_stop_codons frame_shifts is_reverse_complement authors institution +MK783032 MK783032.1 MK783032 2017-11-XX Africa Nigeria Rivers Homo sapiens 2019-07-24 Yinka-Ogunleye et al IIb hMPXV-1 A 0.997946341191325 5 551 0 good good good good good false Yinka-Ogunleye,A.,Aruna,O.,Dalhat,M.,Ogoina,D.,McCollum,A.,Disu,Y.,Mamadu,I.,Akinpelu,A.,Ahmad,A.,Burga,J.,Ndoreraho,A.,Nkunzimana,E.,Manneh,L.,Mohammed,A.,Adeoye,O.,Tom-Aba,D.,Silenou,B.,Ipadeola,O.,Saleh,M.,Adeyemo,A.,Nwadiutor,I.,Aworabhi,N.,Uke,P.,John,D.,Wakama,P.,Reynolds,M.,Mauldin,M.,Doty,J.,Wilkins,K.,Musa,J.,Khalakdina,A.,Adedeji,A.,Mba,N.,Ojo,O.,Krause,G.,Ihekweazu,C.,Mauldin,M.R.,Li,Y.,Gao,J. NCEZID/DHCPP/PRB, Centers for Disease Control & Prevention (US CDC), USA +MK783030 MK783030.1 MK783030 2017-11-30 Africa Nigeria Rivers Homo sapiens 2019-07-24 Yinka-Ogunleye et al IIb hMPXV-1 A 0.9979412704288344 6 552 0 good good good good good false Yinka-Ogunleye,A.,Aruna,O.,Dalhat,M.,Ogoina,D.,McCollum,A.,Disu,Y.,Mamadu,I.,Akinpelu,A.,Ahmad,A.,Burga,J.,Ndoreraho,A.,Nkunzimana,E.,Manneh,L.,Mohammed,A.,Adeoye,O.,Tom-Aba,D.,Silenou,B.,Ipadeola,O.,Saleh,M.,Adeyemo,A.,Nwadiutor,I.,Aworabhi,N.,Uke,P.,John,D.,Wakama,P.,Reynolds,M.,Mauldin,M.,Doty,J.,Wilkins,K.,Musa,J.,Khalakdina,A.,Adedeji,A.,Mba,N.,Ojo,O.,Krause,G.,Ihekweazu,C.,Mauldin,M.R.,Li,Y.,Gao,J. NCEZID/DHCPP/PRB, Centers for Disease Control & Prevention (US CDC), USA +GN354498 GN354498.1 GN354498 2009-05-12 Baptista et al Baptista,C.S.,Wu,X.,Munroe,D.J. +AY299036 AY299036.1 AY299036 2004-06-30 Meyer et al I 0.0018001206841472752 0 1 0 good good good good good true Meyer,H.,Pfeffer,M.,Babkin,I.V.,Shchelkunov,S.N.,Damon,I.K.,Esposito,J.J. Institute of Microbiology of the Bundeswehr +OK573051 OK573051.1 USA-2003 North America USA Maryland 2022-07-30 SRR10017689 Shashan et al II 0.003473472306030658 0 5 0 good good good good good OPG037:empty range false Shashan,N.,Makarenko,J.,Tallon,L.,Sadzewicz,L.,Vavikolanu,K.,Mehta,A.,Aluvathingal,J.,Nadendla,S.,Myers,T.,Yan,Y.,Sichtig,H. US Food and Drug Administration, Center for Devices and Radiological Health +AF375110 AF375110.1 mpv-pch Europe France Ile de France Paris FR Pan troglodytes 2002-10-19 Esposito et al IIa 0.004776658266103474 0 5 0 good good good good good false Esposito,J.J.,Ropp,S.L.,Jin,Q.,Cai,B.,Knight,J.C.,Yu,L.,Taubenberger,J.K.,Tsai,M.M.,Nowotny,N.,Meyer,H.,Cavallaro,K.F. Centers for Disease Control and Prevention, Poxvirus Section, VEHB, DVRD, NCID +L20439 L20439.1 L20439 1993-07-26 Douglass et al IIb 0.004695526066254583 0 4 0 good good good good good false Douglass,N.J.,Richardson,M.,Dumbell,K.R. +OP612680 OP612680.1 MPXV/021/19 2019-04-01 Africa Nigeria Homo sapiens 2022-10-11 Ndodo et al IIb hMPXV-1 A 0.9959586022950271 395 552 0 good good good mediocre good OPG097:267-345,OPG138:7-191 false Ndodo,N.,Ashcroft,J.,Lewandowski,K.,Yinka-Ogunleye,A.,Chukwu,C.,Ahmad,A.,King,D.,Akinpelu,A.,Maluquer de Motes,C.,Ribeca,P.,Summer,R.P.,Rambaut,A.,Chester,M.,Maishman,T.,Babatunde,O.,Mba,N.,Aruna,O.,Pullan,S.T.,Gannon,B.,Brown,C.,Ihekweazu,C.,Adetifa,I.,Ulaeto,D.O. Defence Science and Technology Laboratory, Chemical, Biological and Radiological Sciences +OP612686 OP612686.1 MPXV/053/19 2019-06-26 Africa Nigeria Homo sapiens 2022-10-11 Ndodo et al IIb hMPXV-1 A.1 0.9979666242412871 0 566 0 good good good good good OPG097:267-345 false Ndodo,N.,Ashcroft,J.,Lewandowski,K.,Yinka-Ogunleye,A.,Chukwu,C.,Ahmad,A.,King,D.,Akinpelu,A.,Maluquer de Motes,C.,Ribeca,P.,Summer,R.P.,Rambaut,A.,Chester,M.,Maishman,T.,Babatunde,O.,Mba,N.,Aruna,O.,Pullan,S.T.,Gannon,B.,Brown,C.,Ihekweazu,C.,Adetifa,I.,Ulaeto,D.O. Defence Science and Technology Laboratory, Chemical, Biological and Radiological Sciences +ON676708 ON676708.1 MPXV_USA_2021_MD 2021-11-XX North America USA Maryland Homo sapiens 2022-06-03 Gigante et al IIb hMPXV-1 A.1.1 0.9999645046625661 7 597 0 good good good good good false Gigante,C.M.,Myers,R.,Seabolt,M.H.,Wilkins,K.,McCollum,A.,Hutson,C.,Davidson,W.,Rao,A.,Blythe,D.,Li,Y. Division of High-Consequence Pathogens & Pathology, Centers for Disease Control & Prevention (US CDC), USA +OP612689 OP612689.1 MPXV/111/19 2019-12-17 Africa Nigeria Homo sapiens 2022-10-11 Ndodo et al IIb hMPXV-1 A.2 0.9937680328991071 270 556 0 good good good good good OPG174:290-347 false Ndodo,N.,Ashcroft,J.,Lewandowski,K.,Yinka-Ogunleye,A.,Chukwu,C.,Ahmad,A.,King,D.,Akinpelu,A.,Maluquer de Motes,C.,Ribeca,P.,Summer,R.P.,Rambaut,A.,Chester,M.,Maishman,T.,Babatunde,O.,Mba,N.,Aruna,O.,Pullan,S.T.,Gannon,B.,Brown,C.,Ihekweazu,C.,Adetifa,I.,Ulaeto,D.O. Defence Science and Technology Laboratory, Chemical, Biological and Radiological Sciences +OP642362 OP642362.1 MCL-22-H-MPxV-16-5316 2022-07-13 Asia India Kerala Homo sapiens 2022-10-14 Shete AM et al IIb hMPXV-1 A.2.1 0.9987576631898138 245 742 0 good good mediocre mediocre good OPG047:477-483,OPG051:75,OPG072:81-96,OPG078:62-74 false Shete AM,Yadav PD,Kumar A,Patil S,Patil DY,Joshi Y,Majumdar T,Relhan V,Sahay RR,Vasu M,Gawande P,Verma A,Dhakad S,Krishnan AB,Chenayil S,Kumar S,Abraham P,Yadav,P.D.,Shete,A.M.,Sahay,R.R.,Mohandas,S.,Abraham,P. National Institute of Virology, Maximum Containment Laboratory +OP331336 OP331336.1 MPXV_UK_2022_9000166 2022-06-29 Europe United Kingdom Homo sapiens 2022-08-31 Groves et al IIb hMPXV-1 A.2.2 1 0 588 0 good good good good good OPG174:290-347 false Groves,N.,Osman,K.L.,Lewandowski,K.S.,Carter,D.P.,Pullan,S.T.,Myers,R.,Vipond,R.,Chand,M. UKHSA, Research and Evaluation +OP555515 OP555515.1 Monkeypox/PT0428/2022 2022-08-01 Europe Portugal Homo sapiens 2022-09-30 Isidro et al IIb hMPXV-1 A.2.3 0.9475277497477296 9670 563 0 mediocre good good mediocre good OPG174:290-347,OPG175:60-126 false Isidro,J.,Borges,V.,Pinto,M.,Sobral,D.,Santos,J.,Nunes,A.,Mixao,V.,Ferreira,R.,Santos,D.,Duarte,S.,Vieira,L.,Borrego,M.J.,Nuncio,S.,Lopes de Carvalho,I.,Pelerito,A.,Cordeiro,R.,Gomes,J.P. National Institute of Health Doutor Ricardo Jorge, Portugal (INSA), Department of Infectious Diseases +OP413718 OP413718.1 9000360 2022-08-XX Europe United Kingdom Homo sapiens 2022-09-12 Groves et al IIb hMPXV-1 A.3 0.9999949292375094 1 592 0 good good good good good OPG174:2-347 false Groves,N.,Osman,K.L.,Lewandowski,K.S.,Carter,D.P.,Pullan,S.T.,Myers,R.,Vipond,R.,Chand,M. UKHSA, Research and Evaluation +OQ565449 OQ565449.1 NY-NYCPHL-000810 2022-07-13 North America USA New York New York City Homo sapiens 2023-03-08 Clabby et al IIb hMPXV-1 B.1 0.9826884168572428 1880 596 0 good good good good good false Clabby,T.T.,Amin,H.S.,Wang,J.C.,Taki,F.,Su,M.,Rahat,A.,De La Cruz,N.,Olsen,A.,Thi,C.,Silver,S.,Akther,S.,Chowdhury,M.,Omoregie,E.,Hughes,S. NYC Department of Health and Mental Hygiene, Public Health Laboratory +ON929064 ON929064.1 MPXV/Germany/2022/RKI106 2022-07-XX Europe Germany Homo sapiens 2022-07-06 Brinkmann et al IIb hMPXV-1 B.1.1 1 0 610 0 good good good good good false Brinkmann,A.,Kohl,C.,Pape,K.,Uddin,S.,Schrick,L.,Michel,J.,Schaade,L.,Nitsche,A. Robert Koch Institute, Centre for Biological Threats, Highly Pathogenic Viruses +OR004196 OR004196.1 ANT-LDSP-ANT-MPX-32351C 2022-XX-XX South America Colombia Antioquia Homo sapiens 2023-05-27 Betancur et al IIb hMPXV-1 B.1.10 0.949621974656329 7748 576 0 mediocre good good good good OPG199:321-345 false Betancur,I.I.B.,Velarde Hoyos,C.A.C.V.,Gomez,R.R.G.,Mercado-Reyes,M.M.R. Laboratorio Departamental de Salud Publica de Antioquia, Antioquia +OP440583 OP440583.1 hMPX/Human/USA/CA-LACPHL-MA00010/2022 2022-07-22 North America USA Homo sapiens 2022-09-15 Hemarajata et al IIb hMPXV-1 B.1.11 0.9556815358325431 7191 585 1 mediocre good good good good false Hemarajata,P.,Green,N.M. Los Angeles County Public Health Laboratories, Los Angeles County Department of Public Health +OQ419048 OQ419048.1 NY-NYCPHL-000613 2022-08-10 North America USA New York New York City Homo sapiens 2023-02-15 Wang et al IIb hMPXV-1 B.1.12 0.9922214503394875 0 606 0 good good good good good false Wang,J.C.,Amin,H.S.,Clabby,T.T.,Taki,F.,Su,M.,Rahat,A.,De La Cruz,N.,Olsen,A.,Thi,C.,Silver,S.,Akther,S.,Chowdhury,M.,Omoregie,E.,Hughes,S. NYC Department of Health and Mental Hygiene, Public Health Laboratory +OP820455 OP820455.1 hMpxV/Human/USA/IL-RIPHL-MPXV_10006/2022 2022-08-08 North America USA Illinois Cook County IL Homo sapiens 2022-11-14 Barbian et al IIb hMPXV-1 B.1.13 0.9959433900075554 108 610 0 good good good good good false Barbian,H.,Green,S.,Kunstman,K.,Araujo Perez,F.,Bobrovska,S.,Hayden,M.,Moore,N. Rush University Meical Center, Regional Innovative Public Health Laboratory (RIPHL) +OP523989 OP523989.1 MPXV/Germany/2022/RKI387 2022-09-XX Europe Germany Homo sapiens 2022-09-26 Brinkmann et al IIb hMPXV-1 B.1.14 0.9987120263273989 254 614 0 good good good good good false Brinkmann,A.,Kohl,C.,Pape,K.,Schrick,L.,Michel,J.,Schaade,L.,Nitsche,A. Robert Koch Institute, Centre for Biological Threats, Highly Pathogenic Viruses +OP881932 OP881932.1 ANT-LDSP-ANT-MPX-26250 2022-09-26 South America Colombia Antioquia Homo sapiens 2022-11-23 Betancur et al IIb hMPXV-1 B.1.15 0.8388055311877247 28670 488 0 bad good good good good false Betancur,I.I.B.,Velarde-Hoyos,C.-A.C.V.,Gomez,R.R.G.,Mercado-Reyes,M.M.R. Laboratorio Departamental de Salud Publica de Antioquia, Antioquia +OP459211 OP459211.1 ANT-LDSP-ANT-MPX-23596-O 2022-08-29 South America Colombia Antioquia Medellin Homo sapiens 2022-09-19 Betancur et al IIb hMPXV-1 B.1.16 0.8801930946356403 23527 519 0 bad good good good good false Betancur,I.I.B.,Velarde-Hoyos,C.-A.C.V.,Gomez,R.R.G.,Mercado-Reyes,M.M.R. Laboratorio Departamental de Salud Publica de Antioquia +OQ503819 OQ503819.2 MPXV/Human/USA/CA-LACPHL-MA00426/2022 2022-12-14 North America USA Homo sapiens 2023-02-28 Hemarajata et al IIb hMPXV-1 B.1.17 0.9754524387832199 3293 600 0 good good good good good false Hemarajata,P.,Green,N.M.,Garrigues,J.M. Los Angeles County Public Health Laboratories, Los Angeles County Department of Public Health +OQ504379 OQ504379.1 NY-NYCPHL-000019 2022-07-25 North America USA New York New York City Homo sapiens 2023-02-28 Wang et al IIb hMPXV-1 B.1.18 0.9848587032032007 1443 599 0 good good good good good false Wang,J.C.,Amin,H.S.,Clabby,T.T.,Taki,F.,Su,M.,Rahat,A.,De La Cruz,N.,Olsen,A.,Thi,C.,Silver,S.,Akther,S.,Chowdhury,M.,Omoregie,E.,Hughes,S. NYC Department of Health and Mental Hygiene, Public Health Laboratory +OQ427121 OQ427121.1 MPXV_USA_2022_OR0008 2022-06-XX North America USA Oregon Homo sapiens 2023-02-18 Gigante et al IIb hMPXV-1 B.1.19 0.9997920987378872 41 609 0 good good good good good false Gigante,C.,Cogswell,K.,Zhao,H.,Batra,D.,Hetrick,E.,Howard,D.,Kovar,L.,Seabolt,M.,Morrison,S.,Desch,M.,Knipe,K.,Weigand,M.,Sheth,M.,Burgin,A.,Burroughs,M.,Lee,J.,Wilkins,K.,McCollum,A.,Hutson,C.,Davidson,W.,Rao,A.,Grenz,L.,Li,Y. CDC, DHCPP-PRB +OR209316 OR209316.1 MPXV_USA_2022_LA0013 2022-07-XX North America USA Louisiana Homo sapiens 2023-07-05 Li et al IIb hMPXV-1 B.1.2 0.9997870279753966 41 611 0 good good good good good false Li,C.M. CDC, DHCPP-PRB +OR499977 OR499977.1 MPXV/Human/USA/CA-LACPHL-MA00520/2023 2023-05-23 North America USA Homo sapiens 2023-09-05 Garrigues et al IIb hMPXV-1 B.1.20 0.988895030145683 1441 608 2 good good good good good false Garrigues,J.M.,Green,N.M. Los Angeles County Public Health Laboratories +OP879722 OP879722.1 NJ-MPXV-001 2022-07-26 North America USA New Jersey Homo sapiens 2022-11-22 SRR22321727 Palmateer et al IIb hMPXV-1 B.1.3 0.9920997520397142 0 602 0 good good good good good false Palmateer,N.C. New Jersey Department of Health, Public Health and Environmental Laboratories +OP279033 OP279033.1 MpxV/human/CAN/UN-NML-4094/2022 2022-07-06 North America Canada Homo sapiens 2022-08-23 Duggan et al IIb hMPXV-1 B.1.4 0.975097485408881 4911 585 0 good good good good good false Duggan,A.,Hole,D.,Yadav,C.,Knox,N.,Tyler,A.,Haidl,E.,Chapel,M.,Domselaar,G.V.,Graham,M.,Audet,J.,Fernando,L.,Antonation,K.,Safronetz,D.,Hagan,M.,Peters,G.,Go,A.,Laminman,V.,Kaplen,B.,Leung,A.,Griffiths,E.,Jolly,G.,Eshaghi,A.,Gubbay,J.B.,Hasso,M.,Marchand-Austin,A.,Olsha,R.,Patel,S.N. National Microbiology Laboratory, Public Health Agency of Canada +OQ468871 OQ468871.1 NY-NYCPHL-000426 2022-09-12 North America USA New York New York City Homo sapiens 2023-02-26 Wang et al IIb hMPXV-1 B.1.5 0.9850868875152756 1407 608 0 good good good good good false Wang,J.C.,Amin,H.S.,Clabby,T.T.,Taki,F.,Su,M.,Rahat,A.,De La Cruz,N.,Olsen,A.,Thi,C.,Silver,S.,Akther,S.,Chowdhury,M.,Omoregie,E.,Hughes,S. NYC Department of Health and Mental Hygiene, Public Health Laboratory +OQ644785 OQ644785.1 MPXV/Human/USA/CA-LACPHL-MA00479/2022 2022-12-01 North America USA Homo sapiens 2023-03-22 Hemarajata et al IIb hMPXV-1 B.1.6 0.9853201425898412 2154 607 0 good good good good good OPG178:158-205 false Hemarajata,P.,Green,N.M.,Garrigues,J.M. Los Angeles County Public Health Laboratories, Los Angeles County Department of Public Health +OR449631 OR449631.1 MPXV/Germany/2022/ON/RKI917 2022-06-XX Europe Germany Homo sapiens 2023-08-22 Brinkmann et al IIb hMPXV-1 B.1.7 0.9647277761156945 446 580 0 good good good good good OPG193:264-268 false Brinkmann,A.,Pape,K.,Kohl,C.,Schrick,L.,Michel,J.,Schaade,L.,Nitsche,A. Robert Koch Institute, Centre for Biological Threats, Highly Pathogenic Viruses +OX044338 OX044338.1 MPXV_347_SP_2022 2022-XX-XX Europe Spain Homo sapiens 2022-07-30 ERR10297607 Cuesta et al IIb hMPXV-1 B.1.8 0.9729018452504703 5344 604 0 good good good good good false Cuesta,I. Institute of Health Carlos III, Bioinformatics Unit, Carretera Majadahonda-Pozuelo km2 +ON649713 ON649713.1 Monkeypox/PT0020/2022 2022-05-19 Europe Portugal Homo sapiens 2022-06-01 Isidro et al IIb hMPXV-1 B.1.9 0.9999695754250567 6 604 0 good good good good good OPG023:526-661 false Isidro,J.,Borges,V.,Pinto,M.,Sobral,D.,Santos,J.D.,Nunes,A.,Mixao,V.,Ferreira,R.,Santos,D.,Duarte,S.,Vieira,L.,Borrego,M.J.,Nuncio,S.,Carvalho,I.L.,Pelerito,A.,Cordeiro,R.,Gomes,J.P. Institute National de Saude Doutor Ricardo Jorge (INSA), Portugal +LC756924 LC756924.1 MPXV/human/Japan/Tokyo/2023/TKY220325 2023-01-30 Asia Japan Tokyo Homo sapiens 2023-02-22 Kasuya et al IIb hMPXV-1 C.1 1 0 613 0 good good good good good false Kasuya,F.,Negishi,A.,Kumagai,R.,Hasegawa,M.,Fujiwara,T.,Miyake,H.,Nagashima,M.,Sadamasu,K. Department of Microbiology; 3-24-1,Hyakunin-cho, Shinjuku-ku, Tokyo 169-0073 +OK573144 OK573144.1 WRAIR 7-61 North America USA Maryland Macaca fascicularis 2022-07-30 SRR10027401 Hunt et al outgroup 0.007788691185493563 0 2 0 good good good good good OPG133:401-711 false Hunt,S.,Shashan,N.,Tallon,L.,Sadzewicz,L.,Vavikolanu,K.,Mehta,A.,Aluvathingal,J.,Nadendla,S.,Myers,T.,Yan,Y.,Sichtig,H. US Food and Drug Administration, Center for Devices and Radiological Health diff --git a/phylogenetic/scripts/reverse_reversed_sequences.py b/phylogenetic/scripts/reverse_reversed_sequences.py index d3393cfa..1bf5e408 100644 --- a/phylogenetic/scripts/reverse_reversed_sequences.py +++ b/phylogenetic/scripts/reverse_reversed_sequences.py @@ -19,7 +19,7 @@ with open(args.sequences, 'r') as f_in: with open(args.output, 'w') as f_out: for seq in SeqIO.parse(f_in, 'fasta'): - # Check if metadata['reverse'] is True + # Check if metadata['is_reverse_complement'] is True if metadata.loc[metadata['accession'] == seq.id, 'is_reverse_complement'].values[0] == True: # Reverse-complement sequence seq.seq = seq.seq.reverse_complement()