diff --git a/lib/bio/bio.go b/lib/bio/bio.go index 8ebbee9..de90fa5 100644 --- a/lib/bio/bio.go +++ b/lib/bio/bio.go @@ -20,6 +20,7 @@ import ( "github.com/koeng101/dnadesign/lib/bio/fastq" "github.com/koeng101/dnadesign/lib/bio/genbank" "github.com/koeng101/dnadesign/lib/bio/pileup" + "github.com/koeng101/dnadesign/lib/bio/sam" "github.com/koeng101/dnadesign/lib/bio/slow5" "github.com/koeng101/dnadesign/lib/bio/uniprot" "golang.org/x/sync/errgroup" @@ -33,6 +34,7 @@ const ( Fastq Genbank Slow5 + Sam Pileup ) @@ -48,6 +50,7 @@ var DefaultMaxLengths = map[Format]int{ Fastq: 8 * 1024 * 1024, // The longest single nanopore sequencing read so far is 4Mb. A 8mb buffer should be large enough for any sequencing. Genbank: defaultMaxLineLength, Slow5: 128 * 1024 * 1024, // 128mb is used because slow5 lines can be massive, since a single read can be many millions of base pairs. + Sam: defaultMaxLineLength, Pileup: defaultMaxLineLength, } @@ -89,36 +92,36 @@ type Parser[Data io.WriterTo, Header io.WriterTo] struct { } // NewFastaParser initiates a new FASTA parser from an io.Reader. -func NewFastaParser(r io.Reader) (*Parser[*fasta.Record, *fasta.Header], error) { +func NewFastaParser(r io.Reader) *Parser[*fasta.Record, *fasta.Header] { return NewFastaParserWithMaxLineLength(r, DefaultMaxLengths[Fasta]) } // NewFastaParserWithMaxLineLength initiates a new FASTA parser from an // io.Reader and a user-given maxLineLength. -func NewFastaParserWithMaxLineLength(r io.Reader, maxLineLength int) (*Parser[*fasta.Record, *fasta.Header], error) { - return &Parser[*fasta.Record, *fasta.Header]{parserInterface: fasta.NewParser(r, maxLineLength)}, nil +func NewFastaParserWithMaxLineLength(r io.Reader, maxLineLength int) *Parser[*fasta.Record, *fasta.Header] { + return &Parser[*fasta.Record, *fasta.Header]{parserInterface: fasta.NewParser(r, maxLineLength)} } // NewFastqParser initiates a new FASTQ parser from an io.Reader. -func NewFastqParser(r io.Reader) (*Parser[*fastq.Read, *fastq.Header], error) { +func NewFastqParser(r io.Reader) *Parser[*fastq.Read, *fastq.Header] { return NewFastqParserWithMaxLineLength(r, DefaultMaxLengths[Fastq]) } // NewFastqParserWithMaxLineLength initiates a new FASTQ parser from an // io.Reader and a user-given maxLineLength. -func NewFastqParserWithMaxLineLength(r io.Reader, maxLineLength int) (*Parser[*fastq.Read, *fastq.Header], error) { - return &Parser[*fastq.Read, *fastq.Header]{parserInterface: fastq.NewParser(r, maxLineLength)}, nil +func NewFastqParserWithMaxLineLength(r io.Reader, maxLineLength int) *Parser[*fastq.Read, *fastq.Header] { + return &Parser[*fastq.Read, *fastq.Header]{parserInterface: fastq.NewParser(r, maxLineLength)} } // NewGenbankParser initiates a new Genbank parser form an io.Reader. -func NewGenbankParser(r io.Reader) (*Parser[*genbank.Genbank, *genbank.Header], error) { +func NewGenbankParser(r io.Reader) *Parser[*genbank.Genbank, *genbank.Header] { return NewGenbankParserWithMaxLineLength(r, DefaultMaxLengths[Genbank]) } // NewGenbankParserWithMaxLineLength initiates a new Genbank parser from an // io.Reader and a user-given maxLineLength. -func NewGenbankParserWithMaxLineLength(r io.Reader, maxLineLength int) (*Parser[*genbank.Genbank, *genbank.Header], error) { - return &Parser[*genbank.Genbank, *genbank.Header]{parserInterface: genbank.NewParser(r, maxLineLength)}, nil +func NewGenbankParserWithMaxLineLength(r io.Reader, maxLineLength int) *Parser[*genbank.Genbank, *genbank.Header] { + return &Parser[*genbank.Genbank, *genbank.Header]{parserInterface: genbank.NewParser(r, maxLineLength)} } // NewSlow5Parser initiates a new SLOW5 parser from an io.Reader. @@ -133,15 +136,27 @@ func NewSlow5ParserWithMaxLineLength(r io.Reader, maxLineLength int) (*Parser[*s return &Parser[*slow5.Read, *slow5.Header]{parserInterface: parser}, err } +// NewSamParser initiates a new SAM parser from an io.Reader. +func NewSamParser(r io.Reader) (*Parser[*sam.Alignment, *sam.Header], error) { + return NewSamParserWithMaxLineLength(r, DefaultMaxLengths[Sam]) +} + +// NewSamParserWithMaxLineLength initiates a new SAM parser from an io.Reader +// and a user-given maxLineLength. +func NewSamParserWithMaxLineLength(r io.Reader, maxLineLength int) (*Parser[*sam.Alignment, *sam.Header], error) { + parser, _, err := sam.NewParser(r, maxLineLength) + return &Parser[*sam.Alignment, *sam.Header]{parserInterface: parser}, err +} + // NewPileupParser initiates a new Pileup parser from an io.Reader. -func NewPileupParser(r io.Reader) (*Parser[*pileup.Line, *pileup.Header], error) { +func NewPileupParser(r io.Reader) *Parser[*pileup.Line, *pileup.Header] { return NewPileupParserWithMaxLineLength(r, DefaultMaxLengths[Pileup]) } // NewPileupParserWithMaxLineLength initiates a new Pileup parser from an // io.Reader and a user-given maxLineLength. -func NewPileupParserWithMaxLineLength(r io.Reader, maxLineLength int) (*Parser[*pileup.Line, *pileup.Header], error) { - return &Parser[*pileup.Line, *pileup.Header]{parserInterface: pileup.NewParser(r, maxLineLength)}, nil +func NewPileupParserWithMaxLineLength(r io.Reader, maxLineLength int) *Parser[*pileup.Line, *pileup.Header] { + return &Parser[*pileup.Line, *pileup.Header]{parserInterface: pileup.NewParser(r, maxLineLength)} } // NewUniprotParser initiates a new Uniprot parser from an io.Reader. No diff --git a/lib/bio/example_test.go b/lib/bio/example_test.go index fa3f16b..26ddf2b 100644 --- a/lib/bio/example_test.go +++ b/lib/bio/example_test.go @@ -17,7 +17,7 @@ import ( func Example_read() { // Read lets you read files from disk into a parser. file, _ := os.Open("fasta/data/base.fasta") - parser, _ := bio.NewFastaParser(file) + parser := bio.NewFastaParser(file) records, _ := parser.Parse() @@ -29,7 +29,7 @@ func Example_read() { func Example_readGz() { fileGz, _ := os.Open("fasta/data/base.fasta.gz") file, _ := gzip.NewReader(fileGz) - parser, _ := bio.NewFastaParser(file) + parser := bio.NewFastaParser(file) records, _ := parser.Parse() fmt.Println(records[1].Sequence) @@ -55,7 +55,7 @@ DIDGDGQVNYEEFVQMMTAK*`)) zipWriter.Close() fileDecompressed, _ := gzip.NewReader(&file) // Decompress the file - parser, _ := bio.NewFastaParser(fileDecompressed) + parser := bio.NewFastaParser(fileDecompressed) records, _ := parser.Parse() // Parse all data records from file fmt.Println(records[1].Sequence) @@ -93,7 +93,7 @@ IENY ADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTID FPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIREA DIDGDGQVNYEEFVQMMTAK*`) - parser, _ := bio.NewFastaParser(file) + parser := bio.NewFastaParser(file) channel := make(chan *fasta.Record) ctx := context.Background() @@ -120,8 +120,8 @@ IENY ADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTID FPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIREA DIDGDGQVNYEEFVQMMTAK*`) - parser1, _ := bio.NewFastaParser(file1) - parser2, _ := bio.NewFastaParser(file2) + parser1 := bio.NewFastaParser(file1) + parser2 := bio.NewFastaParser(file2) channel := make(chan *fasta.Record) ctx := context.Background() @@ -182,7 +182,7 @@ IENY ADQLTEEQIAEFKEAFSLFDKDGDGTITTKELGTVMRSLGQNPTEAELQDMINEVDADGNGTID FPEFLTMMARKMKDTDSEEEIREAFRVFDKDGNGYISAAELRHVMTNLGEKLTDEEVDEMIREA DIDGDGQVNYEEFVQMMTAK*`) - parser, _ := bio.NewFastaParser(file) + parser := bio.NewFastaParser(file) records, _ := parser.Parse() // Parse all data records from file fmt.Println(records[1].Sequence) @@ -196,7 +196,7 @@ func ExampleNewFastqParser() { GATGTGCGCCGTTCCAGTTGCGACGTACTATAATCCCCGGCAACACGGTGCTGATTCTCTTCCTGTTCCAGAAAGCATAAACAGATGCAAGTCTGGTGTGATTAACTTCACCAAAGGGCTGGTTGTAATATTAGGAAATCTAACAATAGATTCTGTTGGTTGGACTCTAAAATTAGAAATTTGATAGATTCCTTTTCCCAAATGAAAGTTTAACGTACACTTTGTTTCTAAAGGAAGGTCAAATTACAGTCTACAGCATCGTAATGGTTCATTTTCATTTATATTTTAATACTAGAAAAGTCCTAGGTTGAAGATAACCACATAATAAGCTGCAACTTCAGCTGTCCCAACCTGAAGAAGAATCGCAGGAGTCGAAATAACTTCTGTAAAGCAAGTAGTTTGAACCTATTGATGTTTCAACATGAGCAATACGTAACT + $$&%&%#$)*59;/767C378411,***,('11<;:,0039/0&()&'2(/*((4.1.09751).601+'#&&&,-**/0-+3558,/)+&)'&&%&$$'%'%'&*/5978<9;**'3*'&&A?99:;:97:278?=9B?CLJHGG=9<@AC@@=>?=>D>=3<>=>3362$%/((+/%&+//.-,%-4:+..000,&$#%$$%+*)&*0%.//*?<<;>DE>.8942&&//074&$033)*&&&%**)%)962133-%'&*99><<=1144??6.027639.011/-)($#$(/422*4;:=122>?@6964:.5'8:52)*675=:4@;323&&##'.-57*4597)+0&:7<7-550REGB21/0+*79/&/6538())+)+23665+(''$$$'-2(&&*-.-#$&%%$$,-)&$$#$'&,);;AFDCBD{LNKKGIL{{JLKI{{IFG>==86668789=<><;056<;>=87:840/++1,++)-,-0{{&&%%&&),-13;<{HGVKCGFI{J{L{G{INJHEA@C540/3568;>EOI{{{I0000HHRJ{{{{{{{RH{N@@?AKLQEEC?==<433345588==FTA??A@G?@@@EC?==;10//2333?AB?<<<--(++*''&&-(((+@DBJQHJHGGPJH{.---@B?<''-++'--&%%&,,,FC:999IEGJ{HJHIGIFEGIFMDEF;8878{KJGFIJHIHDCAA=<<<<;DDB>:::EK{{@{E<==HM{{{KF{{{MDEQM{ECA?=>9--,.3))'')*++.-,**()%% NM:i:8 ms:i:408 AS:i:408 nn:i:0 tp:A:P cm:i:29 s1:i:195 s2:i:0 de:f:0.0345 SA:Z:pOpen_V3_amplified,2348,-,236S134M1D92S,60,1; rl:i:0`) + parser, _ := bio.NewSamParser(file) + records, _ := parser.Parse() // Parse all data records from file + + fmt.Println(records[0].CIGAR) + // Output: 8S54M1D3M1D108M1D1M1D62M226S +} diff --git a/lib/bio/sam/SAMv1.pdf b/lib/bio/sam/SAMv1.pdf new file mode 100644 index 0000000..b0e140f Binary files /dev/null and b/lib/bio/sam/SAMv1.pdf differ diff --git a/lib/bio/sam/data/aln.sam b/lib/bio/sam/data/aln.sam new file mode 100644 index 0000000..6198339 --- /dev/null +++ b/lib/bio/sam/data/aln.sam @@ -0,0 +1,25 @@ +@HD VN:1.6 SO:unsorted GO:query +@SQ SN:pOpen_V3_amplified LN:2482 +@PG ID:minimap2 PN:minimap2 VN:2.24-r1155-dirty CL:minimap2 -acLx map-ont - APX814_pass_barcode17_e229f2c8_109f9b91_0.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_1.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_10.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_11.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_12.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_13.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_14.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_15.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_16.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_17.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_18.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_19.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_2.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_20.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_21.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_22.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_23.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_24.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_25.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_26.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_27.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_28.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_29.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_3.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_30.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_31.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_32.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_33.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_34.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_35.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_36.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_37.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_38.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_39.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_4.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_40.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_41.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_42.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_43.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_44.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_45.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_46.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_47.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_48.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_49.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_5.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_50.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_51.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_52.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_6.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_7.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_8.fastq.gz APX814_pass_barcode17_e229f2c8_109f9b91_9.fastq.gz +ae9a66f5-bf71-4572-8106-f6f8dbd3b799 16 pOpen_V3_amplified 1 60 8S54M1D3M1D108M1D1M1D62M226S * 0 0 AGCATGCCGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGTGCTGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCGACGTTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTTACTGTTGATGTTCATGTAGGTGCTGATCAGAGGTACTTTCCTGGAGGGTTTAACCTTAGCAATACGTAACGGAACGAAGTACAGGGCAT %,AFDCBD{LNKKGIL{{JLKI{{IFG>==86668789=<><;056<;>=87:840/++1,++)-,-0{{&&%%&&),-13;<{HGVKCGFI{J{L{G{INJHEA@C540/3568;>EOI{{{I0000HHRJ{{{{{{{RH{N@@?AKLQEEC?==<433345588==FTA??A@G?@@@EC?==;10//2333?AB?<<<--(++*''&&-(((+@DBJQHJHGGPJH{.---@B?<''-++'--&%%&,,,FC:999IEGJ{HJHIGIFEGIFMDEF;8878{KJGFIJHIHDCAA=<<<<;DDB>:::EK{{@{E<==HM{{{KF{{{MDEQM{ECA?=>9--,.3))'')*++.-,**()%% NM:i:8 ms:i:408 AS:i:408 nn:i:0 tp:A:P cm:i:29 s1:i:195 s2:i:0 de:f:0.0345 SA:Z:pOpen_V3_amplified,2348,-,236S134M1D92S,60,1; rl:i:0 +ae9a66f5-bf71-4572-8106-f6f8dbd3b799 2064 pOpen_V3_amplified 2348 60 236H64M1D70M92H * 0 0 AGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTG H{N@@?AKLQEEC?==<433345588==FTA??A@G?@@@EC?==;10//2333?AB?<<<--(++*''&&-(((+@DBJQHJHGGPJH{.---@B?<''-++'--&%%&,,,FC:999IEGJ{HJHIGIFEGI NM:i:1 ms:i:262 AS:i:262 nn:i:0 tp:A:P cm:i:19 s1:i:124 s2:i:0 de:f:0.0074 SA:Z:pOpen_V3_amplified,1,-,8S228M4D226S,60,8; rl:i:0 +ee583067-e453-4393-81c8-89302f71f29f 0 pOpen_V3_amplified 1 60 85S63M1D3M1D2M1D4M2D14M1D61M2D4M1I66M2I1M1D5M1D137M1D112M1D5M3D9M2D311M1I1M1I127M1I43M1D11M2D63M1I3M1D12M2I7M2S * 0 0 GGTGTTGTATTGTACTTCGTTCAGTTACGTATTGCTAAGGTTAAACCCTCCAGGAAAGTACCTCTGATCAGCACCTACCCCATATGCTTTTCTGTGACTAGTGAGTGCTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGAGTCATTGTCTGCCGCGTCAATACGAGAAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCCTCGGGGCCAACTTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTAGCGCATCTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCACAAAAAAGGGAATAAGGGCAACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATCTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCAGTAAAACGACGGCCAGTAGTCAAAAGCCTCCGAACGAAGCTGACTTCGTAGGTGAATGGGGAGTAGTCTTCGCCATCGCTCTTAAAAGCCAGATAACAGTATGCGTATTTGCGCGCTGATTTTTGCGGTATAAGAATATATACTGATATGTATACCCGAAGTATGTCAAAAAGAGGTATGCTATGAAGCAGCGTATTACAGTGACAGTTGACAGCGACAGCTATCAGTTGCTCAAGGCATATATGATGTCAATATCTCCGGTCTGGTAAGCACAACCATGCAGAATGAAGCCCGTCGTCTGCATACCGAACGCTGGAAAGCGGAAAATCAGGAAGGGATGGCTCAGGTGGCCCGGTTTATATCGAAATGAACGGCTCTTTTGCTGACGAGAACAGGGGCTGGTGAAATGCAGTTTAAGGTTTACACCTATAAAAGAGAGAGCCGTTATCGTCTGTTTGTGGATGTACAGAGTGATATTATTGACACACCCAGGGCGACGGATGGTGATCCCCCTGGCCAGTGCACGTCTGCTGTAGATAAAGTCTCGTGAACTTTACCCGGTGGTGCATATCGGGGATGAAAGCTGGCGCATGATGACCACCGATATGAGCCGTGTGCCAGTATTACCGTTATGC +++*+*''%#$%('''(++/010/.*()''-2867:87===?===>?@?885872670.-.24332389:;<776533=>CB@<;;;>?AB=<:;:333+++)))+&''&''''+('(034234236=>>><<<<<==?;;93.*(&&'''')*--0)'')+*,,+,,''%&&(+++*459=888:6566997689;9::@>>>?@B???>?A><:>::7.-.*)&')+-44*'(%''$%**268?<9;;:;=;::;;;:::6678<>==<:;;;;88<>?@A<75559@===>=;;:;<+****,/8;=@>><::385556@A0000666>CD>>>>B>?==ACA@;;7)(''5@EEBC?BC?<;;=AA'''(=889:@?::9:=>;;;;>>=999875374442C?7502{+(()(/02'''3,'9::;:;<;:;0/-,*+=<==>;;;<@?==2/./3=;;:;2/::<=DDB?@AE>9984210-&%%%()-389??@AAABAA?::::<=@E@9996778BA=;:9;<=@AA>;;;;7722321+))'(++,'''+.1'1..-/05:70/03435545;=>F5/..,-+((-9777222:34<&%&+../,,+,:6+>?B{DC=>=BGEO{J{H{GA::;8999:{{F====EKHPM{K{GJ@?EDIIGGEFCEGG{L{PLEDECFABCA:99;EFHOFHGEGJEDDEEIL{JE{PKR==<678=ADDIK@@A@PKJOCH{GK{PHL{{M;;:AIDELI{NJ{EDBJDY{I@@2222:;AAFGGNK{MLK{{H{HKUHE665333499((((((1-****/33FJGDODIH@@@:::=3//?@AJK{JEFHPEBBF{=<<<=>?EJJ{{{K{{WIIFC?62;66-,{/{75==?ECDFDCDDF9:95=>CH{IGKEJFEJJSNL{DC=??>?@11/++,)(((6>?----@AAF9997545=<.-0>?>?@ECA=>>?EGC>8(&&)*++)))+%$$*:=@EDECC43+@6<;:*)(((,''''KKECEABACFDCEPHJE?@@AGEFDDHIHIFM-+++1000341-''''''%&&'2977+& NM:i:56 ms:i:1831 AS:i:1824 nn:i:0 tp:A:P cm:i:137 s1:i:787 s2:i:0 de:f:0.0442 rl:i:0 +4ecff99b-8513-4705-9c38-2d8480ef044b 4 * 0 0 * * 0 0 GTGGTGTAGCGTGTACTTCAGTACGTACGTATTGCTAAGGTTAAACCCTCCAGGAAAGTACCTCTGATCAGCACCTGCACAAGCTTAAGCTGAAGCCACGATGCCTGTAGCAATGGATTACAGAACGAAAACTTCACTGCTGGCGACGCTGGATTACCTGAAGCCGTTACGCAGGAAAGAACCCAATACAGCAAATAGCGTTGATATTAGTCAACGCTGGCAGGAAATTACTCTGAAAACGCAGCAACTACGCCAAATGAATCAACATAACGGCTGGTTCTGGAAGGACAGTTCAGCGCAATCAACAGGCGCTGGAAATGTTGAAACCGCATCAGGAACCGACGCTATATGGGGCGAACGGTCAGACCTTCAACAACCCATCGCGGCGGTCAAAGAATTCGATCTGAAGATTATCTCCGGCCTGCACTGCAGGCCGGAATGCATTGTTACACCGTCCGGCGGGCAAATTACAGCTCGAAGCCCAGTACCGCCAGTGCAGCGAAGATCACGGCAATCCCCGCCAGCACGACCGCCATTAAACGCAGTAAACGCCAGGCAGTAACCAATGACCACTCCGGCATGATATGTAACATACCTAAAAGCACGCCAGACATCACCAGTACCGCCACCACCAGACACAGAACAGAAACGCCATCCAGCCGGGTTGCGGGGTAACCTATTACGCAACTGCCAGTAAAGCAGCGAAGCGTTCGACGCGCCGCCAGACCAATAGAAAGTGACAGCCCGGCATGTTTAAACAGACCAATAAACGCCAGGTTCATCAATTGCGTTAAAATCAGCGTAACGATGGCAATTTTCACTGGCGTTTTAATGTCCTGGCGGGAATAAAAGCCAGGAGCCAACACTTTCACTACAATCAGGCCGATCAAACCCACCGAGTAGGCAATTAACGCCCGCTGGGTCATCAGCGCATCAAACGCGGTAAATTTACCGTACTGGAACAGCGAAACGGTCAACAGACGGAAAGAATGCCCAACGCAACCGCACTGGGCGGCGCCAACAGGAAACAGACGCAACCCCCAGTCCATCAAACGGTTGTATTCATCATGATTGCCACTGGCAAAACTTTTGACAGCGACGGCAGCAAAATGGTGCGGCAGCCGCCCACGCACCGGACGGAAACTCCATTAAGCGGTCGGCGTAATACGTCCAACACGAACCGGAAGCAGAAACCAGGCAAAAATGGTGTTGATGATTAAGGAGATGTCGCTCACAGAGACGCCAAGGATCGCCGGTCCATCTGTTTCACCACGCGCATGCTACAGGCATCGTGGCTTCAGCTTAAGCTTCTGCAGGTGCTGAGTCGAGGTACTTTCCTGGAGGGTTTAACCTTAGCAAATGGTTATG $$%$$$&%%%&'&&$$&'&%$$%&''/0<>?ACA>>;@FP{{{CCBADF544473211,**,*)))+78>@9:BC{HGBB@EE<;;**))136899999:;679877)((()>?9::788))(*//.-(((%%&),,/0./1?GKFHFH[I{FH{LD))))96556<::<:=<:200/'''..3.0,---,,-*++//88F{{HMF{{IH{{IHA{A>661-,-183.455/////>D@@@?FIIMO{GBBBM{AA@?///0101:<<<()=>ADDB@;IF:9688)(('''/@**')'))))656C{GDH<=;:0---/46/---@@BBDCEE;7,5,/2112=ABCF@?;:99=BBB7797''''0%$%)242011;9<9--,./*'((-*)%%&(((',20..,---6,++,338++**3./>6556?EJDCBL{`{IGEDECGDG;:/))))(&%$''%'''*&'**)))+-;@BGL{FM{{M{{IKFEA71)*,-,+*)%%&(-,/01=>BCBIHGUJABC;;;;I{97664{2/))*+**-=>5545;)''''''&'''(&*-'.-.3<=<,)))0?C@@A7767:68421/.0,&'')((''**/3678:CCDEGIHE8888GEJGF{KJF@ABCECDECACAB4444MIGP{{L{U{DCJDIJGCA;744359111169;F8F=555410))++,)),-,&''11011?EHGIIELFBABA7++**51>;63/00/-&%%$%%@CCJFFK2223GC;99:D??CGECLJI{MEGIG{JEIMMHOEA<<<>BBCBCHHFPTLIFI<;;:66275443469B=@>8779:1789:88<==>)))&))))(()++(+,133/;.-&$''),569;@AJF?>GFKHJFCEGFEG{MIH{B;;<>>EDDDDIHLIH{HKIFGDM??><9)))?EDFG{CJFEBD=899:''''(%$%%%$$$$'1*('')*;22;;<;;923666CDECB@@<==:/..--'')''%%&'&%&'.'&'())+(2)*-)(*,,/032-+**24355><44113/.-,(((((''(()%%%&----.6H{A<<<<@;<9:1000.../-',((),/5.004,79:EACE{EAADHHIII{6543433/('%%2.*-+--.&&((-,*+++-&%&$'''',3433374<;7668?<66444.../02533440,.*((& rl:i:0 +e353ee52-5532-46db-b95a-075c29716104 0 pOpen_V3_amplified 1 60 85S39M2D20M1D161M1D90M1D7M1D35M3D211M2S * 0 0 ATGTACCTCGTTCGTTACGTATTGCTAAGGTTAAACCCTCCAGGAAAGTACCTCTGATCAGCACCTGGATATGCATATCTATATGGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTCAATAGTGTATGCGGCGACCCGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTAATCTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACAGAAAGCTAAATCTCATACTCTTCCTTTTTCAATATTATTGAAGCATTCTGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTTCAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCAGTAAAACGACGGCCAGTAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGGTTAAGGTGGAGTGGGAGTAGTCTTCGCCATCGCTACTAAAAGCCAGATAACAGTATGCGTATTTGCGCGCTGTA %%&&$#$%%(*+.((*///0311///,.BDJ{IQIVHIF3337<>>?BEFDKA==>C0..-336:=<=655654,+,-899799:A<433.,*))''(''''1-'';<=EBBBFBBBEDEGHIGI{{CBAAEIFFQ{JGBCABDCB==??2@AJHFHFD;;;(''(,,,,-B>443*,,.000LG>==DC;824//8:=?>=@:45JEFI{A?=;556AI{K{GGGOGGIMF{{{FFP8888{MJ{E8889,,,,200=>CEH@@@=*(():6BH4333EILKGPJJHM5555FDFGHLF{{KIGICDDFJ{EEFBD?@@J{GHGFILKJGGJGG000.0*)).4.''$##&$$$(32333:A=8=@FEIKL{EFDIFDHPFH{P<9/'((''6;:;632.,,-89;?AADDFHMMGJHDB?@@BGGHEJ?6((((LHHFFJJ.../,,,,B@::;:A{JHEA?>=A@CB{D::::AD{LKBBB<=<==;<;764...987410007:;9::>@=;854)(((+&&'(2123F=00//58AB65---++**.7>DDEAA?9878<=?89CB??>?DCB::12<CA<;;=KLOZ{FKFD;?EHH{CGEGE6553)''&&%$$%&'.-,''&'')*((*6;>@ACAC?@CECB@32222:;@CCCD@??=@AA@@<:<++++++++>>JIECA??>@CADEFJF?@CACCCFHJ{?=>MHDG<<7788CDDFFCEAE@776466&%%2('(*)&0.)(''+56AC@AA>AF<==>>=32+)*5547==><<<=>=<BBFIEAA@BCA@9779FD>>FBEC?>>?BIEDDD3333777?<856:8>>>>444::21&'$$%%&,8;;8<;<:544476+**'('*-77102**++064235)))*8;388>@>B<888;:;::<;<=<;433&&&&''(),++&&%$%&)*89:8877;<<=,+++++659888:;=2///:883333)&&&&&&'333,-))5:98::;<<=??><<;:;:=<>@><;8778;>?990../1-,,+-0*'&&(&$##$%(+6:996678:7=>>>==;;;;=;9879?:>?<;:'%%$ NM:i:25 ms:i:745 AS:i:742 nn:i:0 tp:A:P cm:i:50 s1:i:318 s2:i:0 de:f:0.0490 rl:i:0 +e10f8c1a-cb75-40b8-a849-13d2ff024b3a 0 pOpen_V3_amplified 1 60 95S61M1D13M2D19M1I201M2S * 0 0 ATGTTAACCCTTACTTCGTTCAGTTACGTATTGCTAAGGTTAAACCCTCCAGGAAAGTACCTCTGATCAGCACCTCGTATGCGGATGCGCATGCCGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCAGTTGCTCTTACCACGTCGATAAGGGATAATACCCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTTTCAAGGATCTTACCGCTGCCGAGATCAAGTTCGATGTAACCCACTGGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAAGA '((')%####$&%(&''*3554349;<=?EEDFCFAAG{E{EJ{FB@@77@BGFL{R{EGHEGHDEDBD{F{IIKJFGC@??FKHILKE{LKHLC<75:42215799HFI{SHCAC=/...9:5111498::76...///5555;::>>;;;4441&&&))-/)))%%'(*)*('())'''(5;333+****;?ACD{B@9678422/12.*=EIMFEGL@@?2111<<9>CJA>==>?>-+++:;755:?=<0.-.00241011)))'&)0,,++-144,*)*<:9:>>?B221&%%'99=@?<<<<=<<;<<<=;A?===:;::DI@?<6535*' NM:i:13 ms:i:515 AS:i:514 nn:i:0 tp:A:P cm:i:32 s1:i:202 s2:i:0 de:f:0.0404 rl:i:0 +ec8167f4-6448-480d-8810-3cee76950c52 16 pOpen_V3_amplified 1258 60 8S7M2D7M1D37M1I76M1D56M1D44M1I10M1I4M1D44M1I60M2I4M1I11M2D256M1D3M3D73M1I3M1I18M1I25M1D22M1D6M1I149M1I143M2I2M4D22M1D9M2D45M1D8M1D21M1D36M91S * 0 0 TTATCTTCGGCCTCTATGGGTCTCAGGGGTTTTTTGCATGGTCATAGCTGTTTCCTGAGAAGCTCGGCAGGTGATGACACACATTAACAAATTTCGTGAGGAGTCTCCAGAAGAATGCCATTAATTTCCATAGGCTCGTCCCCCCGACCAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCACAGGACTATAAAGATACCAGGCGTTTCCCCCTGGAAGCTCCCTCCGCGCGCTCTCCCTGTCCGACCCTGCCGCTTACCGGATACCCGTCCCGCTTTCTCCCTTCCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGCGTGCCGCTTTCGCCTCGAAGCTGGTCTGTGCACCAACCCCCCGTTCAGCCCTACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTATCTAAATAAACCACCGCTGGTAGCCGTGGTTTTTTTGTTTGCAAGCAGCAGATTACACGCAGAAAAAAAGGATCTCCAACAAAGGCCTACGATTAGCAGACAATGATCCTTTGATCTTTTCTACGGGTCTGACGCTCAGTGGAACGAAACTCTACGTTAAGGGATTTTGGTTATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTGCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGAACCACGCTCACCGGCTCCAGATTTATCAGCAGGGTCAGCCAGCCGGAAGGGCCGAGCCAGAAGTGGCTGGAGCTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAGCTATAGAAGTAGTTTGCCAGTTAATAGTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGCTTCAGCTTAAGCTTGTGCAGGTGCTGATCAGAGGTACTTTCCTGGAGGGTTTAACCTTAGCAATACGTAACTGAACGAAGCACACGCAC $$&'*'&'-01/(&(,+,-*(&&'6888=AIEA@@DA@>=>>ACB?>?>ACC>;:.-,*..('&&,11?>==A@@@C{{F?>>=>B@7;:77788989A7776>@@EED@A>>>=?@@B=559>GA:7667931-,,,012,)))'''(A@A>>B=>685762311221224433455;;?CF@//('%%''&')'.'&'(''*&()./,())%'.//215102>?@B<<;;@=31133+**+*.+**++*&),-*()*3,099=>,=--437@@AA@>?@;=8899=8898;:;@>33213+'%%$$$$#$)()*(*-(((*20/2&&&'')--3++++347@B;;94555@8(&'&011@>>==>==<=>53149::::9<;:;;;:5343257<;::9;<<===<;<<>:89;;B??A@AABA@A@>>>?>==93:;?=;998:=?BABA?>?B<:;;=>?=>=>BBB@ABACC?===?A@AE4333CBB@>><<889:?75--,.+*(('%%$$&+4:;;>?=;:99:<:8410.''/-..4;;:::AABB@B?>@@@??>41''2//&'&',)()%$$'%%&*))&,'''+111.--,((&&''21347<<=877553362345@A<;;;@@@ABBB?>>ADEB=B9C@A====*)))));70((('((*-*()%%%'2(''':>?@30//035>A?876588;::9:;;974445963/..,+*-'&&&,,657++1555122...,,-.4++.+--.@@>?;9701117::8+*))*56::BEGC@,***=>?HD323240//-)))/777>{{A?@??@BDCA?>?;:0++()1-.--...+,..35677988?@954441)(('&$$%))112111020///A@:995555AC@<;;;=>>6667;<=IJ>=2222?=<<<==>>>>>?>><<=196675432234566566?GH?>>BAABAFCA@>>>>?==<=8889DB=>>>?<<@=987/'''*,-,*)))99:::D6B=215A>@8866'&&*'&'((),,+)'%)()('&&'(,66559;>=??B====<<=A@@<:8423-2'&&&$$%%)(*)'',(*)')'$$$$%&&&**17??<::96776666@@ABA7667:?BF><<==?DBEFL{MNFEFFG{GD>=32449>;;;@CB55.'()62234BBFFEBA@A<<=??:::;OMSC=HGIKIJ??=A:44:5:544797//DDLHF{DFD>?LHEEPG7767;?;;210///8:4433013*(,))56>?ABBC?ED<:/./0//00004--+++,@AGEDCABCFIEFKEEEC8.*.,,-*****1.**BBFCCC))))BF{HFIFG>=>>CM{DHIIFXJ{{K{FG=<<7888DEH{K{{@@@:8889::IG;;222535566=DBAGFBFFIEEBCB??FCIGMKJNKL{PHGJFOG{PFJJLEHGF{P{H{{GJKJGJ{KIGG{{HGFC?=)))=2EGHHIJKJJGCB?AA{5555?=CIHNF{H{GFLGGL{JGG{KF{GD@H{GGD5?:2'''++++/--2>CHFGLBBBCD@>)*34:ABFHHFFJ{{{GPIGF{{O@@?@JIF5+(0-544444;<98/.++-()2.//2:CEF>=<>5552233?HHEELC>?@BC{KFHCFA+)))),('''''&%%%'/;=>CCK{JG{GIH?766448FLEGEFEFDI{EFDFINFH{{DEEFMCDDE>>?CC@>=;9''',2(++,(((+:=7666=CCKEHEBCBAB?DEEGJFFJEGGII{NNEJII{OJJF{N{{G>>>DDDCDCBHGH{H{BA::FHFI{U{GHHKEGCEBD{{{C?@??BCFEHED::92*))-0**))()--=;745837;=;;;=BDEHIGHEGJGEJHIGHDHJILM{HJ{{FPNHHE{HPOFGDBCCHIH{JNHPJ{HJGM{GDBGED@667--,.))*,8788:{FC<==>>>>>A=;=<=>::::<=<6444;>?=5;4.('&''&%%&&$$&%&&&%)19=9<<<=<:;9:<9888;=<>CCB><666))))32278;BB====?@??@>AAAAA@=;9;;>@=@==<<8/../133{{{{:5555=?B<9::>*((24??=<<===>;:9:;:;;;;;3323655579:=889>3336943468998(''9:)''))+))(**,,-11121<422261123421,,,+00163448574->?AB@A@?=<<::;;@?;987/-*('(----688:8999<@@=@?>><=?BB?>>8<72-,,.888===:<3333@@@AABBAABBD=::9=33323>;=:9;<=??ACC@<:;;>?<;;<=???@?>@??.../EA><:>7679:3*('''(,+&&'&+)))+((((122283/*+;<889>;<<=>===@==989:==>>?@?B@;99...18:8////*(((,.,,,0..-%&'A??=54,,3422220/.+)()&$$%%(,.9;;;:<=?:32156A6ABCEBACC>>?7667966)(((/+{/,,,,-13-)32.1248<=;9811/022+++785110/4/)((()('&()'''464+++./543'&&)$%%&&&&466:::2223==>FEBDFDEDBBDCCFGIED????DA@@@@B95/,*%%&'+,340.-.1>9>;;<=9999{194%%$$('%%(*7@8450111)0004=>===?C<557><9;:7A??@@E><=>BEB@@@ABDCEABDA@668,?@BBEGCDDE{GMKF><640+))***--<<=>>DE=>=>==;44.,,-8+++CG==D{GC=>32223>?B?@A:999ACEHEF:99,,,3E;;::7664.{%%%+,--=?FHJO{{HJ{G{MJHHIHJKQ{@@A555&&%$$%+**+-)((454001889?K{PH{XEJ?78JDA>BCHIJJIL{{FMG{GGHH{{{II{KJH??=)'&')'%%%),-*/AA@@AACIKFGJEFF=>??{N;;:@555+4,/77?BA:=;7541FHIJKIHGGFKMI{JM{KNVNHKF{{KHMLNFJH{{IIO{{K<;5;GCFE>::A@EJGINFJ{GFE@>=>CBBDEEAAD<<7:..044BH===<9555++++>-,8-*((+*())+,+-.,;@@:>322349;???=:864667B888ABA850'&&&'1??::?>9648=@888:E>=E{U{HHJJGIZ{OJGKE;:::IGN{JJIFL{{{EEDFFBBAAC>?@@{JIYFI{IG{KGI{F{GA>>@8888;99<;;E{DFEABDCAA?><<;>=>?>>?>=CDGEEDMFJ{HFIMUPKIFDIGF31/16:6?7777DAAABBCC;;>=@@AKCBDGEEFEC9888BD89AHOC8950////2256=;5:991333@>=>DCEDE::99BAEFDB;91/+)))>A@??,+*,-,,-2..-.3440.0561666666B;;67-,&%%'''+):A=@@@AC85,-.AFBCCAAA9110)& NM:i:72 ms:i:4559 AS:i:4540 nn:i:0 tp:A:P cm:i:349 s1:i:2096 s2:i:0 de:f:0.0222 rl:i:0 +28562250-774a-4697-a941-7f52276d1fdd 0 pOpen_V3_amplified 1 60 81S6M1D58M2D45M2I3M1D24M1I92M1D40M2I14M2I62M1D29M1I13M1I8M4D17M1I39M1D6M1I8M2D35M1I43M1I6M1D57M1D19M2D1M1D166M1D49M291S * 0 0 TATGTTAACCTGCTTCGTTCAGTTACGTATTGCTAAGGTTAAACCCTCCAGGAAAGTACCTCTGATCAGCACCTGCATGCCGCTTTTTGTGACTGGTGAGTACTCAACAGAATCATACTGAGAATAGTGTATGCGGCGACCGAGTCTATTGCCCGGCGTCAATACGGGATAATACCACGCCACATAGCAGAAAACTTAAAAGTGCTCATCATTGGAAAACCGTTCTTCGGGGCGAAAACCTTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACGGGAAATTACAAAATGCCGCAAGAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATGAAGCATTTATCAGGGTTATTGTTGCATAGAGCGGTTACGTATTTTGAATGTCGAAAAATAAACGAATGCGGGTTTCACCGCACCTGCACCAGTCAGTAAAACGACGGCGGTTGATTGAAAGCCAGCCAGGAGGCTTTTGATTTGGTTGAGGTGAAGTGGGGAGTAGTCTTCGCCATCGCTACTAAAAGCCAGACAACGGTATTGCATATTGCGCGCTGATTTTTGCGGTATAAGAATATATACTGATATGTATACCCGAAGTATGGAAAAAGAGGTATGCTATGGAGCGTATTACAGTGACAGTTGACAGCGACAGCTATCAGTTGCTCAAGGCATATATGATGTCAATATCTCCGGTCTGGTAAGCACAACCATGCAGAATGAAGCCCGTCGTCTGCGTGCCGAACGCTGGAAAGCGGAAAATCAGGAAGGGATGGCTGGGGTCACCCAGTTATTGAAATGAACGGCTCTTTTGCTGACGAGAACAGGGGCTGGTGAAAGTAAAAGATGCTGAAGATGAGTTGGGTGCACGAGTGGGTTACATCGAACTAGATATCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGCGCGGTATTATCCGTATTGACGCCGGGCAAGGCGACTCGGTCGCCGCATACACTATTCTGGGATGAACTGGTTGAGTACTCACCAGTCACAAAAAGCGGGCGTGCGGTGCTGATCAGAGGTATTTTCCTGGAGGGTTTAACCTAACAATGGTGG &''**)&$##$$'**-256;>==<7666<>B<33212))1)AA<32138533+))*7777<=C5556ECDFIK{J{EIFG?>{{LKLFFOIFFFA>=>=-+**--)'')''13%$$$)*@>AAA@AGH{J{{J{=<<<30+*)(&&&%(:=?>?FEF^JRNIG533'''(;;>..'&'&(-+*,2690@I{P{H{{B@A>@AAFGIFNE6-*+2112/101:7(''//-&%&'(,-0<=>@@>F{G{GJ{THDDEFCGEE{IDBAA;853556222'''(=76>31BAANJ{{{JDFMHLHF=AD;;<=X?DGJHHSCDFBDD---/----9:=>87&%%%&%&%%%&(5:;7877=;3-*)&'''%(&('(,,-483220348CE{HPLHEEFGB@=)(19;;:;<+9;@@8(''()))()'%'''&.0',,+{<;>B{FIJPHGN{{{JIHILFGDKBA((((HH{J{HGMGECFEBCCNKEJECA><>?C<<ABEHHGCCCE@@@IIDBKFE{{NPGR{{KJDIN{M{M{H{QNGOIN{LMK{IJK{QN{GJX{{{GJDEECKEBAC;68642/01444.21.,)')*()))''+-7779999854(((),,+-+&&'(''&&)%#$%())*-,,-./78<::9;667CD80--*-('%%''(5;?:88..../43/,++32630//45626334/00&$$$%%$$$## NM:i:70 ms:i:1304 AS:i:1296 nn:i:0 tp:A:P cm:i:87 s1:i:579 s2:i:0 de:f:0.0707 SA:Z:pOpen_V3_amplified,1,-,57S244M4D924S,60,12; rl:i:0 +28562250-774a-4697-a941-7f52276d1fdd 2064 pOpen_V3_amplified 1 60 57H6M1D28M1D32M1D18M1D160M924H * 0 0 GCTTTTTGTGACTGGTGAGTACTCAACCAGTTCATCCCAGAATAGTGTATGCGGCGACCGAGTCGCCTTGCCCGGCGTCAATACGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGATATCTAGTTCGATGTAACCCACTCGTGCACCCAACTCATCTTCAGCATCTTTTACTTTCACCAGC 08DC766;9::<87/.-,,-*))(%$#%)&&''('&&+-+,,)(((4589999777-+'')))(*)'),.12.44410/24686;CABEKCEEDJG{{{XJG{NQ{KJI{KML{NIOGNQ{H{M{M{NIDJK{{RGPN{{EFKBDII@@@ECCCGHHEBA>=<=?D{{JG{{{{{H====E<<<>ACEJEKNCCBEFCEGMGH{J{HH((((ABKDGFLIHIJ{{{NGHPJIF{B>>=:: NM:i:12 ms:i:416 AS:i:416 nn:i:0 tp:A:P cm:i:24 s1:i:171 s2:i:0 de:f:0.0484 SA:Z:pOpen_V3_amplified,1,+,81S853M6D291S,60,70; rl:i:0 +17455a4d-f6d8-48ec-9bbb-6e9db765c287 0 pOpen_V3_amplified 1 60 92S76M1D20M2D61M1D1M1D35M1I48M1D2M1D8M1D8M1I136M4D91M1I21M1I1M1I56M3D6M3I19M1I163M2I175M1I95M1I28M3D71M3D2M1D62M1D72M5D164M3I1M1D43M1D116M1I163M1D3M1D2M1D28M1I33M1I103M1I114M1I299M1D1M1D57M1I63M71S * 0 0 GTGTCCTGTTACTTGGTTCAGTTACGTATTGCACCATTAAACCCTCCAGGAAAGTACCTCTGATCAGCACCTATGCCTATGCCGATGCCCATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCACATAAATACGGGATAATGCCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTACTTCGGGGCAAAAACTTAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAAACTTACACGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCACGTTCTCGGTAGCAAAAACCAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTAAATATATAAAAATAAACAAATTGGGGTTCCGCGCACCTGCACCAGTCAGTAAAACGACGGCCAGTAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGGGTTCAGGTGGAGTGGGAGGATGATCTTCGCCATCGCTACTAAAAGCCAGATAACAGTATGCGTATTTGCGCGCTGATTTCGGTGTAGAAAGAATATCCTCTGGTATGTTATACCCGAAGTATGTCAAAAAGAGGTATGCTATGAAGCAGCGTATTACAGTGACAGTTGACAGCGACAGCTATCAGTTGCTCAAGGCATATATGATGTCAATATCTCCGGTCTGGTAAGCACAACCATGCAGAATGAAGCCCGTCGTCTGCGTGCCGGACGCTTTGGAAAGCGGAAAATCAGGAAGGGATGGCTGAGGTCGCCCGGTTTATTGAAATGAACGGCTCTTTTGCTGACGAGAACAGGGGCTGGTGAAATGCAGTTTAAGGTTTACACCTATAAAAGAGAGAGCCGTTATCGTCTGTTTGTGGATGTACAGAGTGATATTATTGACACGCCCGGGGCGACGGATGGTGATCCCCCTGGCCAGTGCACGTCTGCTGTCAGATAAAGTCTCCCGTGAACTTTACCCGGTGGTGCATATCGGGGATGAAAGACTGGCGCATGATGACCACCGATATGGCCGTGCCGGTCTCCGTTATCGGGGAAGAAGTGGCTGATCTCAGCCACCGCGAAAATGACATCAAAAACGCCATCCAATATTCTGGGGAATATAAATGTCAGGCTCCCTTATACACAGGCGATGTTGAAGACCACGCTAGGTGTCAATAGTCGGAGCCGCTGAGCAATGACTGGCATAACCCCTTGGGGCCTCTAAACGGGTCTTGAGGGTTCCATGGTCATAGCTGTTTCCTGAGAGCTTGGCAGGTGATGACACACATTAACAAAACTCGTGAGGAGTCTCCAGAAGAATGCCATTAATTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGACTGCTATAAAGATACCAGGCGTTTCCCCTTGGAGACTCCCTCGTGCCGCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCATTCAGCTCCAAGCTGGGCTGTGTGCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACGAGTCTGAAGTGGTAGCCTAACTACGGCTACACTTCGAAGAACAGTATTTGGTATCTGCGCTCTGCTAGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTCTTCATACGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACACACAAGAAAAAAAGGATCTCAAGAAGGCCTACTATTAGCAACAACGATCCTTTGATCTTTTCTACGGGGTCTGACGCTGAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACAATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGAGCCGCGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCGAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTCAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGTAGCCTTGCCCTTCCCTTAAGGTGCTGATCAGAGGTACTTTCCTGGAGGGTTTAACCTTAGCAATACGTAG #%$&'&())+&&&+&**+753340.))**,'%%%&&'''(-3331689?BEYEDIFHGSJJEHGFCHIEJF{GNEDFFEG{E42128;>=543.-.,&'%%%%%(((((1+45566421/:::9:555544731155I{JIGH{G5445>;;CGFLIMF39(('))(''*)'&'))(**&,-110..0/:<=@@@@A@@A==5''2'')'&(,+''(()(&'%$%'---.982:::4334CEB@@A<<;:332('',@95554))95%=-***)'',,5:?@2**DGROKIEIFI{FGCBA=77GEHFAHHDDEFCDI{A@A7788OGRJKI{BAAB{H\999---->A>>>=?=N@999<@A455479;778A==>=>EJNHNI{RMH?>>>E8778877742//*'((&(*/,{1/-,2862../0*))(&$%)/..,.,-;>CB?@@AIMEEEE>>>>IJJ{R>>>;<<<555,,,,.67G{A@@AC@AAB<<=BA@>?BA76665''%')''(,(+)''&(.-.)&'()*///101>FJHI{QII{EL]{JHJENLM{OFJIG{F{{{JHN{{GJSJJ{GDC@.-4&%&/--*)))*)))+,-&&&$$&)..././*,,-951>@EKGC@?=<=>M{{EKIG{HF9999==>{K;:;=>AI{E::;H{{EFMP{EHEHFNIKHH{{{{EJ;;:9;401-,**.@DBA@?BB@--+++*+++((()-')((&%%%+*-/154556?865654447ABC@CA@<;;:;::;C<;;<@B@BECCDHEDEGFDDCB@?CA>>@A@?>64)))48:86:=>ABBDFHTEGDCFCBBBEEGHFBBBDFEICGEECH{{{LGGDCEBF999:DE>=7699?@B@?<;;<>BDFIDEGH<<;777:{/..-*34;<''''5*'(+)---+..3<<<=@DFEADDDE>>>?DEGBBC?ABBIFJIMG{CE?;???DED@@>?A6;?6<<;=<::88(''(---,***.1'((8??A>?ABFFFC???CBIEHF////++,888;?BAB?@ABEB/...??>;B@?<<<@A9999B<??HJFFFEHDFFEEHC>?BCGD@?@?CBABFEBAFDDFDCC:99&&(.99)(((+**+555>:::BD3338CEKGEDDDGNHBCACFFY{:001:78266.-.?@@BCH@?:0//+',*+*-01====<;<900/.-/1433{1,-.+,<>@@@ABA=<=={:{873)))()),+(%%'46?@CC==>@A56///8>=CDCEFGGJCEGMQ2H<;;?@@BCCBGFEHJFK=555100..,',.000=>?BB?BCDCBB@**)*0((..-,)*$$$%$&$$$$$$$343/-,@@A2,-9450012;<((((/+'-3**.>77787>>?AICHLHFHHEUCDCDMDAA?GD<<;??=<837>>JHJ?;<9;=4333:;G@???HLEAC{FLHGGJMI{HJBA@BA@C75212277788@H;;==6'''',(''+21,+,1(()4B==?{B===879?<;867611CJFECCDGQJEGOJFLJEHGJ{IHIIEHEEGBCCIGDDCCCEIGHKZII@BBCDFIDCDBFF?D?///0??>EH998998887912DCDCC{HCCD:88/**1=@ABBD;:86('*-::99=<;4$$$)%%&&*,.245((((2''')'')*4=EA...,++++7<<667222983:<>?;88833339841**)*-EHNHFIIEH{{DHDCFI{{IDDA>7@==>==>EA---%$$$'&'::<>?EECFECEHJHNRHDFOH@=;5:;=CEBA@B@JFHIFHIIHKDC@?AA{803*((35;DD{{DBA?<6698;<87:=<>>>>>===DCGJKJ{DFDEOKCD@A?BBAC?;64111-+,.2110''''6BFF{@??FGJEBCFFELEDJLPFDGBB?><***))(()*/./1011DG?==A?@A>@GIHD????A=BB@5**)*6>=?../???BABFGFHHH===;==>>C{{<100157@>A?=>DEHHDOGDCCGJIEBDGQ{FDCC0000EHDEGG{AA@@@@:798*))&*((&&',5*&&'23678BADNCCH{KJ=55GH<:;7445=>>JECEE;:::?<))))95534:32/3:4687;6666BCCCB<<?;>677/3/,+(&&%'%',,*&&'&'''7::F?DA@>ECBFFDOHPIEDA@>=>=?>;9.)+1.+-.79655'''24;>@@C=<<<>6788A@>8885555A=;<>=>---.877<<>;:::E@DF988<=>==-1+)+0,%&)17;64)(&$ NM:i:100 ms:i:4390 AS:i:4370 nn:i:0 tp:A:P cm:i:318 s1:i:1996 s2:i:0 de:f:0.0326 rl:i:0 +7f9007b3-33ec-4fd7-9de2-a0d62395d54b 16 pOpen_V3_amplified 1 60 9S29M1D57M1D292M1I3M2D2M2D41M1I181M2D8M1D245M1D79M2I4M1I2M2D52M1I72M2I74M1D20M1D208M2D2M1I77M1I13M1D11M1I127M1D104M3D154M1D5M1I263M3I2M1D31M1D4M1D31M2I3M1D28M1I161M1D70M92S * 0 0 ATGCATGCCGCTTTTCTGTGACTGGTGAGTACTCAACCAGTCATTCTGAGAATAGTGTATGCGGTGACTGAGTTGCTCTTGCCCGGCGTCAATACGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAAATCCAGTTCGATGTAACCCACTCGTGCACCCAACTCATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTGATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAAGCATATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCTGCGCACCTGCACCAGTCAGTAAAATCATGGCCAACAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTTGGTTCAGGTGGAGTGGGAGTAGTCTTCGCCATCGCTACTAAAAGCCAGATAACAGTATGCGTATTTGCGCGCTGATTTTTGCGGTATAAGAATATATACGAATATGTATACCCAGTATGTCAAAAGAGGTATGCTATGAAGCAGCGTATTACAGTGACAGTTGACAGCGACAGCTATCAGTTGCTCAAGGCATATATGATGTCAATATCTCCGGTCTGGTAAGCACAACCATGCAGAATGAAGCCCGTCGTCTGCGTGCCGAACGCTGGAAAGCGGAAAATCAGGAAGGGATGGCTGAGGTCGCCCGGTTTATTGAAATGAACGGCTCTTTTGCTGACGAGAACAGGGGCTGGTGAAATGCAGTTTAGGTTTACACCTATAAAAGAGAGAGCCGTTATCGTCTGTTTGTGGATGTACAGAGTGATATTATTGACACGCCCGGGCGCAACGGAATTTATCCCCCTGGCCAGTGCACGTCTGCTGTCAGATAAAGTTCCCCGCGGAGTCTCACCCGGTGGTGCATATCGGGGATGAAAGCTGGCGCATGATGACCACCGATATGGCCAGTGTGCCGGTCCCCTCGTTATCGGGGAAGAAGTGGCTGATCTCAGCCACCGCGAAAATGACATCAAAAACGCCATTAACCTGATGTTCTGGGAATATAAATGTCAGAATCCTTATACACAGGCGATGTTGAAGACCACGCTGAGGTGTCAATCGTCGGAGCCGCTGAGCAATAACTAGCATAACCCCTTGGGGCCTCTAAACGGGTCTTGAGGGGTTTTTTGCATGGTCATAGCTGTTTCCTGAGAGCTTGGCAGGTGATGACACACATTAACAAATTTCGTGAGGAGTCTCCAGAAGAATGCCATTAATTTCCATAATAACTTTACCCTGACGAGCATCACAAAAATCGACGCTCAAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCGAGGCGTTTCCCCCGGAGTTTCCCTCCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTCGTTCGCTCCAGCTGGGCCGCGTGCACTGACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCTACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTACCTTCGGAAAAAGAGTTGGTAGCTTTGATCCCGGCAAACAAACCACCGCTGGTAGCGGTGGTTTTTTTGTTTGCAAGCAGCAGATTACACGCAGAAAAAAAGGATCTCAAGAAGGCCTACTATTAGCAACAACGATCCTTTGATCTTTTCTACGGGGTCTGACGCTCAGTGGAACGAAAACTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATGAGTAAACTTGGTGTCCTACAGTTACCAATGCTTAATCAGTGAGGCACCATCTAGCGATCTGTCTATTTCGTTCATCCATAATTCGGCCCACTCCCCGTCGTGTAGATAACTACGATAATGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGAACCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGCTTCAGCTTAAGCTTGTGCAGGTGCTGATCAGAGGTACTTTCCTGGAGGGTTTAACCTTAGCAATACGTAACTGAACGAGTAGCAGGACAT %&07:;=DDICCD@@AG{LBDEEJIDFDGHCCDDNG))79545655..00;:;CBCFHAB@,,,,,)))+/...AB<;;<:5325821118755)9:;:<<=@?A{*&&&2233>>AFILGMIFDD>33489<=>F?><>FICND{@@?GH[DCHFFG{FKGPD@43/045,5374444ACBA?C>9*'+**&''(7:;98;))))MHEHFKHL77@{FGBGGKNIFGCEFIGG{HFEL8;CHJFHFGLFHDJG5D9BNIONH{OLGFCCDHK{{LGLEC=<''644.-(()768@@>@@ABA==?@6569<21211888=6667MCE{SMQI{IQECBA,+++*)))),.+..)*,0{73448:?@?EHLJDFEHH{{J{BA???@<22----59***-:>={C:::
<<==?>?0*&&%'''''*+++*..+*+->??CCJG?>@@B;;>?FCBBC{K7779CFEGGL{JIK{IKGOHIJC==<>BBDCJIH=<;9000>CGFCBB>?1,++664222556>85:><6664.+*''''039>@@ABADFIFDIJMCACG?CFU?666>=@CC@?AAAB?@?E{{IKSJH{QJEF?DGRJDDCA<@ANGJLKG{??=???<<>=A>==?9999=;:56000///040//=:884333/,+,-+,*+0.-&&'&&&'&''''((--+**/0>>@CLF{{FOFGEGD{{{{{LGEEDKHDD;:977@,+++{{LKPECCFGHGCQECE7556;;:9::;<<<<;6668<7>8*))0/0--00,,,+,.237888CEEG{{HHFIJ{G{{GHFH@EEDFDBDBCA55559::DCC8888DBDBDJIAB?><88721-./*(%&)+,.267IIGDVM@CBAD;....10116==>>@==?@{IFJHII[KEJJ{{FH{N{KMPJJI{G{JKDAABCA4:0//:0HDBAA=986/../=?>?@{I{GF{EEBCKJB>:9----===764*))///.322.-,-??@E{HFFHNJKGH@>>>:8866:;G?AL^JKEDNJGZB@9646389=:@BJD@??BEEFG{M{{B;;;;=<>=?ACC@@AAGBJ?><4687779<>DHG{GJIGH{{B{{COLIIOH{KGIE=;987554../0689''''556BE7777C@AHDC:100*&&&&))))))+.%(-155AJF{GECCBBDAB978865+++,C=<88>@:>=:72375:::<;<>?CBA@?><-/.,+)&$$$&),74456@EB+)(+*+++/7;>430123{DHJ{FCB@<=7,,,-.)*-2=BBGRMJIG{FC6644...015..+--/1:AD9996=A?AAABBCMEK{CND@??@HGH@@??D@AABBCCBA@@A@BGK>>>>E{MHHGHEGI?=<<64**)122-1,,**,,55.'&&%&/6;?334.../B>=9:>@@+6.,---212013261222FFAC@47-+)''')'&'*+?@FML><;9=<<<;:77?@@:::;KQFG=:::@=89:=GCA@D)))).-75574567PGJIAABCHG:::9/.1,4@ADJMJ{PK{GGJLNKMEHJLMI;;;<<:90++,-/-(,.')))366:=4444767<548D=;;=FSE{NI{{{CCEEBB@{??@?EDHCBCD=?{{{==789;=?=650(&%('((&&*+....DECEEG{H{H???>BD9788;665..,+,,++,:55;;::;A@;;=8888?/0/*))'(),)&&&'-.AH:9----{FIKEFB@2223A@?('''(+++7?C<;;>>ADD998?@?EDFFD==<=CADA7701&&&')'''::=====BAB>=>@E{F{{{JF{LFGOLKMHJHEEJG{FJD:9BIJ<1119OEEPI{FDEC54446;<>?>@GMQEGGHHEFHAA@==GHGGGFDGIGGHIO{B@@BCEG{GSFHE445DBEEC@>?DDC>=@K{JJF{{FEBAADG{JKGIEFCDEAAB@K@00.044DGHM{{GJIFECDI{22200./.***,1''%%'&$$$$$$$&&'% NM:i:85 ms:i:4458 AS:i:4448 nn:i:0 tp:A:P cm:i:346 s1:i:2106 s2:i:0 de:f:0.0293 rl:i:0 +bdc37787-3a82-47ef-9537-a6c5a4e70223 0 pOpen_V3_amplified 1 60 93S75M2D69M2D14M1I69M1D104M2D90M1D67M1D88M1D691M1D77M1D68M1I48M1D338M4I45M1I92M1I24M1D57M1I313M1I55M1D4M1I4M2D20M1I53M71S * 0 0 TATGTTGGTCTACTTGGTTCAGTTACGTATTGCTAAGGTTAAACCCTTCCAGAAGTACCTCTGATCAGCACCTATGCCTATGCCGATGCCCATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACATTCTGGGGCGAAAACTCTACAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTGAGCATTCTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACGCGGAAATGTTGAATACTCATACTCTTCCTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACGAATAGGGATTCGCGCACCTGCACCAGTCAGTAAAACGACGGCCAGTAGTCAAAAGCCTCCGACCGGAGGCTTTTGACTGGTTCAGGTGGAGTGGGAGTAGTCTTCGCCATCGCTACTAAAAGCCAGATAACAGTATGCGTATTTGCGCGCTGATTTTTGCGGTATTGGATCTATACTGATATGTATACCCGAAGTATGTCAAAAAGAGGTATGCTATGAAGCAGCGTATTACAGTGACAGTTGACAGCGACAGCTATCAGTTGCTCAAGGCATATATGATGTCAATATCTACGGTCTGGTAAGCACAACCATGCAGAATGAGACCCGTCGTCTGCGTGCCGAACGCTGGAAAGCGGAAGATGAGGAAGGGATGGCTGAGGTCGCCCGGTTTATTGAAATGAACGGCTCTTTTGCTGACGAGAACAGGGGCTGGTGAAATGCAGTTTAAGGTTTACACCTATAAAAGAGAGAGCCGTTATCGTCTGTTTGTGGATGTACAGAGTGATATTATTGACACGCCCGGGCGACGGATGGTGATCCCCCTGGCCAGTGCACGTCTGCTGTAGGATGAATTCTACCGTGAACTTTACCCGGTGGTGCATATCGGGGATGAAAGCTGGCGCATGATGACCACCGATATGGCCAGTGTGCCGGTCTCCGTTATCGGGGAAGAAGTGGCTGATCTCAGCCACCGCGAAAATGACATCAAAAACGCCTTTAACCTGATGTTCTGGGGAATATAAATGTCAGGCTCCCTTATACACAGGCGATGTTGAAGACCACGCTGAGGTGTCAATCGTCGGAGCCGCTGAGCAATGACTGGCATAACCCCTTGGGGCCTCTAAACGGGTCTTGAGGGTTTTTTCCATGGTCATAGCTGTTTCCTGAGAGCTTGGCAGGTGATGACACACATTAACAAATTTCGTGAGGAGTTCCAGAAGAATGCCATTAATTTCCATAGGCTCCGCCCCCCTGACGAGCATCACAAAAATCGACGCTCAGAGTCAGAGGTGGCGAAACCCGACAGGACTATAAAGATACCAGGCGTTTCCCCTGGAAGCTCCCTCGTGCGCTCTCCTGTTCCGACCCTGCCGCTTACCGGATACCTGTCCGCCTTTCTCCCTTCGGGAAGCGTGGCGCTTTCTCATAGCTCACGCTGTAGGTATCTCAGTTCGGTGTAGGTGACTCGCTCCAAGCTGGGCTGTGTCCACGAACCCCCCGTTCAGCCCGACCGCTGCGCCTTATCCGGTAACTATCGTCTTGAGTCCAACCCGGTAAGACACGACTTATCGCCACTGGCAGCAGCCACTGGTAACAGGATTAGCAGAGCGAGGTATGTAGGCGGTGCTACAGAGTTCTTGAAGTGGTGGCCTAACTACGGCTACACTTCGAAGAAGAACAGTATTTGGTATCTGCGCTCTGCTGAAGCCAGTTGCCTTTCGGAAAAAGAGTTGGTAGCTCTTAATACGGCAAACAAACCACCGCTGGTAGCGGTGATTTTTTTGTTTGCAAGCAGCAGATTACACGCAGAAAAAAAAGGATCTCAAGAAGGCCTCTATTAGCAACAACGATCCTTTGATCTTTTCTACGGGTGCTGACGCTCAGTGGAACGAAAAATTCACGTTAAGGGATTTTGGTCATGAGATTATCAAAAAGGATCTTCACCTAGATCCTTTTAAATTAAAAATGAAGTTTTAAATCAATCTAAAGTATATATCAGTAAACTTGGTCTGACAGTTACCAATGCTTAATCAGTGAGGCACCTATCTCAGCGATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGAACCACGCTCACCGGCTCCAGATTTATCAGCAATGAACCAGCCAGCGCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCGGTCTTTAATTTGTACGGGAAGCTAGAGTTGGTACGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGTCCGCTTACGCTTGAGCTCAGGTGCTGATCAGAGGTACTTTCCTGGAGGGTTTAACCTTAGCAATACGTAA #&&)('$$%$%%%()&''&'''((',,-/0004333;;9*'(*,251.()',)*&+''',((1167AB??>8888IDHFFIEDCA>>?BEHCGG{HOHWGGHIM{LMJ{J{PMEH>===GLM{{GJIQJKI{CCACEDE{JHP{FIQOH=>=;::7778987+++())'&%&''/.++'''0>?@AAA@=@ACB7767?>GEGJIMGLHH{TOIMJJN8889GFPFGEGGD@.../6'''96,++,.05**&&&'**,))),+03001-,+*)).,+,2389;AACBFDDDDD?H{CBCC{{HKJHIGG999:BB)(();71-01+49:C>AB>==>99970.))*+((((*))//02799@AACCDFJ{EHC97'''*3341.4,'''''(-&&&&'&'5?BBCD9666>877228;<<:=72+((:3DFFE{GB?>8;=>;.*)(''''(((4647,++('$$'--77ADEDGDCBEBD{KFI{K{H{I{EA@ABGGE?>?>+&&&''',,--.-.9EBABCQFKVA878799:BF{JIFDEFFEDMGJCDCCEGG{GSRJGEHDD;99===<<=@9*(((-...---18NAAAA{EHGKGFCDA@@@CDCG{{KJJHJIGG{JGEN{K{HCHE@==?IB@:;=:::;ACC{{GJAHHX{HCE?;332,%%&&'&&'{>==>MHGS{IDEFFE@CCCMIDA@<667KGC:8:20011335<;=AAE{FHEI{ECFBEFEOSJFNGN{IGKPIJFJJEEEGB??@HCBBBDNJJDHHF{{KEBCCA4,,,+.*+%&&&),1@BACCCENGHEHEEDCDD:///-.((((),.*)*)/..../1E@AAADDC@{CBCBEEB=?(((&()+2=??;;=?<8778C====BABDIFFEEHIEGGTHNGFMDDDDDA>@>9-18+++.--++..,.{{035584==;;;8777:8888==@?>=<=?>557DKE{FJHHBCEGNFHJH,,,,,**+****+***:@CFCHFF?FA44AAJFFHJEDDF2225F92,,,,-..,002::;>><>@=IGIIEZHGHHXDDLE=8889;:;ADDDE<<<=HIFECGDBBCDGBBBBCEDJEHL{F:::78A?84,,,-30++//-('&%$$$):;???CBBGFE=>?RMHC@:5357;?4344CDCC@GD>?BDEDGKHF5555?433A@B@@=CAAB?@CGHG{=988:<<8998HC?<87('(-9;<0001,,,-?AAB@@AB@+++A>?6:9345CCFGFEA@4)(.(&&)...,,*/9:>@AC===>?@<@>>=A{{{DD@EDBDBCB:,++*''.--(&&'35>A?AF@ADDCEDBBCA7?93BCBBECA@=@6?>@A@4595555A@76>DDNJ?@4/..100*)'&(+.0017BCFDGEGEHH><7672222;;;=:88=@;;;>7776<=;721:>@77:<1,)*+9=87842,++((%%)--(&(-:9:=UD;-(--+)*+,@CA??@DKA995AA>E42@<:EFGAABCGGCB>::<;3333B:>88@AABA((@@ACBA/--.770.,'&&337<>D@?==?++**('())+&&&(0*)-(((((+(((+6678@BCFFD?@;:;<>BAAA?=>>GB72229.012>@?AIKEFKFEEDFE43334=<>?>=<=>>>664210///1;{7788=9?:>FDFB0++,***>BEIDRHJH::;BEBEG=>>7331446>3>5-?EC{F{TA@<<4404)'%$%%$%%&-=><>>>E,++-<:))))899:79FJFE>95644=?AB:::442.<@@?A{GI?=?=@;5000/.,./5:7:>E@D=>C;BD{{LC{{HHJ{{{{GWB;:;9;::=98334{{4'&&'=HLELFGFNK{NLIKJEG]{{{JFJFJNIG{BAAB{GHGG???>@A751011>>=0///;;>=DHKHJHOGHFICM87768;@?>>@E<==7332555;?>=>333332134492246.+)()-('*+6)))*<=>=00/352-*%%%%%&%&&%&&'389;<=A??DAD{GH{KKJ{FFDHEHGM>777---&&&&-)))*+)(%&&&'8:?B9@76667A=;85/0CLAIFGMMIGO{JGF?;;3336DB<877.,+,2**+50:1*1&&&&07=984+&&'*,%$%#%%%/26HGDFCEEDN{CADAD>=<;<;/.-.00-+++967555E{E666.---FFI=7547'>CFIHIIEDDAD?B@?8.121:;8711'% NM:i:70 ms:i:4545 AS:i:4538 nn:i:0 tp:A:P cm:i:340 s1:i:2096 s2:i:0 de:f:0.0253 rl:i:0 +22782c40-bbbf-46fb-8f5e-44990acc2321 16 pOpen_V3_amplified 1 60 73S210M1D181M1D55M5S * 0 0 CCATTGCCATTGATGCGTCAACCCTCCAAGGAAGGTACCTCTGATCAGCACCTGTGGTAGTGGGAGTGGCAGTGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGATATCTAGTTCGATGTAACCCACTCGTGATCGTACCGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGCGAAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGAAATGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACAGATCGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCACAAAG $%%$$$''%$#$#$'%%%%(+,-1+&&/6-)'&(++/00/./6===@@BBCCC{EDEBCHF{TNIIKGF==>>EGHB;;=@CBBDF@@@@JDBD@BBBFB889A=??DHK{C>?>GFFEFBABAAAAACBB8889>888:;=AAC4222677BCBE<<<7@<<;6662///{>=<>BCEDEEID=>=?IA===:99899;=FFHIF=FI{G@;0/??8556>966359;9>F{>>>C>CDBDDG<<=810...,,*&&&&)))+*+0214549FDBBCDIDCBDINFHJMCCEDGHD332023136<;:>7=C?B>CBGEGNFJBACFDEC@?AE{H@CGIEDB?L{@@AAIOZPHQ{{{K{{NNNLIIAAAA{PMMOI{IH{PJ{FFDMDHFEHDECFJOKLF{NGHJK{JNJIGDHD@>455E?>BAB?@..+)*%%%%$)+-379868>CICHC@@DJJABA;B??DIIFFHEECGJAA?CDEEGGDFFDJEFHFKIIQF{LDB?@NEADENHCBBADB@10-/,+11=;977>AACHK{FGFIIKDDD;;;>BGF{IKGSGLIGDD?>@A@AEIGED;:9+)(((*&&&20*+93-2**684658:...BFLGC@???;<<;5---092278>??GC8778KDG{LEPHGDFFCHFCDHFREFFGFC-,**62677.,&&'3.*,*,-1....658:80/...;=>DBKFIFHCF;:;>>===*))(*553337=88://00BDDCBBAA+***-<=>KF?>?>52-:???8667('''+*)&&&('((>2223=BGJCBCDEEHKGLHIKA?@::::222+++++44GB=>@A@ACEA@@DEHIHC?611123'&&&&&'-.---&'))70/.243244DDEHHIHJE<:::8DFMEB==;<=>G>=>BB@@;;98<==FAB{AIB@AAA5>><'&&&)**'('*659@{{98CDEEDJGEHGGFF=0//2.(''$%$$$+:6603)+-1,+++BBDFDSJEGGCDCCCFDDEGFHEFFHEFEJDFPGDFDDIKBAABFEEHNH{KGFDJD{{GEEDHIGGDDGJFHNGBEEFO{HJFEBCDBBBCCCDYK{CAA>=;:9:;;11&&&&)('&&'(**,+*((*+2.*+@FHEBA<;=///-'$$()(&%$$#$%&,&$$%$%%%0:@?DFFHDJFBDDBADAD>;;JJEGDGDIBBBACA8777GB3555411345---,,-2;9??>>>?ABBDC@A?C?FDO{{{IEDAA@@BEA@?@BBDDDDGCFEFEDB9+((35AABBBB@@C@CCC{763002(68////6679222,++.:888=FFGHFHDICCABD?;:;4../446=?>86457+?@?EBB@FABA;:D6;:8:654+%&)--**))**'%%%)6:<;;>=BBLFGNFHIIFDEBCHFFCBBEDB?<==>BBAABB@A:89:DA5G{{22=>>E8888@108?BEFB?=6226861129;<;;;?>222889:>><>?>BCCAAAHGA?>@<===ABBABADA=<==:=89;;<<<=@@?>?>>>?@=997,,,19::4)))+,,)*+)5**)--'&&',-.)))%%'&(&%((870247732,,+&&**9<>>?CBBC>><<><9755.+&%&7;?@>=<=>?A:;@?BA88530++3))<3222@?==<==={{@==:=<?@?<;:::;==A;::::>?@><=<<5;47,,-*++9<><<<AAFEABBBBA<8::5443./(()&&''.86***&(&%%**1139??>872((((/81--.1333;:4-,-.33573.(%%&,39==<=:96-+'%%%'&&*,-..+%%%&((1.-*&&&'+*.(///.22,**())),+'''&(-,+)($%+)%$#$$&-/5751/%%%&++)-2=<<=;921/0,+(&%%%%)().-.,%%%%&%$$%'&'%$$%%$$(('(*+))-1)('&%&$%%()+++3**+)''&&&$$$%&$$#$%+),*)&% NM:i:61 ms:i:2584 AS:i:2572 nn:i:0 tp:A:P cm:i:201 s1:i:1204 s2:i:0 de:f:0.0327 SA:Z:pOpen_V3_amplified,593,-,12S845M30D1559S,60,128; rl:i:0 +903ca7fa-2851-4f9f-9ab1-3930b86c58a1 2064 pOpen_V3_amplified 593 60 12H5M1D21M1I7M1D15M1D2M1I34M1I4M1I57M2I13M2I45M1I6M1I17M1D2M1D4M1D40M8D56M1I43M3I9M1D48M5D12M2I21M1I8M2I12M1D3M1D2M1D27M3D9M1D16M3D2M2D17M1I27M7D13M2I16M7D3M2D7M1D46M1D1M2D31M1D4M1I25M1D24M1I67M1559H * 0 0 TATACGATATGTATACCCGAAGCATGCTGAAAAAAGGTGTACCGTGATGAGACGTATTACAGTGACAAATCACAGCGACAGCTATCAAATTCGCTCAAGGCATATATGATGTCAATATCTCCGGTCTGGTAAGCACAACCATGCAGAATCCAAAGTGCGTCGTCCGCGCGCGCCGAACGCTGGAAAGCGGAAAATCAGGAAGGGACGGCCGCAGGTCGCCCCGGTTTATTCAAGTGACGCTCTTTGCTGACGAGAACAGGGGCTGGTGAAATGCAGTTTAAGCTATAAAAGAGAGAGCCGTTATCGTCTGTTTGCCTATGTACAGAGTGATATTATTGCACACGCCCGGCGGACCAATGGCCATCCCCCTGGCCAGTGCACGTGCTCTGCTGTCGATAAAGTCTCCCGTGAACTTTACCCGGTGGTGCATATCGGGGATGAAGCGCATGATCCCCACACCGATATGGCCAGTGTGCCTGGTTTCCGCCTCATCGGGGAAGAGTGCGATCTCAGCCACCGCGAAAATGACATCAACGTGATTACCTGATGTTCTGGGGATATGTTAGGCTCCCTTATAACACAGGCGATGTTGAAGACCACGCTGAATCGTCGGAGCCTGGCTGAACAATAACTAGGCTCGGCCTCAAACGGGTCTTGAGGGGTTTTTTGCATGGTCATAGCTGTTTCCTGAATTGGCAGGTGATGACACACATTAAGATGTTTGTGAGGGAGTCTCCAGAAGAATGCCATTTATTCGATAGGCTCTGCCCCCCCTACTGAGCATCACAAAATTCGACGCTCAAGCCAGAGGTGGCGAAACCTGACAGGACTATAAAGATACCAGG &%$$$&&&'')+**3+++)(%%$&%&'()1-))+*('(($$%%$$%'&'%$$%&%%%%,.-.)()%%%%&(+,0/129;=<<=2-)++&%%%/157>77447{86622375-+,,,-/./1()(('&'+***+,))*%'$$$&&&&&%%&'(%%%$%&%'&&&&&'')+*(++,/30027/8,39*(*/2595556//.987,**+,(''+++''')+,**)+))))'%%&)----.--,-8--+)))&'&&)(**+,..,,.)))-.022469931.)'$$&*1488114(((*&&&&01)(('&&&())'%$&&'((**)-2121002100.%&%'(),,-754++)%&&%$&1237666:;=A::::?82//0+)'&&'%$&&$')()++*+*&)''(*,457764578<;;:+**(+,,...3:9878563(),,*+&&&%$$$%#$**,{.-++,,21450--()((*'&%%&)'))()'%$%%&),++,,*+'%$%%$$(///02=:=:99310*))('%&''''&'&&&&*)))*''%$&%&'((')+&&(''%%%&00018-(((''%&'&)*+('').+)('(8998;;2('%&&&&'','&)1-(*&&*))'&&$$%'''%&$$#$)+085911.*(('*)**++,.:<9811.-'(*1683212345...-,./+.+*+'&)')))(&&&&778<{{?<5/-&$$#$%)+%$()+,-(&'''+,)))(**,22.///(.*+'&&&*-.1((&%%%+..-,*&&'%%%'+-69:=<==93,&%%(.37533.-,-4:;3331.--18/((((278>??9311**%%&(&***68. NM:i:128 ms:i:1065 AS:i:1022 nn:i:0 tp:A:P cm:i:53 s1:i:396 s2:i:0 de:f:0.1056 SA:Z:pOpen_V3_amplified,1,+,92S1460M6D864S,60,61; rl:i:0 +5a0486fa-cae7-48bf-99cb-41da62c92d4b 16 pOpen_V3_amplified 1 60 9S126M1D36M1D41M1D46M1I59M2D135M102S * 0 0 GGCGGCAGTGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCTGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAGGTGCCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACGGTGCACCCAACTCATCTTCAGCATCTTTTACTTTCACCAGCGTTTCCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGGACGTTTTATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCACAGAAAAGCACTGCCACTCCCACTACCACAGGTGCTGATCAGAGGTACTTTCCTGGAGGGTTTAACCTTAGCAATACGTAACGGAACCAAGTACACAACATA $''.//2338;:;<=?@C?:88////282213442...:9<=={J{CEK{QGHFEDFEJDBDBBCBDOEDH>=>>{HGGI{A*)))10/117756+79:CCFJEDFG{IGAA@>==?ANEFFIF<7365./8666>=;8779>B{BBBA{ELCA@<=<;7/..***,''&&**1:;==AGFN{JDDD<<==DEGFOBA@BIGEA@C>6(')'(''%%%%%%-1126CDFIGAA@@ACAB8***+>982.+*+,-02)'*56<<;::;;==:999317@HH{GMGD@?77=@@=<<=AOEDBBCEJK{{LJGDCACEFGDCBCHHKT{TI{HUKHHCCDFLJHHMEBDKHGFDHJ{;:63((1/+('(*>DBB99?442243:126:<8''&%%%$&')*)(((0'''(;<;9:<=>?>BA@84'&(12)(('*(%%%()&((+,,.*/.245,+++122/+**&/00555/...8{{;>B@A?<;;4222<=0..;0//1103<;@;9?@=:9:CC:6:71003((''0666999<<<(((2)(()'&&(01195230.(&#$&&('%*.'&&%&$%%'(),,,-/788BDDFFEH;::;;8///166:956A9GDFCCEE{{NJKMDCCAABCBDDG????@@@@KDLKKEBC>DDJGHEECACABAABBBAA><::;;?B6?:84+*'&&''(468CJDDBDI@AE<;:;B?>B@EBA>;442/..'''))))%$$$&)4589;=;;<;=<567>65+*'')**233-02334??>?NMIGCD{{KZG???BMGDEHFEA>6557@@@@DFD?=>:955768004677DHFGKGFDGECDF=87521114))((+++,,7;;;7644/.2...@A:6401>@?>9:::IH66:;?40001444?=>1666788;<<;444:9879;;784555644//((((?BBA@?900/252)-100****.0-*,-)(''(&$#$#$()4@BAE{FCGKLF{KE328>I{EB;;;?>9961/.)(('**,-+*..301124335;<<98900;=?9777777<;;<{IFDE&-**2..-04388,4=;{{8.../55500/&**+/221+++,542./*.,,+((&)(%%%(*'(()21(&'48@AB>?>=<:9;<;('''0((()*)'&$%%%&''8<:621:342244>*'%%&&&&+*&&%&%%$%&$$'&&''(,02'&$$$$###$(**-(&%$$&%((())+(('',,'&%'&'(')(*2//1984'&&(4446534+++-./-,,,,,.-.,/0/,((*%$%%%),.-)'''+%%)))()*+220*,-+'&&').,-)..01('').;=653366;849542&$$''&'&&&(%&'(.('''-,,,+*)(&&&%&+,2213301/((%%'''(//-+,)()* NM:i:69 ms:i:568 AS:i:560 nn:i:0 tp:A:P cm:i:32 s1:i:196 s2:i:0 de:f:0.1200 SA:Z:pOpen_V3_amplified,1953,-,542S528M2D92S,60,21; rl:i:0 +e7d58ab3-5d74-4f0c-be96-1b82985c5d65 0 pOpen_V3_amplified 1 60 91S7M2D13M1D19M1D20M1D3M1D2M1D4M2D69M2D12M3D69M1D67M1D31M1D3M2D5M1I14M2D11M1D9M1D17M2D2M2D24M1D19M62S * 0 0 TTGCCTGTACTTGGTTCAGTTACGTATTGCTAAGGTTAAACCCTCCAGGAAAGTACCTCTGATCAGCACCTGTGGTAGTGGGAGTGGCAGTGCTTTTCTGACTGGTGAGTATAAACCAAGTCATTCTGAGATAGTGTATGCGGCAGCCCATTGTCTGCCACGTGGATCCGGGATAATACCTCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACATTCTGGGGCAAAAACTAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATGAGGCAACACGGAAATGTTGAATACTCATACTCTCCTTTCAGATATTATTGAAGCATATCAGGGTTATCTCTAGTGGCGGATACATATTTGAATATAGAAAAATAAACAAATAGGGGTTCGCGCACCTGCACCAGTCACAGAAAAGCACTGCCACTCCCACTACCACAGGTGCTGATCAGAGGTACTTTCCTGGAGGCAT %&'&('/0(((*'*++4+*))''$$%&+()++-//01/000014223463341/(((*))%%11146566786752323555644667844412/00)&&&*+0121/.-.+&&'(/1334334553((()/,,,+0/.0.**+%%$$#$#$%(&&%%*%'&&&&()****/4321/+++,+()),30.)))*++++4466557555666444311113445400003///00(((10**((+0.,++-.01/1132211421125644300011000357633,,,,433365333321236656210125111334001043001258435544455568:8664443334455200-''%&)-.34643211//00433''&&.17753322434220.-,-.*,((()5-11../332231121+*-)'**++01212..-4&%&%&'%%%&&&(*''()-/-*'('(*++210.//1.,,+(&%%&%%$%)-,./434566+***32550.1132344433357677:87.-..586543443.+****+++)))((('0852/)'$## NM:i:45 ms:i:614 AS:i:606 nn:i:0 tp:A:P cm:i:37 s1:i:241 s2:i:0 de:f:0.0818 rl:i:0 +244cfbbf-305e-42c6-bb76-cad3399504e4 16 pOpen_V3_amplified 1 60 72S59M1I28M1D115M2I3M1D92M2D10M1D136M110S * 0 0 CTACGTATTGCTAAGGTTAAACCCTCCAGGAAAGTACCTCTGATCAGCACCTATGCCTATGCCGATGCCCATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCCAAGTCGCTCTTGCCCGGCGTCAATACGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTGACCGCTGTTGAGATCCAGTTCGATGTAACCCGAACTGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAGGCGACACGGAACGTTGAATACTCATACTCTTCCTTTTTCAATATTATTGAAGCATTTATCAGGGTTATTGTCTCATGAGCGGATACATATTTGAATGTATTTAGAAAAATAAACAAATAGGGGTTCCGCGCACCTGCACCAGTCACAGAAAAGCAAATATGGGCATCGGCATAGCTAACAGGCGGTGCTGATCAGAGGTACTTTCCTGGAGGGTTTAACCTTAGCAATACGTAACTGAACGAAAGGCTACAAAGC %&'''(+))++/AA@@BB@225204698888@?>;31**06>=96676644550+,,-06874477>@A?@ABBCDIGFDCG9999>=<=?EA:....../+**()-1556?@@CCEDB7-,)))''',----22358BE;228:8888@<<<0:@CCFEBFCEQ{HGHDA8897:99@A?::555<=>>DDFAAABGD@DH{{JKGIFF@>>,,130.-,*'(**0..24312----:==F{:998<>>EDGHF:::;EFE<;<<,)((0)*.<=@@A><<<---.CCACEGHGH{LPJGGLHK{{L????FHGHDHEUH<;;:>==@B;?AF{DM@{ABJGKFDC{KWLFIJDCGACD@77((+&&'))+++-+,,-32++--,+++-<;::;=C@455779==BFGS{J{GDIFGEL{HJ{LKK{I{JECAA>44))(22274322111011/++,3B@ABDKRAABG{{{POJHHIPIMFDHPGMEFC@C3211215782222@743()1/*+(('&())),1111,+&%'&&&%%"##$###$%(.23789:=4447;;::??BD>@;866?>@AB?;**44/,++**,$'&&%&&&&&-*(&%%%++/+*'& NM:i:12 ms:i:824 AS:i:822 nn:i:0 tp:A:P cm:i:62 s1:i:384 s2:i:0 de:f:0.0223 rl:i:0 diff --git a/lib/bio/sam/sam.go b/lib/bio/sam/sam.go new file mode 100644 index 0000000..6cbf6b0 --- /dev/null +++ b/lib/bio/sam/sam.go @@ -0,0 +1,532 @@ +/* +Package sam implements a SAM file parser and writer. + +SAM is a tab-delimited text format for storing DNA/RNA sequence alignment data. +It is the most widely used alignment format, complementing its binary +equivalent, BAM, which stores the same data in a compressed format. + +DNA sequencing works in the following way: + + - DNA is read in with some raw signal format from the sequencer machine. + - Raw signal is converted to fastq reads using basecalling software. + - Fastq reads are aligned to target template, producing SAM files. + - SAM files are used to answer bioinformatic queries. + +This parser allows parsing and writing of SAM files in Go. Unlike other SAM +parsers in Golang, we aim to be as close to underlying data types as possible, +with a goal of being as simple as possible, and no simpler. + +Paper: https://doi.org/10.1093%2Fbioinformatics%2Fbtp352 +Spec: http://samtools.github.io/hts-specs/SAMv1.pdf +Spec(locally): `dnadesign/lib/bio/sam/SAMv1.pdf` +*/ +package sam + +import ( + "bufio" + "errors" + "fmt" + "io" + "regexp" + "strconv" + "strings" +) + +const DefaultMaxLineSize int = 1024 * 32 * 2 // // 32kB is a magic number often used by the Go stdlib for parsing. We multiply it by two. + +// Each header in a SAM file begins with an @ followed by a two letter record +// code type. Each line is tab delimited, and contains TAG:VALUE pairs. HD, the +// first line, only occurs once, while SQ, RG, and PG can appear multiple +// times. Finally, @CO contains user generated comments. +// +// For more information, check section 1.3 of the reference document. +type Header struct { + HD map[string]string // File-level metadata. Optional. If present, there must be only one @HD line and it must be the first line of the file. + SQ []map[string]string // Reference sequence dictionary. The order of @SQ lines defines the alignment sorting order. + RG []map[string]string // Read group. Unordered multiple @RG lines are allowed. + PG []map[string]string // Program. + CO []string // One-line text comment. Unordered multiple @CO lines are allowed. UTF-8 encoding may be used. +} + +// headerWriteHelper helps write SAM headers in an ordered way. +func headerWriteHelper(sb io.StringWriter, headerString string, headerMap map[string]string, orderedKeys []string) { + _, _ = sb.WriteString(headerString) + // Write orderedKeys first, if they exist + for _, key := range orderedKeys { + if value, exists := headerMap[key]; exists { + _, _ = sb.WriteString(fmt.Sprintf("\t%s:%s", key, value)) + } + } + // Write the remaining key-value pairs + for key, value := range headerMap { + // Skip if the key is one of the specific keys + var skip bool + for _, orderedKey := range orderedKeys { + if key == orderedKey { + skip = true + } + } + if skip { + continue + } + _, _ = sb.WriteString(fmt.Sprintf("\t%s:%s", key, value)) + } + _, _ = sb.WriteString("\n") +} + +// WriteTo writes a SAM header to an io.Writer. +func (header *Header) WriteTo(w io.Writer) (int64, error) { + // Here we write the header into a SAM file. Please check the official + // documentation for the meaning of each tag used as ordered keys. + // Here, we iterate through each, and write it to a file. + var sb strings.Builder + if len(header.HD) > 0 { + headerWriteHelper(&sb, "@HD", header.HD, []string{"VN", "SO", "GO", "SS"}) + } + for _, sq := range header.SQ { + headerWriteHelper(&sb, "@SQ", sq, []string{"SN", "LN", "AH", "AN", "AS", "DS", "M5", "SP", "TP", "UR"}) + } + for _, rg := range header.RG { + headerWriteHelper(&sb, "@RG", rg, []string{"ID", "BC", "CN", "DS", "DT", "FO", "KS", "LB", "PG", "PI", "PL", "PM", "PU", "SM"}) + } + for _, pg := range header.PG { + headerWriteHelper(&sb, "@PG", pg, []string{"ID", "PN", "VN", "CL", "PP", "DS"}) + } + for _, co := range header.CO { + _, _ = sb.WriteString(fmt.Sprintf("@CO %s\n", co)) + } + + newWrittenBytes, err := w.Write([]byte(sb.String())) + return int64(newWrittenBytes), err +} + +// Validate validates that the header has all required information, as +// described in the SAMv1 specification document. Not implemented yet. +func (header *Header) Validate() error { + /* The following rules apply: + 1. @HD.VN: Format version. Accepted format: /^[0-9]+\.[0-9]+$/. + 2. @HD.SO: Valid values: unknown (default), unsorted, queryname and coordinate + 3. @HD.GO: Valid values: none (default), query (alignments are grouped by QNAME), and reference (alignments are grouped by RNAME/POS) + 4. @HD.SS: Regular expression: (coordinate|queryname|unsorted)(:[A-Za-z0-9_-]+)+ + 5. @SQ.SN: Regular expression: [:rname:^*=][:rname:]* + 6. @SQ.SN/AN: The SN tags and all individual AN names in all @SQ lines must be distinct + 7. @SQ.LN: Reference sequence length. Range: [1, 2^31 − 1] + 8. @SQ.AN: Regular expression: name(,name)* where name is [:rname:^*=][:rname:]* (definition of 6) + 9. @SQ.TP: Valid values: linear (default) and circular + 10. @RG.ID: Each @RG line must have a unique ID + 11. @RG.DT: Date the run was produced (ISO8601 date or date/time). + 12. @RG.FO: Format: /\*|[ACMGRSVTWYHKDBN]+/ + 13. @RG.PL: Valid values: CAPILLARY, DNBSEQ (MGI/BGI), ELEMENT, HELICOS, ILLUMINA, IONTORRENT, LS454, ONT (Oxford Nanopore), PACBIO (Pacific Bio-sciences), SOLID, and ULTIMA + 14. @PG.ID: Each @PG line must have a unique ID. + 15. @PG.PP: Previous @PG-ID. Must match another @PG header’s ID tag. @PG records may be chained using PP tag, with the last record in the chain having no PP tag + */ + + // Validate @HD tags + if len(header.HD) > 0 { + // Accessing HD map directly as it's not a function returning two values + hd := header.HD + + // 1. @HD VN + if vn, ok := hd["VN"]; ok { + matched, _ := regexp.MatchString(`^[0-9]+\.[0-9]+$`, vn) + if !matched { + return fmt.Errorf("Invalid format for @HD VN. Accepted format: /^[0-9]+\\.[0-9]+$/.\nGot: %s", vn) + } + } + // 2. @HD SO + if so, ok := hd["SO"]; ok { + validValues := map[string]bool{"unknown": true, "unsorted": true, "queryname": true, "coordinate": true} + if _, valid := validValues[so]; !valid { + return fmt.Errorf("Invalid value for @HD SO. Valid values: unknown (default), unsorted, queryname and coordinate. Got: %s", so) + } + } + // 3. @HD GO + if goTag, ok := hd["GO"]; ok { + validValues := map[string]bool{"none": true, "query": true, "reference": true} + if _, valid := validValues[goTag]; !valid { + return fmt.Errorf("Invalid value for @HD GO. Valid values: none (default), query (alignments are grouped by QNAME), and reference (alignments are grouped by RNAME/POS). Got: %s", goTag) + } + } + // 4. @HD SS + if ss, ok := hd["SS"]; ok { + matched, _ := regexp.MatchString(`(coordinate|queryname|unsorted)(:[A-Za-z0-9_-]+)+`, ss) + if !matched { + return fmt.Errorf("Invalid format for @HD SS. Needs to match: Regular expression: (coordinate|queryname|unsorted)(:[A-Za-z0-9_-]+)+\nGot: %s", ss) + } + } + } + + // Validate @SQ tags + snMap := make(map[string]bool) + for _, sq := range header.SQ { + // 5. @SQ SN + if sn, ok := sq["SN"]; ok { + // [:rname:^*=][:rname:]* isn't actually a valid regexp, so I'm not + // sure why they've used this as the definition. We skip this check + // because it doesn't make much sense. + if snMap[sn] { + return fmt.Errorf("Non-unique @SQ SN: %s", sn) + } + snMap[sn] = true + } + // 7. @SQ LN + if ln, ok := sq["LN"]; ok { + lnInt, err := strconv.Atoi(ln) + if err != nil || lnInt < 1 || lnInt > 2147483647 { + return fmt.Errorf("Invalid value for @SQ LN. Range: [1, 231 − 1], Got: %d", lnInt) + } + } + // 9. @SQ TP + if tp, ok := sq["TP"]; ok { + validValues := map[string]bool{"linear": true, "circular": true} + if _, valid := validValues[tp]; !valid { + return fmt.Errorf("Invalid value for @SQ TP. Valid values: linear (default) and circular, Got: %s", tp) + } + } + } + + // Validate @RG tags + rgIDMap := make(map[string]bool) + rgFoRegexp := regexp.MustCompile(`\*|[ACMGRSVTWYHKDBN]+`) + for _, rg := range header.RG { + // 10. @RG ID + if id, ok := rg["ID"]; ok { + if rgIDMap[id] { + return fmt.Errorf("Non-unique @RG ID. Got: %s", id) + } + rgIDMap[id] = true + } + // 12. @RG FO + if fo, ok := rg["FO"]; ok { + matched := rgFoRegexp.MatchString(fo) + if !matched { + return fmt.Errorf("Invalid format for @RG FO. Required regexp format: /\\*|[ACMGRSVTWYHKDBN]+/\nGot: %s", fo) + } + } + // 13. @RG PL + if pl, ok := rg["PL"]; ok { + validValues := map[string]bool{ + "CAPILLARY": true, "DNBSEQ": true, "ELEMENT": true, "HELICOS": true, "ILLUMINA": true, + "IONTORRENT": true, "LS454": true, "ONT": true, "PACBIO": true, "SOLID": true, "ULTIMA": true, + } + if _, valid := validValues[pl]; !valid { + return fmt.Errorf("Invalid value for @RG PL. Valid values: CAPILLARY, DNBSEQ (MGI/BGI), ELEMENT, HELICOS, ILLUMINA, IONTORRENT, LS454, ONT (Oxford Nanopore), PACBIO (Pacific Bio-sciences), SOLID, and ULTIMA. Got: %s", pl) + } + } + } + + // Validate @PG tags + pgIDMap := make(map[string]bool) + for _, pg := range header.PG { + // 14. @PG ID + if id, ok := pg["ID"]; ok { + if pgIDMap[id] { + return fmt.Errorf("Non-unique @PG ID. Got: %s", id) + } + pgIDMap[id] = true + } + } + return nil +} + +// Optional fields in SAM alignments are structured as TAG:TYPE:DATA, where +// the type identifiers the typing of the data. +// +// For more information, check section 1.5 of http://samtools.github.io/hts-specs/SAMv1.pdf. +type Optional struct { + Tag string // Tag is typically a two letter tag corresponding to what the optional represents. + Type rune // The type may be one of A (character), B (general array), f (real number), H (hexadecimal array), i (integer), or Z (string). + Data string // Optional data +} + +// Each alignment is a single line of a SAM file, representing a linear +// alignment of a segment, consisting of 11 or more tab delimited fields. The +// 11 fields (QNAME -> QUAL) are always available (if the data isn't there, a +// placeholder '0' or '*' is used instead), with additional optional fields +// following. +// +// For more information, check section 1.4 of the reference document. +type Alignment struct { + QNAME string // Query template NAME + FLAG uint16 // bitwise FLAG + RNAME string // References sequence NAME + POS int32 // 1- based leftmost mapping POSition + MAPQ byte // MAPping Quality + CIGAR string // CIGAR string + RNEXT string // Ref. name of the mate/next read + PNEXT int32 // Position of the mate/next read + TLEN int32 // observed Template LENgth + SEQ string // segment SEQuence + QUAL string // ASCII of Phred-scaled base QUALity+33 + Optionals []Optional // Map of TAG to {TYPE:DATA} +} + +// Alignment_WriteTo implements the io.WriterTo interface. It writes an +// alignment line. +func (alignment *Alignment) WriteTo(w io.Writer) (int64, error) { + var sb strings.Builder + _, _ = sb.WriteString(fmt.Sprintf("%s\t%d\t%s\t%d\t%d\t%s\t%s\t%d\t%d\t%s\t%s", alignment.QNAME, alignment.FLAG, alignment.RNAME, alignment.POS, alignment.MAPQ, alignment.CIGAR, alignment.RNEXT, alignment.PNEXT, alignment.TLEN, alignment.SEQ, alignment.QUAL)) + for _, optional := range alignment.Optionals { + _, _ = sb.WriteString(fmt.Sprintf("\t%s:%c:%s", optional.Tag, optional.Type, optional.Data)) + } + _, _ = sb.WriteString("\n") + newWrittenBytes, err := w.Write([]byte(sb.String())) + return int64(newWrittenBytes), err +} + +// Alignment_Validate validates an alignment as valid, given the REGEXP/range +// defined in the SAM document. Not implemented yet. +func (alignment *Alignment) Validate() error { + /* The following rules apply: + + 1 QNAME String [!-?A-~]{1,254} Query template NAME + 2 FLAG Int [0, 216 − 1] bitwise FLAG + 3 RNAME String \*|[:rname:∧*=][:rname:]* Reference sequence NAME11 + 4 POS Int [0, 231 − 1] 1-based leftmost mapping POSition + 5 MAPQ Int [0, 28 − 1] MAPping Quality + 6 CIGAR String \*|([0-9]+[MIDNSHPX=])+ CIGAR string + 7 RNEXT String \*|=|[:rname:∧*=][:rname:]* Reference name of the mate/next read + 8 PNEXT Int [0, 231 − 1] Position of the mate/next read + 9 TLEN Int [−231 + 1, 231 − 1] observed Template LENgth + 10 SEQ String \*|[A-Za-z=.]+ segment SEQuence + 11 QUAL String [!-~]+ ASCII of Phred-scaled base QUALity+33 + */ + // 1. Validate QNAME + qnameRegex := `^[!-?A-~]{1,254}$` + if matched, _ := regexp.MatchString(qnameRegex, alignment.QNAME); !matched { + return errors.New("Invalid QNAME: must match " + qnameRegex) + } + + // 2. FLAG is validated through uint16 typing. + + // 3. Validate RNAME + rnameRegex := `^\*|[:rname:^\*=][:rname:]*$` + if matched, _ := regexp.MatchString(rnameRegex, alignment.RNAME); !matched { + return errors.New("Invalid RNAME: must match " + rnameRegex) + } + + // 4. Validate POS + if alignment.POS < 0 || alignment.POS > 2147483647 { // 2^31 - 1 + return errors.New("Invalid POS: must be in range [0, 2147483647]") + } + + // 5. MAPQ is validated through byte typing. + + // 6. Validate CIGAR + cigarRegex := `^\*|([0-9]+[MIDNSHPX=])+$` + if matched, _ := regexp.MatchString(cigarRegex, alignment.CIGAR); !matched { + return errors.New("Invalid CIGAR: must match " + cigarRegex) + } + + // 7. Validate RNEXT + rnextRegex := `^\*|=\|[:rname:^\*=][:rname:]*$` + if matched, _ := regexp.MatchString(rnextRegex, alignment.RNEXT); !matched { + return errors.New("Invalid RNEXT: must match " + rnextRegex) + } + + // 8. Validate PNEXT + if alignment.PNEXT < 0 || alignment.PNEXT > 2147483647 { // 2^31 - 1 + return errors.New("Invalid PNEXT: must be in range [0, 2147483647]") + } + + // 9. TLEN is validated through int32 typing. + + // 10. Validate SEQ + seqRegex := `^\*|[A-Za-z=.]+$` + if matched, _ := regexp.MatchString(seqRegex, alignment.SEQ); !matched { + return errors.New("Invalid SEQ: must match " + seqRegex) + } + + // 11. Validate QUAL + qualRegex := `^[!-~]+$` + if matched, _ := regexp.MatchString(qualRegex, alignment.QUAL); !matched { + return errors.New("Invalid QUAL: must match " + qualRegex) + } + + return nil +} + +// Parser is a sam file parser that provide sample control over reading sam +// alignments. It should be initialized with NewParser. +type Parser struct { + reader bufio.Reader + line uint + FileHeader Header + firstLine string + readFirstLine bool +} + +// Header returns the parsed sam header. +func (p *Parser) Header() (*Header, error) { + return &p.FileHeader, nil +} + +func checkIfValidSamLine(lineBytes []byte) bool { + return len(strings.Split(strings.TrimSpace(string(lineBytes)), "\t")) >= 11 +} + +// NewParser creates a parser from an io.Reader for sam data. For larger +// alignments, you will want to increase the maxLineSize. +func NewParser(r io.Reader, maxLineSize int) (*Parser, Header, error) { + parser := &Parser{ + reader: *bufio.NewReaderSize(r, maxLineSize), + } + var header Header + var hdParsed bool + // Initialize header maps + header.HD = make(map[string]string) + header.SQ = []map[string]string{} + header.RG = []map[string]string{} + header.PG = []map[string]string{} + header.CO = []string{} + + // We need to first read the header before returning the parser to the + // user for analyzing alignments. + for { + lineBytes, err := parser.reader.ReadSlice('\n') + line := strings.TrimSpace(string(lineBytes)) + if err != nil { + // Check if we have an EOF, if we have a validSamLine, and we are + // not parsing a header. We do not check EOF + header line without + // any validSamLine because that is useless. + // + // This, on the other hand, will catch if we have a single line sam + // file with an EOF at the end, like we often have in tests. + if err == io.EOF && checkIfValidSamLine(lineBytes) && line[0] != '@' { + parser.firstLine = line + break + } + return parser, Header{}, err + } + parser.line++ + if len(line) == 0 { + return parser, Header{}, fmt.Errorf("Line %d is empty. Empty lines are not allowed in headers.", parser.line) + } + // If this line is the start of the alignments, set the firstLine + // into memory, and then break this loop. + if line[0] != '@' { + parser.firstLine = line + break + } + values := strings.Split(line, "\t") + if len(values) < 1 { + return parser, Header{}, fmt.Errorf("Line %d should contain at least 1 value. Got: %d. Line text: %s", parser.line, len(values), line) + } + + // If we haven't parsed HD, it is always the first line: lets parse it. + if !hdParsed { + if values[0] != "@HD" { + return parser, Header{}, fmt.Errorf("First line (%d) should always contain @HD first. Line text: %s", parser.line, line) + } + // Now parse the rest of the HD header + for _, value := range values[1:] { + valueSplit := strings.Split(value, ":") + header.HD[valueSplit[0]] = valueSplit[1] + } + hdParsed = true + continue + } + + // CO lines are unique in that they are just strings. So we try to parse them + // first. We include the entire comment line for these. + if values[0] == "@CO" { + header.CO = append(header.CO, line) + continue + } + + // HD/CO lines have been successfully parsed, now we work on SQ, RG, and PG. + // Luckily, each one has an identical form ( TAG:DATA ), so we can parse that + // first and then just apply it to the respect top level tag. + genericMap := make(map[string]string) + for _, value := range values[1:] { + valueSplit := strings.Split(value, ":") + genericMap[valueSplit[0]] = valueSplit[1] + } + switch values[0] { + case "@SQ": + header.SQ = append(header.SQ, genericMap) + case "@RG": + header.RG = append(header.RG, genericMap) + case "@PG": + header.PG = append(header.PG, genericMap) + default: + return parser, Header{}, fmt.Errorf("Line %d should contain @SQ, @RG, @PG or @CO as top level tags, but they weren't found. Line text: %s", parser.line, line) + } + } + parser.FileHeader = header + return parser, header, nil +} + +// Next parsers the next read from a parser. Returns an `io.EOF` upon EOF. +func (p *Parser) Next() (*Alignment, error) { + var alignment Alignment + var finalLine bool + var line string + + // We need to handle the firstLine after the header, as well as EOF checks. + if !p.readFirstLine { + line = p.firstLine + p.readFirstLine = true + } else { + lineBytes, err := p.reader.ReadSlice('\n') + if err != nil { + if err == io.EOF { + // This checks if the EOF is at the end of a line. If there is a + // final SAM line, skip the EOF till the next Next() + if len(strings.Split(strings.TrimSpace(string(lineBytes)), "\t")) >= 11 { + finalLine = true + } + } + } + if !finalLine { + if err != nil { + return nil, err + } + } + line = strings.TrimSpace(string(lineBytes)) + } + p.line++ + values := strings.Split(line, "\t") + if len(values) < 11 { + return nil, fmt.Errorf("Line %d had error: must have at least 11 tab-delimited values. Had %d", p.line, len(values)) + } + alignment.QNAME = values[0] + flag64, err := strconv.ParseUint(values[1], 10, 16) // convert string to uint16 + if err != nil { + return nil, fmt.Errorf("Line %d had error: %s", p.line, err) + } + alignment.FLAG = uint16(flag64) + alignment.RNAME = values[2] + pos64, err := strconv.ParseInt(values[3], 10, 32) // convert string to int32 + if err != nil { + return nil, fmt.Errorf("Line %d had error: %s", p.line, err) + } + alignment.POS = int32(pos64) + mapq64, err := strconv.ParseUint(values[4], 10, 8) // convert string to uint8 (otherwise known as byte) + if err != nil { + return nil, fmt.Errorf("Line %d had error: %s", p.line, err) + } + alignment.MAPQ = uint8(mapq64) + alignment.CIGAR = values[5] + alignment.RNEXT = values[6] + pnext64, err := strconv.ParseInt(values[7], 10, 32) + if err != nil { + return nil, fmt.Errorf("Line %d had error: %s", p.line, err) + } + alignment.PNEXT = int32(pnext64) + tlen64, err := strconv.ParseInt(values[8], 10, 32) + if err != nil { + return nil, fmt.Errorf("Line %d had error: %s", p.line, err) + } + alignment.TLEN = int32(tlen64) + alignment.SEQ = values[9] + alignment.QUAL = values[10] + + var optionals []Optional + for _, value := range values[11:] { + valueSplit := strings.Split(value, ":") + optionals = append(optionals, Optional{Tag: valueSplit[0], Type: rune(valueSplit[1][0]), Data: valueSplit[2]}) + } + alignment.Optionals = optionals + return &alignment, nil +} diff --git a/lib/bio/sam/sam_test.go b/lib/bio/sam/sam_test.go new file mode 100644 index 0000000..66c22b9 --- /dev/null +++ b/lib/bio/sam/sam_test.go @@ -0,0 +1,268 @@ +package sam + +import ( + "bytes" + "errors" + "fmt" + "io" + "os" + "strings" + "testing" +) + +func TestParse(t *testing.T) { + file, err := os.Open("data/aln.sam") + if err != nil { + t.Errorf("Failed to open aln.sam: %s", err) + } + parser, header, err := NewParser(file, DefaultMaxLineSize) + if err != nil { + t.Errorf("Got error on new parser: %s", err) + } + if len(header.HD) != 3 { + t.Errorf("HD should have 3 TAG:DATA pairs") + } + for { + _, err := parser.Next() + if err != nil { + if !errors.Is(err, io.EOF) { + t.Errorf("Got unknown error: %s", err) + } + break + } + } +} + +func ExampleNewParser() { + file := strings.NewReader(`@HD VN:1.6 SO:unsorted GO:query +@SQ SN:pOpen_V3_amplified LN:2482 +@PG ID:minimap2 PN:minimap2 VN:2.24-r1155-dirty CL:minimap2 -acLx map-ont - APX814_pass_barcode17_e229f2c8_109f9b91_0.fastq.gz +ae9a66f5-bf71-4572-8106-f6f8dbd3b799 16 pOpen_V3_amplified 1 60 8S54M1D3M1D108M1D1M1D62M226S * 0 0 AGCATGCCGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGTGCTGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCGACGTTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTTACTGTTGATGTTCATGTAGGTGCTGATCAGAGGTACTTTCCTGGAGGGTTTAACCTTAGCAATACGTAACGGAACGAAGTACAGGGCAT %,AFDCBD{LNKKGIL{{JLKI{{IFG>==86668789=<><;056<;>=87:840/++1,++)-,-0{{&&%%&&),-13;<{HGVKCGFI{J{L{G{INJHEA@C540/3568;>EOI{{{I0000HHRJ{{{{{{{RH{N@@?AKLQEEC?==<433345588==FTA??A@G?@@@EC?==;10//2333?AB?<<<--(++*''&&-(((+@DBJQHJHGGPJH{.---@B?<''-++'--&%%&,,,FC:999IEGJ{HJHIGIFEGIFMDEF;8878{KJGFIJHIHDCAA=<<<<;DDB>:::EK{{@{E<==HM{{{KF{{{MDEQM{ECA?=>9--,.3))'')*++.-,**()%% NM:i:8 ms:i:408 AS:i:408 nn:i:0 tp:A:P cm:i:29 s1:i:195 s2:i:0 de:f:0.0345 SA:Z:pOpen_V3_amplified,2348,-,236S134M1D92S,60,1; rl:i:0`) + parser, _, _ := NewParser(file, DefaultMaxLineSize) + samLine, _ := parser.Next() + + fmt.Println(samLine.CIGAR) + // Output: 8S54M1D3M1D108M1D1M1D62M226S +} + +func TestWriteTo(t *testing.T) { + fileString := `@HD VN:1.6 SO:unsorted GO:query +@SQ SN:pOpen_V3_amplified LN:2482 +@PG ID:minimap2 PN:minimap2 VN:2.24-r1155-dirty CL:minimap2 -acLx map-ont - APX814_pass_barcode17_e229f2c8_109f9b91_0.fastq.gz +ae9a66f5-bf71-4572-8106-f6f8dbd3b799 16 pOpen_V3_amplified 1 60 8S54M1D3M1D108M1D1M1D62M226S * 0 0 AGCATGCCGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGTGCTGAGTTGCTCTTGCCCGGCGTCAATACGGGATAATACCGCGCCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCGACGTTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTACAGGCATCGTGGTTACTGTTGATGTTCATGTAGGTGCTGATCAGAGGTACTTTCCTGGAGGGTTTAACCTTAGCAATACGTAACGGAACGAAGTACAGGGCAT %,AFDCBD{LNKKGIL{{JLKI{{IFG>==86668789=<><;056<;>=87:840/++1,++)-,-0{{&&%%&&),-13;<{HGVKCGFI{J{L{G{INJHEA@C540/3568;>EOI{{{I0000HHRJ{{{{{{{RH{N@@?AKLQEEC?==<433345588==FTA??A@G?@@@EC?==;10//2333?AB?<<<--(++*''&&-(((+@DBJQHJHGGPJH{.---@B?<''-++'--&%%&,,,FC:999IEGJ{HJHIGIFEGIFMDEF;8878{KJGFIJHIHDCAA=<<<<;DDB>:::EK{{@{E<==HM{{{KF{{{MDEQM{ECA?=>9--,.3))'')*++.-,**()%% NM:i:8 ms:i:408 AS:i:408 nn:i:0 tp:A:P cm:i:29 s1:i:195 s2:i:0 de:f:0.0345 SA:Z:pOpen_V3_amplified,2348,-,236S134M1D92S,60,1; rl:i:0 +` + file := strings.NewReader(fileString) + parser, _, _ := NewParser(file, DefaultMaxLineSize) + read, _ := parser.Next() + header, _ := parser.Header() + var buffer bytes.Buffer + _, _ = header.WriteTo(&buffer) + _, _ = read.WriteTo(&buffer) + + if fileString != buffer.String() { + t.Errorf("Got diff! First:\n%s\nSecond:\n%s\n====", fileString, buffer.String()) + } +} + +// TestValidate ensures that every aspect of validation is covered +func TestValidate(t *testing.T) { + // Construct an alignment that is correct in all aspects + validAlignment := Alignment{ + QNAME: "ValidName", + FLAG: 255, + RNAME: "*", + POS: 123456, + MAPQ: 50, + CIGAR: "10M1I4M", + RNEXT: "*", + PNEXT: 234567, + TLEN: 1000, + SEQ: "ACTGACTGAC", + QUAL: "~~~~~~~~~~", + } + + // Should pass (no error) + if err := validAlignment.Validate(); err != nil { + t.Errorf("Valid alignment did not pass validation: %s", err) + } + + // Test cases for each field + testCases := []struct { + modify func(a *Alignment) + expected string + }{ + { // Invalid QNAME + func(a *Alignment) { a.QNAME = "Invalid QNAME due to length and spaces" }, + "Invalid QNAME", + }, + { // Invalid RNAME + func(a *Alignment) { a.RNAME = "Invalid RNAME" }, + "Invalid RNAME", + }, + { // Invalid POS, out of range + func(a *Alignment) { a.POS = -1 }, + "Invalid POS", + }, + { // Invalid CIGAR + func(a *Alignment) { a.CIGAR = "X" }, + "Invalid CIGAR", + }, + { // Invalid RNEXT + func(a *Alignment) { a.RNEXT = "Invalid RNEXT" }, + "Invalid RNEXT", + }, + { // Invalid PNEXT, out of range + func(a *Alignment) { a.PNEXT = -1 }, + "Invalid PNEXT", + }, + { // Invalid SEQ + func(a *Alignment) { a.SEQ = "ACTG123" }, + "Invalid SEQ", + }, + { // Invalid QUAL + func(a *Alignment) { a.QUAL = "qual string with lower case or invalid characters" }, + "Invalid QUAL", + }, + } + + for _, tc := range testCases { + // Copy the valid alignment and modify it for the test + invalidAlignment := validAlignment + tc.modify(&invalidAlignment) + + // Now validate it + err := invalidAlignment.Validate() + if err == nil || !contains(err.Error(), tc.expected) { + t.Errorf("Expected error for %s but got none or wrong error: %s", tc.expected, err) + } + } +} + +// contains is a helper function to check if errStr contains the expected substring +func contains(errStr, expected string) bool { + return errStr != "" && strings.Contains(errStr, expected) +} + +// TestValidateAllInOne - testing all validation rules in one function +func TestValidateAllInOne(t *testing.T) { + // Define a series of headers to test different validation scenarios + tests := []struct { + name string + header *Header + expectedError error + }{ + // Valid Complete Header + { + name: "Valid Complete Header", + header: &Header{ + HD: map[string]string{"VN": "1.0", "SO": "unsorted", "GO": "none", "SS": "coordinate:example"}, + SQ: []map[string]string{{"SN": "chr1", "LN": "1000", "TP": "linear"}}, + RG: []map[string]string{{"ID": "rg1", "PL": "ILLUMINA", "FO": "*", "DT": "2023-01-01"}}, + PG: []map[string]string{{"ID": "pg1"}}, + CO: []string{"This is a comment."}, + }, + expectedError: nil, + }, + // Invalid @HD VN format + { + name: "Invalid @HD VN format", + header: &Header{ + HD: map[string]string{"VN": "abc"}, // Invalid VN format + }, + expectedError: fmt.Errorf("Invalid format for @HD VN. Accepted format: /^[0-9]+\\.[0-9]+$/.\nGot: %s", "abc"), + }, + // Invalid @HD SO value + { + name: "Invalid @HD SO value", + header: &Header{ + HD: map[string]string{"VN": "1.0", "SO": "invalid_so"}, // Invalid SO value + }, + expectedError: fmt.Errorf("Invalid value for @HD SO. Valid values: unknown (default), unsorted, queryname and coordinate. Got: %s", "invalid_so"), + }, + // Invalid @HD GO value + { + name: "Invalid @HD GO value", + header: &Header{ + HD: map[string]string{"VN": "1.0", "GO": "invalid_go"}, // Invalid GO value + }, + expectedError: fmt.Errorf("Invalid value for @HD GO. Valid values: none (default), query (alignments are grouped by QNAME), and reference (alignments are grouped by RNAME/POS). Got: %s", "invalid_go"), + }, + // Invalid @HD SS format + { + name: "Invalid @HD SS format", + header: &Header{ + HD: map[string]string{"VN": "1.0", "SS": "invalid_ss"}, // Invalid SS format + }, + expectedError: fmt.Errorf("Invalid format for @HD SS. Needs to match: Regular expression: (coordinate|queryname|unsorted)(:[A-Za-z0-9_-]+)+\nGot: %s", "invalid_ss"), + }, + // Invalid @SQ LN range + { + name: "Invalid @SQ LN range", + header: &Header{ + SQ: []map[string]string{{"SN": "chr1", "LN": "2147483648"}}, // Invalid LN range + }, + expectedError: fmt.Errorf("Invalid value for @SQ LN. Range: [1, 231 − 1], Got: %d", 2147483648), + }, + // Invalid @SQ TP value + { + name: "Invalid @SQ TP value", + header: &Header{ + SQ: []map[string]string{{"SN": "chr1", "LN": "1000", "TP": "invalid_tp"}}, // Invalid TP value + }, + expectedError: fmt.Errorf("Invalid value for @SQ TP. Valid values: linear (default) and circular, Got: %s", "invalid_tp"), + }, + // Non-unique @RG ID + { + name: "Non-unique @RG ID", + header: &Header{ + RG: []map[string]string{{"ID": "rg1", "PL": "ILLUMINA"}, {"ID": "rg1", "PL": "SOLID"}}, + }, + expectedError: fmt.Errorf("Non-unique @RG ID. Got: %s", "rg1"), + }, + // Invalid @RG FO format + { + name: "Invalid @RG FO format", + header: &Header{ + RG: []map[string]string{{"ID": "rg1", "FO": "invalid_fo"}}, + }, + expectedError: fmt.Errorf("Invalid format for @RG FO. Required regexp format: /\\*|[ACMGRSVTWYHKDBN]+/\nGot: %s", "invalid_fo"), + }, + // Invalid @RG PL value + { + name: "Invalid @RG PL value", + header: &Header{ + RG: []map[string]string{{"ID": "rg1", "PL": "invalid_pl"}}, + }, + expectedError: fmt.Errorf("Invalid value for @RG PL. Valid values: CAPILLARY, DNBSEQ (MGI/BGI), ELEMENT, HELICOS, ILLUMINA, IONTORRENT, LS454, ONT (Oxford Nanopore), PACBIO (Pacific Bio-sciences), SOLID, and ULTIMA. Got: %s", "invalid_pl"), + }, + // Non-unique @PG ID + { + name: "Non-unique @PG ID", + header: &Header{ + PG: []map[string]string{{"ID": "pg1"}, {"ID": "pg1"}}, + }, + expectedError: fmt.Errorf("Non-unique @PG ID. Got: %s", "pg1"), + }, + // Non-unique @SN SQ + { + name: "Invalid @SQ SN format", + header: &Header{ + SQ: []map[string]string{{"SN": "invalid_sn", "LN": "1000"}, {"SN": "invalid_sn"}}, // Invalid SN format + }, + expectedError: fmt.Errorf("Non-unique @SQ SN: %s", "invalid_sn"), + }, + } + + // Iterate through each test case + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + // Run the validate function on the header + err := tc.header.Validate() + + // Check if the error matches the expected error + if (err != nil && tc.expectedError == nil) || (err == nil && tc.expectedError != nil) || (err != nil && tc.expectedError != nil && err.Error() != tc.expectedError.Error()) { + t.Errorf("Test %v - Got error %v, want %v", tc.name, err, tc.expectedError) + } + }) + } +} diff --git a/lib/seqhash/example_test.go b/lib/seqhash/example_test.go index 600d7b7..b9595b4 100644 --- a/lib/seqhash/example_test.go +++ b/lib/seqhash/example_test.go @@ -23,7 +23,7 @@ func Example_basic() { func ExampleRotateSequence() { file, _ := os.Open("../data/puc19.gbk") defer file.Close() - parser, _ := bio.NewGenbankParser(file) + parser := bio.NewGenbankParser(file) sequence, _ := parser.Next() sequenceLength := len(sequence.Sequence) diff --git a/lib/seqhash/seqhash_test.go b/lib/seqhash/seqhash_test.go index 6e6e8ce..82e3622 100644 --- a/lib/seqhash/seqhash_test.go +++ b/lib/seqhash/seqhash_test.go @@ -68,7 +68,7 @@ func TestHash2(t *testing.T) { func TestLeastRotation(t *testing.T) { file, _ := os.Open("../data/puc19.gbk") defer file.Close() - parser, _ := bio.NewGenbankParser(file) + parser := bio.NewGenbankParser(file) sequence, _ := parser.Next() var sequenceBuffer bytes.Buffer diff --git a/lib/synthesis/codon/codon_test.go b/lib/synthesis/codon/codon_test.go index bc9ba6c..7f0781e 100644 --- a/lib/synthesis/codon/codon_test.go +++ b/lib/synthesis/codon/codon_test.go @@ -54,7 +54,7 @@ func TestOptimize(t *testing.T) { file, _ := os.Open("../../bio/genbank/data/puc19.gbk") defer file.Close() - parser, _ := bio.NewGenbankParser(file) + parser := bio.NewGenbankParser(file) sequence, _ := parser.Next() table := NewTranslationTable(11) @@ -78,7 +78,7 @@ func TestOptimizeSameSeed(t *testing.T) { var gfpTranslation = "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" file, _ := os.Open(puc19path) defer file.Close() - parser, _ := bio.NewGenbankParser(file) + parser := bio.NewGenbankParser(file) sequence, _ := parser.Next() optimizationTable := NewTranslationTable(11) @@ -101,7 +101,7 @@ func TestOptimizeDifferentSeed(t *testing.T) { var gfpTranslation = "MASKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTTGKLPVPWPTLVTTFSYGVQCFSRYPDHMKRHDFFKSAMPEGYVQERTISFKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSHNVYITADKQKNGIKANFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPDNHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK*" file, _ := os.Open(puc19path) defer file.Close() - parser, _ := bio.NewGenbankParser(file) + parser := bio.NewGenbankParser(file) sequence, _ := parser.Next() optimizationTable := NewTranslationTable(11) @@ -210,7 +210,7 @@ Codon Compromise + Add related tests begin here. func TestCompromiseCodonTable(t *testing.T) { file, _ := os.Open(puc19path) defer file.Close() - parser, _ := bio.NewGenbankParser(file) + parser := bio.NewGenbankParser(file) sequence, _ := parser.Next() // weight our codon optimization table using the regions we collected from the genbank file above @@ -222,7 +222,7 @@ func TestCompromiseCodonTable(t *testing.T) { file2, _ := os.Open("../../data/phix174.gb") defer file2.Close() - parser2, _ := bio.NewGenbankParser(file2) + parser2 := bio.NewGenbankParser(file2) sequence2, _ := parser2.Next() optimizationTable2 := NewTranslationTable(11) err = optimizationTable2.UpdateWeightsWithSequence(*sequence2) @@ -243,7 +243,7 @@ func TestCompromiseCodonTable(t *testing.T) { func TestAddCodonTable(t *testing.T) { file, _ := os.Open(puc19path) defer file.Close() - parser, _ := bio.NewGenbankParser(file) + parser := bio.NewGenbankParser(file) sequence, _ := parser.Next() // weight our codon optimization table using the regions we collected from the genbank file above @@ -256,7 +256,7 @@ func TestAddCodonTable(t *testing.T) { file2, _ := os.Open("../../data/phix174.gb") defer file2.Close() - parser2, _ := bio.NewGenbankParser(file2) + parser2 := bio.NewGenbankParser(file2) sequence2, _ := parser2.Next() optimizationTable2 := NewTranslationTable(11) err = optimizationTable2.UpdateWeightsWithSequence(*sequence2) @@ -285,7 +285,7 @@ func TestCapitalizationRegression(t *testing.T) { file, _ := os.Open(puc19path) defer file.Close() - parser, _ := bio.NewGenbankParser(file) + parser := bio.NewGenbankParser(file) sequence, _ := parser.Next() optimizationTable := NewTranslationTable(11) @@ -311,7 +311,7 @@ func TestOptimizeSequence(t *testing.T) { puc19 = func() genbank.Genbank { file, _ := os.Open("../../bio/genbank/data/puc19.gbk") defer file.Close() - parser, _ := bio.NewGenbankParser(file) + parser := bio.NewGenbankParser(file) sequence, _ := parser.Next() return *sequence }() diff --git a/lib/synthesis/codon/example_test.go b/lib/synthesis/codon/example_test.go index a9cf064..d9a84e9 100644 --- a/lib/synthesis/codon/example_test.go +++ b/lib/synthesis/codon/example_test.go @@ -26,7 +26,7 @@ func ExampleTranslationTable_Optimize() { file, _ := os.Open(puc19path) defer file.Close() - parser, _ := bio.NewGenbankParser(file) + parser := bio.NewGenbankParser(file) sequence, _ := parser.Next() codonTable := codon.NewTranslationTable(11) _ = codonTable.UpdateWeightsWithSequence(*sequence) @@ -83,7 +83,7 @@ func ExampleWriteCodonJSON() { func ExampleCompromiseCodonTable() { file, _ := os.Open(puc19path) defer file.Close() - parser, _ := bio.NewGenbankParser(file) + parser := bio.NewGenbankParser(file) sequence, _ := parser.Next() // weight our codon optimization table using the regions we collected from the genbank file above @@ -95,7 +95,7 @@ func ExampleCompromiseCodonTable() { file2, _ := os.Open(phix174path) defer file2.Close() - parser2, _ := bio.NewGenbankParser(file2) + parser2 := bio.NewGenbankParser(file2) sequence2, _ := parser2.Next() optimizationTable2 := codon.NewTranslationTable(11) @@ -118,7 +118,7 @@ func ExampleCompromiseCodonTable() { func ExampleAddCodonTable() { file, _ := os.Open(puc19path) defer file.Close() - parser, _ := bio.NewGenbankParser(file) + parser := bio.NewGenbankParser(file) sequence, _ := parser.Next() // weight our codon optimization table using the regions we collected from the genbank file above @@ -130,7 +130,7 @@ func ExampleAddCodonTable() { file2, _ := os.Open(phix174path) defer file2.Close() - parser2, _ := bio.NewGenbankParser(file2) + parser2 := bio.NewGenbankParser(file2) sequence2, _ := parser2.Next() optimizationTable2 := codon.NewTranslationTable(11)