diff --git a/bt2_search.cpp b/bt2_search.cpp index 30ad5d1a..d8757d69 100644 --- a/bt2_search.cpp +++ b/bt2_search.cpp @@ -1714,7 +1714,7 @@ static void parseOptions(int argc, const char **argv) { << "sequences must be specified with -1 and -2." << endl; throw 1; } - if(interleaved && (format != FASTA && format != FASTQ)) { + if(interleaved && (format != FASTA && format != FASTQ && format != BAM)) { cerr << "Error: --interleaved only works in combination with FASTA (-f) and FASTQ (-q) formats." << endl; throw 1; } diff --git a/pat.cpp b/pat.cpp index 4b6845fe..b5a9bb72 100644 --- a/pat.cpp +++ b/pat.cpp @@ -55,7 +55,7 @@ static uint32_t genRandSeed( size_t qlen = qry.length(); // Throw all the characters of the read into the random seed for(size_t i = 0; i < qlen; i++) { - int p = (int)qry[i]; + int p = (int)qry[i]; assert_leq(p, 4); size_t off = ((i & 15) << 1); rseed ^= ((uint32_t)p << off); @@ -302,20 +302,28 @@ pair DualPatternComposer::nextBatch(PerThreadReadBuf& pt) { * the read and quality input, create a list of pattern sources to * dispense them. */ -PatternComposer* PatternComposer::setupPatternComposer( - const EList& si, // singles, from argv - const EList& m1, // mate1's, from -1 arg - const EList& m2, // mate2's, from -2 arg - const EList& m12, // both mates on each line, from --12 arg - const EList& q, // qualities associated with singles - const EList& q1, // qualities associated with m1 - const EList& q2, // qualities associated with m2 +PatternComposer *PatternComposer::setupPatternComposer( + const EList &si, // singles, from argv + const EList &m1, // mate1's, from -1 arg + const EList &m2, // mate2's, from -2 arg + const EList &m12, // both mates on each line, from --12 arg + const EList &q, // qualities associated with singles + const EList &q1, // qualities associated with m1 + const EList &q2, // qualities associated with m2 #ifdef USE_SRA - const EList& sra_accs, // SRA accessions + const EList &sra_accs, // SRA accessions #endif - PatternParams& p, // read-in parameters - bool verbose) // be talkative? + PatternParams &p, // read-in parameters + bool verbose) // be talkative? { + // We save the value of PatternParams::align_paired_reads + // so that `--align-paired-reads` works as expected. + bool align_paired_reads = p.align_paired_reads; + // This serves as an indicator to the BAM pattern source + // that we intend on parsing paired reads from separate files + p.align_paired_reads = true; + + EList* a = new EList(); EList* b = new EList(); // Create list of pattern sources for paired reads appearing @@ -354,8 +362,7 @@ PatternComposer* PatternComposer::setupPatternComposer( } } #endif - - // Create list of pattern sources for paired reads + // Create list of pattern sources for paired reads for(size_t i = 0; i < m1.size(); i++) { const EList* qs = &m1; EList tmpSeq; @@ -391,7 +398,11 @@ PatternComposer* PatternComposer::setupPatternComposer( // All mates/mate files must be paired assert_eq(a->size(), b->size()); - // Create list of pattern sources for the unpaired reads + // Create list of pattern sources for the unpaired reads + p.align_paired_reads = align_paired_reads; + if (p.align_paired_reads) { + p.interleaved = true; + } for(size_t i = 0; i < si.size(); i++) { const EList* qs = &si; PatternSource* patsrc = NULL; @@ -1370,7 +1381,8 @@ std::pair BAMPatternSource::get_alignments(PerThreadReadBuf& pt, bool i = i + sizeof(l_name) + l_name + sizeof(uint32_t); } first_ = false; - } + } + while (readi < pt.max_buf_) { if (i >= alignment_batch.size()) { return make_pair(false, readi); @@ -1397,19 +1409,26 @@ std::pair BAMPatternSource::get_alignments(PerThreadReadBuf& pt, bool i += sizeof(block_size); memcpy(&flag, &alignment_batch[0] + i + offset[BAMField::flag], sizeof(flag)); if (currentlyBigEndian()) - flag = endianSwapU16(flag); - EList& readbuf = (pp_.align_paired_reads && (flag & 0x80)) != 0 ? pt.bufb_ : pt.bufa_; + flag = endianSwapU16(flag); + + if (interleaved_) { + batch_a = (flag & 0x80) != 0 ? false : true; + } + + EList &readbuf = batch_a ? pt.bufa_ : pt.bufb_; + if ((flag & 0x4) == 0) { readbuf[readi].readOrigBuf.clear(); i += block_size; continue; } - if (!pp_.align_paired_reads && ((flag & 0x40) != 0 || (flag & 0x80) != 0)) { + if (!align_paired_reads_ && (flag & 0x1) != 0) { readbuf[readi].readOrigBuf.clear(); i += block_size; continue; } - if (pp_.align_paired_reads && ((flag & 0x40) == 0 && (flag & 0x80) == 0)) { + if (align_paired_reads_ && + ((batch_a && ((flag & 0x40) == 0)) || (!batch_a && (flag & 0x80) == 0))) { readbuf[readi].readOrigBuf.clear(); i += block_size; continue; @@ -1418,7 +1437,7 @@ std::pair BAMPatternSource::get_alignments(PerThreadReadBuf& pt, bool readbuf[readi].readOrigBuf.resize(block_size); memcpy(readbuf[readi].readOrigBuf.wbuf(), &alignment_batch[0] + i, block_size); i += block_size; - readi += (pp_.align_paired_reads && + readi += (interleaved_ && pt.bufb_[readi].readOrigBuf.length() == 0) ? 0 : 1; } diff --git a/pat.h b/pat.h index 53356aac..3aab84fd 100644 --- a/pat.h +++ b/pat.h @@ -851,22 +851,16 @@ class BAMPatternSource : public CFilePatternSource { }; public: - - BAMPatternSource( - const EList& infiles, - const PatternParams& p) : - CFilePatternSource(infiles, p), - first_(true), - alignment_batch(0), - alignment_offset(0), - delta_(0), - pp_(p) - { - stream.zalloc = Z_NULL; - stream.zfree = Z_NULL; - stream.opaque = Z_NULL; - alignment_batch.reserve(1 << 16); - } + BAMPatternSource(const EList &infiles, const PatternParams &p) + : CFilePatternSource(infiles, p), first_(true), alignment_batch(0), + alignment_offset(0), delta_(0), pp_(p), interleaved_(pp_.interleaved), + align_paired_reads_(pp_.align_paired_reads) + { + stream.zalloc = Z_NULL; + stream.zfree = Z_NULL; + stream.opaque = Z_NULL; + alignment_batch.reserve(1 << 16); + } virtual void reset() { first_ = true; @@ -913,7 +907,9 @@ class BAMPatternSource : public CFilePatternSource { size_t delta_; z_stream stream; - PatternParams pp_; + PatternParams pp_; + bool interleaved_; + bool align_paired_reads_; }; /**