From cae3c397ef10599b31c39f266da6f4da4bc444e2 Mon Sep 17 00:00:00 2001 From: marekkokot Date: Fri, 8 Dec 2023 11:24:51 +0100 Subject: [PATCH] fix to detect unexpected end of gz file --- kmc_core/fastq_reader.cpp | 41 ++++++++++++++++++++++++++++++--------- kmc_core/fastq_reader.h | 5 +++-- 2 files changed, 35 insertions(+), 11 deletions(-) diff --git a/kmc_core/fastq_reader.cpp b/kmc_core/fastq_reader.cpp index 8a840ea..2cff368 100644 --- a/kmc_core/fastq_reader.cpp +++ b/kmc_core/fastq_reader.cpp @@ -979,9 +979,15 @@ void CFastqReaderDataSrc::init_stream() } //---------------------------------------------------------------------------------- -bool CFastqReaderDataSrc::pop_pack(uchar*& data, uint64& size, FilePart& file_part, CompressionType& mode) +bool CFastqReaderDataSrc::pop_pack(uchar*& data, uint64& size, FilePart& file_part, CompressionType& mode, bool& last_in_file) { - end_reached = !binary_pack_queue->pop(in_data, in_data_size, file_part, compression_type); + end_reached = !binary_pack_queue->pop(data, size, file_part, mode); + if (file_part == FilePart::End) + { + last_in_file = true; + in_progress = false; + return false; + } return !end_reached; } @@ -1005,7 +1011,7 @@ uint64 CFastqReaderDataSrc::read(uchar* buff, uint64 size, bool& last_in_file, b first_in_file = false; if (!in_progress) { - if (!pop_pack(in_data, in_data_size, file_part, compression_type)) + if (!pop_pack(in_data, in_data_size, file_part, compression_type, last_in_file)) return 0; in_progress = true; first_in_file = true; @@ -1017,15 +1023,32 @@ uint64 CFastqReaderDataSrc::read(uchar* buff, uint64 size, bool& last_in_file, b { stream.next_out = buff; stream.avail_out = (uint32)size; - int ret; + int ret = Z_OK; do { if (!stream.avail_in) { pmm_binary_file_reader->free(in_data); in_data = nullptr; - if (!pop_pack(in_data, in_data_size, file_part, compression_type)) - return 0; + if (!pop_pack(in_data, in_data_size, file_part, compression_type, last_in_file)) { + auto ret_val = size - stream.avail_out; + + if (inflateEnd(&stream) != Z_OK) { + std::ostringstream ostr; + ostr << "Some error while reading gzip file (inflateEnd) in (" << __FILE__ << ": " << __LINE__ << ")"; + CCriticalErrorHandler::Inst().HandleCriticalError(ostr.str()); + } + + assert(last_in_file); + + if (ret != Z_STREAM_END) { + std::ostringstream ostr; + ostr << "Unexpected end of gzip file"; + CCriticalErrorHandler::Inst().HandleCriticalError(ostr.str()); + } + + return ret_val; + } stream.avail_in = (uint32)in_data_size; stream.next_in = in_data; } @@ -1090,7 +1113,7 @@ uint64 CFastqReaderDataSrc::read(uchar* buff, uint64 size, bool& last_in_file, b inflateEnd(&stream); in_progress = false; //pull end - bool queue_end = !pop_pack(in_data, in_data_size, file_part, compression_type); + bool queue_end = !pop_pack(in_data, in_data_size, file_part, compression_type, last_in_file); if (!queue_end && file_part != FilePart::End && !garbage) { std::ostringstream ostr; @@ -1106,7 +1129,7 @@ uint64 CFastqReaderDataSrc::read(uchar* buff, uint64 size, bool& last_in_file, b uchar* tmp; uint64 tmpsize; CompressionType tmpcomptype; - pop_pack(tmp, tmpsize, tmpfilepart, tmpcomptype); + pop_pack(tmp, tmpsize, tmpfilepart, tmpcomptype, last_in_file); } } last_in_file = true; @@ -1134,7 +1157,7 @@ uint64 CFastqReaderDataSrc::read(uchar* buff, uint64 size, bool& last_in_file, b { pmm_binary_file_reader->free(in_data); in_data = nullptr; - pop_pack(in_data, in_data_size, file_part, compression_type); + pop_pack(in_data, in_data_size, file_part, compression_type, last_in_file); if (file_part == FilePart::End) { in_progress = false; diff --git a/kmc_core/fastq_reader.h b/kmc_core/fastq_reader.h index c6ee53f..37fd964 100644 --- a/kmc_core/fastq_reader.h +++ b/kmc_core/fastq_reader.h @@ -36,7 +36,7 @@ class CFastqReaderDataSrc uint64 in_data_size; uint64 in_data_pos; //for plain void init_stream(); - bool pop_pack(uchar*& data, uint64& size, FilePart& file_part, CompressionType& mode); + bool pop_pack(uchar*& data, uint64& size, FilePart& file_part, CompressionType& mode, bool& last_in_file); public: inline void SetQueue(CBinaryPackQueue* _binary_pack_queue, CMemoryPool *_pmm_binary_file_reader); inline bool Finished(); @@ -48,7 +48,8 @@ class CFastqReaderDataSrc pmm_binary_file_reader->free(in_data); in_data = nullptr; //clean queue - while (pop_pack(in_data, in_data_size, file_part, compression_type)) + bool last_in_file_tmp = false; + while (pop_pack(in_data, in_data_size, file_part, compression_type, last_in_file_tmp)) { if(in_data_size) pmm_binary_file_reader->free(in_data);