From 4d85d7eabed05d37cdcee8f6a3c6bb0193ba876c Mon Sep 17 00:00:00 2001 From: zzzxl1993 Date: Sun, 10 Nov 2024 20:54:07 +0800 Subject: [PATCH] [opt](inverted index) add performance profiling for remote io access in inverted index --- be/src/clucene | 2 +- be/src/index-tools/index_tool.cpp | 2 +- be/src/olap/compaction.cpp | 9 +- .../olap/rowset/segment_v2/column_reader.cpp | 2 +- .../query/conjunction_query.cpp | 5 +- .../inverted_index/query/conjunction_query.h | 3 +- .../query/disjunction_query.cpp | 6 +- .../inverted_index/query/disjunction_query.h | 3 +- .../query/phrase_edge_query.cpp | 2 +- .../inverted_index/query/phrase_edge_query.h | 2 +- .../query/phrase_prefix_query.cpp | 3 +- .../query/phrase_prefix_query.h | 2 +- .../inverted_index/query/phrase_query.cpp | 8 +- .../inverted_index/query/phrase_query.h | 3 +- .../segment_v2/inverted_index/query/query.h | 1 + .../inverted_index/query/regexp_query.cpp | 4 +- .../inverted_index/query/regexp_query.h | 2 +- .../inverted_index_compound_reader.cpp | 15 +++ .../segment_v2/inverted_index_file_reader.cpp | 5 +- .../segment_v2/inverted_index_file_reader.h | 4 +- .../inverted_index_fs_directory.cpp | 37 +++++- .../segment_v2/inverted_index_fs_directory.h | 7 +- .../segment_v2/inverted_index_reader.cpp | 94 ++++++++------- .../rowset/segment_v2/inverted_index_reader.h | 83 +++++++------ .../compaction/index_compaction_test.cpp | 4 +- .../index_compaction_with_deleted_term.cpp | 4 +- .../test_index_io_context.out | 73 +++++++++++ .../test_index_io_context.groovy | 113 ++++++++++++++++++ 28 files changed, 373 insertions(+), 125 deletions(-) create mode 100644 regression-test/data/fault_injection_p0/test_index_io_context.out create mode 100644 regression-test/suites/fault_injection_p0/test_index_io_context.groovy diff --git a/be/src/clucene b/be/src/clucene index 7cf6cf410d41d95..48fa9cc4ec32b40 160000 --- a/be/src/clucene +++ b/be/src/clucene @@ -1 +1 @@ -Subproject commit 7cf6cf410d41d95456edba263cc55b7b6f5ab027 +Subproject commit 48fa9cc4ec32b40bf3b02338d0a1b2cdbc6408cf diff --git a/be/src/index-tools/index_tool.cpp b/be/src/index-tools/index_tool.cpp index adea2cd84c95f63..ca0575dc545459d 100644 --- a/be/src/index-tools/index_tool.cpp +++ b/be/src/index-tools/index_tool.cpp @@ -170,7 +170,7 @@ void search(lucene::store::Directory* dir, std::string& field, std::string& toke std::vector terms = split(token, '|'); doris::TQueryOptions queryOptions; - ConjunctionQuery conjunct_query(s, queryOptions); + ConjunctionQuery conjunct_query(s, queryOptions, nullptr); conjunct_query.add(field_ws, terms); conjunct_query.search(result); diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index a40e28669e90cc7..8563c98367173ed 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -613,11 +613,9 @@ Status Compaction::do_inverted_index_compaction() { fs, std::string {InvertedIndexDescriptor::get_index_file_path_prefix(seg_path)}, _cur_tablet_schema->get_inverted_index_storage_format(), rowset->rowset_meta()->inverted_index_file_info(seg_id)); - bool open_idx_file_cache = false; RETURN_NOT_OK_STATUS_WITH_WARN( - inverted_index_file_reader->init(config::inverted_index_read_buffer_size, - open_idx_file_cache), - "inverted_index_file_reader init failed"); + inverted_index_file_reader->init(config::inverted_index_read_buffer_size), + "inverted_index_file_reader init faiqled"); inverted_index_file_readers[m.second] = std::move(inverted_index_file_reader); } @@ -783,9 +781,8 @@ void Compaction::construct_index_compaction_columns(RowsetWriterContext& ctx) { InvertedIndexDescriptor::get_index_file_path_prefix(*seg_path)}, _cur_tablet_schema->get_inverted_index_storage_format(), rowset->rowset_meta()->inverted_index_file_info(i)); - bool open_idx_file_cache = false; auto st = inverted_index_file_reader->init( - config::inverted_index_read_buffer_size, open_idx_file_cache); + config::inverted_index_read_buffer_size); index_file_path = inverted_index_file_reader->get_index_file_path(index_meta); DBUG_EXECUTE_IF( "Compaction::construct_skip_inverted_index_index_file_reader_init_" diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index aad3725d5a3f6e1..6e3d64628e8a443 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -344,7 +344,7 @@ Status ColumnReader::new_inverted_index_iterator( { std::shared_lock rlock(_load_index_lock); if (_inverted_index) { - RETURN_IF_ERROR(_inverted_index->new_iterator(read_options.stats, + RETURN_IF_ERROR(_inverted_index->new_iterator(read_options.io_ctx, read_options.stats, read_options.runtime_state, iterator)); } } diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.cpp index fb2479517166fc1..6e9d61db7fddb42 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.cpp @@ -20,8 +20,9 @@ namespace doris::segment_v2 { ConjunctionQuery::ConjunctionQuery(const std::shared_ptr& searcher, - const TQueryOptions& query_options) + const TQueryOptions& query_options, const io::IOContext* io_ctx) : _searcher(searcher), + _io_ctx(io_ctx), _index_version(_searcher->getReader()->getIndexVersion()), _conjunction_ratio(query_options.inverted_index_conjunction_opt_threshold) {} @@ -48,7 +49,7 @@ void ConjunctionQuery::add(const std::wstring& field_name, const std::vectorgetReader()->termDocs(t); + TermDocs* term_doc = _searcher->getReader()->termDocs(t, _io_ctx); _term_docs.push_back(term_doc); iterators.emplace_back(term_doc); } diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.h b/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.h index 2571392d5294e94..b9bfee2bfb1f7ae 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/conjunction_query.h @@ -27,7 +27,7 @@ namespace doris::segment_v2 { class ConjunctionQuery : public Query { public: ConjunctionQuery(const std::shared_ptr& searcher, - const TQueryOptions& query_options); + const TQueryOptions& query_options, const io::IOContext* io_ctx); ~ConjunctionQuery() override; void add(const std::wstring& field_name, const std::vector& terms) override; @@ -41,6 +41,7 @@ class ConjunctionQuery : public Query { public: std::shared_ptr _searcher; + const io::IOContext* _io_ctx = nullptr; IndexVersion _index_version = IndexVersion::kV0; int32_t _conjunction_ratio = 1000; diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.cpp index 650a88c064611ce..852357073d3b1d0 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.cpp @@ -20,8 +20,8 @@ namespace doris::segment_v2 { DisjunctionQuery::DisjunctionQuery(const std::shared_ptr& searcher, - const TQueryOptions& query_options) - : _searcher(searcher) {} + const TQueryOptions& query_options, const io::IOContext* io_ctx) + : _searcher(searcher), _io_ctx(io_ctx) {} void DisjunctionQuery::add(const std::wstring& field_name, const std::vector& terms) { if (terms.empty()) { @@ -36,7 +36,7 @@ void DisjunctionQuery::search(roaring::Roaring& roaring) { auto func = [this, &roaring](const std::string& term, bool first) { std::wstring ws_term = StringUtil::string_to_wstring(term); auto* t = _CLNEW Term(_field_name.c_str(), ws_term.c_str()); - auto* term_doc = _searcher->getReader()->termDocs(t); + auto* term_doc = _searcher->getReader()->termDocs(t, _io_ctx); TermIterator iterator(term_doc); DocRange doc_range; diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.h b/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.h index 357831461571c7a..8d0559ee4b0c982 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/disjunction_query.h @@ -27,7 +27,7 @@ namespace doris::segment_v2 { class DisjunctionQuery : public Query { public: DisjunctionQuery(const std::shared_ptr& searcher, - const TQueryOptions& query_options); + const TQueryOptions& query_options, const io::IOContext* io_ctx); ~DisjunctionQuery() override = default; void add(const std::wstring& field_name, const std::vector& terms) override; @@ -35,6 +35,7 @@ class DisjunctionQuery : public Query { private: std::shared_ptr _searcher; + const io::IOContext* _io_ctx = nullptr; std::wstring _field_name; std::vector _terms; diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.cpp index ec1b5bdd9e4d35d..f82433826e95815 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.cpp @@ -30,7 +30,7 @@ namespace doris::segment_v2 { PhraseEdgeQuery::PhraseEdgeQuery(const std::shared_ptr& searcher, - const TQueryOptions& query_options) + const TQueryOptions& query_options, const io::IOContext* io_ctx) : _searcher(searcher), _query(std::make_unique()), _max_expansions(query_options.inverted_index_max_expansions) {} diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.h b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.h index 5daf382e0d08fa7..9eb3bd57c4a9163 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_edge_query.h @@ -31,7 +31,7 @@ namespace doris::segment_v2 { class PhraseEdgeQuery : public Query { public: PhraseEdgeQuery(const std::shared_ptr& searcher, - const TQueryOptions& query_options); + const TQueryOptions& query_options, const io::IOContext* io_ctx); ~PhraseEdgeQuery() override = default; void add(const std::wstring& field_name, const std::vector& terms) override; diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp index 407e515dc9212f1..88bb3c1171fa307 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.cpp @@ -23,7 +23,8 @@ namespace doris::segment_v2 { PhrasePrefixQuery::PhrasePrefixQuery(const std::shared_ptr& searcher, - const TQueryOptions& query_options) + const TQueryOptions& query_options, + const io::IOContext* io_ctx) : _searcher(searcher), _query(std::make_unique()), _max_expansions(query_options.inverted_index_max_expansions) {} diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h index e565c0409cf4cd1..5cac597951eac78 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_prefix_query.h @@ -31,7 +31,7 @@ namespace doris::segment_v2 { class PhrasePrefixQuery : public Query { public: PhrasePrefixQuery(const std::shared_ptr& searcher, - const TQueryOptions& query_options); + const TQueryOptions& query_options, const io::IOContext* io_ctx); ~PhrasePrefixQuery() override = default; void add(const std::wstring& field_name, const std::vector& terms) override; diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.cpp index 9a3ecc68f89fa0c..38e60b0f089dc0e 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.cpp @@ -123,8 +123,8 @@ bool OrderedSloppyPhraseMatcher::stretch_to_order(PostingsAndPosition* prev_post } PhraseQuery::PhraseQuery(const std::shared_ptr& searcher, - const TQueryOptions& query_options) - : _searcher(searcher) {} + const TQueryOptions& query_options, const io::IOContext* io_ctx) + : _searcher(searcher), _io_ctx(io_ctx) {} PhraseQuery::~PhraseQuery() { for (auto& term_doc : _term_docs) { @@ -173,7 +173,7 @@ void PhraseQuery::add(const std::wstring& field_name, const std::vectorgetReader()->termDocs(t); + TermDocs* term_doc = _searcher->getReader()->termDocs(t, _io_ctx); _term_docs.push_back(term_doc); _lead1 = TermIterator(term_doc); return; @@ -185,7 +185,7 @@ void PhraseQuery::add(const std::wstring& field_name, const std::vectorgetReader()->termPositions(t); + TermPositions* term_pos = _searcher->getReader()->termPositions(t, _io_ctx); _term_docs.push_back(term_pos); if (is_save_iter) { iterators.emplace_back(term_pos); diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.h b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.h index 35a479ff7f9781d..a2c3a7ae91afcc7 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/phrase_query.h @@ -87,7 +87,7 @@ using Matcher = std::variant; class PhraseQuery : public Query { public: PhraseQuery(const std::shared_ptr& searcher, - const TQueryOptions& query_options); + const TQueryOptions& query_options, const io::IOContext* io_ctx); ~PhraseQuery() override; void add(const InvertedIndexQueryInfo& query_info) override; @@ -112,6 +112,7 @@ class PhraseQuery : public Query { private: std::shared_ptr _searcher; + const io::IOContext* _io_ctx = nullptr; TermIterator _lead1; TermIterator _lead2; diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/query.h b/be/src/olap/rowset/segment_v2/inverted_index/query/query.h index c295765ec63478e..c0eac69deaeaf37 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/query.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/query.h @@ -27,6 +27,7 @@ #include #include "common/status.h" +#include "io/io_common.h" #include "roaring/roaring.hh" CL_NS_USE(index) diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp b/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp index 007da8289dcdb07..69de4b7818b870c 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.cpp @@ -25,10 +25,10 @@ namespace doris::segment_v2 { RegexpQuery::RegexpQuery(const std::shared_ptr& searcher, - const TQueryOptions& query_options) + const TQueryOptions& query_options, const io::IOContext* io_ctx) : _searcher(searcher), _max_expansions(query_options.inverted_index_max_expansions), - _query(searcher, query_options) {} + _query(searcher, query_options, io_ctx) {} void RegexpQuery::add(const std::wstring& field_name, const std::vector& patterns) { if (patterns.size() != 1) { diff --git a/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.h b/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.h index 336b2d0b6a671da..650ad2bf10b0029 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.h +++ b/be/src/olap/rowset/segment_v2/inverted_index/query/regexp_query.h @@ -28,7 +28,7 @@ namespace doris::segment_v2 { class RegexpQuery : public Query { public: RegexpQuery(const std::shared_ptr& searcher, - const TQueryOptions& query_options); + const TQueryOptions& query_options, const io::IOContext* io_ctx); ~RegexpQuery() override = default; void add(const std::wstring& field_name, const std::vector& patterns) override; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.cpp index 7613df112ed9aad..60006ea84550a23 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_compound_reader.cpp @@ -59,6 +59,8 @@ class CSIndexInput : public lucene::store::BufferedIndexInput { CL_NS(store)::IndexInput* base; int64_t fileOffset; int64_t _length; + const io::IOContext* _io_ctx = nullptr; + bool _is_index_file = false; // Indicates if the file is a TII file protected: void readInternal(uint8_t* /*b*/, const int32_t /*len*/) override; @@ -75,6 +77,8 @@ class CSIndexInput : public lucene::store::BufferedIndexInput { const char* getDirectoryType() const override { return DorisCompoundReader::getClassName(); } const char* getObjectName() const override { return getClassName(); } static const char* getClassName() { return "CSIndexInput"; } + void setIoContext(const void* io_ctx) override; + void setIndexFile(bool isIndexFile) override; }; CSIndexInput::CSIndexInput(CL_NS(store)::IndexInput* base, const int64_t fileOffset, @@ -92,9 +96,12 @@ void CSIndexInput::readInternal(uint8_t* b, const int32_t len) { if (start + len > _length) { _CLTHROWA(CL_ERR_IO, "read past EOF"); } + base->setIoContext(_io_ctx); + base->setIndexFile(_is_index_file); base->seek(fileOffset + start); bool read_from_buffer = true; base->readBytes(b, len, read_from_buffer); + base->setIoContext(nullptr); } CSIndexInput::~CSIndexInput() = default; @@ -111,6 +118,14 @@ CSIndexInput::CSIndexInput(const CSIndexInput& clone) : BufferedIndexInput(clone void CSIndexInput::close() {} +void CSIndexInput::setIoContext(const void* io_ctx) { + _io_ctx = static_cast(io_ctx); +} + +void CSIndexInput::setIndexFile(bool isIndexFile) { + _is_index_file = isIndexFile; +} + DorisCompoundReader::DorisCompoundReader(CL_NS(store)::IndexInput* stream, int32_t read_buffer_size) : _ram_dir(new lucene::store::RAMDirectory()), _stream(stream), diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp index e0c75922c98bb20..113833d560fd060 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp @@ -27,10 +27,9 @@ namespace doris::segment_v2 { -Status InvertedIndexFileReader::init(int32_t read_buffer_size, bool open_idx_file_cache) { +Status InvertedIndexFileReader::init(int32_t read_buffer_size) { if (!_inited) { _read_buffer_size = read_buffer_size; - _open_idx_file_cache = open_idx_file_cache; if (_storage_format == InvertedIndexStorageFormatPB::V2) { auto st = _init_from_v2(read_buffer_size); if (!st.ok()) { @@ -76,7 +75,6 @@ Status InvertedIndexFileReader::_init_from_v2(int32_t read_buffer_size) { "CLuceneError occur when open idx file {}, error msg: {}", index_file_full_path, err.what()); } - index_input->setIdxFileCache(_open_idx_file_cache); _stream = std::unique_ptr(index_input); // 3. read file @@ -198,7 +196,6 @@ Result> InvertedIndexFileReader::_open( } // 3. read file in DorisCompoundReader - index_input->setIdxFileCache(_open_idx_file_cache); compound_reader = std::make_unique(index_input, _read_buffer_size); } catch (CLuceneError& err) { return ResultError(Status::Error( diff --git a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h index 8bc28b1882f9d85..3b7161c7643cefe 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_file_reader.h @@ -58,8 +58,7 @@ class InvertedIndexFileReader { _storage_format(storage_format), _idx_file_info(idx_file_info) {} - Status init(int32_t read_buffer_size = config::inverted_index_read_buffer_size, - bool open_idx_file_cache = false); + Status init(int32_t read_buffer_size = config::inverted_index_read_buffer_size); Result> open(const TabletIndex* index_meta) const; void debug_file_entries(); std::string get_index_file_cache_key(const TabletIndex* index_meta) const; @@ -80,7 +79,6 @@ class InvertedIndexFileReader { const io::FileSystemSPtr _fs; std::string _index_path_prefix; int32_t _read_buffer_size = -1; - bool _open_idx_file_cache = false; InvertedIndexStorageFormatPB _storage_format; mutable std::shared_mutex _mutex; // Use mutable for const read operations bool _inited = false; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp index ded71c8a6cc73e6..29caf29936dddf0 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp @@ -219,6 +219,27 @@ void DorisFSDirectory::FSIndexInput::close() { }*/ } +void DorisFSDirectory::FSIndexInput::setIoContext(const void* io_ctx) { + if (io_ctx) { + const auto& ctx = static_cast(io_ctx); + _io_ctx.reader_type = ctx->reader_type; + _io_ctx.query_id = ctx->query_id; + _io_ctx.file_cache_stats = ctx->file_cache_stats; + } else { + _io_ctx.reader_type = ReaderType::UNKNOWN; + _io_ctx.query_id = nullptr; + _io_ctx.file_cache_stats = nullptr; + } +} + +const void* DorisFSDirectory::FSIndexInput::getIoContext() { + return &_io_ctx; +} + +void DorisFSDirectory::FSIndexInput::setIndexFile(bool isIndexFile) { + _io_ctx.is_index_data = isIndexFile; +} + void DorisFSDirectory::FSIndexInput::seekInternal(const int64_t position) { CND_PRECONDITION(position >= 0 && position < _handle->_length, "Seeking out of range"); _pos = position; @@ -239,9 +260,23 @@ void DorisFSDirectory::FSIndexInput::readInternal(uint8_t* b, const int32_t len) _handle->_fpos = _pos; } + DBUG_EXECUTE_IF( + "DorisFSDirectory::FSIndexInput::readInternal", ({ + static thread_local std::unordered_map + thread_file_cache_map; + auto it = thread_file_cache_map.find(_io_ctx.query_id); + if (it != thread_file_cache_map.end()) { + if (_io_ctx.file_cache_stats != it->second) { + _CLTHROWA(CL_ERR_IO, "File cache statistics mismatch"); + } + } else { + thread_file_cache_map[_io_ctx.query_id] = _io_ctx.file_cache_stats; + } + })); + Slice result {b, (size_t)len}; size_t bytes_read = 0; - auto st = _handle->_reader->read_at(_pos, result, &bytes_read, &_io_ctx); + Status st = _handle->_reader->read_at(_pos, result, &bytes_read, &_io_ctx); DBUG_EXECUTE_IF("DorisFSDirectory::FSIndexInput::readInternal_reader_read_at_error", { st = Status::InternalError( "debug point: DorisFSDirectory::FSIndexInput::readInternal_reader_read_at_error"); diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h index 59ae6db1a9630d3..fd92873c9707bfe 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h @@ -180,8 +180,6 @@ class DorisFSDirectory::FSIndexInput : public lucene::store::BufferedIndexInput : BufferedIndexInput(buffer_size) { this->_pos = 0; this->_handle = std::move(handle); - this->_io_ctx.reader_type = ReaderType::READER_QUERY; - this->_io_ctx.is_index_data = false; } protected: @@ -199,8 +197,9 @@ class DorisFSDirectory::FSIndexInput : public lucene::store::BufferedIndexInput const char* getDirectoryType() const override { return DorisFSDirectory::getClassName(); } const char* getObjectName() const override { return getClassName(); } static const char* getClassName() { return "FSIndexInput"; } - - void setIdxFileCache(bool index) override { _io_ctx.is_index_data = index; } + void setIoContext(const void* io_ctx) override; + const void* getIoContext() override; + void setIndexFile(bool isIndexFile) override; std::mutex _this_lock; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp index b7cfe7dfaffb318..889fee1fc87ef9f 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.cpp @@ -102,7 +102,8 @@ std::string InvertedIndexReader::get_index_file_path() { return _inverted_index_file_reader->get_index_file_path(&_index_meta); } -Status InvertedIndexReader::read_null_bitmap(OlapReaderStatistics* stats, +Status InvertedIndexReader::read_null_bitmap(const io::IOContext* io_ctx, + OlapReaderStatistics* stats, InvertedIndexQueryCacheHandle* cache_handle, lucene::store::Directory* dir) { SCOPED_RAW_TIMER(&stats->inverted_index_query_null_bitmap_timer); @@ -120,9 +121,7 @@ Status InvertedIndexReader::read_null_bitmap(OlapReaderStatistics* stats, if (!dir) { // TODO: ugly code here, try to refact. - bool open_idx_file_cache = true; - auto st = _inverted_index_file_reader->init(config::inverted_index_read_buffer_size, - open_idx_file_cache); + auto st = _inverted_index_file_reader->init(config::inverted_index_read_buffer_size); if (!st.ok()) { LOG(WARNING) << st; return st; @@ -138,6 +137,7 @@ Status InvertedIndexReader::read_null_bitmap(OlapReaderStatistics* stats, InvertedIndexDescriptor::get_temporary_null_bitmap_file_name(); if (dir->fileExists(null_bitmap_file_name)) { null_bitmap_in = dir->openInput(null_bitmap_file_name); + null_bitmap_in->setIoContext(io_ctx); size_t null_bitmap_size = null_bitmap_in->length(); faststring buf; buf.resize(null_bitmap_size); @@ -165,7 +165,8 @@ Status InvertedIndexReader::read_null_bitmap(OlapReaderStatistics* stats, } Status InvertedIndexReader::handle_searcher_cache( - InvertedIndexCacheHandle* inverted_index_cache_handle, OlapReaderStatistics* stats) { + InvertedIndexCacheHandle* inverted_index_cache_handle, const io::IOContext* io_ctx, + OlapReaderStatistics* stats) { auto index_file_key = _inverted_index_file_reader->get_index_file_cache_key(&_index_meta); InvertedIndexSearcherCache::CacheKey searcher_cache_key(index_file_key); if (InvertedIndexSearcherCache::instance()->lookup(searcher_cache_key, @@ -179,9 +180,7 @@ Status InvertedIndexReader::handle_searcher_cache( SCOPED_RAW_TIMER(&stats->inverted_index_searcher_open_timer); IndexSearcherPtr searcher; - bool open_idx_file_cache = true; - auto st = _inverted_index_file_reader->init(config::inverted_index_read_buffer_size, - open_idx_file_cache); + auto st = _inverted_index_file_reader->init(config::inverted_index_read_buffer_size); if (!st.ok()) { LOG(WARNING) << st; return st; @@ -191,7 +190,7 @@ Status InvertedIndexReader::handle_searcher_cache( // to avoid open directory additionally for null_bitmap // TODO: handle null bitmap procedure in new format. InvertedIndexQueryCacheHandle null_bitmap_cache_handle; - static_cast(read_null_bitmap(stats, &null_bitmap_cache_handle, dir.get())); + static_cast(read_null_bitmap(io_ctx, stats, &null_bitmap_cache_handle, dir.get())); RETURN_IF_ERROR(create_index_searcher(dir.release(), &searcher, mem_tracker.get(), type())); auto* cache_value = new InvertedIndexSearcherCache::CacheValue( std::move(searcher), mem_tracker->consumption(), UnixMillis()); @@ -211,22 +210,21 @@ Status InvertedIndexReader::create_index_searcher(lucene::store::Directory* dir, auto searcher_result = DORIS_TRY(index_searcher_builder->get_index_searcher(dir)); *searcher = searcher_result; - if (std::string(dir->getObjectName()) == "DorisCompoundReader") { - static_cast(dir)->getDorisIndexInput()->setIdxFileCache(false); - } + // NOTE: before mem_tracker hook becomes active, we caculate reader memory size by hand. mem_tracker->consume(index_searcher_builder->get_reader_size()); return Status::OK(); }; Status InvertedIndexReader::match_index_search( - OlapReaderStatistics* stats, RuntimeState* runtime_state, InvertedIndexQueryType query_type, - const InvertedIndexQueryInfo& query_info, const FulltextIndexSearcherPtr& index_searcher, + const io::IOContext* io_ctx, OlapReaderStatistics* stats, RuntimeState* runtime_state, + InvertedIndexQueryType query_type, const InvertedIndexQueryInfo& query_info, + const FulltextIndexSearcherPtr& index_searcher, const std::shared_ptr& term_match_bitmap) { TQueryOptions queryOptions = runtime_state->query_options(); try { SCOPED_RAW_TIMER(&stats->inverted_index_searcher_search_timer); - auto query = QueryFactory::create(query_type, index_searcher, queryOptions); + auto query = QueryFactory::create(query_type, index_searcher, queryOptions, io_ctx); if (!query) { return Status::Error( "query type " + query_type_to_string(query_type) + ", query is nullptr"); @@ -240,15 +238,17 @@ Status InvertedIndexReader::match_index_search( return Status::OK(); } -Status FullTextIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeState* runtime_state, +Status FullTextIndexReader::new_iterator(const io::IOContext& io_ctx, OlapReaderStatistics* stats, + RuntimeState* runtime_state, std::unique_ptr* iterator) { - *iterator = InvertedIndexIterator::create_unique(stats, runtime_state, shared_from_this()); + *iterator = + InvertedIndexIterator::create_unique(io_ctx, stats, runtime_state, shared_from_this()); return Status::OK(); } -Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_state, - const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, +Status FullTextIndexReader::query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, + RuntimeState* runtime_state, const std::string& column_name, + const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) { SCOPED_RAW_TIMER(&stats->inverted_index_query_timer); @@ -314,12 +314,12 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run FulltextIndexSearcherPtr* searcher_ptr = nullptr; InvertedIndexCacheHandle inverted_index_cache_handle; - RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, stats)); + RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, io_ctx, stats)); auto searcher_variant = inverted_index_cache_handle.get_index_searcher(); searcher_ptr = std::get_if(&searcher_variant); if (searcher_ptr != nullptr) { term_match_bitmap = std::make_shared(); - RETURN_IF_ERROR(match_index_search(stats, runtime_state, query_type, query_info, + RETURN_IF_ERROR(match_index_search(io_ctx, stats, runtime_state, query_type, query_info, *searcher_ptr, term_match_bitmap)); term_match_bitmap->runOptimize(); cache->insert(cache_key, term_match_bitmap, &cache_handler); @@ -337,13 +337,15 @@ InvertedIndexReaderType FullTextIndexReader::type() { } Status StringTypeInvertedIndexReader::new_iterator( - OlapReaderStatistics* stats, RuntimeState* runtime_state, + const io::IOContext& io_ctx, OlapReaderStatistics* stats, RuntimeState* runtime_state, std::unique_ptr* iterator) { - *iterator = InvertedIndexIterator::create_unique(stats, runtime_state, shared_from_this()); + *iterator = + InvertedIndexIterator::create_unique(io_ctx, stats, runtime_state, shared_from_this()); return Status::OK(); } -Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats, +Status StringTypeInvertedIndexReader::query(const io::IOContext* io_ctx, + OlapReaderStatistics* stats, RuntimeState* runtime_state, const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, @@ -387,7 +389,7 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats, auto result = std::make_shared(); FulltextIndexSearcherPtr* searcher_ptr = nullptr; InvertedIndexCacheHandle inverted_index_cache_handle; - RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, stats)); + RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, io_ctx, stats)); auto searcher_variant = inverted_index_cache_handle.get_index_searcher(); searcher_ptr = std::get_if(&searcher_variant); if (searcher_ptr != nullptr) { @@ -396,7 +398,7 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats, case InvertedIndexQueryType::MATCH_ANY_QUERY: case InvertedIndexQueryType::MATCH_ALL_QUERY: case InvertedIndexQueryType::EQUAL_QUERY: { - RETURN_IF_ERROR(match_index_search(stats, runtime_state, + RETURN_IF_ERROR(match_index_search(io_ctx, stats, runtime_state, InvertedIndexQueryType::MATCH_ANY_QUERY, query_info, *searcher_ptr, result)); break; @@ -404,8 +406,8 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats, case InvertedIndexQueryType::MATCH_PHRASE_QUERY: case InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY: case InvertedIndexQueryType::MATCH_REGEXP_QUERY: { - RETURN_IF_ERROR(match_index_search(stats, runtime_state, query_type, query_info, - *searcher_ptr, result)); + RETURN_IF_ERROR(match_index_search(io_ctx, stats, runtime_state, query_type, + query_info, *searcher_ptr, result)); break; } case InvertedIndexQueryType::LESS_THAN_QUERY: @@ -470,9 +472,11 @@ InvertedIndexReaderType StringTypeInvertedIndexReader::type() { return InvertedIndexReaderType::STRING_TYPE; } -Status BkdIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeState* runtime_state, +Status BkdIndexReader::new_iterator(const io::IOContext& io_ctx, OlapReaderStatistics* stats, + RuntimeState* runtime_state, std::unique_ptr* iterator) { - *iterator = InvertedIndexIterator::create_unique(stats, runtime_state, shared_from_this()); + *iterator = + InvertedIndexIterator::create_unique(io_ctx, stats, runtime_state, shared_from_this()); return Status::OK(); } @@ -600,12 +604,12 @@ Status BkdIndexReader::invoke_bkd_query(const void* query_value, InvertedIndexQu return Status::OK(); } -Status BkdIndexReader::try_query(OlapReaderStatistics* stats, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, - uint32_t* count) { +Status BkdIndexReader::try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, + const std::string& column_name, const void* query_value, + InvertedIndexQueryType query_type, uint32_t* count) { try { std::shared_ptr r; - auto st = get_bkd_reader(r, stats); + auto st = get_bkd_reader(r, io_ctx, stats); if (!st.ok()) { LOG(WARNING) << "get bkd reader for " << _inverted_index_file_reader->get_index_file_path(&_index_meta) @@ -637,15 +641,15 @@ Status BkdIndexReader::try_query(OlapReaderStatistics* stats, const std::string& return Status::OK(); } -Status BkdIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_state, - const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, +Status BkdIndexReader::query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, + RuntimeState* runtime_state, const std::string& column_name, + const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) { SCOPED_RAW_TIMER(&stats->inverted_index_query_timer); try { std::shared_ptr r; - auto st = get_bkd_reader(r, stats); + auto st = get_bkd_reader(r, io_ctx, stats); if (!st.ok()) { LOG(WARNING) << "get bkd reader for " << _inverted_index_file_reader->get_index_file_path(&_index_meta) @@ -681,11 +685,11 @@ Status BkdIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_ } } -Status BkdIndexReader::get_bkd_reader(BKDIndexSearcherPtr& bkd_reader, +Status BkdIndexReader::get_bkd_reader(BKDIndexSearcherPtr& bkd_reader, const io::IOContext* io_ctx, OlapReaderStatistics* stats) { BKDIndexSearcherPtr* bkd_searcher = nullptr; InvertedIndexCacheHandle inverted_index_cache_handle; - RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, stats)); + RETURN_IF_ERROR(handle_searcher_cache(&inverted_index_cache_handle, io_ctx, stats)); auto searcher_variant = inverted_index_cache_handle.get_index_searcher(); bkd_searcher = std::get_if(&searcher_variant); if (bkd_searcher) { @@ -1115,8 +1119,8 @@ Status InvertedIndexIterator::read_from_inverted_index( } } - RETURN_IF_ERROR( - _reader->query(_stats, _runtime_state, column_name, query_value, query_type, bit_map)); + RETURN_IF_ERROR(_reader->query(&_io_ctx, _stats, _runtime_state, column_name, query_value, + query_type, bit_map)); return Status::OK(); } @@ -1130,7 +1134,8 @@ Status InvertedIndexIterator::try_read_from_inverted_index(const std::string& co query_type == InvertedIndexQueryType::LESS_EQUAL_QUERY || query_type == InvertedIndexQueryType::LESS_THAN_QUERY || query_type == InvertedIndexQueryType::EQUAL_QUERY) { - RETURN_IF_ERROR(_reader->try_query(_stats, column_name, query_value, query_type, count)); + RETURN_IF_ERROR( + _reader->try_query(&_io_ctx, _stats, column_name, query_value, query_type, count)); } return Status::OK(); } @@ -1148,4 +1153,5 @@ template class InvertedIndexVisitor; template class InvertedIndexVisitor; template class InvertedIndexVisitor; template class InvertedIndexVisitor; + } // namespace doris::segment_v2 diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index d3a0ff3cf118ba7..a14456032866191 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -182,17 +182,18 @@ class InvertedIndexReader : public std::enable_shared_from_this* iterator) = 0; - virtual Status query(OlapReaderStatistics* stats, RuntimeState* runtime_state, - const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, + virtual Status query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, + RuntimeState* runtime_state, const std::string& column_name, + const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) = 0; - virtual Status try_query(OlapReaderStatistics* stats, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, - uint32_t* count) = 0; + virtual Status try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, + const std::string& column_name, const void* query_value, + InvertedIndexQueryType query_type, uint32_t* count) = 0; - Status read_null_bitmap(OlapReaderStatistics* stats, + Status read_null_bitmap(const io::IOContext* io_ctx, OlapReaderStatistics* stats, InvertedIndexQueryCacheHandle* cache_handle, lucene::store::Directory* dir = nullptr); @@ -223,15 +224,15 @@ class InvertedIndexReader : public std::enable_shared_from_this& term_match_bitmap); @@ -253,15 +254,16 @@ class FullTextIndexReader : public InvertedIndexReader { : InvertedIndexReader(index_meta, inverted_index_file_reader) {} ~FullTextIndexReader() override = default; - Status new_iterator(OlapReaderStatistics* stats, RuntimeState* runtime_state, + Status new_iterator(const io::IOContext& io_ctx, OlapReaderStatistics* stats, + RuntimeState* runtime_state, std::unique_ptr* iterator) override; - Status query(OlapReaderStatistics* stats, RuntimeState* runtime_state, - const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, + Status query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, + RuntimeState* runtime_state, const std::string& column_name, + const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) override; - Status try_query(OlapReaderStatistics* stats, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, - uint32_t* count) override { + Status try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, + const std::string& column_name, const void* query_value, + InvertedIndexQueryType query_type, uint32_t* count) override { return Status::Error( "FullTextIndexReader not support try_query"); } @@ -279,15 +281,16 @@ class StringTypeInvertedIndexReader : public InvertedIndexReader { : InvertedIndexReader(index_meta, inverted_index_file_reader) {} ~StringTypeInvertedIndexReader() override = default; - Status new_iterator(OlapReaderStatistics* stats, RuntimeState* runtime_state, + Status new_iterator(const io::IOContext& io_ctx, OlapReaderStatistics* stats, + RuntimeState* runtime_state, std::unique_ptr* iterator) override; - Status query(OlapReaderStatistics* stats, RuntimeState* runtime_state, - const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, + Status query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, + RuntimeState* runtime_state, const std::string& column_name, + const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) override; - Status try_query(OlapReaderStatistics* stats, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, - uint32_t* count) override { + Status try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, + const std::string& column_name, const void* query_value, + InvertedIndexQueryType query_type, uint32_t* count) override { return Status::Error( "StringTypeInvertedIndexReader not support try_query"); } @@ -338,16 +341,17 @@ class BkdIndexReader : public InvertedIndexReader { : InvertedIndexReader(index_meta, inverted_index_file_reader) {} ~BkdIndexReader() override = default; - Status new_iterator(OlapReaderStatistics* stats, RuntimeState* runtime_state, + Status new_iterator(const io::IOContext& io_ctx, OlapReaderStatistics* stats, + RuntimeState* runtime_state, std::unique_ptr* iterator) override; - Status query(OlapReaderStatistics* stats, RuntimeState* runtime_state, - const std::string& column_name, const void* query_value, - InvertedIndexQueryType query_type, + Status query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, + RuntimeState* runtime_state, const std::string& column_name, + const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr& bit_map) override; - Status try_query(OlapReaderStatistics* stats, const std::string& column_name, - const void* query_value, InvertedIndexQueryType query_type, - uint32_t* count) override; + Status try_query(const io::IOContext* io_ctx, OlapReaderStatistics* stats, + const std::string& column_name, const void* query_value, + InvertedIndexQueryType query_type, uint32_t* count) override; Status invoke_bkd_try_query(const void* query_value, InvertedIndexQueryType query_type, std::shared_ptr r, uint32_t* count); Status invoke_bkd_query(const void* query_value, InvertedIndexQueryType query_type, @@ -359,7 +363,8 @@ class BkdIndexReader : public InvertedIndexReader { InvertedIndexVisitor* visitor); InvertedIndexReaderType type() override; - Status get_bkd_reader(BKDIndexSearcherPtr& reader, OlapReaderStatistics* stats); + Status get_bkd_reader(BKDIndexSearcherPtr& reader, const io::IOContext* io_ctx, + OlapReaderStatistics* stats); private: const TypeInfo* _type_info {}; @@ -447,9 +452,12 @@ class InvertedIndexIterator { ENABLE_FACTORY_CREATOR(InvertedIndexIterator); public: - InvertedIndexIterator(OlapReaderStatistics* stats, RuntimeState* runtime_state, - std::shared_ptr reader) - : _stats(stats), _runtime_state(runtime_state), _reader(std::move(reader)) {} + InvertedIndexIterator(const io::IOContext& io_ctx, OlapReaderStatistics* stats, + RuntimeState* runtime_state, std::shared_ptr reader) + : _io_ctx(io_ctx), + _stats(stats), + _runtime_state(runtime_state), + _reader(std::move(reader)) {} Status read_from_inverted_index(const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type, uint32_t segment_num_rows, @@ -460,7 +468,7 @@ class InvertedIndexIterator { Status read_null_bitmap(InvertedIndexQueryCacheHandle* cache_handle, lucene::store::Directory* dir = nullptr) { - return _reader->read_null_bitmap(_stats, cache_handle, dir); + return _reader->read_null_bitmap(&_io_ctx, _stats, cache_handle, dir); } [[nodiscard]] InvertedIndexReaderType get_inverted_index_reader_type() const; @@ -470,6 +478,7 @@ class InvertedIndexIterator { const InvertedIndexReaderPtr& reader() { return _reader; } private: + io::IOContext _io_ctx; OlapReaderStatistics* _stats = nullptr; RuntimeState* _runtime_state = nullptr; std::shared_ptr _reader; diff --git a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp index aed83201a6349bb..0f1b27fd4fa11f7 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_test.cpp @@ -223,7 +223,7 @@ bool query_string(const TabletIndex* index, for (int i = 0; i < query_data.size(); i++) { TQueryOptions queryOptions; auto query = QueryFactory::create(InvertedIndexQueryType::EQUAL_QUERY, *string_searcher, - queryOptions); + queryOptions, nullptr); EXPECT_TRUE(query != nullptr); InvertedIndexQueryInfo query_info; query_info.field_name = column_name_ws; @@ -253,7 +253,7 @@ bool query_fulltext(const TabletIndex* index, for (int i = 0; i < query_data.size(); i++) { TQueryOptions queryOptions; auto query = QueryFactory::create(InvertedIndexQueryType::MATCH_ANY_QUERY, *string_searcher, - queryOptions); + queryOptions, nullptr); EXPECT_TRUE(query != nullptr); InvertedIndexQueryInfo query_info; query_info.field_name = column_name_ws; diff --git a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp index 8b5d403fca4ba5c..a46f5f210dfc15e 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index/compaction/index_compaction_with_deleted_term.cpp @@ -124,7 +124,7 @@ static bool query_string(const TabletIndex* index, for (int i = 0; i < query_data.size(); i++) { TQueryOptions queryOptions; auto query = QueryFactory::create(InvertedIndexQueryType::EQUAL_QUERY, *string_searcher, - queryOptions); + queryOptions, nullptr); EXPECT_TRUE(query != nullptr); InvertedIndexQueryInfo query_info; query_info.field_name = column_name_ws; @@ -155,7 +155,7 @@ static bool query_fulltext(const TabletIndex* index, for (int i = 0; i < query_data.size(); i++) { TQueryOptions queryOptions; auto query = QueryFactory::create(InvertedIndexQueryType::MATCH_ANY_QUERY, *string_searcher, - queryOptions); + queryOptions, nullptr); EXPECT_TRUE(query != nullptr); InvertedIndexQueryInfo query_info; query_info.field_name = column_name_ws; diff --git a/regression-test/data/fault_injection_p0/test_index_io_context.out b/regression-test/data/fault_injection_p0/test_index_io_context.out new file mode 100644 index 000000000000000..3dc2880233e5efa --- /dev/null +++ b/regression-test/data/fault_injection_p0/test_index_io_context.out @@ -0,0 +1,73 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +177 + +-- !sql -- +177 + +-- !sql -- +177 + +-- !sql -- +177 + +-- !sql -- +177 + +-- !sql -- +177 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + +-- !sql -- +2 + diff --git a/regression-test/suites/fault_injection_p0/test_index_io_context.groovy b/regression-test/suites/fault_injection_p0/test_index_io_context.groovy new file mode 100644 index 000000000000000..9e9a26748974451 --- /dev/null +++ b/regression-test/suites/fault_injection_p0/test_index_io_context.groovy @@ -0,0 +1,113 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_index_io_context", "nonConcurrent") { + def tableName1 = "test_index_io_context1" + def tableName2 = "test_index_io_context2" + + def create_table = {table_name, index_format -> + sql """ DROP TABLE IF EXISTS ${table_name} """ + sql """ + CREATE TABLE IF NOT EXISTS ${table_name} ( + `@timestamp` int(11) NULL COMMENT "", + `clientip` varchar(20) NULL COMMENT "", + `request` text NULL COMMENT "", + `status` int(11) NULL COMMENT "", + `size` int(11) NULL COMMENT "", + INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = "english", "support_phrase" = "true") COMMENT '' + ) + DISTRIBUTED BY HASH(`@timestamp`) PROPERTIES( + "replication_num" = "1", + "disable_auto_compaction" = "true", + "inverted_index_storage_format" = "${index_format}" + ); + """ + } + + def load_httplogs_data = {table_name, label, read_flag, format_flag, file_name, ignore_failure=false, + expected_succ_rows = -1, load_to_single_tablet = 'true' -> + + // load the json data + streamLoad { + table "${table_name}" + + // set http request header params + set 'label', label + "_" + UUID.randomUUID().toString() + set 'read_json_by_line', read_flag + set 'format', format_flag + file file_name // import json file + time 10000 // limit inflight 10s + if (expected_succ_rows >= 0) { + set 'max_filter_ratio', '1' + } + + // if declared a check callback, the default check condition will ignore. + // So you must check all condition + check { result, exception, startTime, endTime -> + if (ignore_failure && expected_succ_rows < 0) { return } + if (exception != null) { + throw exception + } + log.info("Stream load result: ${result}".toString()) + def json = parseJson(result) + } + } + } + + try { + create_table(tableName1, "V1"); + create_table(tableName2, "V2"); + + load_httplogs_data.call(tableName1, 'test_index_io_context1', 'true', 'json', 'documents-1000.json') + load_httplogs_data.call(tableName2, 'test_index_io_context2', 'true', 'json', 'documents-1000.json') + + sql "sync" + sql """ set enable_common_expr_pushdown = true; """ + + try { + GetDebugPoint().enableDebugPointForAllBEs("DorisFSDirectory::FSIndexInput::readInternal") + + qt_sql """ select count() from ${tableName1} where request match_any 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName1} where request match_any 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName1} where request match_any 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName2} where request match_any 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName2} where request match_any 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName2} where request match_any 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName1} where request match_all 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName1} where request match_all 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName1} where request match_all 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName2} where request match_all 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName2} where request match_all 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName2} where request match_all 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName1} where request match_phrase 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName1} where request match_phrase 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName1} where request match_phrase 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName2} where request match_phrase 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName2} where request match_phrase 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName2} where request match_phrase 'ticket_quest_bg2.jpg'; """ + qt_sql """ select count() from ${tableName1} where request match_phrase 'ticket_quest_bg2.jpg ~10+'; """ + qt_sql """ select count() from ${tableName1} where request match_phrase 'ticket_quest_bg2.jpg ~10+'; """ + qt_sql """ select count() from ${tableName1} where request match_phrase 'ticket_quest_bg2.jpg ~10+'; """ + qt_sql """ select count() from ${tableName2} where request match_phrase 'ticket_quest_bg2.jpg ~10+'; """ + qt_sql """ select count() from ${tableName2} where request match_phrase 'ticket_quest_bg2.jpg ~10+'; """ + qt_sql """ select count() from ${tableName2} where request match_phrase 'ticket_quest_bg2.jpg ~10+'; """ + } finally { + GetDebugPoint().disableDebugPointForAllBEs("DorisFSDirectory::FSIndexInput::readInternal") + } + } finally { + } +} \ No newline at end of file