Skip to content

Commit

Permalink
[Optimize](inverted index) optimize inverted index bitmap copy (apach…
Browse files Browse the repository at this point in the history
  • Loading branch information
airborne12 authored Mar 16, 2024
1 parent 277b3cf commit 7be5a5c
Show file tree
Hide file tree
Showing 9 changed files with 85 additions and 75 deletions.
8 changes: 4 additions & 4 deletions be/src/olap/comparison_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,11 @@ class ComparisonPredicateBase : public ColumnPredicate {
return Status::InvalidArgument("invalid comparison predicate type {}", PT);
}

roaring::Roaring roaring;
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();

auto&& value = PrimitiveTypeConvertor<Type>::to_storage_field_type(_value);
RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &value, query_type,
num_rows, &roaring));
num_rows, roaring));

// mask out null_bitmap, since NULL cmp VALUE will produce NULL
// and be treated as false in WHERE
Expand All @@ -118,9 +118,9 @@ class ComparisonPredicateBase : public ColumnPredicate {
}

if constexpr (PT == PredicateType::NE) {
*bitmap -= roaring;
*bitmap -= *roaring;
} else {
*bitmap &= roaring;
*bitmap &= *roaring;
}

return Status::OK();
Expand Down
6 changes: 3 additions & 3 deletions be/src/olap/in_list_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,10 +194,10 @@ class InListPredicateBase : public ColumnPredicate {
auto&& value = PrimitiveTypeConvertor<Type>::to_storage_field_type(
*reinterpret_cast<const T*>(ptr));
InvertedIndexQueryType query_type = InvertedIndexQueryType::EQUAL_QUERY;
roaring::Roaring index;
std::shared_ptr<roaring::Roaring> index = std::make_shared<roaring::Roaring>();
RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &value, query_type,
num_rows, &index));
indices |= index;
num_rows, index));
indices |= *index;
iter->next();
}

Expand Down
8 changes: 4 additions & 4 deletions be/src/olap/match_predicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ Status MatchPredicate::evaluate(const vectorized::NameAndTypePair& name_with_typ
}
auto type = name_with_type.second;
const std::string& name = name_with_type.first;
roaring::Roaring roaring;
std::shared_ptr<roaring::Roaring> roaring = std::make_shared<roaring::Roaring>();
auto inverted_index_query_type = _to_inverted_index_query_type(_match_type);
TypeDescriptor column_desc = type->get_type_as_type_descriptor();
if (is_string_type(column_desc.type) ||
Expand All @@ -67,7 +67,7 @@ Status MatchPredicate::evaluate(const vectorized::NameAndTypePair& name_with_typ
char* buffer = const_cast<char*>(_value.c_str());
match_value.replace(buffer, length); //is it safe?
RETURN_IF_ERROR(iterator->read_from_inverted_index(
name, &match_value, inverted_index_query_type, num_rows, &roaring));
name, &match_value, inverted_index_query_type, num_rows, roaring));
} else if (column_desc.type == TYPE_ARRAY &&
is_numeric_type(
TabletColumn::get_field_type_by_type(column_desc.children[0].type))) {
Expand All @@ -76,7 +76,7 @@ Status MatchPredicate::evaluate(const vectorized::NameAndTypePair& name_with_typ
TabletColumn::get_field_type_by_type(column_desc.children[0].type));
RETURN_IF_ERROR(type_info->from_string(buf, _value));
RETURN_IF_ERROR(iterator->read_from_inverted_index(name, buf, inverted_index_query_type,
num_rows, &roaring, true));
num_rows, roaring, true));
}

// mask out null_bitmap, since NULL cmp VALUE will produce NULL
Expand All @@ -91,7 +91,7 @@ Status MatchPredicate::evaluate(const vectorized::NameAndTypePair& name_with_typ
}
}

*bitmap &= roaring;
*bitmap &= *roaring;
return Status::OK();
}

Expand Down
11 changes: 10 additions & 1 deletion be/src/olap/rowset/segment_v2/inverted_index_file_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,12 +192,21 @@ Result<std::unique_ptr<DorisCompoundReader>> InvertedIndexFileReader::open(
return _open(index_id, index_suffix);
}

std::string InvertedIndexFileReader::get_index_file_path(const TabletIndex* index_meta) const {
std::string InvertedIndexFileReader::get_index_file_key(const TabletIndex* index_meta) const {
return InvertedIndexDescriptor::get_index_file_name(_index_file_dir / _segment_file_name,
index_meta->index_id(),
index_meta->get_index_suffix());
}

std::string InvertedIndexFileReader::get_index_file_path(const TabletIndex* index_meta) const {
if (_storage_format == InvertedIndexStorageFormatPB::V1) {
return InvertedIndexDescriptor::get_index_file_name(_index_file_dir / _segment_file_name,
index_meta->index_id(),
index_meta->get_index_suffix());
}
return _index_file_dir / _index_file_name;
}

Status InvertedIndexFileReader::index_file_exist(const TabletIndex* index_meta, bool* res) const {
if (_storage_format == InvertedIndexStorageFormatPB::V1) {
auto index_file_path = _index_file_dir / InvertedIndexDescriptor::get_index_file_name(
Expand Down
1 change: 1 addition & 0 deletions be/src/olap/rowset/segment_v2/inverted_index_file_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class InvertedIndexFileReader {
bool open_idx_file_cache = false);
Result<std::unique_ptr<DorisCompoundReader>> open(const TabletIndex* index_meta) const;
void debug_file_entries();
std::string get_index_file_key(const TabletIndex* index_meta) const;
std::string get_index_file_path(const TabletIndex* index_meta) const;
Status index_file_exist(const TabletIndex* index_meta, bool* res) const;
Status has_null(const TabletIndex* index_meta, bool* res) const;
Expand Down
83 changes: 41 additions & 42 deletions be/src/olap/rowset/segment_v2/inverted_index_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,9 @@ Status InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach
bool owned_dir = false;
try {
// try to get query bitmap result from cache and return immediately on cache hit
auto index_file_path = _inverted_index_file_reader->get_index_file_path(&_index_meta);
auto index_file_key = _inverted_index_file_reader->get_index_file_key(&_index_meta);
InvertedIndexQueryCache::CacheKey cache_key {
index_file_path, "", InvertedIndexQueryType::UNKNOWN_QUERY, "null_bitmap"};
index_file_key, "", InvertedIndexQueryType::UNKNOWN_QUERY, "null_bitmap"};
auto* cache = InvertedIndexQueryCache::instance();
if (cache->lookup(cache_key, cache_handle)) {
return Status::OK();
Expand Down Expand Up @@ -196,7 +196,7 @@ Status InvertedIndexReader::read_null_bitmap(InvertedIndexQueryCacheHandle* cach

Status InvertedIndexReader::handle_searcher_cache(
InvertedIndexCacheHandle* inverted_index_cache_handle, OlapReaderStatistics* stats) {
auto index_file_key = _inverted_index_file_reader->get_index_file_path(&_index_meta);
auto index_file_key = _inverted_index_file_reader->get_index_file_key(&_index_meta);
InvertedIndexSearcherCache::CacheKey searcher_cache_key(index_file_key);
if (InvertedIndexSearcherCache::instance()->lookup(searcher_cache_key,
inverted_index_cache_handle)) {
Expand Down Expand Up @@ -252,7 +252,8 @@ Status FullTextIndexReader::new_iterator(OlapReaderStatistics* stats, RuntimeSta

Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_state,
const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, roaring::Roaring* bit_map) {
InvertedIndexQueryType query_type,
std::shared_ptr<roaring::Roaring>& bit_map) {
SCOPED_RAW_TIMER(&stats->inverted_index_query_timer);

std::string search_str = reinterpret_cast<const StringRef*>(query_value)->to_string();
Expand All @@ -262,10 +263,10 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run
try {
std::vector<std::string> analyse_result;
InvertedIndexQueryCache::CacheKey cache_key;
auto index_file_path = _inverted_index_file_reader->get_index_file_path(&_index_meta);
auto index_file_key = _inverted_index_file_reader->get_index_file_key(&_index_meta);

if (query_type == InvertedIndexQueryType::MATCH_REGEXP_QUERY) {
cache_key = {index_file_path, column_name, query_type, search_str};
cache_key = {index_file_key, column_name, query_type, search_str};
analyse_result.emplace_back(search_str);
} else {
InvertedIndexCtxSPtr inverted_index_ctx = std::make_shared<InvertedIndexCtx>(
Expand Down Expand Up @@ -301,7 +302,6 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run
std::unique_ptr<lucene::search::Query> query;
std::wstring field_ws = std::wstring(column_name.begin(), column_name.end());

roaring::Roaring query_match_bitmap;
if (query_type == InvertedIndexQueryType::MATCH_PHRASE_QUERY ||
query_type == InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY ||
query_type == InvertedIndexQueryType::MATCH_PHRASE_EDGE_QUERY ||
Expand All @@ -310,7 +310,7 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run
query_type == InvertedIndexQueryType::MATCH_ANY_QUERY) {
std::string str_tokens = join(analyse_result, " ");

cache_key = {index_file_path, column_name, query_type, str_tokens};
cache_key = {index_file_key, column_name, query_type, str_tokens};
}
auto* cache = InvertedIndexQueryCache::instance();
InvertedIndexQueryCacheHandle cache_handler;
Expand All @@ -337,10 +337,8 @@ Status FullTextIndexReader::query(OlapReaderStatistics* stats, RuntimeState* run
}
term_match_bitmap->runOptimize();
cache->insert(cache_key, term_match_bitmap, &cache_handler);
bit_map = term_match_bitmap;
}
query_match_bitmap = *term_match_bitmap;

bit_map->swap(query_match_bitmap);
return Status::OK();
} catch (const CLuceneError& e) {
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
Expand Down Expand Up @@ -385,7 +383,7 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats,
RuntimeState* runtime_state,
const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type,
roaring::Roaring* bit_map) {
std::shared_ptr<roaring::Roaring>& bit_map) {
SCOPED_RAW_TIMER(&stats->inverted_index_query_timer);

const auto* search_query = reinterpret_cast<const StringRef*>(query_value);
Expand All @@ -402,10 +400,10 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats,
[](lucene::index::Term* term) { _CLDECDELETE(term); }};
std::unique_ptr<lucene::search::Query> query;

auto index_file_path = _inverted_index_file_reader->get_index_file_path(&_index_meta);
auto index_file_key = _inverted_index_file_reader->get_index_file_key(&_index_meta);

// try to get query bitmap result from cache and return immediately on cache hit
InvertedIndexQueryCache::CacheKey cache_key {index_file_path, column_name, query_type,
InvertedIndexQueryCache::CacheKey cache_key {index_file_key, column_name, query_type,
search_str};
auto* cache = InvertedIndexQueryCache::instance();
InvertedIndexQueryCacheHandle cache_handler;
Expand Down Expand Up @@ -498,7 +496,7 @@ Status StringTypeInvertedIndexReader::query(OlapReaderStatistics* stats,
term_match_bitmap->runOptimize();
cache->insert(cache_key, term_match_bitmap, &cache_handler);

bit_map->swap(result);
bit_map = term_match_bitmap;
}
return Status::OK();
}
Expand Down Expand Up @@ -590,43 +588,43 @@ Status BkdIndexReader::invoke_bkd_try_query(const void* query_value,

Status BkdIndexReader::invoke_bkd_query(const void* query_value, InvertedIndexQueryType query_type,
std::shared_ptr<lucene::util::bkd::bkd_reader> r,
roaring::Roaring* bit_map) {
std::shared_ptr<roaring::Roaring>& bit_map) {
switch (query_type) {
case InvertedIndexQueryType::LESS_THAN_QUERY: {
auto visitor =
std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::LESS_THAN_QUERY>>(
r.get(), bit_map);
r.get(), bit_map.get());
RETURN_IF_ERROR(construct_bkd_query_value(query_value, r, visitor.get()));
r->intersect(visitor.get());
break;
}
case InvertedIndexQueryType::LESS_EQUAL_QUERY: {
auto visitor =
std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::LESS_EQUAL_QUERY>>(
r.get(), bit_map);
r.get(), bit_map.get());
RETURN_IF_ERROR(construct_bkd_query_value(query_value, r, visitor.get()));
r->intersect(visitor.get());
break;
}
case InvertedIndexQueryType::GREATER_THAN_QUERY: {
auto visitor =
std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::GREATER_THAN_QUERY>>(
r.get(), bit_map);
r.get(), bit_map.get());
RETURN_IF_ERROR(construct_bkd_query_value(query_value, r, visitor.get()));
r->intersect(visitor.get());
break;
}
case InvertedIndexQueryType::GREATER_EQUAL_QUERY: {
auto visitor =
std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::GREATER_EQUAL_QUERY>>(
r.get(), bit_map);
r.get(), bit_map.get());
RETURN_IF_ERROR(construct_bkd_query_value(query_value, r, visitor.get()));
r->intersect(visitor.get());
break;
}
case InvertedIndexQueryType::EQUAL_QUERY: {
auto visitor = std::make_unique<InvertedIndexVisitor<InvertedIndexQueryType::EQUAL_QUERY>>(
r.get(), bit_map);
r.get(), bit_map.get());
RETURN_IF_ERROR(construct_bkd_query_value(query_value, r, visitor.get()));
r->intersect(visitor.get());
break;
Expand All @@ -641,25 +639,26 @@ Status BkdIndexReader::try_query(OlapReaderStatistics* stats, const std::string&
const void* query_value, InvertedIndexQueryType query_type,
uint32_t* count) {
try {
auto index_file_path = _inverted_index_file_reader->get_index_file_path(&_index_meta);

std::shared_ptr<lucene::util::bkd::bkd_reader> r;
auto st = get_bkd_reader(r, stats);
if (!st.ok()) {
LOG(WARNING) << "get bkd reader for " << index_file_path << " failed: " << st;
LOG(WARNING) << "get bkd reader for "
<< _inverted_index_file_reader->get_index_file_path(&_index_meta)
<< " failed: " << st;
return st;
}
std::string query_str;
_value_key_coder->full_encode_ascending(query_value, &query_str);

InvertedIndexQueryCache::CacheKey cache_key {index_file_path, column_name, query_type,
auto index_file_key = _inverted_index_file_reader->get_index_file_key(&_index_meta);
InvertedIndexQueryCache::CacheKey cache_key {index_file_key, column_name, query_type,
query_str};
auto* cache = InvertedIndexQueryCache::instance();
InvertedIndexQueryCacheHandle cache_handler;
roaring::Roaring bit_map;
auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, &bit_map);
std::shared_ptr<roaring::Roaring> bit_map;
auto cache_status = handle_query_cache(cache, cache_key, &cache_handler, stats, bit_map);
if (cache_status.ok()) {
*count = bit_map.cardinality();
*count = bit_map->cardinality();
return Status::OK();
}

Expand All @@ -675,22 +674,24 @@ Status BkdIndexReader::try_query(OlapReaderStatistics* stats, const std::string&

Status BkdIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_state,
const std::string& column_name, const void* query_value,
InvertedIndexQueryType query_type, roaring::Roaring* bit_map) {
InvertedIndexQueryType query_type,
std::shared_ptr<roaring::Roaring>& bit_map) {
SCOPED_RAW_TIMER(&stats->inverted_index_query_timer);

try {
auto index_file_path = _inverted_index_file_reader->get_index_file_path(&_index_meta);

std::shared_ptr<lucene::util::bkd::bkd_reader> r;
auto st = get_bkd_reader(r, stats);
if (!st.ok()) {
LOG(WARNING) << "get bkd reader for " << index_file_path << " failed: " << st;
LOG(WARNING) << "get bkd reader for "
<< _inverted_index_file_reader->get_index_file_path(&_index_meta)
<< " failed: " << st;
return st;
}
std::string query_str;
_value_key_coder->full_encode_ascending(query_value, &query_str);

InvertedIndexQueryCache::CacheKey cache_key {index_file_path, column_name, query_type,
auto index_file_key = _inverted_index_file_reader->get_index_file_key(&_index_meta);
InvertedIndexQueryCache::CacheKey cache_key {index_file_key, column_name, query_type,
query_str};
auto* cache = InvertedIndexQueryCache::instance();
InvertedIndexQueryCacheHandle cache_handler;
Expand All @@ -700,16 +701,16 @@ Status BkdIndexReader::query(OlapReaderStatistics* stats, RuntimeState* runtime_
}

RETURN_IF_ERROR(invoke_bkd_query(query_value, query_type, r, bit_map));
std::shared_ptr<roaring::Roaring> query_bitmap =
std::make_shared<roaring::Roaring>(*bit_map);
query_bitmap->runOptimize();
cache->insert(cache_key, query_bitmap, &cache_handler);
bit_map->runOptimize();
cache->insert(cache_key, bit_map, &cache_handler);

VLOG_DEBUG << "BKD index search column: " << column_name
<< " result: " << bit_map->cardinality();

return Status::OK();
} catch (const CLuceneError& e) {
LOG(ERROR) << "BKD Query CLuceneError Occurred, error msg: " << e.what() << " file_path:"
<< _inverted_index_file_reader->get_index_file_path(&_index_meta);
return Status::Error<ErrorCode::INVERTED_INDEX_CLUCENE_ERROR>(
"BKD Query CLuceneError Occurred, error msg: {}", e.what());
}
Expand Down Expand Up @@ -1123,11 +1124,9 @@ lucene::util::bkd::relation InvertedIndexVisitor<QT>::compare(std::vector<uint8_
}
}

Status InvertedIndexIterator::read_from_inverted_index(const std::string& column_name,
const void* query_value,
InvertedIndexQueryType query_type,
uint32_t segment_num_rows,
roaring::Roaring* bit_map, bool skip_try) {
Status InvertedIndexIterator::read_from_inverted_index(
const std::string& column_name, const void* query_value, InvertedIndexQueryType query_type,
uint32_t segment_num_rows, std::shared_ptr<roaring::Roaring>& bit_map, bool skip_try) {
if (UNLIKELY(_reader == nullptr)) {
throw CLuceneError(CL_ERR_NullPointer, "bkd index reader is null", false);
}
Expand Down
Loading

0 comments on commit 7be5a5c

Please sign in to comment.