Skip to content

Commit

Permalink
update case
Browse files Browse the repository at this point in the history
  • Loading branch information
bobhan1 committed Dec 18, 2024
1 parent 5554a55 commit 286cb7a
Show file tree
Hide file tree
Showing 4 changed files with 204 additions and 92 deletions.
14 changes: 13 additions & 1 deletion be/src/vec/olap/block_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ Status BlockReader::next_block_with_aggregation(Block* block, bool* eof) {
}

bool BlockReader::_rowsets_not_mono_asc_disjoint(const ReaderParams& read_params) {
LOG(INFO) << fmt::format("xxx enter BlockReader::_rowsets_not_mono_asc_disjoint");
std::string pre_rs_last_key;
bool pre_rs_key_bounds_truncated {false};
const std::vector<RowSetSplits>& rs_splits = read_params.rs_splits;
Expand All @@ -90,6 +91,17 @@ bool BlockReader::_rowsets_not_mono_asc_disjoint(const ReaderParams& read_params
}
bool cur_rs_key_bounds_truncated {
rs_split.rs_reader->rowset()->is_segments_key_bounds_truncated()};
{
// delete me later
bool res = Slice::origin_is_strictly_less_than(
Slice {pre_rs_last_key}, pre_rs_key_bounds_truncated, Slice {rs_first_key},
cur_rs_key_bounds_truncated);
LOG(INFO) << fmt::format(
"xxx result={}\npre_rs_last_key={}, len={}, truncated={}\nrs_first_key={}, "
"len={}, trunacted={}",
res, pre_rs_last_key, pre_rs_last_key.size(), pre_rs_key_bounds_truncated,
rs_first_key, rs_first_key.size(), cur_rs_key_bounds_truncated);
}
if (!Slice::origin_is_strictly_less_than(Slice {pre_rs_last_key},
pre_rs_key_bounds_truncated, Slice {rs_first_key},
cur_rs_key_bounds_truncated)) {
Expand All @@ -99,7 +111,6 @@ bool BlockReader::_rowsets_not_mono_asc_disjoint(const ReaderParams& read_params
pre_rs_key_bounds_truncated = cur_rs_key_bounds_truncated;
CHECK(has_last_key);
}

return false;
}

Expand All @@ -115,6 +126,7 @@ Status BlockReader::_init_collect_iter(const ReaderParams& read_params) {
}
// check if rowsets are noneoverlapping
_is_rowsets_overlapping = _rowsets_not_mono_asc_disjoint(read_params);
LOG_INFO("xxx _is_rowsets_overlapping={}", _is_rowsets_overlapping);
_vcollect_iter.init(this, _is_rowsets_overlapping, read_params.read_orderby_key,
read_params.read_orderby_key_reverse);

Expand Down
277 changes: 186 additions & 91 deletions be/test/olap/segments_key_bounds_truncation_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,9 @@ class SegmentsKeyBoundsTruncationTest : public testing::Test {
rs_split.rs_reader = rs_reader;
rs_splits.emplace_back(rs_split);
}
for (std::size_t i {0}; i < truncate_lengths.size(); i++) {
EXPECT_EQ((truncate_lengths[i] > 0), rowsets[i]->is_segments_key_bounds_truncated());
}
reader_params.rs_splits = std::move(rs_splits);
return reader_params;
}
Expand Down Expand Up @@ -376,117 +379,209 @@ TEST_F(SegmentsKeyBoundsTruncationTest, BlockReaderJudgeFuncTest) {
auto tablet_schema = create_schema(100);

{
// either all rowsets are truncated or none of them are truncated
// all rowsets are truncated to same size
// keys are distinctable from any index
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbb"},
{"cccccc", "dddddd"},
{"eeeeeee", "fffffff"},
{"xxxxxxx", "yyyyyyyy"}};
{
// keys are distinctable from any index
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbb"},
{"cccccc", "dddddd"},
{"eeeeeee", "fffffff"},
{"xxxxxxx", "yyyyyyyy"}};
{
config::enable_segments_key_bounds_truncation = false;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
config::enable_segments_key_bounds_truncation = false;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}

{
config::enable_segments_key_bounds_truncation = true;
config::segments_key_bounds_truncation_threshold = 3;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
// can still determine that segments are non ascending after truncation
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
config::enable_segments_key_bounds_truncation = true;
config::segments_key_bounds_truncation_threshold = 3;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
// can still determine that segments are non ascending after truncation
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
}

{
// all rowsets are truncated to same size
// keys are distinctable from any index before truncation
// some keys are not comparable after truncation
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbb"},
{"cccccccccccc", "ccdddddddd"},
{"cceeeeeeee", "fffffff"},
{"xxxxxxx", "yyyyyyyy"}};
{
// keys are distinctable from any index before truncation
// some keys are not comparable after truncation
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbb"},
{"cccccccccccc", "ccdddddddd"},
{"cceeeeeeee", "fffffff"},
{"xxxxxxx", "yyyyyyyy"}};
{
config::enable_segments_key_bounds_truncation = false;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
config::enable_segments_key_bounds_truncation = false;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}

{
config::enable_segments_key_bounds_truncation = true;
config::segments_key_bounds_truncation_threshold = 6;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
config::enable_segments_key_bounds_truncation = true;
config::segments_key_bounds_truncation_threshold = 6;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}

{
config::enable_segments_key_bounds_truncation = true;
config::segments_key_bounds_truncation_threshold = 3;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
// can not determine wether rowset 2 and rowset 3 are mono ascending
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
config::enable_segments_key_bounds_truncation = true;
config::segments_key_bounds_truncation_threshold = 3;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
// can not determine wether rowset 2 and rowset 3 are mono ascending
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
}

{
// all rowsets are truncated to same size
// keys are not mono ascending before truncation
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbb"},
{"bbbbb", "cccccccc"},
{"cccccccc", "xxxxxxx"},
{"xxxxxxx", "yyyyyyyy"}};
{
// keys are not mono ascending before truncation
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbb"},
{"bbbbb", "cccccccc"},
{"cccccccc", "xxxxxxx"},
{"xxxxxxx", "yyyyyyyy"}};
{
config::enable_segments_key_bounds_truncation = false;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
config::enable_segments_key_bounds_truncation = false;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}

{
config::enable_segments_key_bounds_truncation = true;
config::segments_key_bounds_truncation_threshold = 3;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
config::enable_segments_key_bounds_truncation = true;
config::segments_key_bounds_truncation_threshold = 3;
TabletReader::ReaderParams read_params = create_reader_params(tablet_schema, data);
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
}

{
// some rowsets are truncated, some are not
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbccccccc"},
{"bbbbbbddddddd", "dddddd"}};
{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbccccccc"},
{"bbbbbbddddddd", "dddddd"}};
{
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {-1, 9});
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {-1, 9});
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}

{
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {-1, 4});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {-1, 4});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}

{
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {9, -1});
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {9, -1});
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}

{
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, -1});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, -1});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
}

{
// some rowsets are truncated, some are not, truncated lengths may be different
{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, 4, -1, 6});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, 8, -1, 6});
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}

{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, -1, 4, 6});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, -1, 8, 6});
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}

{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, 8, 4, 6});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, 4, 8, 6});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, 8, 9, 6});
vectorized::BlockReader block_reader;
EXPECT_FALSE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
{
std::vector<std::vector<std::string>> data {{"aaaaaaaaa", "bbbbbbbb"},
{"ccccccccc", "dddddd"},
{"eeeeeee", "ffffffggggg"},
{"ffffffhhhhhh", "hhhhhhh"},
{"iiiiiiii", "jjjjjjjjj"}};
TabletReader::ReaderParams read_params =
create_reader_params(tablet_schema, data, {4, 5, 3, 4, 6});
vectorized::BlockReader block_reader;
EXPECT_TRUE(block_reader._rowsets_not_mono_asc_disjoint(read_params));
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ suite("test_key_bounds_truncation_basic", "nonConcurrent") {
logger.info("\nk1=${k1}, size=${k1.size()}, k2=${k2}, size=${k2.size()}")
logger.info("\nexpected_min_key=${expected_min_key}, size=${expected_min_key.size()}, expected_max_key=${expected_max_key}, size=${expected_max_key.size()}")
logger.info("\nmin_key=${min_key}, size=${min_key.size()}\nmax_key=${max_key}, size=${max_key.size()}")
logger.info("\nsegments_key_bounds_truncated=${rowsetMeta.segments_key_bounds_truncated}, turnedOn=${turnedOn}")
assertEquals(min_key, expected_min_key)
assertEquals(max_key, expected_max_key)

Expand Down
Loading

0 comments on commit 286cb7a

Please sign in to comment.