From 0aba1ed7784decace38efd9f97d677da75dc6bcf Mon Sep 17 00:00:00 2001 From: Pxl Date: Wed, 13 Nov 2024 11:51:14 +0800 Subject: [PATCH] [Bug](runtime-filter) fix core dump on rf between varchar and char (#43758) ### What problem does this PR solve? fix core dump on rf between varchar and char Problem Summary: F20241112 15:33:12.916148 3455401 assert_cast.h:48] Bad cast from type:doris::BloomFilterFunc<(doris::PrimitiveType)15>* to doris::BloomFilterFunc<(doris::PrimitiveType)10>* ```c++ 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /mnt/disk1/xiaolei/incubator-doris/be/src/common/signal_handler.h:421 1# 0x00007F73ACB75B50 in /lib64/libc.so.6 2# gsignal in /lib64/libc.so.6 3# __GI_abort in /lib64/libc.so.6 4# 0x00005645EDABC138 in /mnt/disk1/xiaolei/incubator-doris/output/be/lib/doris_be 5# 0x00005645EDAAD89A in /mnt/disk1/xiaolei/incubator-doris/output/be/lib/doris_be 6# google::LogMessage::SendToLog() in /mnt/disk1/xiaolei/incubator-doris/output/be/lib/doris_be 7# google::LogMessage::Flush() in /mnt/disk1/xiaolei/incubator-doris/output/be/lib/doris_be 8# google::LogMessageFatal::~LogMessageFatal() in /mnt/disk1/xiaolei/incubator-doris/output/be/lib/doris_be 9# doris::BloomFilterFunc<(doris::PrimitiveType)10>* assert_cast*, (TypeCheckOnRelease)1, doris::BloomFilterFuncBase*>(doris::BloomFilterFuncBase*&&)::{lambda(auto:1&&)#1}::operator()(doris::BloomFilterFuncBase*&&) const at /mnt/disk1/xiaolei/incubator-doris/be/src/vec/common/assert_cast.h:48 10# doris::BloomFilterFunc<(doris::PrimitiveType)10>* assert_cast*, (TypeCheckOnRelease)1, doris::BloomFilterFuncBase*>(doris::BloomFilterFuncBase*&&) at /mnt/disk1/xiaolei/incubator-doris/be/src/vec/common/assert_cast.h:64 11# doris::BloomFilterColumnPredicate<(doris::PrimitiveType)10>::BloomFilterColumnPredicate(unsigned int, std::shared_ptr const&) at /mnt/disk1/xiaolei/incubator-doris/be/src/olap/bloom_filter_predicate.h:44 12# doris::ColumnPredicate* doris::create_olap_column_predicate<(doris::PrimitiveType)10>(unsigned int, std::shared_ptr const&, int, doris::TabletColumn const*) at /mnt/disk1/xiaolei/incubator-doris/be/src/exprs/create_predicate_function.h:237 13# doris::ColumnPredicate* doris::create_column_predicate(unsigned int, std::shared_ptr const&, doris::FieldType, int, doris::TabletColumn const*) at /mnt/disk1/xiaolei/incubator-doris/be/src/exprs/create_predicate_function.h:290 14# doris::TabletReader::_parse_to_predicate(std::pair, std::allocator >, std::shared_ptr > const&) at /mnt/disk1/xiaolei/incubator-doris/be/src/olap/tablet_reader.cpp:582 15# doris::TabletReader::_init_conditions_param(doris::TabletReader::ReaderParams const&) at /mnt/disk1/xiaolei/incubator-doris/be/src/olap/tablet_reader.cpp:511 16# doris::TabletReader::_init_params(doris::TabletReader::ReaderParams const&) at /mnt/disk1/xiaolei/incubator-doris/be/src/olap/tablet_reader.cpp:294 17# doris::TabletReader::init(doris::TabletReader::ReaderParams const&) at /mnt/disk1/xiaolei/incubator-doris/be/src/olap/tablet_reader.cpp:125 18# doris::vectorized::BlockReader::init(doris::TabletReader::ReaderParams const&) at /mnt/disk1/xiaolei/incubator-doris/be/src/vec/olap/block_reader.cpp:193 19# doris::vectorized::NewOlapScanner::open(doris::RuntimeState*) at /mnt/disk1/xiaolei/incubator-doris/be/src/vec/exec/scan/new_olap_scanner.cpp:231 20# doris::vectorized::ScannerScheduler::_scanner_scan(std::shared_ptr, std::shared_ptr) at /mnt/disk1/xiaolei/incubator-doris/be/src/vec/exec/scan/scanner_scheduler.cpp:247 ``` --- be/src/exprs/create_predicate_function.h | 3 +- be/src/olap/bitmap_filter_predicate.h | 6 +-- be/src/olap/bloom_filter_predicate.h | 6 +-- be/src/olap/comparison_predicate.h | 6 +-- be/src/olap/in_list_predicate.h | 12 ++--- .../correctness_p0/test_runtime_filter.out | 33 ++++++++++++++ .../correctness_p0/test_runtime_filter.groovy | 44 ++++++++++++++++++- .../join/test_runtimefilter_on_datev2.groovy | 1 + .../join/test_runtimefilter_on_decimal.groovy | 1 + .../join/test_runtime_filter_boolean.groovy | 1 + .../test_runtime_filter_decimal256.groovy | 1 + .../query_p0/join/test_runtimefilter_2.groovy | 9 ++-- .../join/test_runtimefilter_on_datev2.groovy | 2 + 13 files changed, 101 insertions(+), 24 deletions(-) create mode 100644 regression-test/data/correctness_p0/test_runtime_filter.out diff --git a/be/src/exprs/create_predicate_function.h b/be/src/exprs/create_predicate_function.h index 4808caa00f37d0..387be1f9f0b11c 100644 --- a/be/src/exprs/create_predicate_function.h +++ b/be/src/exprs/create_predicate_function.h @@ -234,7 +234,8 @@ ColumnPredicate* create_olap_column_predicate(uint32_t column_id, std::shared_ptr filter_olap; filter_olap.reset(create_bloom_filter(PT)); filter_olap->light_copy(filter.get()); - return new BloomFilterColumnPredicate(column_id, filter); + // create a new filter to match the input filter and PT. For example, filter may be varchar, but PT is char + return new BloomFilterColumnPredicate(column_id, filter_olap); } template diff --git a/be/src/olap/bitmap_filter_predicate.h b/be/src/olap/bitmap_filter_predicate.h index 48e93642f4c368..7518099796fd2e 100644 --- a/be/src/olap/bitmap_filter_predicate.h +++ b/be/src/olap/bitmap_filter_predicate.h @@ -30,8 +30,6 @@ #include "vec/exprs/vruntimefilter_wrapper.h" namespace doris { - -// only use in runtime filter and segment v2 template class BitmapFilterColumnPredicate : public ColumnPredicate { public: @@ -89,7 +87,7 @@ class BitmapFilterColumnPredicate : public ColumnPredicate { uint16_t new_size = 0; new_size = _specific_filter->find_fixed_len_olap_engine( - (char*)reinterpret_cast< + (char*)assert_cast< const vectorized::PredicateColumnType>*>(&column) ->get_data() .data(), @@ -113,7 +111,7 @@ uint16_t BitmapFilterColumnPredicate::_evaluate_inner(const vectorized::IColu uint16_t* sel, uint16_t size) const { uint16_t new_size = 0; if (column.is_nullable()) { - const auto* nullable_col = reinterpret_cast(&column); + const auto* nullable_col = assert_cast(&column); const auto& null_map_data = nullable_col->get_null_map_column().get_data(); new_size = evaluate(nullable_col->get_nested_column(), null_map_data.data(), sel, size); diff --git a/be/src/olap/bloom_filter_predicate.h b/be/src/olap/bloom_filter_predicate.h index 2c49ff2ea8d1a2..d9d37d13198bbd 100644 --- a/be/src/olap/bloom_filter_predicate.h +++ b/be/src/olap/bloom_filter_predicate.h @@ -30,8 +30,6 @@ namespace doris { -// only use in runtime filter and segment v2 - template class BloomFilterColumnPredicate : public ColumnPredicate { public: @@ -41,7 +39,7 @@ class BloomFilterColumnPredicate : public ColumnPredicate { const std::shared_ptr& filter) : ColumnPredicate(column_id), _filter(filter), - _specific_filter(reinterpret_cast(_filter.get())) {} + _specific_filter(assert_cast(_filter.get())) {} ~BloomFilterColumnPredicate() override = default; PredicateType type() const override { return PredicateType::BF; } @@ -105,7 +103,7 @@ template uint16_t BloomFilterColumnPredicate::_evaluate_inner(const vectorized::IColumn& column, uint16_t* sel, uint16_t size) const { if (column.is_nullable()) { - const auto* nullable_col = reinterpret_cast(&column); + const auto* nullable_col = assert_cast(&column); const auto& null_map_data = nullable_col->get_null_map_column().get_data(); return evaluate(nullable_col->get_nested_column(), null_map_data.data(), sel, size); } else { diff --git a/be/src/olap/comparison_predicate.h b/be/src/olap/comparison_predicate.h index f17dae4a72b6f1..2ce2ca57f3db7e 100644 --- a/be/src/olap/comparison_predicate.h +++ b/be/src/olap/comparison_predicate.h @@ -274,7 +274,7 @@ class ComparisonPredicateBase : public ColumnPredicate { const auto* nullable_column_ptr = vectorized::check_and_get_column(column); const auto& nested_column = nullable_column_ptr->get_nested_column(); - const auto& null_map = reinterpret_cast( + const auto& null_map = assert_cast( nullable_column_ptr->get_null_map_column()) .get_data(); @@ -379,7 +379,7 @@ class ComparisonPredicateBase : public ColumnPredicate { const auto* nullable_column_ptr = vectorized::check_and_get_column(column); const auto& nested_column = nullable_column_ptr->get_nested_column(); - const auto& null_map = reinterpret_cast( + const auto& null_map = assert_cast( nullable_column_ptr->get_null_map_column()) .get_data(); @@ -465,7 +465,7 @@ class ComparisonPredicateBase : public ColumnPredicate { const auto* nullable_column_ptr = vectorized::check_and_get_column(column); const auto& nested_column = nullable_column_ptr->get_nested_column(); - const auto& null_map = reinterpret_cast( + const auto& null_map = assert_cast( nullable_column_ptr->get_null_map_column()) .get_data(); diff --git a/be/src/olap/in_list_predicate.h b/be/src/olap/in_list_predicate.h index c88ac0cdd6cfae..deb3f666f0e94b 100644 --- a/be/src/olap/in_list_predicate.h +++ b/be/src/olap/in_list_predicate.h @@ -232,9 +232,9 @@ class InListPredicateBase : public ColumnPredicate { if (column.is_nullable()) { const auto* nullable_col = vectorized::check_and_get_column(column); - const auto& null_bitmap = reinterpret_cast( - nullable_col->get_null_map_column()) - .get_data(); + const auto& null_bitmap = + assert_cast(nullable_col->get_null_map_column()) + .get_data(); const auto& nested_col = nullable_col->get_nested_column(); if (_opposite) { @@ -355,9 +355,9 @@ class InListPredicateBase : public ColumnPredicate { if (column.is_nullable()) { const auto* nullable_col = vectorized::check_and_get_column(column); - const auto& null_map = reinterpret_cast( - nullable_col->get_null_map_column()) - .get_data(); + const auto& null_map = + assert_cast(nullable_col->get_null_map_column()) + .get_data(); const auto& nested_col = nullable_col->get_nested_column(); if (_opposite) { diff --git a/regression-test/data/correctness_p0/test_runtime_filter.out b/regression-test/data/correctness_p0/test_runtime_filter.out new file mode 100644 index 00000000000000..d1ae375f7eaf63 --- /dev/null +++ b/regression-test/data/correctness_p0/test_runtime_filter.out @@ -0,0 +1,33 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test -- +a a a a +b b b b + +-- !test -- +a a a a +b b b b + +-- !test -- +a a a a +b b b b + +-- !test -- +a a a a +b b b b + +-- !test -- +a a a a +b b b b + +-- !test -- +a a a a +b b b b + +-- !test -- +a a a a +b b b b + +-- !test -- +a a a a +b b b b + diff --git a/regression-test/suites/correctness_p0/test_runtime_filter.groovy b/regression-test/suites/correctness_p0/test_runtime_filter.groovy index 54b7aec52747b9..efc4b13927450e 100644 --- a/regression-test/suites/correctness_p0/test_runtime_filter.groovy +++ b/regression-test/suites/correctness_p0/test_runtime_filter.groovy @@ -20,7 +20,7 @@ // and modified by Doris. suite("test_runtime_filter") { - + sql "set enable_runtime_filter_prune=false;" sql """ DROP TABLE IF EXISTS rf_tblA """ sql """ CREATE TABLE IF NOT EXISTS rf_tblA ( @@ -109,5 +109,45 @@ suite("test_runtime_filter") { contains "runtime filters: RF001[max] <- c" contains "runtime filters: RF002[max] <- c" - } + } + + sql """ DROP TABLE IF EXISTS v_table """ + sql """ + create table v_table ( + kc char(100), + kv varchar(100) + ) + duplicate key (kc) + distributed BY hash(kv) buckets 1 + properties("replication_num" = "1"); + """ + + sql """ DROP TABLE IF EXISTS c_table """ + sql """ + create table c_table ( + kc char(100), + kv varchar(100) + ) + duplicate key (kc) + distributed BY hash(kv) buckets 1 + properties("replication_num" = "1"); + """ + sql """ + insert into c_table values ('a','a'),('b','b'); + """ + sql """ + insert into v_table values ('a','a'),('b','b'),('c','c'); + """ + sql "set runtime_filter_type='1';" + qt_test "select * from c_table,v_table where c_table.kc=v_table.kv;" + qt_test "select * from c_table,v_table where c_table.kv=v_table.kc;" + sql "set runtime_filter_type='2';" + qt_test "select * from c_table,v_table where c_table.kc=v_table.kv;" + qt_test "select * from c_table,v_table where c_table.kv=v_table.kc;" + sql "set runtime_filter_type='4';" + qt_test "select * from c_table,v_table where c_table.kc=v_table.kv;" + qt_test "select * from c_table,v_table where c_table.kv=v_table.kc;" + sql "set runtime_filter_type='8';" + qt_test "select * from c_table,v_table where c_table.kc=v_table.kv;" + qt_test "select * from c_table,v_table where c_table.kv=v_table.kc;" } diff --git a/regression-test/suites/nereids_p0/join/test_runtimefilter_on_datev2.groovy b/regression-test/suites/nereids_p0/join/test_runtimefilter_on_datev2.groovy index 768ef7da586583..fca7361a8c9645 100644 --- a/regression-test/suites/nereids_p0/join/test_runtimefilter_on_datev2.groovy +++ b/regression-test/suites/nereids_p0/join/test_runtimefilter_on_datev2.groovy @@ -16,6 +16,7 @@ // under the License. suite("test_runtimefilter_on_datev2", "nereids_p0") { + sql "set enable_runtime_filter_prune=false;" sql "SET enable_nereids_planner=true" sql "SET enable_fallback_to_original_planner=false" def dateTable = "dateTable" diff --git a/regression-test/suites/nereids_p0/join/test_runtimefilter_on_decimal.groovy b/regression-test/suites/nereids_p0/join/test_runtimefilter_on_decimal.groovy index f1bef86bbb2c4f..093568db8a76f7 100644 --- a/regression-test/suites/nereids_p0/join/test_runtimefilter_on_decimal.groovy +++ b/regression-test/suites/nereids_p0/join/test_runtimefilter_on_decimal.groovy @@ -16,6 +16,7 @@ // under the License. suite("test_runtimefilter_on_decimal", "nereids_p0") { + sql "set enable_runtime_filter_prune=false;" sql "SET enable_nereids_planner=true" sql "SET enable_fallback_to_original_planner=false" diff --git a/regression-test/suites/query_p0/join/test_runtime_filter_boolean.groovy b/regression-test/suites/query_p0/join/test_runtime_filter_boolean.groovy index e241909e79c1cb..56f5c502481b86 100644 --- a/regression-test/suites/query_p0/join/test_runtime_filter_boolean.groovy +++ b/regression-test/suites/query_p0/join/test_runtime_filter_boolean.groovy @@ -16,6 +16,7 @@ // under the License. suite("test_runtime_filter_boolean", "query_p0") { + sql "set enable_runtime_filter_prune=false;" sql "drop table if exists test_runtime_filter_boolean0;" sql """ create table test_runtime_filter_boolean0(k1 int, v1 boolean) DUPLICATE KEY(`k1`) diff --git a/regression-test/suites/query_p0/join/test_runtime_filter_decimal256.groovy b/regression-test/suites/query_p0/join/test_runtime_filter_decimal256.groovy index 46d8a23982ec33..d65bacc86621ec 100644 --- a/regression-test/suites/query_p0/join/test_runtime_filter_decimal256.groovy +++ b/regression-test/suites/query_p0/join/test_runtime_filter_decimal256.groovy @@ -16,6 +16,7 @@ // under the License. suite("test_runtime_filter_decimal256", "query_p0") { + sql "set enable_runtime_filter_prune=false;" sql "set enable_nereids_planner = true;" sql "set enable_decimal256 = true;" sql "set parallel_fragment_exec_instance_num = 4;" diff --git a/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy b/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy index 50a61a366b1bd2..5dad4dda3cf214 100644 --- a/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy +++ b/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. - suite("test_runtimefilter_2", "query_p0") { - sql "drop table if exists t_ods_tpisyncjpa4_2;" - sql """ create table t_ods_tpisyncjpa4_2(INTERNAL_CODE varchar(50), USER_ID varchar(50), USER_NAME varchar(50), STATE_ID varchar(50)) distributed by hash(INTERNAL_CODE) properties('replication_num'='1'); """ +suite("test_runtimefilter_2", "query_p0") { + sql "set enable_runtime_filter_prune=false;" + sql "drop table if exists t_ods_tpisyncjpa4_2;" + sql """ create table t_ods_tpisyncjpa4_2(INTERNAL_CODE varchar(50), USER_ID varchar(50), USER_NAME varchar(50), STATE_ID varchar(50)) distributed by hash(INTERNAL_CODE) properties('replication_num'='1'); """ sql """ insert into t_ods_tpisyncjpa4_2 values('1', '2', '3', '1');""" @@ -41,4 +42,4 @@ select DISTINCT tpisyncjpa4.USER_ID as USER_ID, tpisyncjpa4.USER_NAME as USER_NAME, tpisyncjpp1.POST_ID AS "T4_POST_ID" FROM t_ods_tpisyncjpa4_2 tpisyncjpa4 cross join [shuffle] t_ods_tpisyncjpp1_2 tpisyncjpp1 inner join ( SELECT USER_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpa4_2 WHERE STATE_ID = '1' GROUP BY USER_ID )jpa4 on tpisyncjpa4.USER_ID=jpa4.USER_ID and tpisyncjpa4.INTERNAL_CODE=jpa4.INTERNAL_CODE inner join [shuffle] ( SELECT POST_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpp1_2 WHERE STATE_ID = '1' GROUP BY POST_ID )jpp1 on tpisyncjpp1.POST_ID=jpp1.POST_ID and tpisyncjpp1.INTERNAL_CODE=jpp1.INTERNAL_CODE where tpisyncjpa4.USER_ID = tpisyncjpp1.USER_ID AND tpisyncjpp1.STATE_ID ='1' AND tpisyncjpa4.STATE_ID ='1' AND tpisyncjpp1.POST_ID='BSDSAE1018'; """ - } \ No newline at end of file +} \ No newline at end of file diff --git a/regression-test/suites/query_p0/join/test_runtimefilter_on_datev2.groovy b/regression-test/suites/query_p0/join/test_runtimefilter_on_datev2.groovy index 32435ffd73a83e..a9a23401ed0254 100644 --- a/regression-test/suites/query_p0/join/test_runtimefilter_on_datev2.groovy +++ b/regression-test/suites/query_p0/join/test_runtimefilter_on_datev2.groovy @@ -23,6 +23,8 @@ suite("test_runtimefilter_on_datev2", "query_p0") { def dateV2Table2 = "dateV2Table2" def dateTimeV2Table2 = "dateTimeV2Table2" + sql "set enable_runtime_filter_prune=false;" + sql "DROP TABLE IF EXISTS ${dateTable}" sql """ CREATE TABLE IF NOT EXISTS ${dateTable} (