From 1523b6a9beb7a4686629d7873db441ce42c37783 Mon Sep 17 00:00:00 2001 From: zhiqiang Date: Mon, 16 Dec 2024 01:26:34 +0800 Subject: [PATCH] [opt] Optimization for short circuit of CompoundPred (#45422) ### What problem does this PR solve? Related PR: https://github.com/apache/doris/pull/45414 Problem Summary: https://github.com/apache/doris/pull/45414 optimized short circuit of CompoundPred. But for some cases, it will introduce perf recession since AND/OR process of CompoundPred is not as efficient as Function AND/OR. Previous ```sql mysqlslap -hd3 -uroot -P9130 --create-schema=test_db -c 10 -i 100 -q "SELECT count(k) FROM sbtest1 WHERE k BETWEEN 10809931 AND 16922183 OR k BETWEEN 17788920 AND 27291942 OR k BETWEEN 23293962 AND 24940261 OR k BETWEEN 28108000 AND 28870202 OR k BETWEEN 23014347 AND 26008115 OR k BETWEEN 17817024 AND 29749077 OR k BETWEEN 8776291 AND 19869309 OR k BETWEEN 12846851 AND 15917660 OR k BETWEEN 21899521 AND 25614482 OR k BETWEEN 4850578 AND 8454295;" Benchmark Average number of seconds to run all queries: 0.315 seconds Minimum number of seconds to run all queries: 0.307 seconds Maximum number of seconds to run all queries: 0.328 seconds Number of clients running queries: 10 Average number of queries per client: 1 ``` Now ```sql mysqlslap -hd3 -uroot -P9130 --create-schema=test_db -c 10 -i 100 -q "SELECT count(k) FROM sbtest1 WHERE k BETWEEN 10809931 AND 16922183 OR k BETWEEN 17788920 AND 27291942 OR k BETWEEN 23293962 AND 24940261 OR k BETWEEN 28108000 AND 28870202 OR k BETWEEN 23014347 AND 26008115 OR k BETWEEN 17817024 AND 29749077 OR k BETWEEN 8776291 AND 19869309 OR k BETWEEN 12846851 AND 15917660 OR k BETWEEN 21899521 AND 25614482 OR k BETWEEN 4850578 AND 8454295;" Benchmark Average number of seconds to run all queries: 0.222 seconds Minimum number of seconds to run all queries: 0.215 seconds Maximum number of seconds to run all queries: 0.263 seconds Number of clients running queries: 10 Average number of queries per client: 1 ``` BTW, the performance recession only occurs when compound predicate can be short circuited in most cases. Data distribution ```sql mysql> desc sbtest1; +-------+--------------+------+-------+---------+-------+ | Field | Type | Null | Key | Default | Extra | +-------+--------------+------+-------+---------+-------+ | id | int | No | true | NULL | | | k | int | Yes | false | NULL | NONE | | c | varchar(360) | Yes | false | NULL | NONE | | pad | varchar(180) | Yes | false | NULL | NONE | +-------+--------------+------+-------+---------+-------+ 4 rows in set (0.00 sec) mysql> select count(*) from sbtest1; +----------+ | count(*) | +----------+ | 5069205 | +----------+ 1 row in set (0.02 sec) mysql> SELECT count(k) FROM sbtest1 WHERE k BETWEEN 10809931 AND 16922183; +----------+ | count(k) | +----------+ | 4726682 | +----------+ 1 row in set (0.02 sec) mysql> SELECT count(k) FROM sbtest1 WHERE k BETWEEN 10809931 AND 16922183 OR k BETWEEN 17788920 AND 27291942 OR k BETWEEN 23293962 AND 24940261 OR k BETWEEN 28108000 AND 28870202 OR k BETWEEN 23014347 AND 26008115 OR k BETWEEN 17817024 AND 29749077 OR k BETWEEN 8776291 AND 19869309 OR k BETWEEN 12846851 AND 15917660 OR k BETWEEN 21899521 AND 25614482 OR k BETWEEN 4850578 AND 8454295; +----------+ | count(k) | +----------+ | 5066731 | +----------+ 1 row in set (0.06 sec) ``` TODO: Why code is not vectorized. --- be/src/vec/exprs/vcompound_pred.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/be/src/vec/exprs/vcompound_pred.h b/be/src/vec/exprs/vcompound_pred.h index 8c65e6c8adbfef..88f3e474b58da3 100644 --- a/be/src/vec/exprs/vcompound_pred.h +++ b/be/src/vec/exprs/vcompound_pred.h @@ -234,24 +234,27 @@ class VCompoundPred : public VectorizedFnCall { auto vector_vector_null = [&]() { auto col_res = ColumnUInt8::create(size); auto col_nulls = ColumnUInt8::create(size); + auto* __restrict res_datas = assert_cast(col_res)->get_data().data(); auto* __restrict res_nulls = assert_cast(col_nulls)->get_data().data(); ColumnPtr temp_null_map = nullptr; // maybe both children are nullable / or one of children is nullable - lhs_null_map = create_null_map_column(temp_null_map, lhs_null_map); - rhs_null_map = create_null_map_column(temp_null_map, rhs_null_map); + auto* __restrict lhs_null_map_tmp = create_null_map_column(temp_null_map, lhs_null_map); + auto* __restrict rhs_null_map_tmp = create_null_map_column(temp_null_map, rhs_null_map); + auto* __restrict lhs_data_column_tmp = lhs_data_column; + auto* __restrict rhs_data_column_tmp = rhs_data_column; if constexpr (is_and_op) { for (size_t i = 0; i < size; ++i) { - res_nulls[i] = apply_and_null(lhs_data_column[i], lhs_null_map[i], - rhs_data_column[i], rhs_null_map[i]); - res_datas[i] = lhs_data_column[i] & rhs_data_column[i]; + res_nulls[i] = apply_and_null(lhs_data_column_tmp[i], lhs_null_map_tmp[i], + rhs_data_column_tmp[i], rhs_null_map_tmp[i]); + res_datas[i] = lhs_data_column_tmp[i] & rhs_data_column_tmp[i]; } } else { for (size_t i = 0; i < size; ++i) { - res_nulls[i] = apply_or_null(lhs_data_column[i], lhs_null_map[i], - rhs_data_column[i], rhs_null_map[i]); - res_datas[i] = lhs_data_column[i] | rhs_data_column[i]; + res_nulls[i] = apply_or_null(lhs_data_column_tmp[i], lhs_null_map_tmp[i], + rhs_data_column_tmp[i], rhs_null_map_tmp[i]); + res_datas[i] = lhs_data_column_tmp[i] | rhs_data_column_tmp[i]; } } auto result_column = ColumnNullable::create(std::move(col_res), std::move(col_nulls));