From b05c789d723b7ce4682be8ef8ea02a45de3d088a Mon Sep 17 00:00:00 2001 From: zhangstar333 Date: Fri, 15 Nov 2024 10:32:54 +0800 Subject: [PATCH] [improve](join) reuse the join block to reduce malloc memory (#43738) Problem Summary: before in the pull function, it's use tmp_block to reference data and swap with output_block. and then create empty column in join_block, so insert into data at next time, it's need malloc memory again. after fixed: ``` mysql [ssb]>set parallel_pipeline_task_num = 0; mysql [ssb]>select count(c_custkey) from (select c_custkey from customer cross join dates)t; +------------------+ | count(c_custkey) | +------------------+ | 7668000000 | +------------------+ 1 row in set (0.32 sec) mysql [ssb]>set parallel_pipeline_task_num = 1; Query OK, 0 rows affected (0.00 sec) mysql [ssb]>select count(c_custkey) from (select c_custkey from customer cross join dates)t; +------------------+ | count(c_custkey) | +------------------+ | 7668000000 | +------------------+ 1 row in set (5.61 sec) ``` before ``` mysql [ssb]>set parallel_pipeline_task_num = 0; Query OK, 0 rows affected (0.00 sec) mysql [ssb]>select count(c_custkey) from (select c_custkey from customer cross join dates)t; +------------------+ | count(c_custkey) | +------------------+ | 7668000000 | +------------------+ 1 row in set (2.79 sec) mysql [ssb]>set parallel_pipeline_task_num = 1; Query OK, 0 rows affected (0.00 sec) mysql [ssb]>select count(c_custkey) from (select c_custkey from customer cross join dates)t; +------------------+ | count(c_custkey) | +------------------+ | 7668000000 | +------------------+ 1 row in set (10.21 sec) ``` --- .../exec/nested_loop_join_probe_operator.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp b/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp index dd441e7db2268b..c33c231a6ca461 100644 --- a/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp +++ b/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp @@ -523,24 +523,20 @@ Status NestedLoopJoinProbeOperatorX::pull(RuntimeState* state, vectorized::Block local_state._matched_rows_done : local_state._matched_rows_done); + size_t join_block_column_size = local_state._join_block.columns(); { - vectorized::Block tmp_block = local_state._join_block; - - // Here make _join_block release the columns' ptr - local_state._join_block.set_columns(local_state._join_block.clone_empty_columns()); - - local_state.add_tuple_is_null_column(&tmp_block); + local_state.add_tuple_is_null_column(&local_state._join_block); { SCOPED_TIMER(local_state._join_filter_timer); RETURN_IF_ERROR(vectorized::VExprContext::filter_block( - local_state._conjuncts, &tmp_block, tmp_block.columns())); + local_state._conjuncts, &local_state._join_block, + local_state._join_block.columns())); } RETURN_IF_ERROR_OR_CATCH_EXCEPTION( - local_state._build_output_block(&tmp_block, block, false)); + local_state._build_output_block(&local_state._join_block, block, false)); local_state._reset_tuple_is_null_column(); } - local_state._join_block.clear_column_data(); - + local_state._join_block.clear_column_data(join_block_column_size); if (!(*eos) and !local_state._need_more_input_data) { auto func = [&](auto&& join_op_variants, auto set_build_side_flag, auto set_probe_side_flag) {