From 779e437443e7bcf4ec83c283fcdf6ea37b74fe36 Mon Sep 17 00:00:00 2001 From: zhangstar333 <2561612514@qq.com> Date: Fri, 13 Oct 2023 18:14:56 +0800 Subject: [PATCH] [feature](function) support bitmap type in min/max_by agg function --- .../aggregate_function_min_max_by.h | 60 +++++++++++++++++++ .../test_aggregate_all_functions2.out | 14 +++++ .../test_aggregate_all_functions2.groovy | 33 +++++++++- 3 files changed, 106 insertions(+), 1 deletion(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_min_max_by.h b/be/src/vec/aggregate_functions/aggregate_function_min_max_by.h index ca21db0c114b84..b7a2f5c159d366 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_min_max_by.h +++ b/be/src/vec/aggregate_functions/aggregate_function_min_max_by.h @@ -18,15 +18,70 @@ #pragma once #include "common/logging.h" +#include "util/bitmap_value.h" #include "vec/aggregate_functions/aggregate_function.h" #include "vec/aggregate_functions/aggregate_function_min_max.h" #include "vec/aggregate_functions/helpers.h" +#include "vec/columns/column_complex.h" #include "vec/columns/column_decimal.h" #include "vec/columns/column_vector.h" #include "vec/common/assert_cast.h" +#include "vec/data_types/data_type_bitmap.h" #include "vec/io/io_helper.h" namespace doris::vectorized { + +/// For bitmap value +struct BitmapValueData { +private: + using Self = BitmapValueData; + bool has_value = false; + BitmapValue value; + +public: + BitmapValueData() = default; + BitmapValueData(bool has_value_, BitmapValue value_) : has_value(has_value_), value(value_) {} + [[nodiscard]] bool has() const { return has_value; } + + void insert_result_into(IColumn& to) const { + if (has()) { + assert_cast(to).get_data().push_back(value); + } else { + assert_cast(to).insert_default(); + } + } + + void reset() { + if (has()) { + has_value = false; + } + } + + void write(BufferWritable& buf) const { + write_binary(has(), buf); + if (has()) { + DataTypeBitMap::serialize_as_stream(value, buf); + } + } + + void read(BufferReadable& buf, Arena* arena) { + read_binary(has_value, buf); + if (has()) { + DataTypeBitMap::deserialize_as_stream(value, buf); + } + } + + void change(const IColumn& column, size_t row_num, Arena*) { + has_value = true; + value = assert_cast(column).get_data()[row_num]; + } + + void change(const Self& to, Arena*) { + has_value = true; + value = to.value; + } +}; + template struct AggregateFunctionMinMaxByBaseData { protected: @@ -229,6 +284,11 @@ AggregateFunctionPtr create_aggregate_function_min_max_by(const String& name, SingleValueDataFixed>( argument_types, result_is_nullable); } + if (which.idx == TypeIndex::BitMap) { + return create_aggregate_function_min_max_by_impl(argument_types, + result_is_nullable); + } return nullptr; } diff --git a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions2.out b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions2.out index a73761131e0a77..39895527107733 100644 --- a/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions2.out +++ b/regression-test/data/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions2.out @@ -104,3 +104,17 @@ -- !select_count2 -- 15 +-- !select_minmax1 -- +20200622 1 \N +20200622 2 \N +20200622 3 \N + +-- !select_minmax2 -- +20200622 + +-- !select_minmax3 -- +287667876573 + +-- !select_minmax4 -- +243 + diff --git a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions2.groovy b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions2.groovy index b9ff46f22cf1a7..823a73e700d89d 100644 --- a/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions2.groovy +++ b/regression-test/suites/query_p0/sql_functions/aggregate_functions/test_aggregate_all_functions2.groovy @@ -81,5 +81,36 @@ suite("test_aggregate_all_functions2") { qt_select_topn_array6 """ select topn_array(k11,3,100) from baseall; """ qt_select_count1 """ select count(distinct k1,k2,k5) from baseall; """ qt_select_count2 """ select count(distinct k1,k2,cast(k5 as decimalv3(38,18))) from baseall; """ - + + + sql "DROP DATABASE IF EXISTS metric_table" + sql """ + CREATE TABLE `metric_table` ( + `datekey` int(11) NULL, + `hour` int(11) NULL, + `device_id` bitmap BITMAP_UNION NOT NULL + ) ENGINE=OLAP + AGGREGATE KEY(`datekey`, `hour`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`datekey`, `hour`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "is_being_synced" = "false", + "storage_format" = "V2", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false" + ); + """ + sql """ + insert into metric_table values + (20200622, 1, to_bitmap(243)), + (20200622, 2, bitmap_from_array([1,2,3,4,5,434543])), + (20200622, 3, to_bitmap(287667876573)); + """ + + qt_select_minmax1 """ select * from metric_table order by hour; """ + qt_select_minmax2 """ select max_by(datekey,hour) from metric_table; """ + qt_select_minmax3 """ select bitmap_to_string(max_by(device_id,hour)) from metric_table; """ + qt_select_minmax4 """ select bitmap_to_string(min_by(device_id,hour)) from metric_table; """ }