From 989aa0f4280270bba78542844af491a07a1d67ca Mon Sep 17 00:00:00 2001 From: lihangyu Date: Wed, 18 Dec 2024 19:18:28 +0800 Subject: [PATCH] fix 4 (#45601) --- .../olap/rowset/segment_v2/column_reader.cpp | 174 +++++++++--------- be/src/olap/rowset/segment_v2/column_reader.h | 72 ++++---- .../segment_v2/hierarchical_data_reader.cpp | 31 ++-- .../segment_v2/hierarchical_data_reader.h | 1 + be/src/olap/rowset/segment_v2/segment.cpp | 38 ++-- be/src/olap/rowset/segment_v2/segment.h | 5 +- .../segment_v2/variant_column_writer_impl.cpp | 15 +- be/src/vec/columns/column_object.cpp | 11 +- be/src/vec/columns/column_object.h | 14 +- be/src/vec/data_types/data_type_object.cpp | 17 ++ 10 files changed, 194 insertions(+), 184 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 745ff3d93a3beb..2f303999aea308 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -282,18 +282,6 @@ Status VariantColumnReader::init(const ColumnReaderOptions& opts, const SegmentF io::FileReaderSPtr file_reader) { // init sub columns _subcolumn_readers = std::make_unique(); - std::unordered_map - column_path_to_footer_ordinal; - for (uint32_t ordinal = 0; ordinal < footer.columns().size(); ++ordinal) { - const auto& column_pb = footer.columns(ordinal); - // column path for accessing subcolumns of variant - if (column_pb.has_column_path_info()) { - vectorized::PathInData path; - path.from_protobuf(column_pb.column_path_info()); - column_path_to_footer_ordinal.emplace(path, ordinal); - } - } - const ColumnMetaPB& self_column_pb = footer.columns(column_id); for (const ColumnMetaPB& column_pb : footer.columns()) { if (column_pb.unique_id() != self_column_pb.unique_id()) { @@ -311,23 +299,25 @@ Status VariantColumnReader::init(const ColumnReaderOptions& opts, const SegmentF &_sparse_column_reader)); continue; } - // init subcolumns auto relative_path = path.copy_pop_front(); + auto get_data_type_fn = [&]() { + if (relative_path.empty()) { + return make_nullable(std::make_unique()); + } + return vectorized::DataTypeFactory::instance().create_data_type(column_pb); + }; + // init subcolumns if (_subcolumn_readers->get_root() == nullptr) { _subcolumn_readers->create_root(SubcolumnReader {nullptr, nullptr}); } if (relative_path.empty()) { // root column - _subcolumn_readers->get_mutable_root()->modify_to_scalar(SubcolumnReader { - std::move(reader), - vectorized::DataTypeFactory::instance().create_data_type(column_pb)}); + _subcolumn_readers->get_mutable_root()->modify_to_scalar( + SubcolumnReader {std::move(reader), get_data_type_fn()}); } else { // check the root is already a leaf node - _subcolumn_readers->add( - relative_path, - SubcolumnReader { - std::move(reader), - vectorized::DataTypeFactory::instance().create_data_type(column_pb)}); + _subcolumn_readers->add(relative_path, + SubcolumnReader {std::move(reader), get_data_type_fn()}); } } @@ -876,7 +866,9 @@ Status ColumnReader::new_iterator(ColumnIterator** iterator) { return new_map_iterator(iterator); } case FieldType::OLAP_FIELD_TYPE_VARIANT: { - *iterator = new VariantRootColumnIterator(new FileColumnIterator(this)); + // read from root data + // *iterator = new VariantRootColumnIterator(new FileColumnIterator(this)); + *iterator = new FileColumnIterator(this); return Status::OK(); } default: @@ -1738,75 +1730,75 @@ void DefaultValueColumnIterator::_insert_many_default(vectorized::MutableColumnP } } -Status VariantRootColumnIterator::_process_root_column( - vectorized::MutableColumnPtr& dst, vectorized::MutableColumnPtr& root_column, - const vectorized::DataTypePtr& most_common_type) { - auto& obj = - dst->is_nullable() - ? assert_cast( - assert_cast(*dst).get_nested_column()) - : assert_cast(*dst); - - // fill nullmap - if (root_column->is_nullable() && dst->is_nullable()) { - vectorized::ColumnUInt8& dst_null_map = - assert_cast(*dst).get_null_map_column(); - vectorized::ColumnUInt8& src_null_map = - assert_cast(*root_column).get_null_map_column(); - dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size()); - } - - // add root column to a tmp object column - auto tmp = vectorized::ColumnObject::create(true, false); - auto& tmp_obj = assert_cast(*tmp); - tmp_obj.add_sub_column({}, std::move(root_column), most_common_type); - - // merge tmp object column to dst - obj.insert_range_from(*tmp, 0, tmp->size()); - - // finalize object if needed - if (!obj.is_finalized()) { - obj.finalize(); - } - -#ifndef NDEBUG - obj.check_consistency(); -#endif - - return Status::OK(); -} - -Status VariantRootColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, - bool* has_null) { - // read root column - auto& obj = - dst->is_nullable() - ? assert_cast( - assert_cast(*dst).get_nested_column()) - : assert_cast(*dst); - - auto most_common_type = obj.get_most_common_type(); - auto root_column = most_common_type->create_column(); - RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null)); - - return _process_root_column(dst, root_column, most_common_type); -} - -Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, - vectorized::MutableColumnPtr& dst) { - // read root column - auto& obj = - dst->is_nullable() - ? assert_cast( - assert_cast(*dst).get_nested_column()) - : assert_cast(*dst); - - auto most_common_type = obj.get_most_common_type(); - auto root_column = most_common_type->create_column(); - RETURN_IF_ERROR(_inner_iter->read_by_rowids(rowids, count, root_column)); - - return _process_root_column(dst, root_column, most_common_type); -} +// Status VariantRootColumnIterator::_process_root_column( +// vectorized::MutableColumnPtr& dst, vectorized::MutableColumnPtr& root_column, +// const vectorized::DataTypePtr& most_common_type) { +// auto& obj = +// dst->is_nullable() +// ? assert_cast( +// assert_cast(*dst).get_nested_column()) +// : assert_cast(*dst); +// +// // fill nullmap +// if (root_column->is_nullable() && dst->is_nullable()) { +// vectorized::ColumnUInt8& dst_null_map = +// assert_cast(*dst).get_null_map_column(); +// vectorized::ColumnUInt8& src_null_map = +// assert_cast(*root_column).get_null_map_column(); +// dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size()); +// } +// +// // add root column to a tmp object column +// auto tmp = vectorized::ColumnObject::create(true, false); +// auto& tmp_obj = assert_cast(*tmp); +// tmp_obj.add_sub_column({}, std::move(root_column), most_common_type); +// +// // merge tmp object column to dst +// obj.insert_range_from(*tmp, 0, tmp_obj.rows()); +// +// // finalize object if needed +// if (!obj.is_finalized()) { +// obj.finalize(); +// } +// +// #ifndef NDEBUG +// obj.check_consistency(); +// #endif +// +// return Status::OK(); +// } +// +// Status VariantRootColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst, +// bool* has_null) { +// // read root column +// auto& obj = +// dst->is_nullable() +// ? assert_cast( +// assert_cast(*dst).get_nested_column()) +// : assert_cast(*dst); +// +// auto most_common_type = obj.get_most_common_type(); +// auto root_column = most_common_type->create_column(); +// RETURN_IF_ERROR(_inner_iter->next_batch(n, root_column, has_null)); +// +// return _process_root_column(dst, root_column, most_common_type); +// } +// +// Status VariantRootColumnIterator::read_by_rowids(const rowid_t* rowids, const size_t count, +// vectorized::MutableColumnPtr& dst) { +// // read root column +// auto& obj = +// dst->is_nullable() +// ? assert_cast( +// assert_cast(*dst).get_nested_column()) +// : assert_cast(*dst); +// +// auto most_common_type = obj.get_most_common_type(); +// auto root_column = most_common_type->create_column(); +// RETURN_IF_ERROR(_inner_iter->read_by_rowids(rowids, count, root_column)); +// +// return _process_root_column(dst, root_column, most_common_type); +// } Status DefaultNestedColumnIterator::next_batch(size_t* n, vectorized::MutableColumnPtr& dst) { bool has_null = false; diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index d61393e820c227..189435c2095130 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -216,7 +216,7 @@ class ColumnReader : public MetadataAdder { void disable_index_meta_cache() { _use_index_page_cache = false; } - FieldType get_meta_type() { return _meta_type; } + virtual FieldType get_meta_type() { return _meta_type; } private: ColumnReader(const ColumnReaderOptions& opts, const ColumnMetaPB& meta, uint64_t num_rows, @@ -309,6 +309,8 @@ class VariantColumnReader : public ColumnReader { ~VariantColumnReader() override = default; + FieldType get_meta_type() override { return FieldType::OLAP_FIELD_TYPE_VARIANT; } + private: std::unique_ptr _subcolumn_readers; std::unique_ptr _sparse_column_reader; @@ -661,40 +663,40 @@ class RowIdColumnIterator : public ColumnIterator { int32_t _segment_id = 0; }; -class VariantRootColumnIterator : public ColumnIterator { -public: - VariantRootColumnIterator() = delete; - - explicit VariantRootColumnIterator(FileColumnIterator* iter) { _inner_iter.reset(iter); } - - ~VariantRootColumnIterator() override = default; - - Status init(const ColumnIteratorOptions& opts) override { return _inner_iter->init(opts); } - - Status seek_to_first() override { return _inner_iter->seek_to_first(); } - - Status seek_to_ordinal(ordinal_t ord_idx) override { - return _inner_iter->seek_to_ordinal(ord_idx); - } - - Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) { - bool has_null; - return next_batch(n, dst, &has_null); - } - - Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override; - - Status read_by_rowids(const rowid_t* rowids, const size_t count, - vectorized::MutableColumnPtr& dst) override; - - ordinal_t get_current_ordinal() const override { return _inner_iter->get_current_ordinal(); } - -private: - Status _process_root_column(vectorized::MutableColumnPtr& dst, - vectorized::MutableColumnPtr& root_column, - const vectorized::DataTypePtr& most_common_type); - std::unique_ptr _inner_iter; -}; +// class VariantRootColumnIterator : public ColumnIterator { +// public: +// VariantRootColumnIterator() = delete; +// +// explicit VariantRootColumnIterator(FileColumnIterator* iter) { _inner_iter.reset(iter); } +// +// ~VariantRootColumnIterator() override = default; +// +// Status init(const ColumnIteratorOptions& opts) override { return _inner_iter->init(opts); } +// +// Status seek_to_first() override { return _inner_iter->seek_to_first(); } +// +// Status seek_to_ordinal(ordinal_t ord_idx) override { +// return _inner_iter->seek_to_ordinal(ord_idx); +// } +// +// Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst) { +// bool has_null; +// return next_batch(n, dst, &has_null); +// } +// +// Status next_batch(size_t* n, vectorized::MutableColumnPtr& dst, bool* has_null) override; +// +// Status read_by_rowids(const rowid_t* rowids, const size_t count, +// vectorized::MutableColumnPtr& dst) override; +// +// ordinal_t get_current_ordinal() const override { return _inner_iter->get_current_ordinal(); } +// +// private: +// Status _process_root_column(vectorized::MutableColumnPtr& dst, +// vectorized::MutableColumnPtr& root_column, +// const vectorized::DataTypePtr& most_common_type); +// std::unique_ptr _inner_iter; +// }; // This iterator is used to read default value column class DefaultValueColumnIterator : public ColumnIterator { diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp index 2b8e58d47f10de..ca25b230bceec3 100644 --- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.cpp @@ -239,15 +239,17 @@ Status HierarchicalDataReader::_init_container(vectorized::MutableColumnPtr& con // add root first if (_path.get_parts().empty() && _root_reader) { - auto& root_var = - _root_reader->column->is_nullable() - ? assert_cast( - assert_cast(*_root_reader->column) - .get_nested_column()) - : assert_cast(*_root_reader->column); - auto column = root_var.get_root(); - auto type = root_var.get_root_type(); - container_variant.add_sub_column({}, std::move(column), type); + // auto& root_var = + // _root_reader->column->is_nullable() + // ? assert_cast( + // assert_cast(*_root_reader->column) + // .get_nested_column()) + // : assert_cast(*_root_reader->column); + // auto column = root_var.get_root(); + // auto type = root_var.get_root_type(); + MutableColumnPtr column = _root_reader->column->get_ptr(); + container_variant.add_sub_column({}, std::move(column), + ColumnObject::get_most_common_type()); } // parent path -> subcolumns std::map nested_subcolumns; @@ -361,7 +363,9 @@ Status HierarchicalDataReader::_init_null_map_and_clear_columns( return Status::OK(); })); container->clear(); - _sparse_column_reader->column->clear(); + if (_sparse_column_reader) { + _sparse_column_reader->column->clear(); + } if (_root_reader) { if (_root_reader->column->is_nullable()) { // fill nullmap @@ -372,13 +376,8 @@ Status HierarchicalDataReader::_init_null_map_and_clear_columns( dst_null_map.insert_range_from(src_null_map, 0, src_null_map.size()); // clear nullmap and inner data src_null_map.clear(); - assert_cast( - assert_cast(*_root_reader->column).get_nested_column()) - .clear_column_data(); - } else { - auto& root_column = assert_cast(*_root_reader->column); - root_column.clear_column_data(); } + _root_reader->column->clear(); } else { if (dst->is_nullable()) { // No nullable info exist in hirearchical data, fill nullmap with all none null diff --git a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h index 5d58f666f62bdd..83dab269dfc5b8 100644 --- a/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h +++ b/be/src/olap/rowset/segment_v2/hierarchical_data_reader.h @@ -22,6 +22,7 @@ #include #include +#include "common/exception.h" #include "common/status.h" #include "io/io_common.h" #include "olap/field.h" diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 441e839e6ef7bb..ededa493018aca 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -201,22 +201,23 @@ Status Segment::_open() { // 0.01 comes from PrimaryKeyIndexBuilder::init _meta_mem_usage += BloomFilter::optimal_bit_num(_num_rows, 0.01) / 8; - uint32_t ordinal = 0; - for (const auto& column_meta : _footer_pb->columns()) { - // unique_id < 0 means this column is extracted column from variant - if (static_cast(column_meta.unique_id()) >= 0) { - _column_id_to_footer_ordinal[column_meta.unique_id()] = ordinal++; + // collec variant statistics + for (const auto& column_pb : _footer_pb->columns()) { + if (column_pb.has_variant_statistics()) { + _variant_column_stats.try_emplace(column_pb.unique_id(), + column_pb.variant_statistics()); } } + return Status::OK(); } -const ColumnMetaPB* Segment::get_column_meta(int32_t unique_id) const { - auto it = _column_id_to_footer_ordinal.find(unique_id); - if (it == _column_id_to_footer_ordinal.end()) { +const VariantStatisticsPB* Segment::get_stats(int32_t unique_id) const { + auto it = _variant_column_stats.find(unique_id); + if (it == _variant_column_stats.end()) { return nullptr; } - return &_footer_pb->columns(it->second); + return &it->second; } Status Segment::_open_inverted_index() { @@ -570,8 +571,9 @@ Status Segment::healthy_status() { vectorized::DataTypePtr Segment::get_data_type_of(const ColumnIdentifier& identifier, bool read_flat_leaves) const { // Path has higher priority - if (identifier.path != nullptr && !identifier.path->empty()) { - auto relative_path = identifier.path->copy_pop_front(); + auto relative_path = identifier.path != nullptr ? identifier.path->copy_pop_front() + : vectorized::PathInData(); + if (!relative_path.empty()) { int32_t unique_id = identifier.unique_id > 0 ? identifier.unique_id : identifier.parent_unique_id; const auto* node = _column_readers.contains(unique_id) @@ -605,11 +607,17 @@ Status Segment::_create_column_readers_once() { } Status Segment::_create_column_readers(const SegmentFooterPB& footer) { + // unique_id -> idx in footer.columns() + std::unordered_map column_id_to_footer_ordinal; + uint32_t ordinal = 0; + for (const auto& column_meta : _footer_pb->columns()) { + column_id_to_footer_ordinal.try_emplace(column_meta.unique_id(), ordinal++); + } // init by unique_id for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); ++ordinal) { const auto& column = _tablet_schema->column(ordinal); - auto iter = _column_id_to_footer_ordinal.find(column.unique_id()); - if (iter == _column_id_to_footer_ordinal.end()) { + auto iter = column_id_to_footer_ordinal.find(column.unique_id()); + if (iter == column_id_to_footer_ordinal.end()) { continue; } @@ -796,8 +804,8 @@ Status Segment::new_column_iterator(const TabletColumn& tablet_column, // } // For compability reason unique_id may less than 0 for variant extracted column - int32_t unique_id = tablet_column.unique_id() > 0 ? tablet_column.unique_id() - : tablet_column.parent_unique_id(); + int32_t unique_id = tablet_column.unique_id() >= 0 ? tablet_column.unique_id() + : tablet_column.parent_unique_id(); // init default iterator if (!_column_readers.contains(unique_id)) { RETURN_IF_ERROR(new_default_iterator(tablet_column, iter)); diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index 877f74ae1c3a12..1c7b94271630d9 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -208,7 +208,7 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd const TabletSchemaSPtr& tablet_schema() { return _tablet_schema; } - const ColumnMetaPB* get_column_meta(int32_t unique_id) const; + const VariantStatisticsPB* get_stats(int32_t unique_id) const; private: DISALLOW_COPY_AND_ASSIGN(Segment); @@ -288,8 +288,7 @@ class Segment : public std::enable_shared_from_this, public MetadataAdd int _be_exec_version = BeExecVersionManager::get_newest_version(); OlapReaderStatistics* _pk_index_load_stats = nullptr; - // unique_id -> idx in footer.columns() - std::unordered_map _column_id_to_footer_ordinal; + std::unordered_map _variant_column_stats; }; } // namespace segment_v2 diff --git a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp index 5fbb7433e104fb..a3671f3afd3109 100644 --- a/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp +++ b/be/src/olap/rowset/segment_v2/variant_column_writer_impl.cpp @@ -68,22 +68,19 @@ Status VariantColumnWriterImpl::_get_subcolumn_paths_from_stats(std::setload_segments( std::static_pointer_cast(reader->rowset()), &segment_cache)); for (const auto& segment : segment_cache.get_segments()) { - const auto* column_meta_pb = segment->get_column_meta(_tablet_column->unique_id()); - if (!column_meta_pb) { + const VariantStatisticsPB* source_statistics = + segment->get_stats(_tablet_column->unique_id()); + if (!source_statistics) { continue; } - if (!column_meta_pb->has_variant_statistics()) { - continue; - } - const VariantStatisticsPB& source_statistics = column_meta_pb->variant_statistics(); - for (const auto& [path, size] : source_statistics.subcolumn_non_null_size()) { + for (const auto& [path, size] : source_statistics->subcolumn_non_null_size()) { auto it = path_to_total_number_of_non_null_values.find(path); if (it == path_to_total_number_of_non_null_values.end()) { it = path_to_total_number_of_non_null_values.emplace(path, 0).first; } it->second += size; } - for (const auto& [path, size] : source_statistics.sparse_column_non_null_size()) { + for (const auto& [path, size] : source_statistics->sparse_column_non_null_size()) { auto it = path_to_total_number_of_non_null_values.find(path); if (it == path_to_total_number_of_non_null_values.end()) { it = path_to_total_number_of_non_null_values.emplace(path, 0).first; @@ -256,7 +253,7 @@ Status VariantColumnWriterImpl::_process_sparse_column( } void VariantStatistics::to_pb(VariantStatisticsPB* stats) const { - for (const auto& [path, value] : _sparse_column_non_null_size) { + for (const auto& [path, value] : _subcolumns_non_null_size) { stats->mutable_subcolumn_non_null_size()->emplace(path.to_string(), value); } for (const auto& [path, value] : _sparse_column_non_null_size) { diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index f234ba7bfa43d6..eb397e85a322ad 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -2231,12 +2231,6 @@ void ColumnObject::clear() { _prev_positions.clear(); } -void ColumnObject::create_root() { - auto type = is_nullable ? make_nullable(std::make_shared()) - : std::make_shared(); - add_sub_column({}, type->create_column(), type); -} - void ColumnObject::create_root(const DataTypePtr& type, MutableColumnPtr&& column) { if (num_rows == 0) { num_rows = column->size(); @@ -2244,9 +2238,8 @@ void ColumnObject::create_root(const DataTypePtr& type, MutableColumnPtr&& colum add_sub_column({}, std::move(column), type); } -DataTypePtr ColumnObject::get_most_common_type() const { - auto type = is_nullable ? make_nullable(std::make_shared()) - : std::make_shared(); +const DataTypePtr& ColumnObject::get_most_common_type() { + static auto type = make_nullable(std::make_shared()); return type; } diff --git a/be/src/vec/columns/column_object.h b/be/src/vec/columns/column_object.h index 86ba60fffce752..647516f97cdc52 100644 --- a/be/src/vec/columns/column_object.h +++ b/be/src/vec/columns/column_object.h @@ -46,6 +46,7 @@ #include "vec/core/types.h" #include "vec/data_types/data_type.h" #include "vec/data_types/data_type_jsonb.h" +#include "vec/data_types/data_type_map.h" #include "vec/data_types/data_type_nullable.h" #include "vec/data_types/serde/data_type_serde.h" #include "vec/io/reader_buffer.h" @@ -307,15 +308,10 @@ class ColumnObject final : public COWHelper { // ensure root node is a certain type void ensure_root_node_type(const DataTypePtr& type); - // create jsonb root if missing - // notice: should only using in VariantRootColumnIterator - // since some datastructures(sparse columns are schema on read - void create_root(); - // create root with type and column if missing void create_root(const DataTypePtr& type, MutableColumnPtr&& column); - DataTypePtr get_most_common_type() const; + static const DataTypePtr& get_most_common_type(); // root is null or type nothing bool is_null_root() const; @@ -377,6 +373,12 @@ class ColumnObject final : public COWHelper { vectorized::ColumnArray::ColumnOffsets::create()); } + static const DataTypePtr& get_sparse_column_type() { + static DataTypePtr type = std::make_shared(std::make_shared(), + std::make_shared()); + return type; + } + void set_sparse_column(ColumnPtr column) { serialized_sparse_column = column; } Status finalize(FinalizeMode mode); diff --git a/be/src/vec/data_types/data_type_object.cpp b/be/src/vec/data_types/data_type_object.cpp index 0c795e542b0dd5..5829554d11884a 100644 --- a/be/src/vec/data_types/data_type_object.cpp +++ b/be/src/vec/data_types/data_type_object.cpp @@ -30,6 +30,7 @@ #include #include "agent/be_exec_version_manager.h" +#include "vec/columns/column.h" #include "vec/columns/column_object.h" #include "vec/common/assert_cast.h" #include "vec/common/typeid_cast.h" @@ -84,6 +85,11 @@ int64_t DataTypeObject::get_uncompressed_serialized_bytes(const IColumn& column, size += sizeof(uint32_t); } + // sparse column + // TODO make compability with sparse column + size += ColumnObject::get_sparse_column_type()->get_uncompressed_serialized_bytes( + *column_object.get_sparse_column(), be_exec_version); + return size; } @@ -134,6 +140,11 @@ char* DataTypeObject::serialize(const IColumn& column, char* buf, int be_exec_ve buf += sizeof(uint32_t); } + // serialize sparse column + // TODO make compability with sparse column + buf = ColumnObject::get_sparse_column_type()->serialize(*column_object.get_sparse_column(), buf, + be_exec_version); + return buf; } @@ -175,6 +186,12 @@ const char* DataTypeObject::deserialize(const char* buf, MutableColumnPtr* colum buf += sizeof(uint32_t); } + // deserialize sparse column + // TODO make compability with sparse column + MutableColumnPtr sparse_column = ColumnObject::get_sparse_column_type()->create_column(); + buf = ColumnObject::get_sparse_column_type()->deserialize(buf, &sparse_column, be_exec_version); + column_object->set_sparse_column(std::move(sparse_column)); + column_object->finalize(); #ifndef NDEBUG // DCHECK size