From 52e9e144595156e62022f978f613195f8284346a Mon Sep 17 00:00:00 2001 From: Sun Chenyang Date: Tue, 17 Dec 2024 20:40:38 +0800 Subject: [PATCH] fix3 (#45554) --- be/src/vec/columns/column_object.cpp | 11 +++++--- .../serde/data_type_array_serde.cpp | 10 +++++-- .../serde/data_type_jsonb_serde.cpp | 15 +++++++--- .../serde/data_type_nullable_serde.cpp | 28 +++++++++---------- .../serde/data_type_nullable_serde.h | 4 +-- .../serde/data_type_number_serde.cpp | 11 ++++++-- be/src/vec/data_types/serde/data_type_serde.h | 2 +- .../data_types/serde/data_type_string_serde.h | 18 ++++++++---- 8 files changed, 65 insertions(+), 34 deletions(-) diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 13ef6eb8d7183f..f234ba7bfa43d6 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -804,11 +804,14 @@ ColumnObject::ColumnObject(bool is_nullable_, bool create_root_) } ColumnObject::ColumnObject(MutableColumnPtr&& sparse_column) - : is_nullable(true), serialized_sparse_column(std::move(sparse_column)) {} + : is_nullable(true), + num_rows(sparse_column->size()), + serialized_sparse_column(std::move(sparse_column)) {} ColumnObject::ColumnObject(bool is_nullable_, DataTypePtr type, MutableColumnPtr&& column) - : is_nullable(is_nullable_) { + : is_nullable(is_nullable_), num_rows(0) { add_sub_column({}, std::move(column), type); + serialized_sparse_column->insert_many_defaults(num_rows); } ColumnObject::ColumnObject(Subcolumns&& subcolumns_, bool is_nullable_) @@ -818,8 +821,8 @@ ColumnObject::ColumnObject(Subcolumns&& subcolumns_, bool is_nullable_) check_consistency(); } -ColumnObject::ColumnObject(size_t num_rows) : is_nullable(true) { - insert_many_defaults(num_rows); +ColumnObject::ColumnObject(size_t size) : is_nullable(true), num_rows(0) { + insert_many_defaults(size); check_consistency(); } diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index 1b812781805ad9..57a43fbb38113f 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -438,7 +438,11 @@ Status DataTypeArraySerDe::read_column_from_pb(IColumn& column, const PValues& a void DataTypeArraySerDe::write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column, int64_t row_num) const { const uint8_t type = static_cast(TypeIndex::Array); - dst_column->insert_data(reinterpret_cast(&type), sizeof(uint8_t)); + ColumnString::Chars& chars = dst_column->get_chars(); + const size_t old_size = chars.size(); + const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t); + chars.resize(new_size); + memcpy(chars.data() + old_size, reinterpret_cast(&type), sizeof(uint8_t)); const auto& array_col = assert_cast(src_column); const IColumn& nested_column = array_col.get_data(); @@ -446,10 +450,12 @@ void DataTypeArraySerDe::write_one_cell_to_binary(const IColumn& src_column, size_t start = offsets[row_num - 1]; size_t end = offsets[row_num]; size_t size = end - start; - dst_column->insert_data(reinterpret_cast(&size), sizeof(size_t)); + memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast(&size), + sizeof(size_t)); for (size_t offset = start; offset != end; ++offset) { nested_serde->write_one_cell_to_binary(nested_column, dst_column, offset); } + dst_column->get_offsets().push_back(chars.size()); } } // namespace vectorized diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp index f56bccc298c2d4..7279a0fc4a685b 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp @@ -283,11 +283,18 @@ void DataTypeJsonbSerDe::write_one_cell_to_binary(const IColumn& src_column, const uint8_t type = static_cast(TypeIndex::JSONB); const auto& col = assert_cast(src_column); const auto& data_ref = col.get_data_at(row_num); - const size_t size = data_ref.size; + size_t data_size = data_ref.size; - dst_column->insert_data(reinterpret_cast(&type), sizeof(uint8_t)); - dst_column->insert_data(reinterpret_cast(&size), sizeof(size_t)); - dst_column->insert_data(data_ref.data, size); + ColumnString::Chars& chars = dst_column->get_chars(); + const size_t old_size = chars.size(); + const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) + data_ref.size; + chars.resize(new_size); + + memcpy(chars.data() + old_size, reinterpret_cast(&type), sizeof(uint8_t)); + memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast(&data_size), + sizeof(size_t)); + memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t), data_ref.data, data_size); + dst_column->get_offsets().push_back(new_size); } } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index d45b39c6d63bc3..b325ec88e9fcca 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -393,20 +393,20 @@ Status DataTypeNullableSerDe::read_one_cell_from_json(IColumn& column, return Status::OK(); } -void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn& src_column, - ColumnString* dst_column, - int64_t row_num) const { - auto& col = assert_cast(src_column); - uint8_t is_null = 0; - if (col.is_null_at(row_num)) [[unlikely]] { - is_null = 1; - dst_column->insert_data(reinterpret_cast(is_null), sizeof(uint8_t)); - } else { - dst_column->insert_data(reinterpret_cast(is_null), sizeof(uint8_t)); - auto& nested_col = col.get_nested_column(); - nested_serde->write_one_cell_to_binary(nested_col, dst_column, row_num); - } -} +// void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn& src_column, +// ColumnString* dst_column, +// int64_t row_num) const { +// auto& col = assert_cast(src_column); +// uint8_t is_null = 0; +// if (col.is_null_at(row_num)) [[unlikely]] { +// is_null = 1; +// dst_column->insert_data(reinterpret_cast(is_null), sizeof(uint8_t)); +// } else { +// dst_column->insert_data(reinterpret_cast(is_null), sizeof(uint8_t)); +// auto& nested_col = col.get_nested_column(); +// nested_serde->write_one_cell_to_binary(nested_col, dst_column, row_num); +// } +// } } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index d0f46ce7cbc319..33cf86ab694fde 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -99,8 +99,8 @@ class DataTypeNullableSerDe : public DataTypeSerDe { int64_t row_num) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; - void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column, - int64_t row_num) const override; + // void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column, + // int64_t row_num) const override; DataTypeSerDeSPtr get_nested_serde() { return nested_serde; } diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index 5ba7fdf293a1ee..c5f2994f6b2dc0 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -398,9 +398,16 @@ void DataTypeNumberSerDe::write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column, int64_t row_num) const { const uint8_t type = static_cast(TypeId::value); - dst_column->insert_data(reinterpret_cast(&type), sizeof(uint8_t)); const auto& data_ref = assert_cast(src_column).get_data_at(row_num); - dst_column->insert_data(data_ref.data, data_ref.size); + + ColumnString::Chars& chars = dst_column->get_chars(); + const size_t old_size = chars.size(); + const size_t new_size = old_size + sizeof(uint8_t) + data_ref.size; + chars.resize(new_size); + + memcpy(chars.data() + old_size, reinterpret_cast(&type), sizeof(uint8_t)); + memcpy(chars.data() + old_size + sizeof(uint8_t), data_ref.data, data_ref.size); + dst_column->get_offsets().push_back(new_size); } /// Explicit template instantiations - to avoid code bloat in headers. diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 4634afa1449e40..38b2590b06265a 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -337,7 +337,7 @@ class DataTypeSerDe { Arena& mem_pool, int64_t row_num) const; virtual Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const; - virtual void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column, + virtual void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst, int64_t row_num) const { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "write_one_cell_to_binary"); } diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index 98cf89ada1e9ed..50acf28c6f271a 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -371,11 +371,19 @@ class DataTypeStringSerDeBase : public DataTypeSerDe { const uint8_t type = static_cast(TypeIndex::String); const auto& col = assert_cast(src_column); const auto& data_ref = col.get_data_at(row_num); - const size_t size = data_ref.size; - - dst_column->insert_data(reinterpret_cast(&type), sizeof(uint8_t)); - dst_column->insert_data(reinterpret_cast(&size), sizeof(size_t)); - dst_column->insert_data(data_ref.data, size); + const size_t data_size = data_ref.size; + + ColumnString::Chars& chars = dst_column->get_chars(); + const size_t old_size = chars.size(); + const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) + data_ref.size; + chars.resize(new_size); + + memcpy(chars.data() + old_size, reinterpret_cast(&type), sizeof(uint8_t)); + memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast(&data_size), + sizeof(size_t)); + memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t), data_ref.data, + data_size); + dst_column->get_offsets().push_back(chars.size()); } private: