diff --git a/be/src/vec/columns/column_object.cpp b/be/src/vec/columns/column_object.cpp index 31b77d549fb65a..e1b76f6e4df9da 100644 --- a/be/src/vec/columns/column_object.cpp +++ b/be/src/vec/columns/column_object.cpp @@ -1052,8 +1052,10 @@ void ColumnObject::Subcolumn::serialize_to_sparse_column(ColumnString* key, std: auto& nullable_col = assert_cast(*part); // insert value + ColumnString::Chars& chars = value->get_chars(); nullable_serde->get_nested_serde()->write_one_cell_to_binary( - nullable_col.get_nested_column(), value, row); + nullable_col.get_nested_column(), chars, row); + value->get_offsets().push_back(chars.size()); } return; } @@ -1113,6 +1115,11 @@ const char* parse_binary_from_sparse_column(TypeIndex type, const char* data, Fi end = data + size; break; } + case TypeIndex::Nothing: { + res = Null(); + end = data; + break; + } case TypeIndex::Array: { const size_t size = *reinterpret_cast(data); data += sizeof(size_t); @@ -1122,9 +1129,9 @@ const char* parse_binary_from_sparse_column(TypeIndex type, const char* data, Fi for (size_t i = 0; i < size; ++i) { Field nested_field; const auto nested_type = - assert_cast(*reinterpret_cast(data++)); + static_cast(*reinterpret_cast(data++)); data = parse_binary_from_sparse_column(nested_type, data, nested_field, info_res); - array.emplace_back(std::move(nested_field)); + array[i] = std::move(nested_field); } end = data; break; diff --git a/be/src/vec/data_types/serde/data_type_array_serde.cpp b/be/src/vec/data_types/serde/data_type_array_serde.cpp index 57a43fbb38113f..2c906ce4c49861 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_array_serde.cpp @@ -436,9 +436,9 @@ Status DataTypeArraySerDe::read_column_from_pb(IColumn& column, const PValues& a } void DataTypeArraySerDe::write_one_cell_to_binary(const IColumn& src_column, - ColumnString* dst_column, int64_t row_num) const { + ColumnString::Chars& chars, + int64_t row_num) const { const uint8_t type = static_cast(TypeIndex::Array); - ColumnString::Chars& chars = dst_column->get_chars(); const size_t old_size = chars.size(); const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t); chars.resize(new_size); @@ -453,9 +453,8 @@ void DataTypeArraySerDe::write_one_cell_to_binary(const IColumn& src_column, memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast(&size), sizeof(size_t)); for (size_t offset = start; offset != end; ++offset) { - nested_serde->write_one_cell_to_binary(nested_column, dst_column, offset); + nested_serde->write_one_cell_to_binary(nested_column, chars, offset); } - dst_column->get_offsets().push_back(chars.size()); } } // namespace vectorized diff --git a/be/src/vec/data_types/serde/data_type_array_serde.h b/be/src/vec/data_types/serde/data_type_array_serde.h index aaf1a42551218d..25da83f2cff425 100644 --- a/be/src/vec/data_types/serde/data_type_array_serde.h +++ b/be/src/vec/data_types/serde/data_type_array_serde.h @@ -101,7 +101,7 @@ class DataTypeArraySerDe : public DataTypeSerDe { nested_serde->set_return_object_as_string(value); } - void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column, + void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars, int64_t row_num) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp index 7279a0fc4a685b..69dbae7241c38d 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.cpp @@ -279,13 +279,13 @@ Status DataTypeJsonbSerDe::read_column_from_pb(IColumn& column, const PValues& a } void DataTypeJsonbSerDe::write_one_cell_to_binary(const IColumn& src_column, - ColumnString* dst_column, int64_t row_num) const { + ColumnString::Chars& chars, + int64_t row_num) const { const uint8_t type = static_cast(TypeIndex::JSONB); const auto& col = assert_cast(src_column); const auto& data_ref = col.get_data_at(row_num); size_t data_size = data_ref.size; - ColumnString::Chars& chars = dst_column->get_chars(); const size_t old_size = chars.size(); const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) + data_ref.size; chars.resize(new_size); @@ -294,7 +294,6 @@ void DataTypeJsonbSerDe::write_one_cell_to_binary(const IColumn& src_column, memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast(&data_size), sizeof(size_t)); memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t), data_ref.data, data_size); - dst_column->get_offsets().push_back(new_size); } } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_jsonb_serde.h b/be/src/vec/data_types/serde/data_type_jsonb_serde.h index d6d29cce55611f..95e510516ed104 100644 --- a/be/src/vec/data_types/serde/data_type_jsonb_serde.h +++ b/be/src/vec/data_types/serde/data_type_jsonb_serde.h @@ -71,7 +71,7 @@ class DataTypeJsonbSerDe : public DataTypeStringSerDe { int64_t end) const override; Status read_column_from_pb(IColumn& column, const PValues& arg) const override; - void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column, + void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars, int64_t row_num) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index b325ec88e9fcca..9193a3b0100a38 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -393,20 +393,21 @@ Status DataTypeNullableSerDe::read_one_cell_from_json(IColumn& column, return Status::OK(); } -// void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn& src_column, -// ColumnString* dst_column, -// int64_t row_num) const { -// auto& col = assert_cast(src_column); -// uint8_t is_null = 0; -// if (col.is_null_at(row_num)) [[unlikely]] { -// is_null = 1; -// dst_column->insert_data(reinterpret_cast(is_null), sizeof(uint8_t)); -// } else { -// dst_column->insert_data(reinterpret_cast(is_null), sizeof(uint8_t)); -// auto& nested_col = col.get_nested_column(); -// nested_serde->write_one_cell_to_binary(nested_col, dst_column, row_num); -// } -// } +void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn& src_column, + ColumnString::Chars& chars, + int64_t row_num) const { + auto& col = assert_cast(src_column); + if (col.is_null_at(row_num)) [[unlikely]] { + const uint8_t type = static_cast(TypeIndex::Nothing); + const size_t old_size = chars.size(); + const size_t new_size = old_size + sizeof(uint8_t); + chars.resize(new_size); + memcpy(chars.data() + old_size, reinterpret_cast(&type), sizeof(uint8_t)); + } else { + auto& nested_col = col.get_nested_column(); + nested_serde->write_one_cell_to_binary(nested_col, chars, row_num); + } +} } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h b/be/src/vec/data_types/serde/data_type_nullable_serde.h index 33cf86ab694fde..828c079244bd09 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.h +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h @@ -99,8 +99,8 @@ class DataTypeNullableSerDe : public DataTypeSerDe { int64_t row_num) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; - // void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column, - // int64_t row_num) const override; + void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars, + int64_t row_num) const override; DataTypeSerDeSPtr get_nested_serde() { return nested_serde; } diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index c5f2994f6b2dc0..fcf72e6f992fe5 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -395,19 +395,17 @@ Status DataTypeNumberSerDe::write_column_to_orc(const std::string& timezone, template void DataTypeNumberSerDe::write_one_cell_to_binary(const IColumn& src_column, - ColumnString* dst_column, + ColumnString::Chars& chars, int64_t row_num) const { const uint8_t type = static_cast(TypeId::value); const auto& data_ref = assert_cast(src_column).get_data_at(row_num); - ColumnString::Chars& chars = dst_column->get_chars(); const size_t old_size = chars.size(); const size_t new_size = old_size + sizeof(uint8_t) + data_ref.size; chars.resize(new_size); memcpy(chars.data() + old_size, reinterpret_cast(&type), sizeof(uint8_t)); memcpy(chars.data() + old_size + sizeof(uint8_t), data_ref.data, data_ref.size); - dst_column->get_offsets().push_back(new_size); } /// Explicit template instantiations - to avoid code bloat in headers. diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h b/be/src/vec/data_types/serde/data_type_number_serde.h index c9073f5e868710..db4373e646c7a9 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.h +++ b/be/src/vec/data_types/serde/data_type_number_serde.h @@ -107,7 +107,7 @@ class DataTypeNumberSerDe : public DataTypeSerDe { int64_t row_num) const override; Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override; - void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column, + void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars, int64_t row_num) const override; private: diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 38b2590b06265a..8a879b5df26cb9 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -337,7 +337,7 @@ class DataTypeSerDe { Arena& mem_pool, int64_t row_num) const; virtual Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const; - virtual void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst, + virtual void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars, int64_t row_num) const { throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "write_one_cell_to_binary"); } diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index 50acf28c6f271a..39a623316a27b4 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -366,14 +366,13 @@ class DataTypeStringSerDeBase : public DataTypeSerDe { return Status::OK(); } - void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column, + void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars, int64_t row_num) const override { const uint8_t type = static_cast(TypeIndex::String); const auto& col = assert_cast(src_column); const auto& data_ref = col.get_data_at(row_num); const size_t data_size = data_ref.size; - ColumnString::Chars& chars = dst_column->get_chars(); const size_t old_size = chars.size(); const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) + data_ref.size; chars.resize(new_size); @@ -383,7 +382,6 @@ class DataTypeStringSerDeBase : public DataTypeSerDe { sizeof(size_t)); memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t), data_ref.data, data_size); - dst_column->get_offsets().push_back(chars.size()); } private: