Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix](serialize) fix column serialize and deserialize #45667

Merged
merged 1 commit into from
Dec 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions be/src/vec/columns/column_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1052,8 +1052,10 @@ void ColumnObject::Subcolumn::serialize_to_sparse_column(ColumnString* key, std:
auto& nullable_col = assert_cast<const ColumnNullable&>(*part);

// insert value
ColumnString::Chars& chars = value->get_chars();
nullable_serde->get_nested_serde()->write_one_cell_to_binary(
nullable_col.get_nested_column(), value, row);
nullable_col.get_nested_column(), chars, row);
value->get_offsets().push_back(chars.size());
}
return;
}
Expand Down Expand Up @@ -1113,6 +1115,11 @@ const char* parse_binary_from_sparse_column(TypeIndex type, const char* data, Fi
end = data + size;
break;
}
case TypeIndex::Nothing: {
res = Null();
end = data;
break;
}
case TypeIndex::Array: {
const size_t size = *reinterpret_cast<const size_t*>(data);
data += sizeof(size_t);
Expand All @@ -1122,9 +1129,9 @@ const char* parse_binary_from_sparse_column(TypeIndex type, const char* data, Fi
for (size_t i = 0; i < size; ++i) {
Field nested_field;
const auto nested_type =
assert_cast<const TypeIndex>(*reinterpret_cast<const uint8_t*>(data++));
static_cast<const TypeIndex>(*reinterpret_cast<const uint8_t*>(data++));
data = parse_binary_from_sparse_column(nested_type, data, nested_field, info_res);
array.emplace_back(std::move(nested_field));
array[i] = std::move(nested_field);
}
end = data;
break;
Expand Down
7 changes: 3 additions & 4 deletions be/src/vec/data_types/serde/data_type_array_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -436,9 +436,9 @@ Status DataTypeArraySerDe::read_column_from_pb(IColumn& column, const PValues& a
}

void DataTypeArraySerDe::write_one_cell_to_binary(const IColumn& src_column,
ColumnString* dst_column, int64_t row_num) const {
ColumnString::Chars& chars,
int64_t row_num) const {
const uint8_t type = static_cast<uint8_t>(TypeIndex::Array);
ColumnString::Chars& chars = dst_column->get_chars();
const size_t old_size = chars.size();
const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t);
chars.resize(new_size);
Expand All @@ -453,9 +453,8 @@ void DataTypeArraySerDe::write_one_cell_to_binary(const IColumn& src_column,
memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const char*>(&size),
sizeof(size_t));
for (size_t offset = start; offset != end; ++offset) {
nested_serde->write_one_cell_to_binary(nested_column, dst_column, offset);
nested_serde->write_one_cell_to_binary(nested_column, chars, offset);
}
dst_column->get_offsets().push_back(chars.size());
}

} // namespace vectorized
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/data_types/serde/data_type_array_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class DataTypeArraySerDe : public DataTypeSerDe {
nested_serde->set_return_object_as_string(value);
}

void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column,
void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars,
int64_t row_num) const override;

private:
Expand Down
5 changes: 2 additions & 3 deletions be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,13 +279,13 @@ Status DataTypeJsonbSerDe::read_column_from_pb(IColumn& column, const PValues& a
}

void DataTypeJsonbSerDe::write_one_cell_to_binary(const IColumn& src_column,
ColumnString* dst_column, int64_t row_num) const {
ColumnString::Chars& chars,
int64_t row_num) const {
const uint8_t type = static_cast<uint8_t>(TypeIndex::JSONB);
const auto& col = assert_cast<const ColumnString&>(src_column);
const auto& data_ref = col.get_data_at(row_num);
size_t data_size = data_ref.size;

ColumnString::Chars& chars = dst_column->get_chars();
const size_t old_size = chars.size();
const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) + data_ref.size;
chars.resize(new_size);
Expand All @@ -294,7 +294,6 @@ void DataTypeJsonbSerDe::write_one_cell_to_binary(const IColumn& src_column,
memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const char*>(&data_size),
sizeof(size_t));
memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t), data_ref.data, data_size);
dst_column->get_offsets().push_back(new_size);
}
} // namespace vectorized
} // namespace doris
2 changes: 1 addition & 1 deletion be/src/vec/data_types/serde/data_type_jsonb_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ class DataTypeJsonbSerDe : public DataTypeStringSerDe {
int64_t end) const override;
Status read_column_from_pb(IColumn& column, const PValues& arg) const override;

void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column,
void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars,
int64_t row_num) const override;

private:
Expand Down
29 changes: 15 additions & 14 deletions be/src/vec/data_types/serde/data_type_nullable_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,20 +393,21 @@ Status DataTypeNullableSerDe::read_one_cell_from_json(IColumn& column,
return Status::OK();
}

// void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn& src_column,
// ColumnString* dst_column,
// int64_t row_num) const {
// auto& col = assert_cast<const ColumnNullable&>(src_column);
// uint8_t is_null = 0;
// if (col.is_null_at(row_num)) [[unlikely]] {
// is_null = 1;
// dst_column->insert_data(reinterpret_cast<const char*>(is_null), sizeof(uint8_t));
// } else {
// dst_column->insert_data(reinterpret_cast<const char*>(is_null), sizeof(uint8_t));
// auto& nested_col = col.get_nested_column();
// nested_serde->write_one_cell_to_binary(nested_col, dst_column, row_num);
// }
// }
void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn& src_column,
ColumnString::Chars& chars,
int64_t row_num) const {
auto& col = assert_cast<const ColumnNullable&>(src_column);
if (col.is_null_at(row_num)) [[unlikely]] {
const uint8_t type = static_cast<uint8_t>(TypeIndex::Nothing);
csun5285 marked this conversation as resolved.
Show resolved Hide resolved
const size_t old_size = chars.size();
const size_t new_size = old_size + sizeof(uint8_t);
chars.resize(new_size);
memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type), sizeof(uint8_t));
} else {
auto& nested_col = col.get_nested_column();
nested_serde->write_one_cell_to_binary(nested_col, chars, row_num);
}
}

} // namespace vectorized
} // namespace doris
4 changes: 2 additions & 2 deletions be/src/vec/data_types/serde/data_type_nullable_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ class DataTypeNullableSerDe : public DataTypeSerDe {
int64_t row_num) const override;
Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override;

// void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column,
// int64_t row_num) const override;
void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars,
int64_t row_num) const override;

DataTypeSerDeSPtr get_nested_serde() { return nested_serde; }

Expand Down
4 changes: 1 addition & 3 deletions be/src/vec/data_types/serde/data_type_number_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -395,19 +395,17 @@ Status DataTypeNumberSerDe<T>::write_column_to_orc(const std::string& timezone,

template <typename T>
void DataTypeNumberSerDe<T>::write_one_cell_to_binary(const IColumn& src_column,
ColumnString* dst_column,
ColumnString::Chars& chars,
int64_t row_num) const {
const uint8_t type = static_cast<uint8_t>(TypeId<T>::value);
const auto& data_ref = assert_cast<const ColumnType&>(src_column).get_data_at(row_num);

ColumnString::Chars& chars = dst_column->get_chars();
const size_t old_size = chars.size();
const size_t new_size = old_size + sizeof(uint8_t) + data_ref.size;
chars.resize(new_size);

memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type), sizeof(uint8_t));
memcpy(chars.data() + old_size + sizeof(uint8_t), data_ref.data, data_ref.size);
dst_column->get_offsets().push_back(new_size);
}

/// Explicit template instantiations - to avoid code bloat in headers.
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/data_types/serde/data_type_number_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class DataTypeNumberSerDe : public DataTypeSerDe {
int64_t row_num) const override;
Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override;

void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column,
void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars,
int64_t row_num) const override;

private:
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/data_types/serde/data_type_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ class DataTypeSerDe {
Arena& mem_pool, int64_t row_num) const;
virtual Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const;

virtual void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst,
virtual void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars,
int64_t row_num) const {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "write_one_cell_to_binary");
}
Expand Down
4 changes: 1 addition & 3 deletions be/src/vec/data_types/serde/data_type_string_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -366,14 +366,13 @@ class DataTypeStringSerDeBase : public DataTypeSerDe {
return Status::OK();
}

void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column,
void write_one_cell_to_binary(const IColumn& src_column, ColumnString::Chars& chars,
int64_t row_num) const override {
const uint8_t type = static_cast<uint8_t>(TypeIndex::String);
const auto& col = assert_cast<const ColumnType&>(src_column);
const auto& data_ref = col.get_data_at(row_num);
const size_t data_size = data_ref.size;

ColumnString::Chars& chars = dst_column->get_chars();
const size_t old_size = chars.size();
const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) + data_ref.size;
chars.resize(new_size);
Expand All @@ -383,7 +382,6 @@ class DataTypeStringSerDeBase : public DataTypeSerDe {
sizeof(size_t));
memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t), data_ref.data,
data_size);
dst_column->get_offsets().push_back(chars.size());
}

private:
Expand Down
Loading