Skip to content

Commit

Permalink
fix3 (apache#45554)
Browse files Browse the repository at this point in the history
  • Loading branch information
csun5285 authored Dec 17, 2024
1 parent 01ad180 commit 52e9e14
Show file tree
Hide file tree
Showing 8 changed files with 65 additions and 34 deletions.
11 changes: 7 additions & 4 deletions be/src/vec/columns/column_object.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -804,11 +804,14 @@ ColumnObject::ColumnObject(bool is_nullable_, bool create_root_)
}

ColumnObject::ColumnObject(MutableColumnPtr&& sparse_column)
: is_nullable(true), serialized_sparse_column(std::move(sparse_column)) {}
: is_nullable(true),
num_rows(sparse_column->size()),
serialized_sparse_column(std::move(sparse_column)) {}

ColumnObject::ColumnObject(bool is_nullable_, DataTypePtr type, MutableColumnPtr&& column)
: is_nullable(is_nullable_) {
: is_nullable(is_nullable_), num_rows(0) {
add_sub_column({}, std::move(column), type);
serialized_sparse_column->insert_many_defaults(num_rows);
}

ColumnObject::ColumnObject(Subcolumns&& subcolumns_, bool is_nullable_)
Expand All @@ -818,8 +821,8 @@ ColumnObject::ColumnObject(Subcolumns&& subcolumns_, bool is_nullable_)
check_consistency();
}

ColumnObject::ColumnObject(size_t num_rows) : is_nullable(true) {
insert_many_defaults(num_rows);
ColumnObject::ColumnObject(size_t size) : is_nullable(true), num_rows(0) {
insert_many_defaults(size);
check_consistency();
}

Expand Down
10 changes: 8 additions & 2 deletions be/src/vec/data_types/serde/data_type_array_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -438,18 +438,24 @@ Status DataTypeArraySerDe::read_column_from_pb(IColumn& column, const PValues& a
void DataTypeArraySerDe::write_one_cell_to_binary(const IColumn& src_column,
ColumnString* dst_column, int64_t row_num) const {
const uint8_t type = static_cast<uint8_t>(TypeIndex::Array);
dst_column->insert_data(reinterpret_cast<const char*>(&type), sizeof(uint8_t));
ColumnString::Chars& chars = dst_column->get_chars();
const size_t old_size = chars.size();
const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t);
chars.resize(new_size);
memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type), sizeof(uint8_t));

const auto& array_col = assert_cast<const ColumnArray&>(src_column);
const IColumn& nested_column = array_col.get_data();
const auto& offsets = array_col.get_offsets();
size_t start = offsets[row_num - 1];
size_t end = offsets[row_num];
size_t size = end - start;
dst_column->insert_data(reinterpret_cast<const char*>(&size), sizeof(size_t));
memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const char*>(&size),
sizeof(size_t));
for (size_t offset = start; offset != end; ++offset) {
nested_serde->write_one_cell_to_binary(nested_column, dst_column, offset);
}
dst_column->get_offsets().push_back(chars.size());
}

} // namespace vectorized
Expand Down
15 changes: 11 additions & 4 deletions be/src/vec/data_types/serde/data_type_jsonb_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -283,11 +283,18 @@ void DataTypeJsonbSerDe::write_one_cell_to_binary(const IColumn& src_column,
const uint8_t type = static_cast<uint8_t>(TypeIndex::JSONB);
const auto& col = assert_cast<const ColumnString&>(src_column);
const auto& data_ref = col.get_data_at(row_num);
const size_t size = data_ref.size;
size_t data_size = data_ref.size;

dst_column->insert_data(reinterpret_cast<const char*>(&type), sizeof(uint8_t));
dst_column->insert_data(reinterpret_cast<const char*>(&size), sizeof(size_t));
dst_column->insert_data(data_ref.data, size);
ColumnString::Chars& chars = dst_column->get_chars();
const size_t old_size = chars.size();
const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) + data_ref.size;
chars.resize(new_size);

memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type), sizeof(uint8_t));
memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const char*>(&data_size),
sizeof(size_t));
memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t), data_ref.data, data_size);
dst_column->get_offsets().push_back(new_size);
}
} // namespace vectorized
} // namespace doris
28 changes: 14 additions & 14 deletions be/src/vec/data_types/serde/data_type_nullable_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -393,20 +393,20 @@ Status DataTypeNullableSerDe::read_one_cell_from_json(IColumn& column,
return Status::OK();
}

void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn& src_column,
ColumnString* dst_column,
int64_t row_num) const {
auto& col = assert_cast<const ColumnNullable&>(src_column);
uint8_t is_null = 0;
if (col.is_null_at(row_num)) [[unlikely]] {
is_null = 1;
dst_column->insert_data(reinterpret_cast<const char*>(is_null), sizeof(uint8_t));
} else {
dst_column->insert_data(reinterpret_cast<const char*>(is_null), sizeof(uint8_t));
auto& nested_col = col.get_nested_column();
nested_serde->write_one_cell_to_binary(nested_col, dst_column, row_num);
}
}
// void DataTypeNullableSerDe::write_one_cell_to_binary(const IColumn& src_column,
// ColumnString* dst_column,
// int64_t row_num) const {
// auto& col = assert_cast<const ColumnNullable&>(src_column);
// uint8_t is_null = 0;
// if (col.is_null_at(row_num)) [[unlikely]] {
// is_null = 1;
// dst_column->insert_data(reinterpret_cast<const char*>(is_null), sizeof(uint8_t));
// } else {
// dst_column->insert_data(reinterpret_cast<const char*>(is_null), sizeof(uint8_t));
// auto& nested_col = col.get_nested_column();
// nested_serde->write_one_cell_to_binary(nested_col, dst_column, row_num);
// }
// }

} // namespace vectorized
} // namespace doris
4 changes: 2 additions & 2 deletions be/src/vec/data_types/serde/data_type_nullable_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,8 +99,8 @@ class DataTypeNullableSerDe : public DataTypeSerDe {
int64_t row_num) const override;
Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const override;

void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column,
int64_t row_num) const override;
// void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column,
// int64_t row_num) const override;

DataTypeSerDeSPtr get_nested_serde() { return nested_serde; }

Expand Down
11 changes: 9 additions & 2 deletions be/src/vec/data_types/serde/data_type_number_serde.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -398,9 +398,16 @@ void DataTypeNumberSerDe<T>::write_one_cell_to_binary(const IColumn& src_column,
ColumnString* dst_column,
int64_t row_num) const {
const uint8_t type = static_cast<uint8_t>(TypeId<T>::value);
dst_column->insert_data(reinterpret_cast<const char*>(&type), sizeof(uint8_t));
const auto& data_ref = assert_cast<const ColumnType&>(src_column).get_data_at(row_num);
dst_column->insert_data(data_ref.data, data_ref.size);

ColumnString::Chars& chars = dst_column->get_chars();
const size_t old_size = chars.size();
const size_t new_size = old_size + sizeof(uint8_t) + data_ref.size;
chars.resize(new_size);

memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type), sizeof(uint8_t));
memcpy(chars.data() + old_size + sizeof(uint8_t), data_ref.data, data_ref.size);
dst_column->get_offsets().push_back(new_size);
}

/// Explicit template instantiations - to avoid code bloat in headers.
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/data_types/serde/data_type_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ class DataTypeSerDe {
Arena& mem_pool, int64_t row_num) const;
virtual Status read_one_cell_from_json(IColumn& column, const rapidjson::Value& result) const;

virtual void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst_column,
virtual void write_one_cell_to_binary(const IColumn& src_column, ColumnString* dst,
int64_t row_num) const {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "write_one_cell_to_binary");
}
Expand Down
18 changes: 13 additions & 5 deletions be/src/vec/data_types/serde/data_type_string_serde.h
Original file line number Diff line number Diff line change
Expand Up @@ -371,11 +371,19 @@ class DataTypeStringSerDeBase : public DataTypeSerDe {
const uint8_t type = static_cast<uint8_t>(TypeIndex::String);
const auto& col = assert_cast<const ColumnType&>(src_column);
const auto& data_ref = col.get_data_at(row_num);
const size_t size = data_ref.size;

dst_column->insert_data(reinterpret_cast<const char*>(&type), sizeof(uint8_t));
dst_column->insert_data(reinterpret_cast<const char*>(&size), sizeof(size_t));
dst_column->insert_data(data_ref.data, size);
const size_t data_size = data_ref.size;

ColumnString::Chars& chars = dst_column->get_chars();
const size_t old_size = chars.size();
const size_t new_size = old_size + sizeof(uint8_t) + sizeof(size_t) + data_ref.size;
chars.resize(new_size);

memcpy(chars.data() + old_size, reinterpret_cast<const char*>(&type), sizeof(uint8_t));
memcpy(chars.data() + old_size + sizeof(uint8_t), reinterpret_cast<const char*>(&data_size),
sizeof(size_t));
memcpy(chars.data() + old_size + sizeof(uint8_t) + sizeof(size_t), data_ref.data,
data_size);
dst_column->get_offsets().push_back(chars.size());
}

private:
Expand Down

0 comments on commit 52e9e14

Please sign in to comment.