Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[refine](column) ColumnArray does not implement the insert data function. #43926

Merged
merged 4 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 0 additions & 11 deletions be/src/vec/columns/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -612,23 +612,12 @@ class IColumn : public COW<IColumn> {
* To avoid confusion between these cases, we don't have isContiguous method.
*/

/// Values in column are represented as continuous memory segment of fixed size. Implies values_have_fixed_size.
virtual bool is_fixed_and_contiguous() const { return false; }

/// If is_fixed_and_contiguous, returns the underlying data array, otherwise throws an exception.
virtual StringRef get_raw_data() const {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"Column {} is not a contiguous block of memory", get_name());
return StringRef {};
}

/// If values_have_fixed_size, returns size of value, otherwise throw an exception.
virtual size_t size_of_value_if_fixed() const {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"Values of column {} are not fixed size.", get_name());
return 0;
}

/// Returns ratio of values in column, that are equal to default value of column.
/// Checks only @sample_ratio ratio of rows.
virtual double get_ratio_of_default_rows(double sample_ratio = 1.0) const { return 0.0; }
Expand Down
52 changes: 6 additions & 46 deletions be/src/vec/columns/column_array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,60 +159,20 @@ void ColumnArray::get(size_t n, Field& res) const {
}

StringRef ColumnArray::get_data_at(size_t n) const {
/** Returns the range of memory that covers all elements of the array.
* Works for arrays of fixed length values.
* For arrays of strings and arrays of arrays, the resulting chunk of memory may not be one-to-one correspondence with the elements,
* since it contains only the data laid in succession, but not the offsets.
*/
size_t offset_of_first_elem = offset_at(n);
StringRef first;
if (offset_of_first_elem < get_data().size()) {
first = get_data().get_data_at(offset_of_first_elem);
}

size_t array_size = size_at(n);
if (array_size == 0) {
return StringRef(first.data, 0);
}

size_t offset_of_last_elem = offset_at(n + 1) - 1;
StringRef last = get_data().get_data_at(offset_of_last_elem);
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"Method get_data_at is not supported for " + get_name());
}

return StringRef(first.data, last.data + last.size - first.data);
void ColumnArray::insert_data(const char* pos, size_t length) {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"Method insert_data is not supported for " + get_name());
}

bool ColumnArray::is_default_at(size_t n) const {
const auto& offsets_data = get_offsets();
return offsets_data[n] == offsets_data[static_cast<ssize_t>(n) - 1];
}

void ColumnArray::insert_data(const char* pos, size_t length) {
/** Similarly - only for arrays of fixed length values.
*/
if (!data->is_fixed_and_contiguous()) {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"Method insert_data should have_fixed_size, {} is not suitable",
get_name());
}

size_t field_size = data->size_of_value_if_fixed();

size_t elems = 0;

if (length) {
const char* end = pos + length;
for (; pos + field_size <= end; pos += field_size, ++elems)
data->insert_data(pos, field_size);

if (pos != end)
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"Incorrect length argument for method ColumnArray::insert_data");
__builtin_unreachable();
}

get_offsets().push_back(get_offsets().back() + elems);
}

StringRef ColumnArray::serialize_value_into_arena(size_t n, Arena& arena,
char const*& begin) const {
size_t array_size = size_at(n);
Expand Down
3 changes: 0 additions & 3 deletions be/src/vec/columns/column_complex.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,6 @@ class ColumnComplexType final : public COWHelper<IColumn, ColumnComplexType<T>>
// TODO add hash function
}

bool is_fixed_and_contiguous() const override { return true; }
size_t size_of_value_if_fixed() const override { return sizeof(T); }

StringRef get_raw_data() const override {
return StringRef(reinterpret_cast<const char*>(data.data()), data.size());
}
Expand Down
33 changes: 0 additions & 33 deletions be/src/vec/columns/column_const.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,39 +110,6 @@ ColumnPtr ColumnConst::permute(const Permutation& perm, size_t limit) const {
return ColumnConst::create(data, limit);
}

void ColumnConst::update_crcs_with_value(uint32_t* __restrict hashes, doris::PrimitiveType type,
uint32_t rows, uint32_t offset,
const uint8_t* __restrict null_data) const {
DCHECK(null_data == nullptr);
DCHECK(rows == size());
auto real_data = data->get_data_at(0);
if (real_data.data == nullptr) {
for (int i = 0; i < rows; ++i) {
hashes[i] = HashUtil::zlib_crc_hash_null(hashes[i]);
}
} else {
for (int i = 0; i < rows; ++i) {
hashes[i] = RawValue::zlib_crc32(real_data.data, real_data.size, type, hashes[i]);
}
}
}

void ColumnConst::update_hashes_with_value(uint64_t* __restrict hashes,
const uint8_t* __restrict null_data) const {
DCHECK(null_data == nullptr);
auto real_data = data->get_data_at(0);
auto real_size = size();
if (real_data.data == nullptr) {
for (int i = 0; i < real_size; ++i) {
hashes[i] = HashUtil::xxHash64NullWithSeed(hashes[i]);
}
} else {
for (int i = 0; i < real_size; ++i) {
hashes[i] = HashUtil::xxHash64WithSeed(real_data.data, real_data.size, hashes[i]);
}
}
}

void ColumnConst::get_permutation(bool /*reverse*/, size_t /*limit*/, int /*nan_direction_hint*/,
Permutation& res) const {
res.resize(s);
Expand Down
10 changes: 0 additions & 10 deletions be/src/vec/columns/column_const.h
Original file line number Diff line number Diff line change
Expand Up @@ -208,14 +208,6 @@ class ColumnConst final : public COWHelper<IColumn, ColumnConst> {
data->update_hash_with_value(0, hash);
}

// (TODO.Amory) here may not use column_const update hash, and PrimitiveType is not used.
void update_crcs_with_value(uint32_t* __restrict hashes, PrimitiveType type, uint32_t rows,
uint32_t offset = 0,
const uint8_t* __restrict null_data = nullptr) const override;

void update_hashes_with_value(uint64_t* __restrict hashes,
const uint8_t* __restrict null_data) const override;

ColumnPtr filter(const Filter& filt, ssize_t result_size_hint) const override;
size_t filter(const Filter& filter) override;

Expand Down Expand Up @@ -262,8 +254,6 @@ class ColumnConst final : public COWHelper<IColumn, ColumnConst> {
// bool is_nullable() const override { return is_column_nullable(*data); }
bool only_null() const override { return data->is_null_at(0); }
bool is_numeric() const override { return data->is_numeric(); }
bool is_fixed_and_contiguous() const override { return data->is_fixed_and_contiguous(); }
size_t size_of_value_if_fixed() const override { return data->size_of_value_if_fixed(); }
StringRef get_raw_data() const override { return data->get_raw_data(); }

/// Not part of the common interface.
Expand Down
2 changes: 0 additions & 2 deletions be/src/vec/columns/column_decimal.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,6 @@ class ColumnDecimal final : public COWHelper<IColumn, ColumnDecimal<T>> {

bool is_numeric() const override { return false; }
bool is_column_decimal() const override { return true; }
bool is_fixed_and_contiguous() const override { return true; }
size_t size_of_value_if_fixed() const override { return sizeof(T); }

size_t size() const override { return data.size(); }
size_t byte_size() const override { return data.size() * sizeof(data[0]); }
Expand Down
4 changes: 0 additions & 4 deletions be/src/vec/columns/column_dictionary.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,6 @@ class ColumnDictionary final : public COWHelper<IColumn, ColumnDictionary<T>> {
__builtin_unreachable();
}

bool is_fixed_and_contiguous() const override { return true; }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this can be deleted ?
because get_raw_data() maybe depends on it?!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now , only use in array.


size_t size_of_value_if_fixed() const override { return sizeof(T); }

[[noreturn]] StringRef get_raw_data() const override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"get_raw_data not supported in ColumnDictionary");
Expand Down
6 changes: 0 additions & 6 deletions be/src/vec/columns/column_nullable.h
Original file line number Diff line number Diff line change
Expand Up @@ -339,18 +339,12 @@ class ColumnNullable final : public COWHelper<IColumn, ColumnNullable>, public N
bool is_column_array() const override { return get_nested_column().is_column_array(); }
bool is_column_map() const override { return get_nested_column().is_column_map(); }
bool is_column_struct() const override { return get_nested_column().is_column_struct(); }
bool is_fixed_and_contiguous() const override { return false; }

bool is_exclusive() const override {
return IColumn::is_exclusive() && nested_column->is_exclusive() &&
get_null_map_column().is_exclusive();
}

size_t size_of_value_if_fixed() const override {
return get_null_map_column().size_of_value_if_fixed() +
nested_column->size_of_value_if_fixed();
}

bool only_null() const override { return size() == 1 && is_null_at(0); }

// used in schema change
Expand Down
5 changes: 0 additions & 5 deletions be/src/vec/columns/column_object.h
Original file line number Diff line number Diff line change
Expand Up @@ -537,11 +537,6 @@ class ColumnObject final : public COWHelper<IColumn, ColumnObject> {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "get_raw_data" + get_name());
}

size_t size_of_value_if_fixed() const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR,
"size_of_value_if_fixed" + get_name());
}

StringRef get_data_at(size_t) const override {
throw doris::Exception(ErrorCode::NOT_IMPLEMENTED_ERROR, "get_data_at" + get_name());
}
Expand Down
2 changes: 0 additions & 2 deletions be/src/vec/columns/column_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -373,8 +373,6 @@ class ColumnVector final : public COWHelper<IColumn, ColumnVector<T>> {

ColumnPtr replicate(const IColumn::Offsets& offsets) const override;

bool is_fixed_and_contiguous() const override { return true; }
size_t size_of_value_if_fixed() const override { return sizeof(T); }
StringRef get_raw_data() const override {
return StringRef(reinterpret_cast<const char*>(data.data()), data.size());
}
Expand Down
3 changes: 0 additions & 3 deletions be/src/vec/columns/predicate_column.h
Original file line number Diff line number Diff line change
Expand Up @@ -379,9 +379,6 @@ class PredicateColumnType final : public COWHelper<IColumn, PredicateColumnType<
__builtin_unreachable();
}

bool is_fixed_and_contiguous() const override { return true; }
size_t size_of_value_if_fixed() const override { return sizeof(T); }

[[noreturn]] StringRef get_raw_data() const override {
throw doris::Exception(ErrorCode::INTERNAL_ERROR,
"get_raw_data not supported in PredicateColumnType");
Expand Down
6 changes: 3 additions & 3 deletions be/test/vec/columns/column_hash_func_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,11 +71,11 @@ TEST(HashFuncTest, ArrayTypeTest) {
DataTypePtr a = std::make_shared<DataTypeArray>(d);
ColumnPtr col_a = a->create_column_const_with_default_value(1);
// xxHash
EXPECT_NO_FATAL_FAILURE(col_a->update_hashes_with_value(xx_hashes));
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_hashes_with_value(xx_hashes));
std::cout << xx_hashes[0] << std::endl;
// crcHash
EXPECT_NO_FATAL_FAILURE(
col_a->update_crcs_with_value(crc_hashes, PrimitiveType::TYPE_ARRAY, 1));
EXPECT_NO_FATAL_FAILURE(unpack_if_const(col_a).first->update_crcs_with_value(
crc_hashes, PrimitiveType::TYPE_ARRAY, 1));
std::cout << crc_hashes[0] << std::endl;
}
}
Expand Down
Loading