Skip to content

Commit

Permalink
[Fix](Variant) fix some nested explode_variant_array bug and add more…
Browse files Browse the repository at this point in the history
… test
  • Loading branch information
eldenmoon committed Nov 25, 2024
1 parent 647cd86 commit bf3489e
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 11 deletions.
40 changes: 34 additions & 6 deletions be/src/vec/exprs/table_function/vexplode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,17 @@

#include "common/status.h"
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_nothing.h"
#include "vec/columns/column_object.h"
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/data_types/data_type.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_nothing.h"
#include "vec/exprs/vexpr.h"
#include "vec/exprs/vexpr_context.h"
#include "vec/functions/function_helpers.h"

namespace doris::vectorized {
#include "common/compile_check_begin.h"
Expand All @@ -37,6 +42,34 @@ VExplodeTableFunction::VExplodeTableFunction() {
_fn_name = "vexplode";
}

Status VExplodeTableFunction::_process_init_variant(Block* block, int value_column_idx) {
// explode variant array
const auto& variant_column = check_and_get_column<ColumnObject>(
remove_nullable(block->get_by_position(value_column_idx)
.column->convert_to_full_column_if_const())
.get());
_detail.output_as_variant = true;
if (!variant_column->is_null_root()) {
_array_column = variant_column->get_root();
// We need to wrap the output nested column within a variant column.
// Otherwise the type is missmatched
const auto* array_type = check_and_get_data_type<DataTypeArray>(
remove_nullable(variant_column->get_root_type()).get());
if (array_type == nullptr) {
return Status::NotSupported("explode not support none array type {}",
variant_column->get_root_type()->get_name());
}
_detail.nested_type = array_type->get_nested_type();
} else {
// null root, use nothing type
_array_column = ColumnNullable::create(ColumnArray::create(ColumnNothing::create(0)),
ColumnUInt8::create(0));
_array_column->assume_mutable()->insert_many_defaults(variant_column->size());
_detail.nested_type = std::make_shared<DataTypeNothing>();
}
return Status::OK();
}

Status VExplodeTableFunction::process_init(Block* block, RuntimeState* state) {
CHECK(_expr_context->root()->children().size() == 1)
<< "VExplodeTableFunction only support 1 child but has "
Expand All @@ -47,12 +80,7 @@ Status VExplodeTableFunction::process_init(Block* block, RuntimeState* state) {
&value_column_idx));
if (WhichDataType(remove_nullable(block->get_by_position(value_column_idx).type))
.is_variant_type()) {
// explode variant array
const auto& variant_column = check_and_get_column<ColumnObject>(
remove_nullable(block->get_by_position(value_column_idx)
.column->convert_to_full_column_if_const())
.get());
_array_column = variant_column->get_root();
RETURN_IF_ERROR(_process_init_variant(block, value_column_idx));
} else {
_array_column =
block->get_by_position(value_column_idx).column->convert_to_full_column_if_const();
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/exprs/table_function/vexplode.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class VExplodeTableFunction : public TableFunction {
int get_value(MutableColumnPtr& column, int max_step) override;

private:
Status _process_init_variant(Block* block, int value_column_idx);
ColumnPtr _array_column;
ColumnArrayExecutionData _detail;
size_t _array_offset; // start offset of array[row_idx]
Expand Down
4 changes: 2 additions & 2 deletions be/src/vec/functions/array/function_array_distance.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ class FunctionArrayDistance : public IFunction {

const auto& offsets1 = *arr1.offsets_ptr;
const auto& offsets2 = *arr2.offsets_ptr;
const auto& nested_col1 = assert_cast<const ColumnFloat64*>(arr1.nested_col);
const auto& nested_col2 = assert_cast<const ColumnFloat64*>(arr2.nested_col);
const auto& nested_col1 = assert_cast<const ColumnFloat64*>(arr1.nested_col.get());
const auto& nested_col2 = assert_cast<const ColumnFloat64*>(arr2.nested_col.get());
for (ssize_t row = 0; row < offsets1.size(); ++row) {
if (arr1.array_nullmap_data && arr1.array_nullmap_data[row]) {
dst_null_data[row] = true;
Expand Down
13 changes: 11 additions & 2 deletions be/src/vec/functions/array/function_array_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@

#include "vec/columns/column.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_object.h"
#include "vec/columns/column_vector.h"
#include "vec/data_types/data_type.h"

namespace doris::vectorized {

Expand All @@ -45,12 +47,19 @@ bool extract_column_array_info(const IColumn& src, ColumnArrayExecutionData& dat

// extract array offsets and nested column
data.offsets_ptr = &data.array_col->get_offsets();
data.nested_col = &data.array_col->get_data();
data.nested_col = data.array_col->get_data_ptr();
// extract nested column is nullable
if (data.nested_col->is_nullable()) {
const auto& nested_null_col = reinterpret_cast<const ColumnNullable&>(*data.nested_col);
data.nested_nullmap_data = nested_null_col.get_null_map_data().data();
data.nested_col = nested_null_col.get_nested_column_ptr().get();
data.nested_col = nested_null_col.get_nested_column_ptr();
}
if (data.output_as_variant &&
!WhichDataType(remove_nullable(data.nested_type)).is_variant_type()) {
// set variant root column/type to from column/type
auto variant = ColumnObject::create(true /*always nullable*/);
variant->create_root(data.nested_type, make_nullable(data.nested_col)->assume_mutable());
data.nested_col = variant->get_ptr();
}
return true;
}
Expand Down
6 changes: 5 additions & 1 deletion be/src/vec/functions/array/function_array_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
// under the License.
#pragma once

#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/columns_number.h"
Expand Down Expand Up @@ -54,7 +55,10 @@ struct ColumnArrayExecutionData {
const ColumnArray* array_col = nullptr;
const ColumnArray::Offsets64* offsets_ptr = nullptr;
const UInt8* nested_nullmap_data = nullptr;
const IColumn* nested_col = nullptr;
ColumnPtr nested_col = nullptr;
DataTypePtr nested_type = nullptr;
// wrap the nested column as variant column
bool output_as_variant = false;

ColumnArrayMutableData to_mutable_data() const {
ColumnArrayMutableData dst;
Expand Down

0 comments on commit bf3489e

Please sign in to comment.