From 54da28c13c144202fe668e6d4ea599503f3a5d10 Mon Sep 17 00:00:00 2001 From: zhangstar333 <87313068+zhangstar333@users.noreply.github.com> Date: Wed, 6 Nov 2024 15:29:34 +0800 Subject: [PATCH] [feature](table-function)support posexplode table function (#43221) ### What problem does this PR solve? Issue Number: close #xxx Related PR: #xxx Problem Summary: support posexplode table function doc: https://github.com/apache/doris-website/pull/1283 --- .../table_function/table_function_factory.cpp | 2 + .../vec/exprs/table_function/vposexplode.cpp | 155 ++++++++++++++++ be/src/vec/exprs/table_function/vposexplode.h | 50 ++++++ be/src/vec/functions/function_fake.cpp | 29 +++ .../BuiltinTableGeneratingFunctions.java | 6 +- .../functions/generator/PosExplode.java | 80 +++++++++ .../functions/generator/PosExplodeOuter.java | 80 +++++++++ .../TableGeneratingFunctionVisitor.java | 10 ++ .../table_function/posexplode.out | 166 ++++++++++++++++++ .../table_function/posexplode.groovy | 82 +++++++++ 10 files changed, 659 insertions(+), 1 deletion(-) create mode 100644 be/src/vec/exprs/table_function/vposexplode.cpp create mode 100644 be/src/vec/exprs/table_function/vposexplode.h create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplode.java create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplodeOuter.java create mode 100644 regression-test/data/nereids_p0/sql_functions/table_function/posexplode.out create mode 100644 regression-test/suites/nereids_p0/sql_functions/table_function/posexplode.groovy diff --git a/be/src/vec/exprs/table_function/table_function_factory.cpp b/be/src/vec/exprs/table_function/table_function_factory.cpp index 0bef185351dc87..332eaed37d4483 100644 --- a/be/src/vec/exprs/table_function/table_function_factory.cpp +++ b/be/src/vec/exprs/table_function/table_function_factory.cpp @@ -33,6 +33,7 @@ #include "vec/exprs/table_function/vexplode_map.h" #include "vec/exprs/table_function/vexplode_numbers.h" #include "vec/exprs/table_function/vexplode_split.h" +#include "vec/exprs/table_function/vposexplode.h" #include "vec/utils/util.hpp" namespace doris::vectorized { @@ -61,6 +62,7 @@ const std::unordered_map()}, {"explode_map", TableFunctionCreator {}}, {"explode_json_object", TableFunctionCreator {}}, + {"posexplode", TableFunctionCreator {}}, {"explode", TableFunctionCreator {}}}; Status TableFunctionFactory::get_fn(const TFunction& t_fn, ObjectPool* pool, TableFunction** fn) { diff --git a/be/src/vec/exprs/table_function/vposexplode.cpp b/be/src/vec/exprs/table_function/vposexplode.cpp new file mode 100644 index 00000000000000..20d04a219f831a --- /dev/null +++ b/be/src/vec/exprs/table_function/vposexplode.cpp @@ -0,0 +1,155 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "vec/exprs/table_function/vposexplode.h" + +#include + +#include +#include + +#include "common/status.h" +#include "vec/columns/column.h" +#include "vec/columns/column_nullable.h" +#include "vec/columns/columns_number.h" +#include "vec/common/assert_cast.h" +#include "vec/common/string_ref.h" +#include "vec/core/block.h" +#include "vec/core/column_with_type_and_name.h" +#include "vec/exprs/vexpr.h" +#include "vec/exprs/vexpr_context.h" + +namespace doris::vectorized { + +VPosExplodeTableFunction::VPosExplodeTableFunction() { + _fn_name = "posexplode"; +} + +Status VPosExplodeTableFunction::process_init(Block* block, RuntimeState* state) { + CHECK(_expr_context->root()->children().size() == 1) + << "VPosExplodeTableFunction only support 1 child but has " + << _expr_context->root()->children().size(); + + int value_column_idx = -1; + RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, + &value_column_idx)); + + _collection_column = + block->get_by_position(value_column_idx).column->convert_to_full_column_if_const(); + + if (!extract_column_array_info(*_collection_column, _array_detail)) { + return Status::NotSupported("column type {} not supported now, only support array", + block->get_by_position(value_column_idx).column->get_name()); + } + if (is_column_nullable(*_collection_column)) { + _array_data_column = + assert_cast( + assert_cast(*_collection_column).get_nested_column()) + .get_data_ptr(); + } else { + _array_data_column = assert_cast(*_collection_column).get_data_ptr(); + } + return Status::OK(); +} + +void VPosExplodeTableFunction::process_row(size_t row_idx) { + DCHECK(row_idx < _collection_column->size()); + TableFunction::process_row(row_idx); + + if (!_array_detail.array_nullmap_data || !_array_detail.array_nullmap_data[row_idx]) { + _collection_offset = (*_array_detail.offsets_ptr)[row_idx - 1]; + _cur_size = (*_array_detail.offsets_ptr)[row_idx] - _collection_offset; + } +} + +void VPosExplodeTableFunction::process_close() { + _collection_column = nullptr; + _array_data_column = nullptr; + _array_detail.reset(); + _collection_offset = 0; +} + +void VPosExplodeTableFunction::get_same_many_values(MutableColumnPtr& column, int length) { + // now we only support array column explode to struct column + size_t pos = _collection_offset + _cur_offset; + // if current is empty array row, also append a default value + if (current_empty()) { + column->insert_many_defaults(length); + return; + } + ColumnStruct* ret = nullptr; + // this _is_nullable is whole output column's nullable + if (_is_nullable) { + ret = assert_cast( + assert_cast(column.get())->get_nested_column_ptr().get()); + assert_cast( + assert_cast(column.get())->get_null_map_column_ptr().get()) + ->insert_many_defaults(length); + } else if (column->is_column_struct()) { + ret = assert_cast(column.get()); + } else { + throw Exception(ErrorCode::INTERNAL_ERROR, + "only support array column explode to struct column"); + } + if (!ret || ret->tuple_size() != 2) { + throw Exception( + ErrorCode::INTERNAL_ERROR, + "only support array column explode to two column, but given: ", ret->tuple_size()); + } + auto& pose_column_nullable = assert_cast(ret->get_column(0)); + pose_column_nullable.get_null_map_column().insert_many_defaults(length); + assert_cast(pose_column_nullable.get_nested_column()) + .insert_many_vals(_cur_offset, length); + ret->get_column(1).insert_many_from(*_array_data_column, pos, length); +} + +int VPosExplodeTableFunction::get_value(MutableColumnPtr& column, int max_step) { + max_step = std::min(max_step, (int)(_cur_size - _cur_offset)); + size_t pos = _collection_offset + _cur_offset; + if (current_empty()) { + column->insert_default(); + max_step = 1; + } else { + ColumnStruct* struct_column = nullptr; + if (_is_nullable) { + auto* nullable_column = assert_cast(column.get()); + struct_column = + assert_cast(nullable_column->get_nested_column_ptr().get()); + auto* nullmap_column = + assert_cast(nullable_column->get_null_map_column_ptr().get()); + // here nullmap_column insert max_step many defaults as if array[row_idx] is NULL + // will be not update value, _cur_size = 0, means current_empty; + // so here could insert directly + nullmap_column->insert_many_defaults(max_step); + } else { + struct_column = assert_cast(column.get()); + } + if (!struct_column || struct_column->tuple_size() != 2) { + throw Exception(ErrorCode::INTERNAL_ERROR, + "only support array column explode to two column, but given: ", + struct_column->tuple_size()); + } + auto& pose_column_nullable = assert_cast(struct_column->get_column(0)); + pose_column_nullable.get_null_map_column().insert_many_defaults(max_step); + assert_cast(pose_column_nullable.get_nested_column()) + .insert_range_of_integer(_cur_offset, _cur_offset + max_step); + struct_column->get_column(1).insert_range_from(*_array_data_column, pos, max_step); + } + forward(max_step); + return max_step; +} +} // namespace doris::vectorized diff --git a/be/src/vec/exprs/table_function/vposexplode.h b/be/src/vec/exprs/table_function/vposexplode.h new file mode 100644 index 00000000000000..4e021fd58da918 --- /dev/null +++ b/be/src/vec/exprs/table_function/vposexplode.h @@ -0,0 +1,50 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "common/status.h" +#include "vec/columns/column_map.h" +#include "vec/data_types/data_type.h" +#include "vec/data_types/data_type_array.h" +#include "vec/exprs/table_function/table_function.h" +#include "vec/functions/array/function_array_utils.h" + +namespace doris::vectorized { + +class VPosExplodeTableFunction : public TableFunction { + ENABLE_FACTORY_CREATOR(VPosExplodeTableFunction); + +public: + VPosExplodeTableFunction(); + + ~VPosExplodeTableFunction() override = default; + + Status process_init(Block* block, RuntimeState* state) override; + void process_row(size_t row_idx) override; + void process_close() override; + void get_same_many_values(MutableColumnPtr& column, int length) override; + int get_value(MutableColumnPtr& column, int max_step) override; + +private: + ColumnPtr _collection_column; + ColumnPtr _array_data_column; + ColumnArrayExecutionData _array_detail; + size_t _collection_offset; // start offset of array[row_idx] +}; + +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_fake.cpp b/be/src/vec/functions/function_fake.cpp index 6a4f6275e1a89b..646da600b50c13 100644 --- a/be/src/vec/functions/function_fake.cpp +++ b/be/src/vec/functions/function_fake.cpp @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -83,6 +84,25 @@ struct FunctionExplodeMap { static std::string get_error_msg() { return "Fake function do not support execute"; } }; +template +struct FunctionPoseExplode { + static DataTypePtr get_return_type_impl(const DataTypes& arguments) { + DCHECK(is_array(arguments[0])) << arguments[0]->get_name() << " not supported"; + DataTypes fieldTypes(2); + fieldTypes[0] = make_nullable(std::make_shared()); + fieldTypes[1] = + check_and_get_data_type(arguments[0].get())->get_nested_type(); + auto struct_type = std::make_shared(fieldTypes); + if constexpr (AlwaysNullable) { + return make_nullable(struct_type); + } else { + return arguments[0]->is_nullable() ? make_nullable(struct_type) : struct_type; + } + } + static DataTypes get_variadic_argument_types() { return {}; } + static std::string get_error_msg() { return "Fake function do not support execute"; } +}; + // explode json-object: expands json-object to struct with a pair of key and value in column string struct FunctionExplodeJsonObject { static DataTypePtr get_return_type_impl(const DataTypes& arguments) { @@ -138,6 +158,12 @@ void register_table_function_expand_outer_default(SimpleFunctionFactory& factory COMBINATOR_SUFFIX_OUTER); }; +template +void register_table_function_with_impl(SimpleFunctionFactory& factory, const std::string& name, + const std::string& suffix = "") { + factory.register_function>(name + suffix); +}; + void register_function_fake(SimpleFunctionFactory& factory) { register_function(factory, "esquery"); @@ -158,6 +184,9 @@ void register_function_fake(SimpleFunctionFactory& factory) { register_table_function_expand_outer_default( factory, "explode_json_array_double"); register_table_function_expand_outer_default(factory, "explode_bitmap"); + register_table_function_with_impl>(factory, "posexplode"); + register_table_function_with_impl>(factory, "posexplode", + COMBINATOR_SUFFIX_OUTER); register_table_function_expand_outer_default(factory, "explode_variant_array"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinTableGeneratingFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinTableGeneratingFunctions.java index 8f9679a0b51a09..d342b01ff7ee1e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinTableGeneratingFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinTableGeneratingFunctions.java @@ -38,6 +38,8 @@ import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeSplit; import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeSplitOuter; import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeVariantArray; +import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplode; +import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplodeOuter; import com.google.common.collect.ImmutableList; @@ -71,7 +73,9 @@ public class BuiltinTableGeneratingFunctions implements FunctionHelper { tableGenerating(ExplodeJsonArrayStringOuter.class, "explode_json_array_string_outer"), tableGenerating(ExplodeJsonArrayJson.class, "explode_json_array_json"), tableGenerating(ExplodeJsonArrayJsonOuter.class, "explode_json_array_json_outer"), - tableGenerating(ExplodeVariantArray.class, "explode_variant_array") + tableGenerating(ExplodeVariantArray.class, "explode_variant_array"), + tableGenerating(PosExplode.class, "posexplode"), + tableGenerating(PosExplodeOuter.class, "posexplode_outer") ); public static final BuiltinTableGeneratingFunctions INSTANCE = new BuiltinTableGeneratingFunctions(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplode.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplode.java new file mode 100644 index 00000000000000..16f8232606ff5f --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplode.java @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.generator; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable; +import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.ArrayType; +import org.apache.doris.nereids.types.IntegerType; +import org.apache.doris.nereids.types.StructField; +import org.apache.doris.nereids.types.StructType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * PosExplode(array('a','b','c')) generate two columns and three rows with: + * pose column: 0, 1, 2 + * value column: 'a', 'b', 'c' + */ +public class PosExplode extends TableGeneratingFunction implements UnaryExpression, PropagateNullable { + + /** + * constructor with 1 argument. + */ + public PosExplode(Expression arg) { + super("posexplode", arg); + } + + /** + * withChildren. + */ + @Override + public PosExplode withChildren(List children) { + Preconditions.checkArgument(children.size() == 1); + return new PosExplode(children.get(0)); + } + + @Override + public void checkLegalityBeforeTypeCoercion() { + if (!(child().getDataType() instanceof ArrayType)) { + throw new AnalysisException("only support array type for posexplode function but got " + + child().getDataType()); + } + } + + @Override + public List getSignatures() { + return ImmutableList.of( + FunctionSignature.ret(new StructType(ImmutableList.of( + new StructField("pos", IntegerType.INSTANCE, true, ""), + new StructField("col", ((ArrayType) child().getDataType()).getItemType(), true, "")))) + .args(child().getDataType())); + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitPosExplode(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplodeOuter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplodeOuter.java new file mode 100644 index 00000000000000..6d181354f414bc --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/PosExplodeOuter.java @@ -0,0 +1,80 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.generator; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNullable; +import org.apache.doris.nereids.trees.expressions.literal.StructLiteral; +import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.ArrayType; +import org.apache.doris.nereids.types.IntegerType; + +import com.google.common.base.Preconditions; +import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; + +import java.util.List; + +/** + * PosExplode(array('a','b','c')) generate two columns and three rows with: + * pose column: 0, 1, 2 + * value column: 'a', 'b', 'c' + */ +public class PosExplodeOuter extends TableGeneratingFunction implements UnaryExpression, AlwaysNullable { + + /** + * constructor with 1 argument. + */ + public PosExplodeOuter(Expression arg) { + super("posexplode_outer", arg); + } + + /** + * withChildren. + */ + @Override + public PosExplodeOuter withChildren(List children) { + Preconditions.checkArgument(children.size() == 1); + return new PosExplodeOuter(children.get(0)); + } + + @Override + public void checkLegalityBeforeTypeCoercion() { + if (!(child().getDataType() instanceof ArrayType)) { + throw new AnalysisException("only support array type for posexplode_outer function but got " + + child().getDataType()); + } + } + + @Override + public List getSignatures() { + return ImmutableList.of( + FunctionSignature.ret(StructLiteral.constructStructType( + Lists.newArrayList(IntegerType.INSTANCE, + ((ArrayType) child().getDataType()).getItemType()))) + .args(child().getDataType())); + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitPosExplodeOuter(this, context); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/TableGeneratingFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/TableGeneratingFunctionVisitor.java index 94839b21fe7e27..9fae7c397cada9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/TableGeneratingFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/TableGeneratingFunctionVisitor.java @@ -38,6 +38,8 @@ import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeSplit; import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeSplitOuter; import org.apache.doris.nereids.trees.expressions.functions.generator.ExplodeVariantArray; +import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplode; +import org.apache.doris.nereids.trees.expressions.functions.generator.PosExplodeOuter; import org.apache.doris.nereids.trees.expressions.functions.generator.TableGeneratingFunction; import org.apache.doris.nereids.trees.expressions.functions.udf.JavaUdtf; @@ -134,4 +136,12 @@ default R visitExplodeJsonArrayJsonOuter(ExplodeJsonArrayJsonOuter explodeJsonAr default R visitJavaUdtf(JavaUdtf udtf, C context) { return visitTableGeneratingFunction(udtf, context); } + + default R visitPosExplode(PosExplode posExplode, C context) { + return visitTableGeneratingFunction(posExplode, context); + } + + default R visitPosExplodeOuter(PosExplodeOuter posExplodeOuter, C context) { + return visitTableGeneratingFunction(posExplodeOuter, context); + } } diff --git a/regression-test/data/nereids_p0/sql_functions/table_function/posexplode.out b/regression-test/data/nereids_p0/sql_functions/table_function/posexplode.out new file mode 100644 index 00000000000000..393e13a2b546a2 --- /dev/null +++ b/regression-test/data/nereids_p0/sql_functions/table_function/posexplode.out @@ -0,0 +1,166 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +0 zhangsan ["Chinese", "Math", "English"] +1 lisi ["null"] +2 wangwu ["88a", "90b", "96c"] +3 lisi2 [null] +4 amory \N + +-- !explode_sql -- +0 zhangsan ["Chinese", "Math", "English"] 0 Chinese +0 zhangsan ["Chinese", "Math", "English"] 1 Math +0 zhangsan ["Chinese", "Math", "English"] 2 English +1 lisi ["null"] 0 null +2 wangwu ["88a", "90b", "96c"] 0 88a +2 wangwu ["88a", "90b", "96c"] 1 90b +2 wangwu ["88a", "90b", "96c"] 2 96c +3 lisi2 [null] 0 \N + +-- !explode_outer_sql -- +0 zhangsan ["Chinese", "Math", "English"] 0 Chinese +0 zhangsan ["Chinese", "Math", "English"] 1 Math +0 zhangsan ["Chinese", "Math", "English"] 2 English +1 lisi ["null"] 0 null +2 wangwu ["88a", "90b", "96c"] 0 88a +2 wangwu ["88a", "90b", "96c"] 1 90b +2 wangwu ["88a", "90b", "96c"] 2 96c +3 lisi2 [null] 0 \N +4 amory \N \N \N + +-- !explode_sql_multi -- +0 zhangsan ["Chinese", "Math", "English"] 0 Chinese 0 Chinese +0 zhangsan ["Chinese", "Math", "English"] 0 Chinese 1 Math +0 zhangsan ["Chinese", "Math", "English"] 0 Chinese 2 English +0 zhangsan ["Chinese", "Math", "English"] 1 Math 0 Chinese +0 zhangsan ["Chinese", "Math", "English"] 1 Math 1 Math +0 zhangsan ["Chinese", "Math", "English"] 1 Math 2 English +0 zhangsan ["Chinese", "Math", "English"] 2 English 0 Chinese +0 zhangsan ["Chinese", "Math", "English"] 2 English 1 Math +0 zhangsan ["Chinese", "Math", "English"] 2 English 2 English +1 lisi ["null"] 0 null 0 null +2 wangwu ["88a", "90b", "96c"] 0 88a 0 88a +2 wangwu ["88a", "90b", "96c"] 0 88a 1 90b +2 wangwu ["88a", "90b", "96c"] 0 88a 2 96c +2 wangwu ["88a", "90b", "96c"] 1 90b 0 88a +2 wangwu ["88a", "90b", "96c"] 1 90b 1 90b +2 wangwu ["88a", "90b", "96c"] 1 90b 2 96c +2 wangwu ["88a", "90b", "96c"] 2 96c 0 88a +2 wangwu ["88a", "90b", "96c"] 2 96c 1 90b +2 wangwu ["88a", "90b", "96c"] 2 96c 2 96c +3 lisi2 [null] 0 \N 0 \N + +-- !explode_sql_alias -- +0 zhangsan ["Chinese", "Math", "English"] 0 Chinese +0 zhangsan ["Chinese", "Math", "English"] 1 Math +0 zhangsan ["Chinese", "Math", "English"] 2 English +1 lisi ["null"] 0 null +2 wangwu ["88a", "90b", "96c"] 0 88a +2 wangwu ["88a", "90b", "96c"] 1 90b +2 wangwu ["88a", "90b", "96c"] 2 96c +3 lisi2 [null] 0 \N + +-- !explode_outer_sql_alias -- +0 zhangsan ["Chinese", "Math", "English"] 0 Chinese +0 zhangsan ["Chinese", "Math", "English"] 1 Math +0 zhangsan ["Chinese", "Math", "English"] 2 English +1 lisi ["null"] 0 null +2 wangwu ["88a", "90b", "96c"] 0 88a +2 wangwu ["88a", "90b", "96c"] 1 90b +2 wangwu ["88a", "90b", "96c"] 2 96c +3 lisi2 [null] 0 \N +4 amory \N \N \N + +-- !explode_sql_alias_multi -- +0 zhangsan ["Chinese", "Math", "English"] 0 Chinese 0 Chinese +0 zhangsan ["Chinese", "Math", "English"] 0 Chinese 1 Math +0 zhangsan ["Chinese", "Math", "English"] 0 Chinese 2 English +0 zhangsan ["Chinese", "Math", "English"] 1 Math 0 Chinese +0 zhangsan ["Chinese", "Math", "English"] 1 Math 1 Math +0 zhangsan ["Chinese", "Math", "English"] 1 Math 2 English +0 zhangsan ["Chinese", "Math", "English"] 2 English 0 Chinese +0 zhangsan ["Chinese", "Math", "English"] 2 English 1 Math +0 zhangsan ["Chinese", "Math", "English"] 2 English 2 English +1 lisi ["null"] 0 null 0 null +2 wangwu ["88a", "90b", "96c"] 0 88a 0 88a +2 wangwu ["88a", "90b", "96c"] 0 88a 1 90b +2 wangwu ["88a", "90b", "96c"] 0 88a 2 96c +2 wangwu ["88a", "90b", "96c"] 1 90b 0 88a +2 wangwu ["88a", "90b", "96c"] 1 90b 1 90b +2 wangwu ["88a", "90b", "96c"] 1 90b 2 96c +2 wangwu ["88a", "90b", "96c"] 2 96c 0 88a +2 wangwu ["88a", "90b", "96c"] 2 96c 1 90b +2 wangwu ["88a", "90b", "96c"] 2 96c 2 96c +3 lisi2 [null] 0 \N 0 \N + +-- !sql -- +0 zhangsan ["Chinese", "Math", "English"] +1 lisi ["null"] +2 wangwu ["88a", "90b", "96c"] +3 lisi2 [null] +4 liuba [] + +-- !explode_sql_not -- +0 zhangsan ["Chinese", "Math", "English"] 0 Chinese +0 zhangsan ["Chinese", "Math", "English"] 1 Math +0 zhangsan ["Chinese", "Math", "English"] 2 English +1 lisi ["null"] 0 null +2 wangwu ["88a", "90b", "96c"] 0 88a +2 wangwu ["88a", "90b", "96c"] 1 90b +2 wangwu ["88a", "90b", "96c"] 2 96c +3 lisi2 [null] 0 \N + +-- !explode_outer_sql_not -- +0 zhangsan ["Chinese", "Math", "English"] 0 Chinese +0 zhangsan ["Chinese", "Math", "English"] 1 Math +0 zhangsan ["Chinese", "Math", "English"] 2 English +1 lisi ["null"] 0 null +2 wangwu ["88a", "90b", "96c"] 0 88a +2 wangwu ["88a", "90b", "96c"] 1 90b +2 wangwu ["88a", "90b", "96c"] 2 96c +3 lisi2 [null] 0 \N +4 liuba [] \N \N + +-- !explode_sql_alias_multi2 -- +0 zhangsan ["Chinese", "Math", "English"] {"pos":0, "col":"Chinese"} {"pos":0, "col":"Chinese"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":0, "col":"Chinese"} {"pos":1, "col":"Math"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":0, "col":"Chinese"} {"pos":2, "col":"English"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":1, "col":"Math"} {"pos":0, "col":"Chinese"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":1, "col":"Math"} {"pos":1, "col":"Math"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":1, "col":"Math"} {"pos":2, "col":"English"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":2, "col":"English"} {"pos":0, "col":"Chinese"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":2, "col":"English"} {"pos":1, "col":"Math"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":2, "col":"English"} {"pos":2, "col":"English"} +1 lisi ["null"] {"pos":0, "col":"null"} {"pos":0, "col":"null"} +2 wangwu ["88a", "90b", "96c"] {"pos":0, "col":"88a"} {"pos":0, "col":"88a"} +2 wangwu ["88a", "90b", "96c"] {"pos":0, "col":"88a"} {"pos":1, "col":"90b"} +2 wangwu ["88a", "90b", "96c"] {"pos":0, "col":"88a"} {"pos":2, "col":"96c"} +2 wangwu ["88a", "90b", "96c"] {"pos":1, "col":"90b"} {"pos":0, "col":"88a"} +2 wangwu ["88a", "90b", "96c"] {"pos":1, "col":"90b"} {"pos":1, "col":"90b"} +2 wangwu ["88a", "90b", "96c"] {"pos":1, "col":"90b"} {"pos":2, "col":"96c"} +2 wangwu ["88a", "90b", "96c"] {"pos":2, "col":"96c"} {"pos":0, "col":"88a"} +2 wangwu ["88a", "90b", "96c"] {"pos":2, "col":"96c"} {"pos":1, "col":"90b"} +2 wangwu ["88a", "90b", "96c"] {"pos":2, "col":"96c"} {"pos":2, "col":"96c"} +3 lisi2 [null] {"pos":0, "col":null} {"pos":0, "col":null} + +-- !explode_sql_alias_multi3 -- +0 zhangsan ["Chinese", "Math", "English"] {"pos":0, "col":"Chinese"} {"pos":0, "col":"Chinese"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":0, "col":"Chinese"} {"pos":1, "col":"Math"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":0, "col":"Chinese"} {"pos":2, "col":"English"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":1, "col":"Math"} {"pos":0, "col":"Chinese"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":1, "col":"Math"} {"pos":1, "col":"Math"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":1, "col":"Math"} {"pos":2, "col":"English"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":2, "col":"English"} {"pos":0, "col":"Chinese"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":2, "col":"English"} {"pos":1, "col":"Math"} +0 zhangsan ["Chinese", "Math", "English"] {"pos":2, "col":"English"} {"pos":2, "col":"English"} +1 lisi ["null"] {"pos":0, "col":"null"} {"pos":0, "col":"null"} +2 wangwu ["88a", "90b", "96c"] {"pos":0, "col":"88a"} {"pos":0, "col":"88a"} +2 wangwu ["88a", "90b", "96c"] {"pos":0, "col":"88a"} {"pos":1, "col":"90b"} +2 wangwu ["88a", "90b", "96c"] {"pos":0, "col":"88a"} {"pos":2, "col":"96c"} +2 wangwu ["88a", "90b", "96c"] {"pos":1, "col":"90b"} {"pos":0, "col":"88a"} +2 wangwu ["88a", "90b", "96c"] {"pos":1, "col":"90b"} {"pos":1, "col":"90b"} +2 wangwu ["88a", "90b", "96c"] {"pos":1, "col":"90b"} {"pos":2, "col":"96c"} +2 wangwu ["88a", "90b", "96c"] {"pos":2, "col":"96c"} {"pos":0, "col":"88a"} +2 wangwu ["88a", "90b", "96c"] {"pos":2, "col":"96c"} {"pos":1, "col":"90b"} +2 wangwu ["88a", "90b", "96c"] {"pos":2, "col":"96c"} {"pos":2, "col":"96c"} +3 lisi2 [null] {"pos":0, "col":null} {"pos":0, "col":null} + diff --git a/regression-test/suites/nereids_p0/sql_functions/table_function/posexplode.groovy b/regression-test/suites/nereids_p0/sql_functions/table_function/posexplode.groovy new file mode 100644 index 00000000000000..8320af92f48ff5 --- /dev/null +++ b/regression-test/suites/nereids_p0/sql_functions/table_function/posexplode.groovy @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("posexplode") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + + sql """ DROP TABLE IF EXISTS table_test """ + sql """ + CREATE TABLE IF NOT EXISTS `table_test`( + `id` INT NULL, + `name` TEXT NULL, + `score` array NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ("replication_allocation" = "tag.location.default: 1"); + """ + + // insert values + sql """ insert into table_test values (0, "zhangsan", ["Chinese","Math","English"]); """ + sql """ insert into table_test values (1, "lisi", ["null"]); """ + sql """ insert into table_test values (2, "wangwu", ["88a","90b","96c"]); """ + sql """ insert into table_test values (3, "lisi2", [null]); """ + sql """ insert into table_test values (4, "amory", NULL); """ + + qt_sql """ select * from table_test order by id; """ + order_qt_explode_sql """ select id,name,score, k,v from table_test lateral view posexplode(score) tmp as k,v order by id;""" + order_qt_explode_outer_sql """ select id,name,score, k,v from table_test lateral view posexplode_outer(score) tmp as k,v order by id; """ + + // multi lateral view + order_qt_explode_sql_multi """ select id,name,score, k,v,k1,v1 from table_test lateral view posexplode_outer(score) tmp as k,v lateral view posexplode(score) tmp2 as k1,v1 order by id;""" + + // test with alias + order_qt_explode_sql_alias """ select id,name,score, tmp.k, tmp.v from table_test lateral view posexplode(score) tmp as k,v order by id;""" + order_qt_explode_outer_sql_alias """ select id,name,score, tmp.k, tmp.v from table_test lateral view posexplode_outer(score) tmp as k,v order by id; """ + + order_qt_explode_sql_alias_multi """ select id,name,score, tmp.k, tmp.v, tmp2.k, tmp2.v from table_test lateral view posexplode_outer(score) tmp as k,v lateral view posexplode(score) tmp2 as k,v order by id;""" + + sql """ DROP TABLE IF EXISTS table_test_not """ + sql """ + CREATE TABLE IF NOT EXISTS `table_test_not`( + `id` INT NULL, + `name` TEXT NULL, + `score` array not NULL + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ("replication_allocation" = "tag.location.default: 1"); + """ + + // insert values + sql """ insert into table_test_not values (0, "zhangsan", ["Chinese","Math","English"]); """ + sql """ insert into table_test_not values (1, "lisi", ["null"]); """ + sql """ insert into table_test_not values (2, "wangwu", ["88a","90b","96c"]); """ + sql """ insert into table_test_not values (3, "lisi2", [null]); """ + sql """ insert into table_test_not values (4, "liuba", []); """ + + qt_sql """ select * from table_test_not order by id; """ + order_qt_explode_sql_not """ select id,name,score, k,v from table_test_not lateral view posexplode(score) tmp as k,v order by id;""" + order_qt_explode_outer_sql_not """ select id,name,score, k,v from table_test_not lateral view posexplode_outer(score) tmp as k,v order by id; """ + order_qt_explode_sql_alias_multi2 """ select * from table_test_not lateral view posexplode(score) tmp as e1 lateral view posexplode(score) tmp2 as e2 order by id;""" + sql """ set batch_size = 1; """ + order_qt_explode_sql_alias_multi3 """ select * from table_test_not lateral view posexplode(score) tmp as e1 lateral view posexplode(score) tmp2 as e2 order by id;""" + +}