Skip to content

Commit

Permalink
add test case
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangstar333 committed Apr 18, 2024
1 parent a1ea131 commit 7c91858
Show file tree
Hide file tree
Showing 26 changed files with 691 additions and 58 deletions.
1 change: 1 addition & 0 deletions be/src/pipeline/exec/table_function_operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ bool TableFunctionLocalState::_roll_table_functions(int last_eos_idx) {
bool TableFunctionLocalState::_is_inner_and_empty() {
for (int i = 0; i < _parent->cast<TableFunctionOperatorX>()._fn_num; i++) {
// if any table function is not outer and has empty result, go to next child row
// if it's outer function, will be insert into one row NULL
if (!_fns[i]->is_outer() && _fns[i]->current_empty()) {
return true;
}
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/exec/vtable_function_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ Status VTableFunctionNode::_prepare_output_slot_ids(const TPlanNode& tnode) {
bool VTableFunctionNode::_is_inner_and_empty() {
for (int i = 0; i < _fn_num; i++) {
// if any table function is not outer and has empty result, go to next child row
// if it's outer function, will be insert into NULL
if (!_fns[i]->is_outer() && _fns[i]->current_empty()) {
return true;
}
Expand Down
1 change: 1 addition & 0 deletions be/src/vec/exprs/table_function/table_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class TableFunction {
virtual int get_value(MutableColumnPtr& column, int max_step) {
max_step = std::max(1, std::min(max_step, (int)(_cur_size - _cur_offset)));
int i = 0;
// TODO: this for loop maybe could refactor, and call once get_value function, it's could insert into max_step value once
for (; i < max_step && !eos(); i++) {
get_value(column);
forward();
Expand Down
11 changes: 7 additions & 4 deletions be/src/vec/exprs/table_function/table_function_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,16 @@ const std::unordered_map<std::string, std::function<std::unique_ptr<TableFunctio
{"explode", TableFunctionCreator<VExplodeTableFunction> {}}};

Status TableFunctionFactory::get_fn(const TFunction& t_fn, ObjectPool* pool, TableFunction** fn) {
const std::string fn_name_raw = t_fn.name.function_name;
bool is_outer = match_suffix(t_fn.name.function_name, COMBINATOR_SUFFIX_OUTER);
if (t_fn.binary_type == TFunctionBinaryType::JAVA_UDF) {
*fn = pool->add(UDFTableFunction::create_unique(t_fn).release());
if (is_outer) {
(*fn)->set_outer();
}
return Status::OK();
} else {
bool is_outer = match_suffix(t_fn.name.function_name, COMBINATOR_SUFFIX_OUTER);
std::string fn_name_real =
const std::string& fn_name_raw = t_fn.name.function_name;
const std::string& fn_name_real =
is_outer ? remove_suffix(fn_name_raw, COMBINATOR_SUFFIX_OUTER) : fn_name_raw;

auto fn_iterator = _function_map.find(fn_name_real);
Expand All @@ -89,7 +92,7 @@ Status TableFunctionFactory::get_fn(const TFunction& t_fn, ObjectPool* pool, Tab
return Status::OK();
}
}
return Status::NotSupported("Table function {} is not support", fn_name_raw);
return Status::NotSupported("Table function {} is not support", t_fn.name.function_name);
}

} // namespace doris::vectorized
20 changes: 9 additions & 11 deletions be/src/vec/exprs/table_function/udf_table_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,11 @@

#include <glog/logging.h>

#include <algorithm>
#include <iterator>
#include <memory>
#include <ostream>
#include <sstream>

#include "common/status.h"
#include "runtime/user_function_cache.h"
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/column_string.h"
#include "vec/common/assert_cast.h"
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_factory.hpp"
#include "vec/exec/jni_connector.h"
Expand All @@ -49,7 +39,9 @@ UDFTableFunction::UDFTableFunction(const TFunction& t_fn) : TableFunction(), _t_
_fn_name = _t_fn.name.function_name;
_return_type = DataTypeFactory::instance().create_data_type(
TypeDescriptor::from_thrift(t_fn.ret_type));
_return_type = std::make_shared<DataTypeArray>(make_nullable(_return_type));
// as the java-utdf function in java code is eg: ArrayList<String>
// so we need a array column to save the execute result, and make_nullable could help deal with nullmap
_return_type = make_nullable(std::make_shared<DataTypeArray>(make_nullable(_return_type)));
}

Status UDFTableFunction::open() {
Expand Down Expand Up @@ -133,6 +125,8 @@ Status UDFTableFunction::process_init(Block* block, RuntimeState* state) {
env->DeleteLocalRef(input_map);
env->DeleteLocalRef(output_map);
RETURN_IF_ERROR(JniConnector::fill_block(block, {_result_column_idx}, output_address));
LOG(INFO)<<"block dump: "<<block->dump_structure();
LOG(INFO)<<block->dump_data();
block->erase(_result_column_idx);
if (!extract_column_array_info(*_array_result_column, _array_column_detail)) {
return Status::NotSupported("column type {} not supported now",
Expand All @@ -148,6 +142,10 @@ void UDFTableFunction::process_row(size_t row_idx) {
_array_offset = (*_array_column_detail.offsets_ptr)[row_idx - 1];
_cur_size = (*_array_column_detail.offsets_ptr)[row_idx] - _array_offset;
}
// so when it's NULL of row_idx, will not update _cur_size
// it's will be _cur_size == 0, and means current_empty.
// if the fn is outer, will be continue insert_default
// if the fn is not outer function, will be not insert any value.
}

void UDFTableFunction::process_close() {
Expand Down
14 changes: 0 additions & 14 deletions be/src/vec/exprs/table_function/udf_table_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,13 @@

#pragma once

#include <stddef.h>
#include <stdint.h>

#include <string_view>
#include <vector>

#include "common/status.h"
#include "jni.h"
#include "util/jni-util.h"
#include "vec/columns/column.h"
#include "vec/common/string_ref.h"
#include "vec/data_types/data_type.h"
#include "vec/exprs/table_function/table_function.h"
#include "vec/functions/array/function_array_utils.h"
namespace doris {
namespace vectorized {
class Block;
class ColumnString;
} // namespace vectorized
} // namespace doris

namespace doris::vectorized {

Expand Down Expand Up @@ -82,7 +69,6 @@ class UDFTableFunction final : public TableFunction {
if (is_closed) {
return Status::OK();
}
VLOG_DEBUG << "Free resources for JniContext";
JNIEnv* env = nullptr;
Status status = JniUtil::GetJNIEnv(&env);
if (!status.ok() || env == nullptr) {
Expand Down
3 changes: 2 additions & 1 deletion be/src/vec/exprs/vectorized_fn_call.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ Status VectorizedFnCall::prepare(RuntimeState* state, const RowDescriptor& desc,
} else if (_fn.binary_type == TFunctionBinaryType::JAVA_UDF) {
if (config::enable_java_support) {
if (_fn.is_udtf_function) {
_function = FakeJavaUDTF::create(_fn, argument_template, _data_type);
// fake function. it's no use and can't execute.
_function = FunctionFake<UDTFImpl>::create();
} else {
_function = JavaFunctionCall::create(_fn, argument_template, _data_type);
}
Expand Down
27 changes: 1 addition & 26 deletions be/src/vec/functions/function_fake.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,35 +65,10 @@ class FunctionFake : public IFunction {

struct UDTFImpl {
static DataTypePtr get_return_type_impl(const DataTypes& arguments) {
DCHECK(false) << "get_return_type_impl not supported";
DCHECK(false) << "get_return_type_impl not supported, shouldn't into here.";
return nullptr;
}
static std::string get_error_msg() { return "Fake function do not support execute"; }
};

class FakeJavaUDTF : public FunctionFake<UDTFImpl> {
public:
FakeJavaUDTF(const TFunction& fn, const DataTypes& argument_types,
const DataTypePtr& return_type)
: _fn(fn), _argument_types(argument_types), _return_type(return_type) {}

static FunctionPtr create(const TFunction& fn, const ColumnsWithTypeAndName& argument_types,
const DataTypePtr& return_type) {
DataTypes data_types(argument_types.size());
for (size_t i = 0; i < argument_types.size(); ++i) {
data_types[i] = argument_types[i].type;
}
return std::make_shared<FakeJavaUDTF>(fn, data_types, return_type);
}
String get_name() const override { return _fn.name.function_name; }
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
return _return_type;
}

private:
const TFunction& _fn;
const DataTypes _argument_types;
const DataTypePtr _return_type;
};

} // namespace doris::vectorized
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,8 @@ private void analyzeTableFunction() throws AnalysisException {
function.setChecksum(checksum);
function.setNullableMode(returnNullMode);
function.setUDTFunction(true);
// Todo: maybe in create tables function, need register two function, one is
// normal and one is outer as those have different result when result is NULL.
}

private void analyzeUda() throws AnalysisException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1673,7 +1673,7 @@ && collectChildReturnTypes()[0].isDecimalV3()) {
fn = getTableFunction(fnName.getFunction(), matchFuncChildTypes,
Function.CompareMode.IS_NONSTRICT_SUPERTYPE_OF);
if (fn == null) {
throw new AnalysisException(getFunctionNotFoundError(argTypes));
throw new AnalysisException(getFunctionNotFoundError(argTypes) + " in table function");
}
// set param child types
fn.setReturnType(((ArrayType) childTypes[0]).getItemType());
Expand All @@ -1687,7 +1687,7 @@ && collectChildReturnTypes()[0].isDecimalV3()) {
if (fn != null) {
FunctionUtil.checkEnableJavaUdf();
if (!fn.isUDTFunction()) {
throw new AnalysisException(getFunctionNotFoundError(argTypes));
throw new AnalysisException(getFunctionNotFoundError(argTypes) + " in table function");
}
}
}
Expand Down
25 changes: 25 additions & 0 deletions regression-test/data/javaudf_p0/test_javaudtf_arrayint.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_default --
1 2 2022-01-01 2022-01-01T11:11:11 a1b
2 4 2022-01-01 2022-01-01T11:11:11 a2b
3 6 2022-01-01 2022-01-01T11:11:11 a3b
4 8 2022-01-01 2022-01-01T11:11:11 a4b
5 10 2022-01-01 2022-01-01T11:11:11 a5b
6 12 2022-01-01 2022-01-01T11:11:11 a6b
7 14 2022-01-01 2022-01-01T11:11:11 a7b
8 16 2022-01-01 2022-01-01T11:11:11 a8b
9 18 2022-01-01 2022-01-01T11:11:11 a9b
10 20 2022-06-06 2022-01-01T12:12:12 a10b

-- !select1 --
1 a1b 1
2 a2b 2
3 a3b 3
4 a4b 4
5 a5b 5
6 a6b 6
7 a7b 7
8 a8b 8
9 a9b 9
10 a10b 10

15 changes: 15 additions & 0 deletions regression-test/data/javaudf_p0/test_javaudtf_decimal.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_default --
111 11111.111110000 222222.333333300
112 1234556.111110000 222222.333333300
113 87654321.111110000 \N

-- !select1 --
111 11111.111110000 22222.222220000
112 1234556.111110000 2469112.222220000
113 87654321.111110000 175308642.222220000

-- !select2 --
111 222222.333333300 444444.666666600
112 222222.333333300 444444.666666600

15 changes: 15 additions & 0 deletions regression-test/data/javaudf_p0/test_javaudtf_double.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_default --
111 11111.111 222222.33 1.234567834455677E7 1111112.0
112 1234556.1 222222.33 2.2222222233333334E8 4.444444444444556E12
113 8.765432E7 \N 6.666666666666667E9 \N

-- !select1 --
111 1.234567834455677E7 1.234567834455677E8
112 2.2222222233333334E8 2.2222222233333335E9
113 6.666666666666667E9 6.666666666666667E10

-- !select2 --
111 1111112.0 1.111112E7
112 4.444444444444556E12 4.4444444444445555E13

32 changes: 32 additions & 0 deletions regression-test/data/javaudf_p0/test_javaudtf_int.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_default --
1 1 abc,defg poiuytre,abcdefg
2 2 abc,defg poiuytre,abcdefg
0 3 abc,defg poiuytre,abcdefg
1 4 abc,defg poiuytre,abcdefg
2 5 abc,defg poiuytre,abcdefg
0 6 abc,defg poiuytre,abcdefg
1 7 abc,defg poiuytre,abcdefg
2 8 abc,defg poiuytre,abcdefg
9 9 ab,cdefg poiuytreabcde,fg

-- !select1 --
1 abc,defg 1
1 abc,defg 1
1 abc,defg 1
2 abc,defg 2
2 abc,defg 2
2 abc,defg 2
2 abc,defg 2
2 abc,defg 2
2 abc,defg 2
9 ab,cdefg 9
9 ab,cdefg 9
9 ab,cdefg 9
9 ab,cdefg 9
9 ab,cdefg 9
9 ab,cdefg 9
9 ab,cdefg 9
9 ab,cdefg 9
9 ab,cdefg 9

34 changes: 34 additions & 0 deletions regression-test/data/javaudf_p0/test_javaudtf_string.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_default --
1 1 abc,defg poiuytre,abcdefg
2 2 abc,defg poiuytre,abcdefg
0 3 abc,defg poiuytre,abcdefg
1 4 abc,defg poiuytre,abcdefg
2 5 abc,defg poiuytre,abcdefg
0 6 abc,defg poiuytre,abcdefg
1 7 abc,defg poiuytre,abcdefg
2 8 abc,defg poiuytre,abcdefg
9 9 ab,cdefg poiuytreabcde,fg

-- !select1 --
0 abc,defg abc
0 abc,defg defg
0 abc,defg abc
0 abc,defg defg
1 abc,defg abc
1 abc,defg defg
1 abc,defg abc
1 abc,defg defg
1 abc,defg abc
1 abc,defg defg
2 abc,defg abc
2 abc,defg defg
2 abc,defg abc
2 abc,defg defg
2 abc,defg abc
2 abc,defg defg
9 ab,cdefg ab
9 ab,cdefg cdefg

-- !select2 --

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.udf;

import java.util.ArrayList;

public class UDTFArrayIntTest {
public ArrayList<Integer> evaluate(ArrayList<Integer> val) {
if (val == null) return null;
ArrayList<Integer> result = new ArrayList<>();
for (int i = 0; i < val.size(); i = i + 2) {
result.add(val.get(i));
}
return val;
}
}
Loading

0 comments on commit 7c91858

Please sign in to comment.