Skip to content

Commit

Permalink
Merge branch 'master' into 20240828_fix_allocator
Browse files Browse the repository at this point in the history
  • Loading branch information
xinyiZzz authored Sep 4, 2024
2 parents 680e5c8 + 5eab877 commit 43eb5af
Show file tree
Hide file tree
Showing 70 changed files with 2,440 additions and 4,806 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/code-checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
sh_checker_comment: true
sh_checker_exclude: .git .github ^docker ^thirdparty/src ^thirdparty/installed ^ui ^docs/node_modules ^tools/clickbench-tools ^extension ^output ^fs_brokers/apache_hdfs_broker/output (^|.*/)Dockerfile$ ^be/src/apache-orc ^be/src/clucene ^pytest
sh_checker_exclude: .git .github ^docker ^thirdparty/src ^thirdparty/installed ^ui ^docs/node_modules ^tools/clickbench-tools ^extension ^output ^fs_brokers/apache_hdfs_broker/output (^|.*/)Dockerfile$ ^be/src/apache-orc ^be/src/clucene ^pytest ^samples

preparation:
name: "Clang Tidy Preparation"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/scope-label.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
github.event_name == 'pull_request_target' &&
(github.event.action == 'opened' ||
github.event.action == 'synchronize')
uses: actions/labeler@v5.5.0
uses: actions/labeler@2.2.0
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
configuration-path: .github/workflows/labeler/scope-label-conf.yml
Expand Down
9 changes: 5 additions & 4 deletions be/src/olap/rowset/segment_v2/segment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ Status Segment::_create_column_readers(const SegmentFooterPB& footer) {
vectorized::PathInData path;
path.from_protobuf(spase_column_pb.column_path_info());
// Read from root column, so reader is nullptr
_sparse_column_tree[column.unique_id()].add(
_sparse_column_tree[unique_id].add(
path.copy_pop_front(),
SubcolumnReader {nullptr,
vectorized::DataTypeFactory::instance().create_data_type(
Expand Down Expand Up @@ -617,9 +617,10 @@ Status Segment::new_column_iterator_with_path(const TabletColumn& tablet_column,
const auto* node = tablet_column.has_path_info()
? _sub_column_tree[unique_id].find_exact(relative_path)
: nullptr;
const auto* sparse_node = tablet_column.has_path_info()
? _sparse_column_tree[unique_id].find_exact(relative_path)
: nullptr;
const auto* sparse_node =
tablet_column.has_path_info() && _sparse_column_tree.contains(unique_id)
? _sparse_column_tree[unique_id].find_exact(relative_path)
: nullptr;
// Currently only compaction and checksum need to read flat leaves
// They both use tablet_schema_with_merged_max_schema_version as read schema
auto type_to_read_flat_leaves = [](ReaderType type) {
Expand Down
1 change: 0 additions & 1 deletion be/src/vec/exprs/vexpr.h
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,6 @@ class VExpr {
uint32_t _index_unique_id = 0;
bool _can_fast_execute = false;
bool _enable_inverted_index_query = true;
uint32_t _in_list_value_count_threshold = 10;
};

} // namespace vectorized
Expand Down
27 changes: 24 additions & 3 deletions be/src/vec/exprs/vin_predicate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,14 @@ Status VInPredicate::prepare(RuntimeState* state, const RowDescriptor& desc,

VExpr::register_function_context(state, context);
_prepare_finished = true;

if (state->query_options().__isset.in_list_value_count_threshold) {
_in_list_value_count_threshold = state->query_options().in_list_value_count_threshold;
}

const auto in_list_value_count = _children.size() - 1;
// When the number of values in the IN condition exceeds this threshold, fast_execute will not be used
_can_fast_execute = in_list_value_count <= _in_list_value_count_threshold;
return Status::OK();
}

Expand All @@ -94,10 +102,24 @@ Status VInPredicate::open(RuntimeState* state, VExprContext* context,
if (scope == FunctionContext::FRAGMENT_LOCAL) {
RETURN_IF_ERROR(VExpr::get_const_col(context, nullptr));
}

_is_args_all_constant = std::all_of(_children.begin() + 1, _children.end(),
[](const VExprSPtr& expr) { return expr->is_constant(); });
_open_finished = true;
return Status::OK();
}

size_t VInPredicate::skip_constant_args_size() const {
if (_is_args_all_constant && !_can_fast_execute) {
// This is an optimization. For expressions like colA IN (1, 2, 3, 4),
// where all values inside the IN clause are constants,
// a hash set is created during open, and it will not be accessed again during execute
// Here, _children[0] is colA
return 1;
}
return _children.size();
}

void VInPredicate::close(VExprContext* context, FunctionContext::FunctionStateScope scope) {
VExpr::close_function_context(context, scope, _function);
VExpr::close(context, scope);
Expand All @@ -116,9 +138,8 @@ Status VInPredicate::execute(VExprContext* context, Block* block, int* result_co
return Status::OK();
}
DCHECK(_open_finished || _getting_const_col);
// TODO: not execute const expr again, but use the const column in function context
doris::vectorized::ColumnNumbers arguments(_children.size());
for (int i = 0; i < _children.size(); ++i) {
doris::vectorized::ColumnNumbers arguments(skip_constant_args_size());
for (int i = 0; i < skip_constant_args_size(); ++i) {
int column_id = -1;
RETURN_IF_ERROR(_children[i]->execute(context, block, &column_id));
arguments[i] = column_id;
Expand Down
4 changes: 4 additions & 0 deletions be/src/vec/exprs/vin_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class VInPredicate final : public VExpr {

std::string debug_string() const override;

size_t skip_constant_args_size() const;

const FunctionBasePtr function() { return _function; }

bool is_not_in() const { return _is_not_in; };
Expand All @@ -62,5 +64,7 @@ class VInPredicate final : public VExpr {

const bool _is_not_in;
static const constexpr char* function_name = "in";
uint32_t _in_list_value_count_threshold = 10;
bool _is_args_all_constant = false;
};
} // namespace doris::vectorized
5 changes: 2 additions & 3 deletions be/src/vec/functions/function_collection_in.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,8 @@ class FunctionCollectionIn : public IFunction {
// FE should make element type consistent and
// equalize the length of the elements in struct
const auto& const_column_ptr = context->get_constant_col(i);
if (const_column_ptr == nullptr) {
break;
}
// Types like struct, array, and map only support constant expressions.
DCHECK(const_column_ptr != nullptr);
const auto& [col, _] = unpack_if_const(const_column_ptr->column_ptr);
if (col->is_nullable()) {
auto* null_col = vectorized::check_and_get_column<vectorized::ColumnNullable>(col);
Expand Down
68 changes: 39 additions & 29 deletions be/src/vec/functions/functions_geo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include "vec/columns/column.h"
#include "vec/columns/column_nullable.h"
#include "vec/columns/columns_number.h"
#include "vec/common/assert_cast.h"
#include "vec/common/string_ref.h"
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
Expand Down Expand Up @@ -58,14 +59,16 @@ struct StPoint {
auto res = ColumnString::create();
auto null_map = ColumnUInt8::create(size, 0);
auto& null_map_data = null_map->get_data();
const auto* left_column_f64 = assert_cast<const ColumnFloat64*>(left_column.get());
const auto* right_column_f64 = assert_cast<const ColumnFloat64*>(right_column.get());
GeoPoint point;
std::string buf;
if (left_const) {
const_vector(left_column, right_column, res, null_map_data, size, point, buf);
const_vector(left_column_f64, right_column_f64, res, null_map_data, size, point, buf);
} else if (right_const) {
vector_const(left_column, right_column, res, null_map_data, size, point, buf);
vector_const(left_column_f64, right_column_f64, res, null_map_data, size, point, buf);
} else {
vector_vector(left_column, right_column, res, null_map_data, size, point, buf);
vector_vector(left_column_f64, right_column_f64, res, null_map_data, size, point, buf);
}

block.replace_by_position(result,
Expand All @@ -86,32 +89,32 @@ struct StPoint {
res->insert_data(buf.data(), buf.size());
}

static void const_vector(const ColumnPtr& left_column, const ColumnPtr& right_column,
static void const_vector(const ColumnFloat64* left_column, const ColumnFloat64* right_column,
ColumnString::MutablePtr& res, NullMap& null_map, const size_t size,
GeoPoint& point, std::string& buf) {
double x = left_column->operator[](0).get<Float64>();
double x = left_column->get_element(0);
for (int row = 0; row < size; ++row) {
auto cur_res = point.from_coord(x, right_column->operator[](row).get<Float64>());
auto cur_res = point.from_coord(x, right_column->get_element(row));
loop_do(cur_res, res, null_map, row, point, buf);
}
}

static void vector_const(const ColumnPtr& left_column, const ColumnPtr& right_column,
static void vector_const(const ColumnFloat64* left_column, const ColumnFloat64* right_column,
ColumnString::MutablePtr& res, NullMap& null_map, const size_t size,
GeoPoint& point, std::string& buf) {
double y = right_column->operator[](0).get<Float64>();
double y = right_column->get_element(0);
for (int row = 0; row < size; ++row) {
auto cur_res = point.from_coord(right_column->operator[](row).get<Float64>(), y);
auto cur_res = point.from_coord(right_column->get_element(row), y);
loop_do(cur_res, res, null_map, row, point, buf);
}
}

static void vector_vector(const ColumnPtr& left_column, const ColumnPtr& right_column,
static void vector_vector(const ColumnFloat64* left_column, const ColumnFloat64* right_column,
ColumnString::MutablePtr& res, NullMap& null_map, const size_t size,
GeoPoint& point, std::string& buf) {
for (int row = 0; row < size; ++row) {
auto cur_res = point.from_coord(left_column->operator[](row).get<Float64>(),
right_column->operator[](row).get<Float64>());
auto cur_res =
point.from_coord(left_column->get_element(row), right_column->get_element(row));
loop_do(cur_res, res, null_map, row, point, buf);
}
}
Expand Down Expand Up @@ -246,22 +249,25 @@ struct StDistanceSphere {
DCHECK_EQ(arguments.size(), 4);
auto return_type = block.get_data_type(result);

auto x_lng = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
auto x_lat = block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
auto y_lng = block.get_by_position(arguments[2]).column->convert_to_full_column_if_const();
auto y_lat = block.get_by_position(arguments[3]).column->convert_to_full_column_if_const();

const auto* x_lng = check_and_get_column<ColumnFloat64>(
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const());
const auto* x_lat = check_and_get_column<ColumnFloat64>(
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const());
const auto* y_lng = check_and_get_column<ColumnFloat64>(
block.get_by_position(arguments[2]).column->convert_to_full_column_if_const());
const auto* y_lat = check_and_get_column<ColumnFloat64>(
block.get_by_position(arguments[3]).column->convert_to_full_column_if_const());
CHECK(x_lng && x_lat && y_lng && y_lat);
const auto size = x_lng->size();
auto res = ColumnFloat64::create();
res->reserve(size);
auto null_map = ColumnUInt8::create(size, 0);
auto& null_map_data = null_map->get_data();
for (int row = 0; row < size; ++row) {
double distance = 0;
if (!GeoPoint::ComputeDistance(x_lng->operator[](row).get<Float64>(),
x_lat->operator[](row).get<Float64>(),
y_lng->operator[](row).get<Float64>(),
y_lat->operator[](row).get<Float64>(), &distance)) {
if (!GeoPoint::ComputeDistance(x_lng->get_element(row), x_lat->get_element(row),
y_lng->get_element(row), y_lat->get_element(row),
&distance)) {
null_map_data[row] = 1;
res->insert_default();
continue;
Expand All @@ -284,10 +290,15 @@ struct StAngleSphere {
DCHECK_EQ(arguments.size(), 4);
auto return_type = block.get_data_type(result);

auto x_lng = block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
auto x_lat = block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
auto y_lng = block.get_by_position(arguments[2]).column->convert_to_full_column_if_const();
auto y_lat = block.get_by_position(arguments[3]).column->convert_to_full_column_if_const();
const auto* x_lng = check_and_get_column<ColumnFloat64>(
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const());
const auto* x_lat = check_and_get_column<ColumnFloat64>(
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const());
const auto* y_lng = check_and_get_column<ColumnFloat64>(
block.get_by_position(arguments[2]).column->convert_to_full_column_if_const());
const auto* y_lat = check_and_get_column<ColumnFloat64>(
block.get_by_position(arguments[3]).column->convert_to_full_column_if_const());
CHECK(x_lng && x_lat && y_lng && y_lat);

const auto size = x_lng->size();

Expand All @@ -298,10 +309,9 @@ struct StAngleSphere {

for (int row = 0; row < size; ++row) {
double angle = 0;
if (!GeoPoint::ComputeAngleSphere(x_lng->operator[](row).get<Float64>(),
x_lat->operator[](row).get<Float64>(),
y_lng->operator[](row).get<Float64>(),
y_lat->operator[](row).get<Float64>(), &angle)) {
if (!GeoPoint::ComputeAngleSphere(x_lng->get_element(row), x_lat->get_element(row),
y_lng->get_element(row), y_lat->get_element(row),
&angle)) {
null_map_data[row] = 1;
res->insert_default();
continue;
Expand Down
Loading

0 comments on commit 43eb5af

Please sign in to comment.