Skip to content

Commit

Permalink
[env](compile) open compile check for some files
Browse files Browse the repository at this point in the history
  • Loading branch information
jacktengg committed Nov 8, 2024
1 parent c9757b5 commit 404f2ff
Show file tree
Hide file tree
Showing 15 changed files with 146 additions and 95 deletions.
37 changes: 21 additions & 16 deletions be/src/vec/functions/function_timestamp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <utility>
#include <vector>

#include "common/cast_set.h"
#include "common/status.h"
#include "runtime/decimalv2_value.h"
#include "runtime/define_primitive_type.h"
Expand Down Expand Up @@ -70,6 +71,7 @@
#include "vec/utils/util.hpp"

namespace doris::vectorized {
#include "common/compile_check_begin.h"

template <typename DateType>
struct StrToDate {
Expand Down Expand Up @@ -195,9 +197,9 @@ struct StrToDate {
const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
size_t r_str_size = roffsets[i] - roffsets[i - 1];
const StringRef format_str = rewrite_specific_format(r_raw_str, r_str_size);
_execute_inner_loop<DateValueType, NativeType>(l_raw_str, l_str_size, format_str.data,
format_str.size, context, res, null_map,
i);
_execute_inner_loop<DateValueType, NativeType>(
l_raw_str, cast_set<int>(l_str_size), format_str.data,
cast_set<int>(format_str.size), context, res, null_map, i);
}
}
template <typename ArgDateType,
Expand All @@ -210,18 +212,19 @@ struct StrToDate {
size_t size = loffsets.size();
res.resize(size);
const StringRef format_str = rewrite_specific_format(rdata.data, rdata.size);
int format_str_size = cast_set<int>(format_str.size);
for (size_t i = 0; i < size; ++i) {
const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
size_t l_str_size = loffsets[i] - loffsets[i - 1];
auto l_str_size = loffsets[i] - loffsets[i - 1];

_execute_inner_loop<DateValueType, NativeType>(l_raw_str, l_str_size, format_str.data,
format_str.size, context, res, null_map,
i);
_execute_inner_loop<DateValueType, NativeType>(l_raw_str, cast_set<int>(l_str_size),
format_str.data, format_str_size,
context, res, null_map, i);
}
}
template <typename DateValueType, typename NativeType>
static void _execute_inner_loop(const char* l_raw_str, size_t l_str_size, const char* r_raw_str,
size_t r_str_size, FunctionContext* context,
static void _execute_inner_loop(const char* l_raw_str, int l_str_size, const char* r_raw_str,
int r_str_size, FunctionContext* context,
PaddedPODArray<NativeType>& res, NullMap& null_map,
size_t index) {
auto& ts_val = *reinterpret_cast<DateValueType*>(&res[index]);
Expand Down Expand Up @@ -373,7 +376,7 @@ struct MakeDateImpl {
// l checked outside
if constexpr (std::is_same_v<DateValueType, VecDateTimeValue>) {
VecDateTimeValue ts_value = VecDateTimeValue();
ts_value.unchecked_set_time(l, 1, 1, 0, 0, 0);
ts_value.unchecked_set_time(l, 1U, 1U, 0U, 0U, 0U);

TimeInterval interval(DAY, r - 1, false);
res_val = ts_value;
Expand All @@ -383,7 +386,8 @@ struct MakeDateImpl {
}
res_val.cast_to_date();
} else {
res_val.unchecked_set_time(l, 1, 1, 0, 0, 0, 0);
// caller has checked range of l
res_val.unchecked_set_time(cast_set<uint16_t, int, false>(l), 1U, 1U, 0U, 0U, 0U, 0U);
TimeInterval interval(DAY, r - 1, false);
if (!res_val.template date_add_interval<DAY>(interval)) {
null_map[index] = 1;
Expand Down Expand Up @@ -577,7 +581,7 @@ struct UnixTimeStampImpl {
size_t input_rows_count) {
auto col_result = ColumnVector<Int32>::create();
col_result->resize(1);
col_result->get_data()[0] = context->state()->timestamp_ms() / 1000;
col_result->get_data()[0] = static_cast<int32>(context->state()->timestamp_ms() / 1000);
auto col_const = ColumnConst::create(std::move(col_result), input_rows_count);
block.replace_by_position(result, std::move(col_const));
return Status::OK();
Expand Down Expand Up @@ -713,7 +717,8 @@ struct UnixTimeStampStrImpl {
StringRef fmt = col_format->get_data_at(index_check_const(i, format_const));

DateV2Value<DateTimeV2ValueType> ts_value;
if (!ts_value.from_date_format_str(fmt.data, fmt.size, source.data, source.size)) {
if (!ts_value.from_date_format_str(fmt.data, cast_set<int>(fmt.size), source.data,
cast_set<int>(source.size))) {
null_map_data[i] = true;
continue;
}
Expand Down Expand Up @@ -916,7 +921,7 @@ struct LastDayImpl {
null_map[i] = 1;
continue;
}
int day = get_last_month_day(ts_value.year(), ts_value.month());
auto day = get_last_month_day(ts_value.year(), ts_value.month());
// day is definitely legal
if constexpr (date_cast::IsV1<DateType>()) {
ts_value.unchecked_set_time(ts_value.year(), ts_value.month(), day, 0, 0, 0);
Expand All @@ -941,7 +946,7 @@ struct LastDayImpl {
const auto& cur_data = data_col[i];
auto ts_value = binary_cast<NativeType, DateValueType>(cur_data);
DCHECK(ts_value.is_valid_date());
int day = get_last_month_day(ts_value.year(), ts_value.month());
auto day = get_last_month_day(ts_value.year(), ts_value.month());
ts_value.template unchecked_set_time_unit<TimeUnit::DAY>(day);

if constexpr (std::is_same_v<DateType, DataTypeDateV2>) {
Expand All @@ -954,7 +959,7 @@ struct LastDayImpl {
}
}

static int get_last_month_day(int year, int month) {
static uint8 get_last_month_day(int year, int month) {
bool is_leap_year = doris::is_leap(year);
if (month == 2) {
return is_leap_year ? 29 : 28;
Expand Down
4 changes: 3 additions & 1 deletion be/src/vec/functions/function_tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@

#include "CLucene/StdHeader.h"
#include "CLucene/config/repl_wchar.h"
#include "common/cast_set.h"
#include "olap/inverted_index_parser.h"
#include "olap/rowset/segment_v2/inverted_index/analyzer/analyzer.h"
#include "olap/rowset/segment_v2/inverted_index_reader.h"
Expand All @@ -36,6 +37,7 @@
#include "vec/data_types/data_type_number.h"

namespace doris::vectorized {
#include "common/compile_check_begin.h"

Status parse(const std::string& str, std::map<std::string, std::string>& result) {
boost::regex pattern(
Expand Down Expand Up @@ -82,7 +84,7 @@ void FunctionTokenize::_do_tokenize(const ColumnString& src_column_string,
}
auto reader = doris::segment_v2::inverted_index::InvertedIndexAnalyzer::create_reader(
inverted_index_ctx.char_filter_map);
reader->init(tokenize_str.data, tokenize_str.size, true);
reader->init(tokenize_str.data, cast_set<int32_t>(tokenize_str.size), true);

std::vector<std::string> query_tokens =
doris::segment_v2::inverted_index::InvertedIndexAnalyzer::get_analyse_result(
Expand Down
14 changes: 10 additions & 4 deletions be/src/vec/functions/function_width_bucket.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@

#include <algorithm>
#include <boost/iterator/iterator_facade.hpp>
#include <limits>
#include <memory>
#include <utility>

#include "common/cast_set.h"
#include "common/status.h"
#include "vec/aggregate_functions/aggregate_function.h"
#include "vec/columns/column.h"
Expand All @@ -44,6 +46,7 @@ class FunctionContext;
} // namespace doris

namespace doris::vectorized {
#include "common/compile_check_begin.h"
class FunctionWidthBucket : public IFunction {
public:
static constexpr auto name = "width_bucket";
Expand Down Expand Up @@ -103,18 +106,21 @@ class FunctionWidthBucket : public IFunction {
for (size_t i = 0; i < input_rows_count; ++i) {
auto min_value = min_value_column_concrete.get_data()[i];
auto max_value = max_value_column_concrete.get_data()[i];
auto average_value = (max_value - min_value) / (1.0 * num_buckets);
auto range = max_value - min_value;
using arg_type = decltype(range);
auto average_value = static_cast<double>(range) / static_cast<double>(num_buckets);
if (expr_column_concrete.get_data()[i] < min_value) {
continue;
} else if (expr_column_concrete.get_data()[i] >= max_value) {
nested_column_concrete.get_data()[i] = num_buckets + 1;
} else {
if ((max_value - min_value) / num_buckets == 0) {
if (range / static_cast<arg_type>(num_buckets) == static_cast<arg_type>(0)) {
continue;
}
nested_column_concrete.get_data()[i] =
(int64_t)(1 +
(expr_column_concrete.get_data()[i] - min_value) / average_value);
(int64_t)(1 + static_cast<double>(expr_column_concrete.get_data()[i] -
min_value) /
average_value);
}
}
}
Expand Down
45 changes: 29 additions & 16 deletions be/src/vec/functions/int_div.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@
#pragma once

#include <libdivide.h>
#include <string.h>

#include "vec/columns/column_vector.h"
#include "vec/core/types.h"
#include "vec/data_types/number_traits.h"

namespace doris::vectorized {
#include "common/compile_check_begin.h"

template <typename A, typename B>
struct DivideIntegralImpl {
Expand All @@ -38,30 +38,43 @@ struct DivideIntegralImpl {
static void apply(const typename Traits::ArrayA& a, B b,
typename ColumnVector<Result>::Container& c,
typename Traits::ArrayNull& null_map) {
size_t size = c.size();
UInt8 is_null = b == 0;
memset(null_map.data(), is_null, size);
if constexpr (!std::is_integral_v<A> || !std::is_integral_v<B>) {
throw doris::Exception(ErrorCode::RUNTIME_ERROR,
"DivideIntegralImpl only support integral types");
__builtin_unreachable();
} else {
size_t size = c.size();
UInt8 is_null = b == 0;
memset(null_map.data(), is_null, size);

if (!is_null) {
if constexpr (!std::is_floating_point_v<A> && !std::is_same_v<A, Int128> &&
!std::is_same_v<A, Int8> && !std::is_same_v<A, UInt8>) {
const auto divider = libdivide::divider<A>(A(b));
for (size_t i = 0; i < size; i++) {
c[i] = a[i] / divider;
}
} else {
for (size_t i = 0; i < size; i++) {
c[i] = Result(a[i] / b);
if (!is_null) {
if constexpr (!std::is_floating_point_v<A> && !std::is_same_v<A, Int128> &&
!std::is_same_v<A, Int8> && !std::is_same_v<A, UInt8>) {
const auto divider = libdivide::divider<A>(A(b));
for (size_t i = 0; i < size; i++) {
c[i] = a[i] / divider;
}
} else {
for (size_t i = 0; i < size; i++) {
c[i] = Result(a[i] / b);
}
}
}
}
}

template <typename Result = ResultType>
static inline Result apply(A a, B b, UInt8& is_null) {
is_null = b == 0;
return Result(a / (b + is_null));
if constexpr (!std::is_integral_v<A> || !std::is_integral_v<B>) {
throw doris::Exception(ErrorCode::RUNTIME_ERROR,
"DivideIntegralImpl only support integral types");
__builtin_unreachable();
} else {
is_null = b == 0;
return Result(a / (b + is_null));
}
}
};

} // namespace doris::vectorized
#include "common/compile_check_end.h"
27 changes: 16 additions & 11 deletions be/src/vec/functions/like.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <utility>
#include <vector>

#include "common/cast_set.h"
#include "common/logging.h"
#include "vec/columns/column.h"
#include "vec/columns/column_const.h"
Expand All @@ -37,6 +38,7 @@
#include "vec/functions/simple_function_factory.h"

namespace doris::vectorized {
#include "common/compile_check_begin.h"
// A regex to match any regex pattern is equivalent to a substring search.
static const RE2 SUBSTRING_RE(R"((?:\.\*)*([^\.\^\{\[\(\|\)\]\}\+\*\?\$\\]*)(?:\.\*)*)");

Expand Down Expand Up @@ -253,7 +255,8 @@ Status FunctionLikeBase::constant_starts_with_fn_scalar(LikeSearchState* state,
const StringRef& pattern,
unsigned char* result) {
*result = (val.size >= state->search_string_sv.size) &&
(state->search_string_sv == val.substring(0, state->search_string_sv.size));
(state->search_string_sv ==
val.substring(0, cast_set<int>(state->search_string_sv.size)));
return Status::OK();
}

Expand Down Expand Up @@ -287,8 +290,9 @@ Status FunctionLikeBase::constant_ends_with_fn_scalar(LikeSearchState* state, co
const StringRef& pattern,
unsigned char* result) {
*result = (val.size >= state->search_string_sv.size) &&
(state->search_string_sv == val.substring(val.size - state->search_string_sv.size,
state->search_string_sv.size));
(state->search_string_sv ==
val.substring(cast_set<int>(val.size - state->search_string_sv.size),
cast_set<int>(state->search_string_sv.size)));
return Status::OK();
}

Expand Down Expand Up @@ -384,7 +388,8 @@ Status FunctionLikeBase::vector_substring_fn(const ColumnString& vals,
Status FunctionLikeBase::constant_regex_fn_scalar(LikeSearchState* state, const StringRef& val,
const StringRef& pattern, unsigned char* result) {
if (state->hs_database) { // use hyperscan
auto ret = hs_scan(state->hs_database.get(), val.data, val.size, 0, state->hs_scratch.get(),
auto ret = hs_scan(state->hs_database.get(), val.data, cast_set<unsigned int>(val.size), 0,
state->hs_scratch.get(),
doris::vectorized::LikeSearchState::hs_match_handler, (void*)result);
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
Expand Down Expand Up @@ -418,8 +423,8 @@ Status FunctionLikeBase::constant_regex_fn(LikeSearchState* state, const ColumnS
if (state->hs_database) { // use hyperscan
for (size_t i = 0; i < sz; i++) {
const auto& str_ref = val.get_data_at(i);
auto ret = hs_scan(state->hs_database.get(), str_ref.data, str_ref.size, 0,
state->hs_scratch.get(),
auto ret = hs_scan(state->hs_database.get(), str_ref.data,
cast_set<unsigned int>(str_ref.size), 0, state->hs_scratch.get(),
doris::vectorized::LikeSearchState::hs_match_handler,
(void*)(result.data() + i));
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
Expand Down Expand Up @@ -447,8 +452,8 @@ Status FunctionLikeBase::regexp_fn(LikeSearchState* state, const ColumnString& v
auto sz = val.size();
for (size_t i = 0; i < sz; i++) {
const auto& str_ref = val.get_data_at(i);
auto ret = hs_scan(database, str_ref.data, str_ref.size, 0, scratch,
doris::vectorized::LikeSearchState::hs_match_handler,
auto ret = hs_scan(database, str_ref.data, cast_set<unsigned int>(str_ref.size), 0,
scratch, doris::vectorized::LikeSearchState::hs_match_handler,
(void*)(result.data() + i));
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
Expand Down Expand Up @@ -770,8 +775,8 @@ void FunctionLike::convert_like_pattern(LikeSearchState* state, const std::strin
void FunctionLike::remove_escape_character(std::string* search_string) {
std::string tmp_search_string;
tmp_search_string.swap(*search_string);
int len = tmp_search_string.length();
for (int i = 0; i < len;) {
auto len = tmp_search_string.length();
for (std::string::size_type i = 0; i < len;) {
if (tmp_search_string[i] == '\\' && i + 1 < len &&
(tmp_search_string[i + 1] == '%' || tmp_search_string[i + 1] == '_' ||
tmp_search_string[i + 1] == '\\')) {
Expand All @@ -791,7 +796,7 @@ bool re2_full_match(const std::string& str, const RE2& re, std::vector<std::stri

std::vector<RE2::Arg> arguments;
std::vector<RE2::Arg*> arguments_ptrs;
std::size_t args_count = re.NumberOfCapturingGroups();
auto args_count = re.NumberOfCapturingGroups();
arguments.resize(args_count);
arguments_ptrs.resize(args_count);
results.resize(args_count);
Expand Down
Loading

0 comments on commit 404f2ff

Please sign in to comment.