Skip to content

Commit

Permalink
[env](compile) open compile check for some files
Browse files Browse the repository at this point in the history
  • Loading branch information
jacktengg committed Nov 6, 2024
1 parent e567ee0 commit a6f10d1
Show file tree
Hide file tree
Showing 15 changed files with 138 additions and 85 deletions.
33 changes: 20 additions & 13 deletions be/src/vec/functions/function_timestamp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,10 @@
#include "vec/functions/simple_function_factory.h"
#include "vec/runtime/vdatetime_value.h"
#include "vec/utils/util.hpp"
#include "common/cast_set.h"

namespace doris::vectorized {
// #include "common/compile_check_begin.h"

template <typename DateType>
struct StrToDate {
Expand Down Expand Up @@ -195,8 +197,8 @@ struct StrToDate {
const char* r_raw_str = reinterpret_cast<const char*>(&rdata[roffsets[i - 1]]);
size_t r_str_size = roffsets[i] - roffsets[i - 1];
const StringRef format_str = rewrite_specific_format(r_raw_str, r_str_size);
_execute_inner_loop<DateValueType, NativeType>(l_raw_str, l_str_size, format_str.data,
format_str.size, context, res, null_map,
_execute_inner_loop<DateValueType, NativeType>(l_raw_str, cast_set<int>(l_str_size), format_str.data,
cast_set<int>(format_str.size), context, res, null_map,
i);
}
}
Expand All @@ -210,18 +212,19 @@ struct StrToDate {
size_t size = loffsets.size();
res.resize(size);
const StringRef format_str = rewrite_specific_format(rdata.data, rdata.size);
int format_str_size = cast_set<int>(format_str.size);
for (size_t i = 0; i < size; ++i) {
const char* l_raw_str = reinterpret_cast<const char*>(&ldata[loffsets[i - 1]]);
size_t l_str_size = loffsets[i] - loffsets[i - 1];
auto l_str_size = loffsets[i] - loffsets[i - 1];

_execute_inner_loop<DateValueType, NativeType>(l_raw_str, l_str_size, format_str.data,
format_str.size, context, res, null_map,
_execute_inner_loop<DateValueType, NativeType>(l_raw_str, cast_set<int>(l_str_size), format_str.data,
format_str_size, context, res, null_map,
i);
}
}
template <typename DateValueType, typename NativeType>
static void _execute_inner_loop(const char* l_raw_str, size_t l_str_size, const char* r_raw_str,
size_t r_str_size, FunctionContext* context,
static void _execute_inner_loop(const char* l_raw_str, int l_str_size, const char* r_raw_str,
int r_str_size, FunctionContext* context,
PaddedPODArray<NativeType>& res, NullMap& null_map,
size_t index) {
auto& ts_val = *reinterpret_cast<DateValueType*>(&res[index]);
Expand Down Expand Up @@ -373,7 +376,7 @@ struct MakeDateImpl {
// l checked outside
if constexpr (std::is_same_v<DateValueType, VecDateTimeValue>) {
VecDateTimeValue ts_value = VecDateTimeValue();
ts_value.unchecked_set_time(l, 1, 1, 0, 0, 0);
ts_value.unchecked_set_time(l, 1U, 1U, 0U, 0U, 0U);

TimeInterval interval(DAY, r - 1, false);
res_val = ts_value;
Expand All @@ -383,7 +386,8 @@ struct MakeDateImpl {
}
res_val.cast_to_date();
} else {
res_val.unchecked_set_time(l, 1, 1, 0, 0, 0, 0);
// caller has checked range of l
res_val.unchecked_set_time(cast_set<uint16_t, int, false>(l), 1U, 1U, 0U, 0U, 0U, 0U);
TimeInterval interval(DAY, r - 1, false);
if (!res_val.template date_add_interval<DAY>(interval)) {
null_map[index] = 1;
Expand Down Expand Up @@ -577,7 +581,8 @@ struct UnixTimeStampImpl {
size_t input_rows_count) {
auto col_result = ColumnVector<Int32>::create();
col_result->resize(1);
col_result->get_data()[0] = context->state()->timestamp_ms() / 1000;
// timestamp in seconds is 10 digits
col_result->get_data()[0] = static_cast<int32>(context->state()->timestamp_ms() / 1000);
auto col_const = ColumnConst::create(std::move(col_result), input_rows_count);
block.replace_by_position(result, std::move(col_const));
return Status::OK();
Expand Down Expand Up @@ -713,7 +718,7 @@ struct UnixTimeStampStrImpl {
StringRef fmt = col_format->get_data_at(index_check_const(i, format_const));

DateV2Value<DateTimeV2ValueType> ts_value;
if (!ts_value.from_date_format_str(fmt.data, fmt.size, source.data, source.size)) {
if (!ts_value.from_date_format_str(fmt.data, cast_set<int>(fmt.size), source.data, cast_set<int>(source.size))) {
null_map_data[i] = true;
continue;
}
Expand Down Expand Up @@ -927,7 +932,8 @@ struct LastDayImpl {
res_data[i] = binary_cast<DateValueType, UInt32>(ts_value);
} else {
ts_value.template unchecked_set_time_unit<TimeUnit::DAY>(day);
ts_value.unchecked_set_time(ts_value.year(), ts_value.month(), day, 0, 0, 0, 0);
// get_last_month_day guarantees day is legal
ts_value.unchecked_set_time(ts_value.year(), ts_value.month(), cast_set<uint8_t, int, false>(day), 0, 0, 0, 0);
UInt64 cast_value = binary_cast<DateValueType, UInt64>(ts_value);
DataTypeDateTimeV2::cast_to_date_v2(cast_value, res_data[i]);
}
Expand All @@ -947,7 +953,8 @@ struct LastDayImpl {
if constexpr (std::is_same_v<DateType, DataTypeDateV2>) {
res_data[i] = binary_cast<DateValueType, UInt32>(ts_value);
} else if constexpr (std::is_same_v<DateType, DataTypeDateTimeV2>) {
ts_value.unchecked_set_time(ts_value.year(), ts_value.month(), day, 0, 0, 0, 0);
// get_last_month_day guarantees day is legal
ts_value.unchecked_set_time(ts_value.year(), ts_value.month(), cast_set<uint8_t, int, false>(day), 0, 0, 0, 0);
UInt64 cast_value = binary_cast<DateValueType, UInt64>(ts_value);
DataTypeDateTimeV2::cast_to_date_v2(cast_value, res_data[i]);
}
Expand Down
4 changes: 3 additions & 1 deletion be/src/vec/functions/function_tokenize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@
#include "vec/core/column_with_type_and_name.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "common/cast_set.h"

namespace doris::vectorized {
#include "common/compile_check_begin.h"

Status parse(const std::string& str, std::map<std::string, std::string>& result) {
boost::regex pattern(
Expand Down Expand Up @@ -82,7 +84,7 @@ void FunctionTokenize::_do_tokenize(const ColumnString& src_column_string,
}
auto reader = doris::segment_v2::inverted_index::InvertedIndexAnalyzer::create_reader(
inverted_index_ctx.char_filter_map);
reader->init(tokenize_str.data, tokenize_str.size, true);
reader->init(tokenize_str.data, cast_set<int32_t>(tokenize_str.size), true);

std::vector<std::string> query_tokens =
doris::segment_v2::inverted_index::InvertedIndexAnalyzer::get_analyse_result(
Expand Down
10 changes: 7 additions & 3 deletions be/src/vec/functions/function_width_bucket.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,14 @@
#include "vec/data_types/data_type_number.h"
#include "vec/functions/function.h"
#include "vec/functions/simple_function_factory.h"
#include "common/cast_set.h"

namespace doris {
class FunctionContext;
} // namespace doris

namespace doris::vectorized {
#include "common/compile_check_begin.h"
class FunctionWidthBucket : public IFunction {
public:
static constexpr auto name = "width_bucket";
Expand Down Expand Up @@ -103,18 +105,20 @@ class FunctionWidthBucket : public IFunction {
for (size_t i = 0; i < input_rows_count; ++i) {
auto min_value = min_value_column_concrete.get_data()[i];
auto max_value = max_value_column_concrete.get_data()[i];
auto average_value = (max_value - min_value) / (1.0 * num_buckets);
using arg_type = decltype(min_value);
auto range = max_value - min_value;
auto average_value = static_cast<double>(range) / static_cast<double>(num_buckets);
if (expr_column_concrete.get_data()[i] < min_value) {
continue;
} else if (expr_column_concrete.get_data()[i] >= max_value) {
nested_column_concrete.get_data()[i] = num_buckets + 1;
} else {
if ((max_value - min_value) / num_buckets == 0) {
if (range / static_cast<arg_type>(num_buckets) == static_cast<arg_type>(0)) {
continue;
}
nested_column_concrete.get_data()[i] =
(int64_t)(1 +
(expr_column_concrete.get_data()[i] - min_value) / average_value);
static_cast<double>(expr_column_concrete.get_data()[i] - min_value) / average_value);
}
}
}
Expand Down
46 changes: 31 additions & 15 deletions be/src/vec/functions/int_div.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,15 @@

#include <libdivide.h>
#include <string.h>
#include <type_traits>

#include "common/status.h"
#include "vec/columns/column_vector.h"
#include "vec/core/types.h"
#include "vec/data_types/number_traits.h"

namespace doris::vectorized {
#include "common/compile_check_begin.h"

template <typename A, typename B>
struct DivideIntegralImpl {
Expand All @@ -38,30 +41,43 @@ struct DivideIntegralImpl {
static void apply(const typename Traits::ArrayA& a, B b,
typename ColumnVector<Result>::Container& c,
typename Traits::ArrayNull& null_map) {
size_t size = c.size();
UInt8 is_null = b == 0;
memset(null_map.data(), is_null, size);
if constexpr (!std::is_integral_v<A> || !std::is_integral_v<B>) {
throw doris::Exception(ErrorCode::RUNTIME_ERROR,
"DivideIntegralImpl only support integral types");
__builtin_unreachable();
} else {
size_t size = c.size();
UInt8 is_null = b == 0;
memset(null_map.data(), is_null, size);

if (!is_null) {
if constexpr (!std::is_floating_point_v<A> && !std::is_same_v<A, Int128> &&
!std::is_same_v<A, Int8> && !std::is_same_v<A, UInt8>) {
const auto divider = libdivide::divider<A>(A(b));
for (size_t i = 0; i < size; i++) {
c[i] = a[i] / divider;
}
} else {
for (size_t i = 0; i < size; i++) {
c[i] = Result(a[i] / b);
if (!is_null) {
if constexpr (!std::is_floating_point_v<A> && !std::is_same_v<A, Int128> &&
!std::is_same_v<A, Int8> && !std::is_same_v<A, UInt8>) {
const auto divider = libdivide::divider<A>(A(b));
for (size_t i = 0; i < size; i++) {
c[i] = a[i] / divider;
}
} else {
for (size_t i = 0; i < size; i++) {
c[i] = Result(a[i] / b);
}
}
}
}
}

template <typename Result = ResultType>
static inline Result apply(A a, B b, UInt8& is_null) {
is_null = b == 0;
return Result(a / (b + is_null));
if constexpr (!std::is_integral_v<A> || !std::is_integral_v<B>) {
throw doris::Exception(ErrorCode::RUNTIME_ERROR,
"DivideIntegralImpl only support integral types");
__builtin_unreachable();
} else {
is_null = b == 0;
return Result(a / (b + is_null));
}
}
};

} // namespace doris::vectorized
#include "common/compile_check_end.h"
20 changes: 11 additions & 9 deletions be/src/vec/functions/like.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@
#include "vec/core/block.h"
#include "vec/core/column_with_type_and_name.h"
#include "vec/functions/simple_function_factory.h"
#include "common/cast_set.h"

namespace doris::vectorized {
#include "common/compile_check_begin.h"
// A regex to match any regex pattern is equivalent to a substring search.
static const RE2 SUBSTRING_RE(R"((?:\.\*)*([^\.\^\{\[\(\|\)\]\}\+\*\?\$\\]*)(?:\.\*)*)");

Expand Down Expand Up @@ -253,7 +255,7 @@ Status FunctionLikeBase::constant_starts_with_fn_scalar(LikeSearchState* state,
const StringRef& pattern,
unsigned char* result) {
*result = (val.size >= state->search_string_sv.size) &&
(state->search_string_sv == val.substring(0, state->search_string_sv.size));
(state->search_string_sv == val.substring(0, cast_set<int>(state->search_string_sv.size)));
return Status::OK();
}

Expand Down Expand Up @@ -287,8 +289,8 @@ Status FunctionLikeBase::constant_ends_with_fn_scalar(LikeSearchState* state, co
const StringRef& pattern,
unsigned char* result) {
*result = (val.size >= state->search_string_sv.size) &&
(state->search_string_sv == val.substring(val.size - state->search_string_sv.size,
state->search_string_sv.size));
(state->search_string_sv == val.substring(cast_set<int>(val.size - state->search_string_sv.size),
cast_set<int>(state->search_string_sv.size)));
return Status::OK();
}

Expand Down Expand Up @@ -384,7 +386,7 @@ Status FunctionLikeBase::vector_substring_fn(const ColumnString& vals,
Status FunctionLikeBase::constant_regex_fn_scalar(LikeSearchState* state, const StringRef& val,
const StringRef& pattern, unsigned char* result) {
if (state->hs_database) { // use hyperscan
auto ret = hs_scan(state->hs_database.get(), val.data, val.size, 0, state->hs_scratch.get(),
auto ret = hs_scan(state->hs_database.get(), val.data, cast_set<unsigned int>(val.size), 0, state->hs_scratch.get(),
doris::vectorized::LikeSearchState::hs_match_handler, (void*)result);
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
return Status::RuntimeError(fmt::format("hyperscan error: {}", ret));
Expand Down Expand Up @@ -418,7 +420,7 @@ Status FunctionLikeBase::constant_regex_fn(LikeSearchState* state, const ColumnS
if (state->hs_database) { // use hyperscan
for (size_t i = 0; i < sz; i++) {
const auto& str_ref = val.get_data_at(i);
auto ret = hs_scan(state->hs_database.get(), str_ref.data, str_ref.size, 0,
auto ret = hs_scan(state->hs_database.get(), str_ref.data, cast_set<unsigned int>(str_ref.size), 0,
state->hs_scratch.get(),
doris::vectorized::LikeSearchState::hs_match_handler,
(void*)(result.data() + i));
Expand Down Expand Up @@ -447,7 +449,7 @@ Status FunctionLikeBase::regexp_fn(LikeSearchState* state, const ColumnString& v
auto sz = val.size();
for (size_t i = 0; i < sz; i++) {
const auto& str_ref = val.get_data_at(i);
auto ret = hs_scan(database, str_ref.data, str_ref.size, 0, scratch,
auto ret = hs_scan(database, str_ref.data, cast_set<unsigned int>(str_ref.size), 0, scratch,
doris::vectorized::LikeSearchState::hs_match_handler,
(void*)(result.data() + i));
if (ret != HS_SUCCESS && ret != HS_SCAN_TERMINATED) {
Expand Down Expand Up @@ -770,8 +772,8 @@ void FunctionLike::convert_like_pattern(LikeSearchState* state, const std::strin
void FunctionLike::remove_escape_character(std::string* search_string) {
std::string tmp_search_string;
tmp_search_string.swap(*search_string);
int len = tmp_search_string.length();
for (int i = 0; i < len;) {
auto len = tmp_search_string.length();
for (std::string::size_type i = 0; i < len;) {
if (tmp_search_string[i] == '\\' && i + 1 < len &&
(tmp_search_string[i + 1] == '%' || tmp_search_string[i + 1] == '_' ||
tmp_search_string[i + 1] == '\\')) {
Expand All @@ -791,7 +793,7 @@ bool re2_full_match(const std::string& str, const RE2& re, std::vector<std::stri

std::vector<RE2::Arg> arguments;
std::vector<RE2::Arg*> arguments_ptrs;
std::size_t args_count = re.NumberOfCapturingGroups();
auto args_count = re.NumberOfCapturingGroups();
arguments.resize(args_count);
arguments_ptrs.resize(args_count);
results.resize(args_count);
Expand Down
Loading

0 comments on commit a6f10d1

Please sign in to comment.