Skip to content

Commit

Permalink
[env](compile) open compile_check in some file (apache#45357)
Browse files Browse the repository at this point in the history
  • Loading branch information
Mryange authored Dec 18, 2024
1 parent fe42ec9 commit b65de37
Show file tree
Hide file tree
Showing 6 changed files with 48 additions and 25 deletions.
7 changes: 5 additions & 2 deletions be/src/util/simd/vstring_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,11 @@ class VStringFunctions {
// is to say, counting bytes which do not match 10xx_xxxx pattern.
// All 0xxx_xxxx, 110x_xxxx, 1110_xxxx and 1111_0xxx are greater than 1011_1111 when use int8_t arithmetic,
// so just count bytes greater than 1011_1111 in a byte string as the result of utf8_length.
static inline size_t get_char_len(const char* src, size_t len) {
size_t char_len = 0;
// get_char_len is used to return the UTF-8 length of a string.
// The return value will never exceed len.
template <typename T>
static inline T get_char_len(const char* src, T len) {
T char_len = 0;
const char* p = src;
const char* end = p + len;
#if defined(__SSE2__) || defined(__aarch64__)
Expand Down
2 changes: 1 addition & 1 deletion be/src/vec/functions/function_rpc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
#include "vec/data_types/serde/data_type_serde.h"

namespace doris::vectorized {

#include "common/compile_check_begin.h"
RPCFnImpl::RPCFnImpl(const TFunction& fn) : _fn(fn) {
_function_name = _fn.scalar_fn.symbol;
_server_addr = _fn.hdfs_location;
Expand Down
9 changes: 5 additions & 4 deletions be/src/vec/functions/function_split_by_regexp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include "vec/functions/simple_function_factory.h"

namespace doris::vectorized {
#include "common/compile_check_begin.h"

struct Match {
std::string::size_type offset;
Expand Down Expand Up @@ -194,7 +195,7 @@ struct ExecuteImpl {
unpack_if_const(block.get_by_position(arguments[1]).column);
const auto& [three_column, three_is_const] =
unpack_if_const(block.get_by_position(arguments[2]).column);
auto limit_value = assert_cast<const ColumnInt32&>(*three_column).get_int(0);
auto limit_value = assert_cast<const ColumnInt32&>(*three_column).get_element(0);
const auto& src_column = assert_cast<const ColumnString&>(*first_column);
const auto& pattern_column = assert_cast<const ColumnString&>(*second_column);

Expand Down Expand Up @@ -238,7 +239,7 @@ struct ExecuteImpl {
const StringRef& pattern_ref,
ColumnString& dest_column_string,
ColumnArray::Offsets64& dest_offsets,
NullMapType* dest_nested_null_map, Int64 limit_value,
NullMapType* dest_nested_null_map, Int32 limit_value,
size_t input_rows_count, RE2::Options* opts) {
const char* token_begin = nullptr;
const char* token_end = nullptr;
Expand Down Expand Up @@ -270,7 +271,7 @@ struct ExecuteImpl {
const ColumnString& pattern_column,
ColumnString& dest_column_string,
ColumnArray::Offsets64& dest_offsets,
NullMapType* dest_nested_null_map, Int64 limit_value,
NullMapType* dest_nested_null_map, Int32 limit_value,
size_t input_rows_count, RE2::Options* opts) {
const char* token_begin = nullptr;
const char* token_end = nullptr;
Expand Down Expand Up @@ -307,7 +308,7 @@ struct ExecuteImpl {
const ColumnString& pattern_column,
ColumnString& dest_column_string,
ColumnArray::Offsets64& dest_offsets,
NullMapType* dest_nested_null_map, Int64 limit_value,
NullMapType* dest_nested_null_map, Int32 limit_value,
size_t input_rows_count, RE2::Options* opts) {
const char* token_begin = nullptr;
const char* token_end = nullptr;
Expand Down
38 changes: 26 additions & 12 deletions be/src/vec/functions/function_string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include <cstddef>
#include <string_view>

#include "common/cast_set.h"
#include "common/status.h"
#include "runtime/string_search.hpp"
#include "util/url_coding.h"
Expand All @@ -38,6 +39,7 @@
#include "vec/functions/simple_function_factory.h"

namespace doris::vectorized {
#include "common/compile_check_begin.h"
struct NameStringASCII {
static constexpr auto name = "ascii";
};
Expand Down Expand Up @@ -68,7 +70,7 @@ struct NameQuoteImpl {
static Status vector(const ColumnString::Chars& data, const ColumnString::Offsets& offsets,
ColumnString::Chars& res_data, ColumnString::Offsets& res_offsets) {
size_t offset_size = offsets.size();
size_t pos = 0;
ColumnString::Offset pos = 0;
res_offsets.resize(offset_size);
res_data.resize(data.size() + offset_size * 2);
for (int i = 0; i < offset_size; i++) {
Expand Down Expand Up @@ -285,9 +287,9 @@ struct StringInStrImpl {

/// We check that the entry does not pass through the boundaries of strings.
if (pos + rdata.size <= begin + loffsets[i]) {
int loc = pos - begin - loffsets[i - 1];
int loc = (int)(pos - begin) - loffsets[i - 1];
int l_str_size = loffsets[i] - loffsets[i - 1];
size_t len = std::min(l_str_size, loc);
auto len = std::min(l_str_size, loc);
loc = simd::VStringFunctions::get_char_len((char*)(begin + loffsets[i - 1]), len);
res[i] = loc + 1;
}
Expand Down Expand Up @@ -332,7 +334,7 @@ struct StringInStrImpl {
// Hive returns positions starting from 1.
int loc = search.search(&strl);
if (loc > 0) {
size_t len = std::min((size_t)loc, strl.size);
int len = std::min(loc, (int)strl.size);
loc = simd::VStringFunctions::get_char_len(strl.data, len);
}

Expand Down Expand Up @@ -489,7 +491,16 @@ struct InitcapImpl {
if (!::isalnum(res_data[i])) {
need_capitalize = true;
} else if (need_capitalize) {
res_data[i] = ::toupper(res_data[i]);
/*
https://en.cppreference.com/w/cpp/string/byte/toupper
Like all other functions from <cctype>, the behavior of std::toupper is undefined if the argument's value is neither representable as unsigned char nor equal to EOF.
To use these functions safely with plain chars (or signed chars), the argument should first be converted to unsigned char:
char my_toupper(char ch)
{
return static_cast<char>(std::toupper(static_cast<unsigned char>(ch)));
}
*/
res_data[i] = static_cast<unsigned char>(::toupper(res_data[i]));
need_capitalize = false;
}
}
Expand Down Expand Up @@ -540,7 +551,8 @@ struct TrimUtil {
}

res_data.insert_assume_reserved(str_begin, str_end);
res_offsets[i] = res_data.size();
// The length of the result of the trim function will never exceed the length of the input.
res_offsets[i] = (ColumnString::Offset)res_data.size();
}
return Status::OK();
}
Expand Down Expand Up @@ -606,7 +618,8 @@ struct TrimInUtil {
}

res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
res_offsets[i] = res_data.size();
// The length of the result of the trim function will never exceed the length of the input.
res_offsets[i] = (ColumnString::Offset)res_data.size();
}

return Status::OK();
Expand Down Expand Up @@ -669,7 +682,8 @@ struct TrimInUtil {
}

res_data.insert_assume_reserved(left_trim_pos, right_trim_pos);
res_offsets[i] = res_data.size();
// The length of the result of the trim function will never exceed the length of the input.
res_offsets[i] = (ColumnString::Offset)res_data.size();
}
return Status::OK();
}
Expand Down Expand Up @@ -820,7 +834,7 @@ struct UnHexImpl {
return false;
}

static int hex_decode(const char* src_str, size_t src_len, char* dst_str) {
static int hex_decode(const char* src_str, ColumnString::Offset src_len, char* dst_str) {
// if str length is odd or 0, return empty string like mysql dose.
if ((src_len & 1) != 0 or src_len == 0) {
return 0;
Expand Down Expand Up @@ -848,7 +862,7 @@ struct UnHexImpl {

for (int i = 0; i < rows_count; ++i) {
const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t srclen = offsets[i] - offsets[i - 1];
ColumnString::Offset srclen = offsets[i] - offsets[i - 1];

if (srclen == 0) {
StringOP::push_empty_string(i, dst_data, dst_offsets);
Expand Down Expand Up @@ -898,7 +912,7 @@ struct StringSpace {
for (size_t i = 0; i < input_size; ++i) {
if (data[i] > 0) [[likely]] {
res_data.resize_fill(res_data.size() + data[i], ' ');
res_offsets[i] = res_data.size();
cast_set(res_offsets[i], res_data.size());
} else {
StringOP::push_empty_string(i, res_data, res_offsets);
}
Expand Down Expand Up @@ -962,7 +976,7 @@ struct FromBase64Impl {
}

const auto* source = reinterpret_cast<const char*>(&data[offsets[i - 1]]);
size_t srclen = offsets[i] - offsets[i - 1];
ColumnString::Offset srclen = offsets[i] - offsets[i - 1];

if (srclen == 0) {
StringOP::push_empty_string(i, dst_data, dst_offsets);
Expand Down
6 changes: 4 additions & 2 deletions be/src/vec/functions/functions_comparison.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
//#include "olap/rowset/segment_v2/inverted_index_reader.h"

namespace doris::vectorized {
#include "common/compile_check_begin.h"

/** Comparison functions: ==, !=, <, >, <=, >=.
* The comparison functions always return 0 or 1 (UInt8).
Expand Down Expand Up @@ -441,7 +442,7 @@ class FunctionComparison : public IFunction {

if (c0_const_string) {
c0_const_chars = &c0_const_string->get_chars();
c0_const_size = c0_const_string->get_data_at(0).size;
c0_const_size = c0_const_string->get_offsets()[0];
} else {
return Status::NotSupported("Illegal columns {}, of argument of function {}",
c0->get_name(), name);
Expand All @@ -454,7 +455,7 @@ class FunctionComparison : public IFunction {

if (c1_const_string) {
c1_const_chars = &c1_const_string->get_chars();
c1_const_size = c1_const_string->get_data_at(0).size;
c1_const_size = c1_const_string->get_offsets()[0];
} else {
return Status::NotSupported("Illegal columns {}, of argument of function {}",
c1->get_name(), name);
Expand Down Expand Up @@ -714,4 +715,5 @@ class FunctionComparison : public IFunction {
}
};

#include "common/compile_check_end.h"
} // namespace doris::vectorized
11 changes: 7 additions & 4 deletions be/src/vec/functions/functions_multi_string_position.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include "vec/functions/simple_function_factory.h"

namespace doris {
#include "common/compile_check_begin.h"
class FunctionContext;
} // namespace doris

Expand Down Expand Up @@ -106,10 +107,11 @@ class FunctionMultiStringPosition : public IFunction {
const ColumnConst* col_needles_const =
check_and_get_column_const<ColumnArray>(needles_ptr.get());

if (!col_needles_const && !col_needles_vector)
if (!col_needles_const && !col_needles_vector) {
return Status::InvalidArgument(
"function '{}' encountered unsupported needles column, found {}", name,
needles_column->get_name());
}

if (col_haystack_const && col_needles_vector) {
return Status::InvalidArgument(
Expand Down Expand Up @@ -219,9 +221,9 @@ struct FunctionMultiSearchAllPositionsImpl {
const auto* haystack_end =
haystack - prev_haystack_offset + haystack_offsets[haystack_index];

auto ans_now = searcher.search(haystack, haystack_end);
const auto* ans_now = searcher.search(haystack, haystack_end);
vec_res[res_index] =
ans_now >= haystack_end ? 0 : std::distance(haystack, ans_now) + 1;
ans_now >= haystack_end ? 0 : (Int32)std::distance(haystack, ans_now) + 1;
prev_haystack_offset = haystack_offsets[haystack_index];
}
}
Expand Down Expand Up @@ -296,7 +298,7 @@ struct FunctionMultiSearchAllPositionsImpl {

auto ans_now = searcher.search(haystack, haystack_end);
vec_res[ans_row_begin + ans_slot_in_row] =
ans_now >= haystack_end ? 0 : std::distance(haystack, ans_now) + 1;
ans_now >= haystack_end ? 0 : (Int32)std::distance(haystack, ans_now) + 1;
}

prev_haystack_offset = haystack_offsets[haystack_index];
Expand All @@ -315,4 +317,5 @@ void register_function_multi_string_position(SimpleFunctionFactory& factory) {
factory.register_function<FunctionMultiSearchAllPositions>();
}

#include "common/compile_check_end.h"
} // namespace doris::vectorized

0 comments on commit b65de37

Please sign in to comment.