Skip to content

Commit

Permalink
[refactor](storage) Expressing the types of computation layer and sto…
Browse files Browse the repository at this point in the history
…rage layer in PrimitiveTypeTraits (apache#26191)
  • Loading branch information
mrhhsg authored Nov 15, 2023
1 parent 76d530e commit 3ad865f
Show file tree
Hide file tree
Showing 30 changed files with 1,184 additions and 395 deletions.
53 changes: 0 additions & 53 deletions be/src/exprs/bloom_filter_func.h
Original file line number Diff line number Diff line change
Expand Up @@ -416,65 +416,12 @@ struct FixedStringFindOp : public StringFindOp {
}
};

struct DateTimeFindOp : public CommonFindOp<VecDateTimeValue> {
bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* data) const {
VecDateTimeValue value;
value.from_olap_datetime(*reinterpret_cast<const uint64_t*>(data));
return bloom_filter.test(Slice((char*)&value, sizeof(VecDateTimeValue)));
}
};

// avoid violating C/C++ aliasing rules.
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101684

struct DateFindOp : public CommonFindOp<VecDateTimeValue> {
bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* data) const {
uint24_t date = *static_cast<const uint24_t*>(data);
uint64_t value = uint32_t(date);

VecDateTimeValue date_value;
date_value.from_olap_date(value);

return bloom_filter.test(Slice((char*)&date_value, sizeof(VecDateTimeValue)));
}
};

struct DecimalV2FindOp : public CommonFindOp<DecimalV2Value> {
bool find_olap_engine(const BloomFilterAdaptor& bloom_filter, const void* data) const {
auto packed_decimal = *static_cast<const decimal12_t*>(data);
DecimalV2Value value;
int64_t int_value = packed_decimal.integer;
int32_t frac_value = packed_decimal.fraction;
value.from_olap_decimal(int_value, frac_value);

constexpr int decimal_value_sz = sizeof(DecimalV2Value);
char data_bytes[decimal_value_sz];
memcpy(&data_bytes, &value, decimal_value_sz);
return bloom_filter.test(Slice(data_bytes, decimal_value_sz));
}
};

template <PrimitiveType type>
struct BloomFilterTypeTraits {
using T = typename PrimitiveTypeTraits<type>::CppType;
using FindOp = CommonFindOp<T>;
};

template <>
struct BloomFilterTypeTraits<TYPE_DATE> {
using FindOp = DateFindOp;
};

template <>
struct BloomFilterTypeTraits<TYPE_DATETIME> {
using FindOp = DateTimeFindOp;
};

template <>
struct BloomFilterTypeTraits<TYPE_DECIMALV2> {
using FindOp = DecimalV2FindOp;
};

template <>
struct BloomFilterTypeTraits<TYPE_CHAR> {
using FindOp = FixedStringFindOp;
Expand Down
10 changes: 5 additions & 5 deletions be/src/olap/bitmap_filter_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,13 @@ class BitmapFilterColumnPredicate : public ColumnPredicate {
// no non-null values
return false;
} else {
max_value = *reinterpret_cast<const CppType*>(statistic.second->cell_ptr());
max_value = get_zone_map_value<T, CppType>(statistic.second->cell_ptr());
}

CppType min_value =
statistic.first->is_null() /* contains null values */
? 0
: *reinterpret_cast<const CppType*>(statistic.first->cell_ptr());
CppType min_value = statistic.first->is_null() /* contains null values */
? 0
: get_zone_map_value<T, CppType>(statistic.first->cell_ptr());
;
return _specific_filter->contains_any(min_value, max_value);
}

Expand Down
23 changes: 5 additions & 18 deletions be/src/olap/bloom_filter_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,20 +63,9 @@ class BloomFilterColumnPredicate : public ColumnPredicate {
DCHECK(null_map);
}

uint24_t tmp_uint24_value;
auto get_cell_value = [&tmp_uint24_value](auto& data) {
if constexpr (std::is_same_v<std::decay_t<decltype(data)>, uint32_t> &&
T == PrimitiveType::TYPE_DATE) {
memcpy((char*)(&tmp_uint24_value), (char*)(&data), sizeof(uint24_t));
return (const char*)&tmp_uint24_value;
} else {
return (const char*)&data;
}
};

uint16_t new_size = 0;
if (column.is_column_dictionary()) {
auto* dict_col = reinterpret_cast<const vectorized::ColumnDictI32*>(&column);
const auto* dict_col = reinterpret_cast<const vectorized::ColumnDictI32*>(&column);
if (_be_exec_version >= 2) {
for (uint16_t i = 0; i < size; i++) {
uint16_t idx = sel[i];
Expand Down Expand Up @@ -113,12 +102,11 @@ class BloomFilterColumnPredicate : public ColumnPredicate {
for (uint16_t i = 0; i < size; i++) {
uint16_t idx = is_dense_column ? i : sel[i];
if constexpr (is_nullable) {
if (!null_map[idx] &&
_specific_filter->find_crc32_hash(get_cell_value(pred_col_data[idx]))) {
if (!null_map[idx] && _specific_filter->find_crc32_hash(&pred_col_data[idx])) {
sel[new_size++] = idx;
}
} else {
if (_specific_filter->find_crc32_hash(get_cell_value(pred_col_data[idx]))) {
if (_specific_filter->find_crc32_hash(&pred_col_data[idx])) {
sel[new_size++] = idx;
}
}
Expand All @@ -140,9 +128,8 @@ class BloomFilterColumnPredicate : public ColumnPredicate {

auto pred_col_data = pred_col.data();
#define EVALUATE_WITH_NULL_IMPL(IDX) \
!null_map[IDX] && _specific_filter->find_olap_engine(get_cell_value(pred_col_data[IDX]))
#define EVALUATE_WITHOUT_NULL_IMPL(IDX) \
_specific_filter->find_olap_engine(get_cell_value(pred_col_data[IDX]))
!null_map[IDX] && _specific_filter->find_olap_engine(&pred_col_data[IDX])
#define EVALUATE_WITHOUT_NULL_IMPL(IDX) _specific_filter->find_olap_engine(&pred_col_data[IDX])
EVALUATE_BY_SELECTOR(EVALUATE_WITH_NULL_IMPL, EVALUATE_WITHOUT_NULL_IMPL)
#undef EVALUATE_WITH_NULL_IMPL
#undef EVALUATE_WITHOUT_NULL_IMPL
Expand Down
29 changes: 21 additions & 8 deletions be/src/olap/column_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,27 @@ enum class PredicateType {
MATCH = 13, // fulltext match
};

template <PrimitiveType primitive_type, typename ResultType>
ResultType get_zone_map_value(void* data_ptr) {
ResultType res;
// DecimalV2's storage value is different from predicate or compute value type
// need convert it to DecimalV2Value
if constexpr (primitive_type == PrimitiveType::TYPE_DECIMALV2) {
decimal12_t decimal_12_t_value;
memcpy((char*)(&decimal_12_t_value), data_ptr, sizeof(decimal12_t));
res.from_olap_decimal(decimal_12_t_value.integer, decimal_12_t_value.fraction);
} else if constexpr (primitive_type == PrimitiveType::TYPE_DATE) {
static_assert(std::is_same_v<ResultType, VecDateTimeValue>);
res.from_olap_date(*reinterpret_cast<uint24_t*>(data_ptr));
} else if constexpr (primitive_type == PrimitiveType::TYPE_DATETIME) {
static_assert(std::is_same_v<ResultType, VecDateTimeValue>);
res.from_olap_datetime(*reinterpret_cast<uint64_t*>(data_ptr));
} else {
memcpy(reinterpret_cast<void*>(&res), data_ptr, sizeof(ResultType));
}
return res;
}

inline std::string type_to_string(PredicateType type) {
switch (type) {
case PredicateType::UNKNOWN:
Expand Down Expand Up @@ -264,14 +285,6 @@ class ColumnPredicate {
}

protected:
// Just prevent access not align memory address coredump
template <class T>
T _get_zone_map_value(void* data_ptr) const {
T res;
memcpy(&res, data_ptr, sizeof(T));
return res;
}

virtual std::string _debug_string() const = 0;

uint32_t _column_id;
Expand Down
43 changes: 24 additions & 19 deletions be/src/olap/comparison_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace doris {
template <PrimitiveType Type, PredicateType PT>
class ComparisonPredicateBase : public ColumnPredicate {
public:
using T = typename PredicatePrimitiveTypeTraits<Type>::PredicateFieldType;
using T = typename PrimitiveTypeTraits<Type>::CppType;
ComparisonPredicateBase(uint32_t column_id, const T& value, bool opposite = false)
: ColumnPredicate(column_id, opposite),
_cached_code(_InvalidateCodeValue),
Expand Down Expand Up @@ -67,7 +67,9 @@ class ComparisonPredicateBase : public ColumnPredicate {

roaring::Roaring roaring;
bool exact_match = false;
Status status = iterator->seek_dictionary(&_value, &exact_match);

auto&& value = PrimitiveTypeConvertor<Type>::to_storage_field_type(_value);
Status status = iterator->seek_dictionary(&value, &exact_match);
rowid_t seeked_ordinal = iterator->current_ordinal();

return _bitmap_compare(status, exact_match, ordinal_limit, seeked_ordinal, iterator,
Expand Down Expand Up @@ -107,7 +109,9 @@ class ComparisonPredicateBase : public ColumnPredicate {
}

roaring::Roaring roaring;
RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &_value, query_type,

auto&& value = PrimitiveTypeConvertor<Type>::to_storage_field_type(_value);
RETURN_IF_ERROR(iterator->read_from_inverted_index(column_name, &value, query_type,
num_rows, &roaring));

// mask out null_bitmap, since NULL cmp VALUE will produce NULL
Expand Down Expand Up @@ -150,17 +154,13 @@ class ComparisonPredicateBase : public ColumnPredicate {
_evaluate_bit<true>(column, sel, size, flags);
}

using WarpperFieldType = std::conditional_t<Type == TYPE_DATE, uint24_t, T>;

bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& statistic) const override {
if (statistic.first->is_null()) {
return true;
}

T tmp_min_value {};
T tmp_max_value {};
memcpy((char*)(&tmp_min_value), statistic.first->cell_ptr(), sizeof(WarpperFieldType));
memcpy((char*)(&tmp_max_value), statistic.second->cell_ptr(), sizeof(WarpperFieldType));
T tmp_min_value = get_zone_map_value<Type, T>(statistic.first->cell_ptr());
T tmp_max_value = get_zone_map_value<Type, T>(statistic.second->cell_ptr());

if constexpr (PT == PredicateType::EQ) {
return _operator(tmp_min_value <= _value && tmp_max_value >= _value, true);
Expand All @@ -183,10 +183,8 @@ class ComparisonPredicateBase : public ColumnPredicate {
<< " Type: " << Type << " sizeof(T): " << sizeof(T)
<< " statistic.first->size(): " << statistic.first->size();

T tmp_min_value {};
T tmp_max_value {};
memcpy((char*)(&tmp_min_value), statistic.first->cell_ptr(), sizeof(WarpperFieldType));
memcpy((char*)(&tmp_max_value), statistic.second->cell_ptr(), sizeof(WarpperFieldType));
T tmp_min_value = get_zone_map_value<Type, T>(statistic.first->cell_ptr());
T tmp_max_value = get_zone_map_value<Type, T>(statistic.second->cell_ptr());

if constexpr (PT == PredicateType::LT) {
return _value > tmp_max_value;
Expand All @@ -206,10 +204,8 @@ class ComparisonPredicateBase : public ColumnPredicate {
return false;
}

T tmp_min_value {};
T tmp_max_value {};
memcpy((char*)(&tmp_min_value), statistic.first->cell_ptr(), sizeof(WarpperFieldType));
memcpy((char*)(&tmp_max_value), statistic.second->cell_ptr(), sizeof(WarpperFieldType));
T tmp_min_value = get_zone_map_value<Type, T>(statistic.first->cell_ptr());
T tmp_max_value = get_zone_map_value<Type, T>(statistic.second->cell_ptr());

if constexpr (PT == PredicateType::EQ) {
return tmp_min_value == _value && tmp_max_value == _value;
Expand All @@ -232,8 +228,17 @@ class ComparisonPredicateBase : public ColumnPredicate {
if constexpr (std::is_same_v<T, StringRef>) {
return bf->test_bytes(_value.data, _value.size);
} else {
return bf->test_bytes(const_cast<char*>(reinterpret_cast<const char*>(&_value)),
sizeof(WarpperFieldType));
// DecimalV2 using decimal12_t in bloom filter, should convert value to decimal12_t
// Datev1/DatetimeV1 using VecDatetimeValue in bloom filter, NO need to convert.
if constexpr (Type == PrimitiveType::TYPE_DECIMALV2) {
decimal12_t decimal12_t_val(_value.int_value(), _value.frac_value());
return bf->test_bytes(
const_cast<char*>(reinterpret_cast<const char*>(&decimal12_t_val)),
sizeof(decimal12_t));
} else {
return bf->test_bytes(const_cast<char*>(reinterpret_cast<const char*>(&_value)),
sizeof(T));
}
}
} else {
LOG(FATAL) << "Bloom filter is not supported by predicate type.";
Expand Down
2 changes: 2 additions & 0 deletions be/src/olap/decimal12.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ struct decimal12_t {
return std::string(buf);
}

// Not modify this structure, ZoneMap use this from_string and to_string
// to serialize decimalv2 value to segment files
Status from_string(const std::string& str) {
integer = 0;
fraction = 0;
Expand Down
Loading

0 comments on commit 3ad865f

Please sign in to comment.