From c96091c208db5bccac434fabcbb0adb8ae2e322b Mon Sep 17 00:00:00 2001 From: jacktengg <18241664+jacktengg@users.noreply.github.com> Date: Tue, 26 Sep 2023 17:29:11 +0800 Subject: [PATCH] [feature](decimal) support decimal256: compile ok --- be/src/common/consts.h | 1 + be/src/exec/olap_common.h | 2 + be/src/exec/schema_scanner.cpp | 6 + be/src/olap/olap_common.h | 3 +- be/src/runtime/define_primitive_type.h | 1 + be/src/runtime/primitive_type.h | 6 + be/src/runtime/runtime_predicate.cpp | 4 + be/src/runtime/runtime_predicate.h | 6 + be/src/runtime/type_limit.h | 8 + be/src/util/binary_cast.hpp | 6 +- be/src/util/string_parser.hpp | 5 +- be/src/vec/columns/column_decimal.cpp | 12 +- be/src/vec/columns/column_decimal.h | 4 +- be/src/vec/columns/columns_number.h | 1 + be/src/vec/common/arithmetic_overflow.h | 13 + be/src/vec/common/field_visitors.h | 2 + be/src/vec/common/int_exp.h | 92 ++ be/src/vec/core/accurate_comparison.h | 622 ++++---- be/src/vec/core/decimal_comparison.h | 16 +- be/src/vec/core/decomposed_float.h | 182 +++ be/src/vec/core/extended_types.h | 108 ++ be/src/vec/core/field.cpp | 1 + be/src/vec/core/field.h | 15 + be/src/vec/core/types.h | 362 ++++- be/src/vec/core/wide_integer.h | 290 ++++ be/src/vec/core/wide_integer_impl.h | 1357 +++++++++++++++++ be/src/vec/core/wide_integer_to_string.h | 58 + .../vec/data_types/convert_field_to_type.cpp | 3 + be/src/vec/data_types/data_type.cpp | 2 + be/src/vec/data_types/data_type.h | 4 +- be/src/vec/data_types/data_type_decimal.cpp | 11 +- be/src/vec/data_types/data_type_decimal.h | 34 +- be/src/vec/data_types/data_type_factory.cpp | 8 + be/src/vec/data_types/get_least_supertype.cpp | 7 +- .../serde/data_type_decimal_serde.cpp | 2 + .../serde/data_type_decimal_serde.h | 9 + be/src/vec/exec/format/orc/vorc_reader.cpp | 1 + .../parquet/byte_array_dict_decoder.cpp | 1 + .../parquet/byte_array_plain_decoder.cpp | 1 + .../parquet/fix_length_dict_decoder.hpp | 2 + .../parquet/fix_length_plain_decoder.cpp | 1 + be/src/vec/exec/jni_connector.cpp | 2 + be/src/vec/olap/olap_data_convertor.cpp | 3 + be/src/vec/runtime/vorc_transformer.cpp | 1 + be/src/vec/runtime/vparquet_transformer.cpp | 1 + be/src/vec/sink/vtablet_block_convertor.cpp | 4 + be/test/vec/data_types/decimal_test.cpp | 81 + gensrc/proto/types.proto | 1 + gensrc/thrift/Types.thrift | 3 +- 49 files changed, 3071 insertions(+), 294 deletions(-) create mode 100644 be/src/vec/core/decomposed_float.h create mode 100644 be/src/vec/core/extended_types.h create mode 100644 be/src/vec/core/wide_integer.h create mode 100644 be/src/vec/core/wide_integer_impl.h create mode 100644 be/src/vec/core/wide_integer_to_string.h create mode 100644 be/test/vec/data_types/decimal_test.cpp diff --git a/be/src/common/consts.h b/be/src/common/consts.h index 72942f75b22c99..90ed3a663b821c 100644 --- a/be/src/common/consts.h +++ b/be/src/common/consts.h @@ -33,5 +33,6 @@ const std::string DYNAMIC_COLUMN_NAME = "__DORIS_DYNAMIC_COL__"; constexpr int MAX_DECIMAL32_PRECISION = 9; constexpr int MAX_DECIMAL64_PRECISION = 18; constexpr int MAX_DECIMAL128_PRECISION = 38; +constexpr int MAX_DECIMAL256_PRECISION = 76; } // namespace BeConsts } // namespace doris diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h index 7a58645b74cbbc..31d68b998fd0eb 100644 --- a/be/src/exec/olap_common.h +++ b/be/src/exec/olap_common.h @@ -55,6 +55,8 @@ std::string cast_to_string(T value, int scale) { return ((vectorized::Decimal)value).to_string(scale); } else if constexpr (primitive_type == TYPE_DECIMAL128I) { return ((vectorized::Decimal)value).to_string(scale); + } else if constexpr (primitive_type == TYPE_DECIMAL256) { + return ((vectorized::Decimal)value).to_string(scale); } else if constexpr (primitive_type == TYPE_TINYINT) { return std::to_string(static_cast(value)); } else if constexpr (primitive_type == TYPE_LARGEINT) { diff --git a/be/src/exec/schema_scanner.cpp b/be/src/exec/schema_scanner.cpp index 9733558284a8fd..3062b9d7be2cdc 100644 --- a/be/src/exec/schema_scanner.cpp +++ b/be/src/exec/schema_scanner.cpp @@ -287,6 +287,12 @@ Status SchemaScanner::fill_dest_column_for_range(vectorized::Block* block, size_ reinterpret_cast(&num), 0); break; } + // case TYPE_DECIMAL256: { + // const vectorized::Int256 num = (reinterpret_cast(data))->value; + // reinterpret_cast(col_ptr)->insert_data( + // reinterpret_cast(&num), 0); + // break; + // } case TYPE_DECIMAL32: { const int32_t num = *reinterpret_cast(data); diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index 130d65e7ef448d..b2a6ac49cd1c0b 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -143,7 +143,8 @@ enum class FieldType { OLAP_FIELD_TYPE_DECIMAL128I = 33, OLAP_FIELD_TYPE_JSONB = 34, OLAP_FIELD_TYPE_VARIANT = 35, - OLAP_FIELD_TYPE_AGG_STATE = 36 + OLAP_FIELD_TYPE_AGG_STATE = 36, + OLAP_FIELD_TYPE_DECIMAL256 = 37, }; // Define all aggregation methods supported by Field diff --git a/be/src/runtime/define_primitive_type.h b/be/src/runtime/define_primitive_type.h index 0ecacb92347c0b..7ac29fd4f8c38d 100644 --- a/be/src/runtime/define_primitive_type.h +++ b/be/src/runtime/define_primitive_type.h @@ -63,6 +63,7 @@ enum PrimitiveType : PrimitiveNative { TYPE_VARIANT, /* 32 */ TYPE_LAMBDA_FUNCTION, /* 33 */ TYPE_AGG_STATE, /* 34 */ + TYPE_DECIMAL256, /* 35 */ }; constexpr PrimitiveNative BEGIN_OF_PRIMITIVE_TYPE = INVALID_TYPE; diff --git a/be/src/runtime/primitive_type.h b/be/src/runtime/primitive_type.h index 07ef91f11cbca4..4b8d0d739aa5b7 100644 --- a/be/src/runtime/primitive_type.h +++ b/be/src/runtime/primitive_type.h @@ -59,6 +59,7 @@ constexpr bool is_enumeration_type(PrimitiveType type) { case TYPE_DECIMAL32: case TYPE_DECIMAL64: case TYPE_DECIMAL128I: + case TYPE_DECIMAL256: case TYPE_BOOLEAN: case TYPE_ARRAY: case TYPE_STRUCT: @@ -205,6 +206,11 @@ struct PrimitiveTypeTraits { using ColumnType = vectorized::ColumnDecimal; }; template <> +struct PrimitiveTypeTraits { + using CppType = vectorized::Decimal256; + using ColumnType = vectorized::ColumnDecimal; +}; +template <> struct PrimitiveTypeTraits { using CppType = __int128_t; using ColumnType = vectorized::ColumnInt128; diff --git a/be/src/runtime/runtime_predicate.cpp b/be/src/runtime/runtime_predicate.cpp index f053b842c7be17..2b949fb10e6c53 100644 --- a/be/src/runtime/runtime_predicate.cpp +++ b/be/src/runtime/runtime_predicate.cpp @@ -112,6 +112,10 @@ Status RuntimePredicate::init(const PrimitiveType type, const bool nulls_first) _get_value_fn = get_decimal128_value; break; } + case PrimitiveType::TYPE_DECIMAL256: { + _get_value_fn = get_decimal256_value; + break; + } default: return Status::InvalidArgument("unsupported runtime predicate type {}", type); } diff --git a/be/src/runtime/runtime_predicate.h b/be/src/runtime/runtime_predicate.h index 9dd48279acbfe6..b1d4dadf1a4fa8 100644 --- a/be/src/runtime/runtime_predicate.h +++ b/be/src/runtime/runtime_predicate.h @@ -173,6 +173,12 @@ class RuntimePredicate { auto v = field.get>(); return cast_to_string(v.get_value(), v.get_scale()); } + + static std::string get_decimal256_value(const Field& field) { + using ValueType = typename PrimitiveTypeTraits::CppType; + auto v = field.get>(); + return cast_to_string(v.get_value(), v.get_scale()); + } }; } // namespace vectorized diff --git a/be/src/runtime/type_limit.h b/be/src/runtime/type_limit.h index 4d9fd5e646a4c0..d23a9f1921656c 100644 --- a/be/src/runtime/type_limit.h +++ b/be/src/runtime/type_limit.h @@ -20,6 +20,7 @@ #include "runtime/datetime_value.h" #include "runtime/decimalv2_value.h" #include "vec/common/string_ref.h" +#include "vec/core/wide_integer.h" namespace doris { @@ -70,6 +71,13 @@ struct type_limit { } static vectorized::Decimal128 min() { return -max(); } }; +static Int256 MAX_DECIMAL256_INT({18446744073709551615ul, 8607968719199866879ul, + 532749306367912313ul, 1593091911132452277ul}); +template <> +struct type_limit { + static vectorized::Decimal256 max() { return vectorized::Decimal256(MAX_DECIMAL256_INT); } + static vectorized::Decimal256 min() { return vectorized::Decimal256(-MAX_DECIMAL256_INT); } +}; template <> struct type_limit { diff --git a/be/src/util/binary_cast.hpp b/be/src/util/binary_cast.hpp index ecba899ec220ee..43ea6486bd476c 100644 --- a/be/src/util/binary_cast.hpp +++ b/be/src/util/binary_cast.hpp @@ -24,6 +24,7 @@ #include "runtime/datetime_value.h" #include "runtime/decimalv2_value.h" #include "util/types.h" +#include "vec/core/wide_integer.h" #include "vec/runtime/vdatetime_value.h" namespace doris { union TypeConverter { @@ -79,6 +80,7 @@ To binary_cast(From from) { match_v; constexpr bool from_i128_to_decv2 = match_v; constexpr bool from_decv2_to_i128 = match_v; + constexpr bool from_decv2_to_i256 = match_v; constexpr bool from_ui32_to_date_v2 = match_v static T get_scale_multiplier(int scale) { static_assert(std::is_same_v || std::is_same_v || - std::is_same_v, + std::is_same_v || std::is_same_v, "You can only instantiate as int32_t, int64_t, __int128."); if constexpr (std::is_same_v) { return common::exp10_i32(scale); @@ -99,6 +100,8 @@ class StringParser { return common::exp10_i64(scale); } else if constexpr (std::is_same_v) { return common::exp10_i128(scale); + } else if constexpr (std::is_same_v) { + return common::exp10_i256(scale); } } diff --git a/be/src/vec/columns/column_decimal.cpp b/be/src/vec/columns/column_decimal.cpp index 7c9a8cf0849f79..5bba2b03df5589 100644 --- a/be/src/vec/columns/column_decimal.cpp +++ b/be/src/vec/columns/column_decimal.cpp @@ -122,8 +122,10 @@ template UInt64 ColumnDecimal::get64(size_t n) const { if constexpr (sizeof(T) > sizeof(UInt64)) { LOG(FATAL) << "Method get64 is not supported for " << get_family_name(); + return 0; + } else { + return static_cast(data[n]); } - return static_cast(data[n]); } template @@ -527,6 +529,13 @@ Decimal128I ColumnDecimal::get_scale_multiplier() const { return common::exp10_i128(scale); } +// duplicate with +// Decimal256 DataTypeDecimal::get_scale_multiplier(UInt32 scale) { +template <> +Decimal256 ColumnDecimal::get_scale_multiplier() const { + return Decimal256(common::exp10_i256(scale)); +} + template ColumnPtr ColumnDecimal::index(const IColumn& indexes, size_t limit) const { return select_index_impl(*this, indexes, limit); @@ -536,4 +545,5 @@ template class ColumnDecimal; template class ColumnDecimal; template class ColumnDecimal; template class ColumnDecimal; +template class ColumnDecimal; } // namespace doris::vectorized diff --git a/be/src/vec/columns/column_decimal.h b/be/src/vec/columns/column_decimal.h index 87a8cddc99e9cd..0b664dae68d64e 100644 --- a/be/src/vec/columns/column_decimal.h +++ b/be/src/vec/columns/column_decimal.h @@ -201,8 +201,8 @@ class ColumnDecimal final : public COWHelper(&data[n]), sizeof(data[n])); } void get(size_t n, Field& res) const override { res = (*this)[n]; } - bool get_bool(size_t n) const override { return bool(data[n]); } - Int64 get_int(size_t n) const override { return Int64(data[n] * scale); } + bool get_bool(size_t n) const override { return bool(data[n].value); } + Int64 get_int(size_t n) const override { return Int64(data[n].value * scale); } UInt64 get64(size_t n) const override; bool is_default_at(size_t n) const override { return data[n].value == 0; } diff --git a/be/src/vec/columns/columns_number.h b/be/src/vec/columns/columns_number.h index 1ce930e4cd1a66..a9d3c9ac16c785 100644 --- a/be/src/vec/columns/columns_number.h +++ b/be/src/vec/columns/columns_number.h @@ -53,6 +53,7 @@ using ColumnDecimal32 = ColumnDecimal; using ColumnDecimal64 = ColumnDecimal; using ColumnDecimal128 = ColumnDecimal; using ColumnDecimal128I = ColumnDecimal; +using ColumnDecimal256 = ColumnDecimal; template struct IsFixLenColumnType { diff --git a/be/src/vec/common/arithmetic_overflow.h b/be/src/vec/common/arithmetic_overflow.h index 0d0828a61bfa87..24abd225ada12d 100644 --- a/be/src/vec/common/arithmetic_overflow.h +++ b/be/src/vec/common/arithmetic_overflow.h @@ -20,6 +20,7 @@ #pragma once +#include "vec/core/wide_integer.h" namespace common { template inline bool add_overflow(T x, T y, T& res) { @@ -79,6 +80,12 @@ inline bool sub_overflow(__int128 x, __int128 y, __int128& res) { return (y < 0 && x > max_int128 + y) || (y > 0 && x < min_int128 + y); } +/// Multiply and ignore overflow. +template +inline auto mul_ignore_overflow(T1 x, T2 y) { + return x * y; +} + template inline bool mul_overflow(T x, T y, T& res) { return __builtin_mul_overflow(x, y, &res); @@ -109,4 +116,10 @@ inline bool mul_overflow(__int128 x, __int128 y, __int128& res) { unsigned __int128 b = (y > 0) ? y : -y; return (a * b) / b != a; } + +template <> +inline bool mul_overflow(wide::Int256 x, wide::Int256 y, wide::Int256& res) { + res = mul_ignore_overflow(x, y); + return false; +} } // namespace common diff --git a/be/src/vec/common/field_visitors.h b/be/src/vec/common/field_visitors.h index 68a85170d4519e..8434483b7721ea 100644 --- a/be/src/vec/common/field_visitors.h +++ b/be/src/vec/common/field_visitors.h @@ -63,6 +63,8 @@ typename std::decay_t::ResultType apply_visitor(Visitor&& visitor, F&& return visitor(field.template get>()); case Field::Types::Decimal128I: return visitor(field.template get>()); + case Field::Types::Decimal256: + return visitor(field.template get>()); default: LOG(FATAL) << "Bad type of Field"; return {}; diff --git a/be/src/vec/common/int_exp.h b/be/src/vec/common/int_exp.h index cac7f24f0404e4..81ca11bb11a10c 100644 --- a/be/src/vec/common/int_exp.h +++ b/be/src/vec/common/int_exp.h @@ -24,6 +24,8 @@ #include #include +#include "vec/core/wide_integer.h" + namespace exp_details { // compile-time exp(v, n) by linear recursion @@ -78,4 +80,94 @@ inline constexpr __int128 exp10_i128(int x) { return exp_details::get_exp<__int128, 10, 39>(x); } +using wide::Int256; +inline Int256 exp10_i256(int x) { + if (x < 0) return 0; + if (x > 76) return std::numeric_limits::max(); + + using Int256 = Int256; + static constexpr Int256 i10e18 {1000000000000000000ll}; + static const Int256 values[] = { + static_cast(1ll), + static_cast(10ll), + static_cast(100ll), + static_cast(1000ll), + static_cast(10000ll), + static_cast(100000ll), + static_cast(1000000ll), + static_cast(10000000ll), + static_cast(100000000ll), + static_cast(1000000000ll), + static_cast(10000000000ll), + static_cast(100000000000ll), + static_cast(1000000000000ll), + static_cast(10000000000000ll), + static_cast(100000000000000ll), + static_cast(1000000000000000ll), + static_cast(10000000000000000ll), + static_cast(100000000000000000ll), + i10e18, + i10e18 * 10ll, + i10e18 * 100ll, + i10e18 * 1000ll, + i10e18 * 10000ll, + i10e18 * 100000ll, + i10e18 * 1000000ll, + i10e18 * 10000000ll, + i10e18 * 100000000ll, + i10e18 * 1000000000ll, + i10e18 * 10000000000ll, + i10e18 * 100000000000ll, + i10e18 * 1000000000000ll, + i10e18 * 10000000000000ll, + i10e18 * 100000000000000ll, + i10e18 * 1000000000000000ll, + i10e18 * 10000000000000000ll, + i10e18 * 100000000000000000ll, + i10e18 * 100000000000000000ll * 10ll, + i10e18 * 100000000000000000ll * 100ll, + i10e18 * 100000000000000000ll * 1000ll, + i10e18 * 100000000000000000ll * 10000ll, + i10e18 * 100000000000000000ll * 100000ll, + i10e18 * 100000000000000000ll * 1000000ll, + i10e18 * 100000000000000000ll * 10000000ll, + i10e18 * 100000000000000000ll * 100000000ll, + i10e18 * 100000000000000000ll * 1000000000ll, + i10e18 * 100000000000000000ll * 10000000000ll, + i10e18 * 100000000000000000ll * 100000000000ll, + i10e18 * 100000000000000000ll * 1000000000000ll, + i10e18 * 100000000000000000ll * 10000000000000ll, + i10e18 * 100000000000000000ll * 100000000000000ll, + i10e18 * 100000000000000000ll * 1000000000000000ll, + i10e18 * 100000000000000000ll * 10000000000000000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 10ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 100ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 1000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 10000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 100000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 1000000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 10000000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 1000000000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 10000000000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 1000000000000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 10000000000000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 1000000000000000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 10000000000000000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll * 10ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll * 100ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll * 1000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll * 10000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll * 100000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll * 1000000ll, + i10e18 * 100000000000000000ll * 100000000000000000ll * 100000000000000000ll * + 10000000ll, + }; + return values[x]; +} + } // namespace common diff --git a/be/src/vec/core/accurate_comparison.h b/be/src/vec/core/accurate_comparison.h index 10e961311ca9c3..5d35f3d3e119c1 100644 --- a/be/src/vec/core/accurate_comparison.h +++ b/be/src/vec/core/accurate_comparison.h @@ -28,6 +28,8 @@ #include "vec/common/nan_utils.h" #include "vec/common/string_ref.h" #include "vec/common/uint128.h" +#include "vec/core/decomposed_float.h" +#include "vec/core/extended_types.h" #include "vec/core/types.h" #include "vec/runtime/vdatetime_value.h" /** Perceptually-correct number comparisons. @@ -159,311 +161,415 @@ inline bool_if_double_can_be_used equalsOpTmpl(TAFloat a, TAInt return static_cast(a) == static_cast(b); } -/* Final realizations */ - template -inline bool_if_not_safe_conversion greaterOp(A a, B b) { - return greaterOpTmpl(a, b); -} +bool lessOp(A a, B b) { + if constexpr (std::is_same_v) return a < b; -template -inline bool_if_safe_conversion greaterOp(A a, B b) { - return a > b; -} + /// float vs float + if constexpr (std::is_floating_point_v && std::is_floating_point_v) return a < b; -// Case 3b. 64-bit integers vs floats comparison. -// See hint at https://github.com/JuliaLang/julia/issues/257 (but it doesn't work properly for -2**63) + /// anything vs NaN + if (is_nan(a) || is_nan(b)) return false; -constexpr doris::vectorized::Int64 MAX_INT64_WITH_EXACT_FLOAT64_REPR = 9007199254740992LL; // 2^53 + /// int vs int + if constexpr (is_integer && is_integer) { + /// same signedness + if constexpr (is_signed_v == is_signed_v) return a < b; -template <> -inline bool greaterOp( - doris::vectorized::Float64 f, doris::vectorized::Int64 i) { - if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR) { - return f > static_cast(i); - } + /// different signedness - return (f >= static_cast( - std::numeric_limits< - doris::vectorized::Int64>::max())) // rhs is 2**63 (not 2^63 - 1) - || (f > static_cast( - std::numeric_limits::min()) && - static_cast(f) > i); -} + // if constexpr (is_signed_v && !is_signed_v) + // return a < 0 || static_cast>(a) < b; -template <> -inline bool greaterOp( - doris::vectorized::Int64 i, doris::vectorized::Float64 f) { - if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR) { - return f < static_cast(i); + // if constexpr (!is_signed_v && is_signed_v) + // return b >= 0 && a < static_cast>(b); } - return (f < static_cast( - std::numeric_limits::min())) || - (f < static_cast( - std::numeric_limits::max()) && - i > static_cast(f)); -} + /// int vs float + if constexpr (is_integer && std::is_floating_point_v) { + if constexpr (sizeof(A) <= 4) return static_cast(a) < static_cast(b); -template <> -inline bool greaterOp( - doris::vectorized::Float64 f, doris::vectorized::UInt64 u) { - if (u <= static_cast(MAX_INT64_WITH_EXACT_FLOAT64_REPR)) { - return f > static_cast(u); + return DecomposedFloat(b).greater(a); } - return (f >= static_cast( - std::numeric_limits::max())) || - (f >= 0 && static_cast(f) > u); -} + if constexpr (std::is_floating_point_v && is_integer) { + if constexpr (sizeof(B) <= 4) return static_cast(a) < static_cast(b); -template <> -inline bool greaterOp( - doris::vectorized::UInt64 u, doris::vectorized::Float64 f) { - if (u <= static_cast(MAX_INT64_WITH_EXACT_FLOAT64_REPR)) { - return static_cast(u) > f; + return DecomposedFloat(a).less(b); } - return (f < 0) || (f < static_cast( - std::numeric_limits::max()) && - u > static_cast(f)); -} - -// Case 3b for float32 -template <> -inline bool greaterOp( - doris::vectorized::Float32 f, doris::vectorized::Int64 i) { - return greaterOp(static_cast(f), i); -} - -template <> -inline bool greaterOp( - doris::vectorized::Int64 i, doris::vectorized::Float32 f) { - return greaterOp(i, static_cast(f)); -} - -template <> -inline bool greaterOp( - doris::vectorized::Float32 f, doris::vectorized::UInt64 u) { - return greaterOp(static_cast(f), u); -} - -template <> -inline bool greaterOp( - doris::vectorized::UInt64 u, doris::vectorized::Float32 f) { - return greaterOp(u, static_cast(f)); -} - -template <> -inline bool greaterOp( - doris::vectorized::Float64 f, doris::vectorized::UInt128 u) { - return u.low == 0 && greaterOp(f, u.high); + static_assert(is_integer || std::is_floating_point_v); + static_assert(is_integer || std::is_floating_point_v); + __builtin_unreachable(); } +/* Final realizations */ -template <> -inline bool greaterOp( - doris::vectorized::UInt128 u, doris::vectorized::Float64 f) { - return u.low != 0 || greaterOp(u.high, f); -} +// template +// inline bool_if_not_safe_conversion greaterOp(A a, B b) { +// return greaterOpTmpl(a, b); +// } +// +// template +// inline bool_if_safe_conversion greaterOp(A a, B b) { +// return a > b; +// } -template <> -inline bool greaterOp( - doris::vectorized::Float32 f, doris::vectorized::UInt128 u) { - return greaterOp(static_cast(f), u); -} +// Case 3b. 64-bit integers vs floats comparison. +// See hint at https://github.com/JuliaLang/julia/issues/257 (but it doesn't work properly for -2**63) -template <> -inline bool greaterOp( - doris::vectorized::UInt128 u, doris::vectorized::Float32 f) { - return greaterOp(u, static_cast(f)); -} +constexpr doris::vectorized::Int64 MAX_INT64_WITH_EXACT_FLOAT64_REPR = 9007199254740992LL; // 2^53 +// template <> +// inline bool greaterOp( +// doris::vectorized::Float64 f, doris::vectorized::Int64 i) { +// if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR) { +// return f > static_cast(i); +// } +// +// return (f >= static_cast( +// std::numeric_limits< +// doris::vectorized::Int64>::max())) // rhs is 2**63 (not 2^63 - 1) +// || (f > static_cast( +// std::numeric_limits::min()) && +// static_cast(f) > i); +// } +// +// template <> +// inline bool greaterOp( +// doris::vectorized::Int64 i, doris::vectorized::Float64 f) { +// if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR) { +// return f < static_cast(i); +// } +// +// return (f < static_cast( +// std::numeric_limits::min())) || +// (f < static_cast( +// std::numeric_limits::max()) && +// i > static_cast(f)); +// } +// +// template <> +// inline bool greaterOp( +// doris::vectorized::Float64 f, doris::vectorized::UInt64 u) { +// if (u <= static_cast(MAX_INT64_WITH_EXACT_FLOAT64_REPR)) { +// return f > static_cast(u); +// } +// +// return (f >= static_cast( +// std::numeric_limits::max())) || +// (f >= 0 && static_cast(f) > u); +// } +// +// template <> +// inline bool greaterOp( +// doris::vectorized::UInt64 u, doris::vectorized::Float64 f) { +// if (u <= static_cast(MAX_INT64_WITH_EXACT_FLOAT64_REPR)) { +// return static_cast(u) > f; +// } +// +// return (f < 0) || (f < static_cast( +// std::numeric_limits::max()) && +// u > static_cast(f)); +// } +// +// // Case 3b for float32 +// template <> +// inline bool greaterOp( +// doris::vectorized::Float32 f, doris::vectorized::Int64 i) { +// return greaterOp(static_cast(f), i); +// } +// +// template <> +// inline bool greaterOp( +// doris::vectorized::Int64 i, doris::vectorized::Float32 f) { +// return greaterOp(i, static_cast(f)); +// } +// +// template <> +// inline bool greaterOp( +// doris::vectorized::Float32 f, doris::vectorized::UInt64 u) { +// return greaterOp(static_cast(f), u); +// } +// +// template <> +// inline bool greaterOp( +// doris::vectorized::UInt64 u, doris::vectorized::Float32 f) { +// return greaterOp(u, static_cast(f)); +// } +// +// template <> +// inline bool greaterOp( +// doris::vectorized::Float64 f, doris::vectorized::UInt128 u) { +// return u.low == 0 && greaterOp(f, u.high); +// } +// +// template <> +// inline bool greaterOp( +// doris::vectorized::UInt128 u, doris::vectorized::Float64 f) { +// return u.low != 0 || greaterOp(u.high, f); +// } +// +// template <> +// inline bool greaterOp( +// doris::vectorized::Float32 f, doris::vectorized::UInt128 u) { +// return greaterOp(static_cast(f), u); +// } +// +// template <> +// inline bool greaterOp( +// doris::vectorized::UInt128 u, doris::vectorized::Float32 f) { +// return greaterOp(u, static_cast(f)); +// } +// template -inline bool_if_not_safe_conversion equalsOp(A a, B b) { - return equalsOpTmpl(a, b); +bool greaterOp(A a, B b) { + return lessOp(b, a); } template -inline bool_if_safe_conversion equalsOp(A a, B b) { - using LargestType = std::conditional_t= sizeof(B), A, B>; - return static_cast(a) == static_cast(b); -} +bool greaterOrEqualsOp(A a, B b) { + if (is_nan(a) || is_nan(b)) return false; -template <> -inline bool equalsOp( - doris::vectorized::Float64 f, doris::vectorized::UInt64 u) { - return static_cast(f) == u && - f == static_cast(u); -} - -template <> -inline bool equalsOp( - doris::vectorized::UInt64 u, doris::vectorized::Float64 f) { - return u == static_cast(f) && - static_cast(u) == f; + return !lessOp(a, b); } -template <> -inline bool equalsOp( - doris::vectorized::Float64 f, doris::vectorized::Int64 u) { - return static_cast(f) == u && - f == static_cast(u); -} - -template <> -inline bool equalsOp( - doris::vectorized::Int64 u, doris::vectorized::Float64 f) { - return u == static_cast(f) && - static_cast(u) == f; -} +template +bool lessOrEqualsOp(A a, B b) { + if (is_nan(a) || is_nan(b)) return false; -template <> -inline bool equalsOp( - doris::vectorized::Float32 f, doris::vectorized::UInt64 u) { - return static_cast(f) == u && - f == static_cast(u); + return !lessOp(b, a); } -template <> -inline bool equalsOp( - doris::vectorized::UInt64 u, doris::vectorized::Float32 f) { - return u == static_cast(f) && - static_cast(u) == f; -} +template +bool equalsOp(A a, B b) { + if constexpr (std::is_same_v) return a == b; -template <> -inline bool equalsOp( - doris::vectorized::Float32 f, doris::vectorized::Int64 u) { - return static_cast(f) == u && - f == static_cast(u); -} + /// float vs float + if constexpr (std::is_floating_point_v && std::is_floating_point_v) return a == b; -template <> -inline bool equalsOp( - doris::vectorized::Int64 u, doris::vectorized::Float32 f) { - return u == static_cast(f) && - static_cast(u) == f; -} + /// anything vs NaN + if (is_nan(a) || is_nan(b)) return false; -template <> -inline bool equalsOp( - doris::vectorized::UInt128 u, doris::vectorized::Float64 f) { - return u.low == 0 && equalsOp(static_cast(u.high), f); -} + /// int vs int + if constexpr (is_integer && is_integer) { + /// same signedness + if constexpr (is_signed_v == is_signed_v) return a == b; -template <> -inline bool equalsOp( - doris::vectorized::UInt128 u, doris::vectorized::Float32 f) { - return equalsOp(u, static_cast(f)); -} + /// different signedness -template <> -inline bool equalsOp( - doris::vectorized::Float64 f, doris::vectorized::UInt128 u) { - return equalsOp(u, f); -} + // if constexpr (is_signed_v && !is_signed_v) + // return a >= 0 && static_cast>(a) == b; -template <> -inline bool equalsOp( - doris::vectorized::Float32 f, doris::vectorized::UInt128 u) { - return equalsOp(static_cast(f), u); -} - -inline bool greaterOp(doris::vectorized::Int128 i, doris::vectorized::Float64 f) { - static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64; - static constexpr __int128 max_int128 = - (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll; - - if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR) { - return static_cast(i) > f; + // if constexpr (!is_signed_v && is_signed_v) + // return b >= 0 && a == static_cast>(b); } - return (f < static_cast(min_int128)) || - (f < static_cast(max_int128) && - i > static_cast(f)); -} - -inline bool greaterOp(doris::vectorized::Float64 f, doris::vectorized::Int128 i) { - static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64; - static constexpr __int128 max_int128 = - (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll; + /// int vs float + if constexpr (is_integer && std::is_floating_point_v) { + if constexpr (sizeof(A) <= 4) return static_cast(a) == static_cast(b); - if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR) { - return f > static_cast(i); + return DecomposedFloat(b).equals(a); } - return (f >= static_cast(max_int128)) || - (f > static_cast(min_int128) && - static_cast(f) > i); -} - -inline bool greaterOp(doris::vectorized::Int128 i, doris::vectorized::Float32 f) { - return greaterOp(i, static_cast(f)); -} -inline bool greaterOp(doris::vectorized::Float32 f, doris::vectorized::Int128 i) { - return greaterOp(static_cast(f), i); -} - -inline bool equalsOp(doris::vectorized::Int128 i, doris::vectorized::Float64 f) { - return i == static_cast(f) && - static_cast(i) == f; -} -inline bool equalsOp(doris::vectorized::Int128 i, doris::vectorized::Float32 f) { - return i == static_cast(f) && - static_cast(i) == f; -} -inline bool equalsOp(doris::vectorized::Float64 f, doris::vectorized::Int128 i) { - return equalsOp(i, f); -} -inline bool equalsOp(doris::vectorized::Float32 f, doris::vectorized::Int128 i) { - return equalsOp(i, f); -} - -template -inline bool_if_not_safe_conversion notEqualsOp(A a, B b) { - return !equalsOp(a, b); -} - -template -inline bool_if_safe_conversion notEqualsOp(A a, B b) { - return a != b; -} + if constexpr (std::is_floating_point_v && is_integer) { + if constexpr (sizeof(B) <= 4) return static_cast(a) == static_cast(b); -template -inline bool_if_not_safe_conversion lessOp(A a, B b) { - return greaterOp(b, a); -} - -template -inline bool_if_safe_conversion lessOp(A a, B b) { - return a < b; -} - -template -inline bool_if_not_safe_conversion lessOrEqualsOp(A a, B b) { - if (is_nan(a) || is_nan(b)) { - return false; + return DecomposedFloat(a).equals(b); } - return !greaterOp(a, b); -} - -template -inline bool_if_safe_conversion lessOrEqualsOp(A a, B b) { - return a <= b; -} + /// e.g comparing UUID with integer. + return false; +} +// // template +// // inline bool_if_not_safe_conversion equalsOp(A a, B b) { +// // return equalsOpTmpl(a, b); +// // } +// // +// // template +// // inline bool_if_safe_conversion equalsOp(A a, B b) { +// // using LargestType = std::conditional_t= sizeof(B), A, B>; +// // return static_cast(a) == static_cast(b); +// // } +// // +// // template <> +// // inline bool equalsOp( +// // doris::vectorized::Float64 f, doris::vectorized::UInt64 u) { +// // return static_cast(f) == u && +// // f == static_cast(u); +// // } +// // +// // template <> +// // inline bool equalsOp( +// // doris::vectorized::UInt64 u, doris::vectorized::Float64 f) { +// // return u == static_cast(f) && +// // static_cast(u) == f; +// // } +// // +// // template <> +// // inline bool equalsOp( +// // doris::vectorized::Float64 f, doris::vectorized::Int64 u) { +// // return static_cast(f) == u && +// // f == static_cast(u); +// // } +// // +// // template <> +// // inline bool equalsOp( +// // doris::vectorized::Int64 u, doris::vectorized::Float64 f) { +// // return u == static_cast(f) && +// // static_cast(u) == f; +// // } +// // +// // template <> +// // inline bool equalsOp( +// // doris::vectorized::Float32 f, doris::vectorized::UInt64 u) { +// // return static_cast(f) == u && +// // f == static_cast(u); +// // } +// // +// // template <> +// // inline bool equalsOp( +// // doris::vectorized::UInt64 u, doris::vectorized::Float32 f) { +// // return u == static_cast(f) && +// // static_cast(u) == f; +// // } +// // +// // template <> +// // inline bool equalsOp( +// // doris::vectorized::Float32 f, doris::vectorized::Int64 u) { +// // return static_cast(f) == u && +// // f == static_cast(u); +// // } +// // +// // template <> +// // inline bool equalsOp( +// // doris::vectorized::Int64 u, doris::vectorized::Float32 f) { +// // return u == static_cast(f) && +// // static_cast(u) == f; +// // } +// // +// // template <> +// // inline bool equalsOp( +// // doris::vectorized::UInt128 u, doris::vectorized::Float64 f) { +// // return u.low == 0 && equalsOp(static_cast(u.high), f); +// // } +// // +// // template <> +// // inline bool equalsOp( +// // doris::vectorized::UInt128 u, doris::vectorized::Float32 f) { +// // return equalsOp(u, static_cast(f)); +// // } +// // +// // template <> +// // inline bool equalsOp( +// // doris::vectorized::Float64 f, doris::vectorized::UInt128 u) { +// // return equalsOp(u, f); +// // } +// // +// // template <> +// // inline bool equalsOp( +// // doris::vectorized::Float32 f, doris::vectorized::UInt128 u) { +// // return equalsOp(static_cast(f), u); +// // } +// +// inline bool greaterOp(doris::vectorized::Int128 i, doris::vectorized::Float64 f) { +// static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64; +// static constexpr __int128 max_int128 = +// (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll; +// +// if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR) { +// return static_cast(i) > f; +// } +// +// return (f < static_cast(min_int128)) || +// (f < static_cast(max_int128) && +// i > static_cast(f)); +// } +// +// inline bool greaterOp(doris::vectorized::Float64 f, doris::vectorized::Int128 i) { +// static constexpr __int128 min_int128 = __int128(0x8000000000000000ll) << 64; +// static constexpr __int128 max_int128 = +// (__int128(0x7fffffffffffffffll) << 64) + 0xffffffffffffffffll; +// +// if (-MAX_INT64_WITH_EXACT_FLOAT64_REPR <= i && i <= MAX_INT64_WITH_EXACT_FLOAT64_REPR) { +// return f > static_cast(i); +// } +// +// return (f >= static_cast(max_int128)) || +// (f > static_cast(min_int128) && +// static_cast(f) > i); +// } +// +// inline bool greaterOp(doris::vectorized::Int128 i, doris::vectorized::Float32 f) { +// return greaterOp(i, static_cast(f)); +// } +// inline bool greaterOp(doris::vectorized::Float32 f, doris::vectorized::Int128 i) { +// return greaterOp(static_cast(f), i); +// } + +// inline bool equalsOp(doris::vectorized::Int128 i, doris::vectorized::Float64 f) { +// return i == static_cast(f) && +// static_cast(i) == f; +// } +// inline bool equalsOp(doris::vectorized::Int128 i, doris::vectorized::Float32 f) { +// return i == static_cast(f) && +// static_cast(i) == f; +// } +// inline bool equalsOp(doris::vectorized::Float64 f, doris::vectorized::Int128 i) { +// return equalsOp(i, f); +// } +// inline bool equalsOp(doris::vectorized::Float32 f, doris::vectorized::Int128 i) { +// return equalsOp(i, f); +// } + +// template +// inline bool_if_not_safe_conversion notEqualsOp(A a, B b) { +// return !equalsOp(a, b); +// } +// +// template +// inline bool_if_safe_conversion notEqualsOp(A a, B b) { +// return a != b; +// } template -inline bool_if_not_safe_conversion greaterOrEqualsOp(A a, B b) { - if (is_nan(a) || is_nan(b)) { - return false; - } - return !greaterOp(b, a); +bool notEqualsOp(A a, B b) { + return !equalsOp(a, b); } -template -inline bool_if_safe_conversion greaterOrEqualsOp(A a, B b) { - return a >= b; -} +// template +// inline bool_if_not_safe_conversion lessOp(A a, B b) { +// return greaterOp(b, a); +// } +// +// template +// inline bool_if_safe_conversion lessOp(A a, B b) { +// return a < b; +// } + +// template +// inline bool_if_not_safe_conversion lessOrEqualsOp(A a, B b) { +// if (is_nan(a) || is_nan(b)) { +// return false; +// } +// return !greaterOp(a, b); +// } +// +// template +// inline bool_if_safe_conversion lessOrEqualsOp(A a, B b) { +// return a <= b; +// } + +// template +// inline bool_if_not_safe_conversion greaterOrEqualsOp(A a, B b) { +// if (is_nan(a) || is_nan(b)) { +// return false; +// } +// return !greaterOp(b, a); +// } +// +// template +// inline bool_if_safe_conversion greaterOrEqualsOp(A a, B b) { +// return a >= b; +// } /// Converts numeric to an equal numeric of other type. /// When `strict` is `true` check that result exactly same as input, otherwise just check overflow diff --git a/be/src/vec/core/decimal_comparison.h b/be/src/vec/core/decimal_comparison.h index 68a083dc159e3e..d7eb5cf9d19cb5 100644 --- a/be/src/vec/core/decimal_comparison.h +++ b/be/src/vec/core/decimal_comparison.h @@ -53,6 +53,10 @@ template <> struct ConstructDecInt<16> { using Type = Int128; }; +template <> +struct ConstructDecInt<32> { + using Type = Int256; +}; template struct DecCompareInt { @@ -105,12 +109,16 @@ class DecimalComparison { } Shift shift; - if (scale_a < scale_b) + if (scale_a < scale_b) { shift.a = DataTypeDecimal(max_decimal_precision(), scale_b) - .get_scale_multiplier(scale_b - scale_a); - if (scale_a > scale_b) + .get_scale_multiplier(scale_b - scale_a) + .value; + } + if (scale_a > scale_b) { shift.b = DataTypeDecimal(max_decimal_precision(), scale_a) - .get_scale_multiplier(scale_a - scale_b); + .get_scale_multiplier(scale_a - scale_b) + .value; + } return apply_with_scale(a, b, shift); } diff --git a/be/src/vec/core/decomposed_float.h b/be/src/vec/core/decomposed_float.h new file mode 100644 index 00000000000000..e09cb4548c594b --- /dev/null +++ b/be/src/vec/core/decomposed_float.h @@ -0,0 +1,182 @@ +#pragma once + +#include +#include +#include + +#include "extended_types.h" + +/// Allows to check the internals of IEEE-754 floating point number. + +template +struct FloatTraits; + +template <> +struct FloatTraits { + using UInt = uint32_t; + static constexpr size_t bits = 32; + static constexpr size_t exponent_bits = 8; + static constexpr size_t mantissa_bits = bits - exponent_bits - 1; +}; + +template <> +struct FloatTraits { + using UInt = uint64_t; + static constexpr size_t bits = 64; + static constexpr size_t exponent_bits = 11; + static constexpr size_t mantissa_bits = bits - exponent_bits - 1; +}; + +/// x = sign * (2 ^ normalized_exponent) * (1 + mantissa * 2 ^ -mantissa_bits) +/// x = sign * (2 ^ normalized_exponent + mantissa * 2 ^ (normalized_exponent - mantissa_bits)) +template +struct DecomposedFloat { + using Traits = FloatTraits; + + explicit DecomposedFloat(T x) { memcpy(&x_uint, &x, sizeof(x)); } + + typename Traits::UInt x_uint; + + bool isNegative() const { return x_uint >> (Traits::bits - 1); } + + /// Returns 0 for both +0. and -0. + int sign() const { return (exponent() == 0 && mantissa() == 0) ? 0 : (isNegative() ? -1 : 1); } + + uint16_t exponent() const { + return (x_uint >> (Traits::mantissa_bits)) & + (((1ull << (Traits::exponent_bits + 1)) - 1) >> 1); + } + + int16_t normalizedExponent() const { + return int16_t(exponent()) - ((1ull << (Traits::exponent_bits - 1)) - 1); + } + + uint64_t mantissa() const { return x_uint & ((1ull << Traits::mantissa_bits) - 1); } + + int64_t mantissaWithSign() const { return isNegative() ? -mantissa() : mantissa(); } + + /// NOTE Probably floating point instructions can be better. + bool isIntegerInRepresentableRange() const { + return x_uint == 0 || + (normalizedExponent() >= 0 /// The number is not less than one + /// The number is inside the range where every integer has exact representation in float + && normalizedExponent() <= static_cast(Traits::mantissa_bits) + /// After multiplying by 2^exp, the fractional part becomes zero, means the number is integer + && ((mantissa() & ((1ULL << (Traits::mantissa_bits - normalizedExponent())) - 1)) == + 0)); + } + + /// Compare float with integer of arbitrary width (both signed and unsigned are supported). Assuming two's complement arithmetic. + /// This function is generic, big integers (128, 256 bit) are supported as well. + /// Infinities are compared correctly. NaNs are treat similarly to infinities, so they can be less than all numbers. + /// (note that we need total order) + /// Returns -1, 0 or 1. + template + int compare(Int rhs) const { + if (rhs == 0) return sign(); + + /// Different signs + if (isNegative() && rhs > 0) return -1; + if (!isNegative() && rhs < 0) return 1; + + /// Fractional number with magnitude less than one + if (normalizedExponent() < 0) { + if (!isNegative()) + return rhs > 0 ? -1 : 1; + else + return rhs >= 0 ? -1 : 1; + } + + /// The case of the most negative integer + if constexpr (is_signed_v) { + if (rhs == std::numeric_limits::lowest()) { + assert(isNegative()); + + if (normalizedExponent() < static_cast(8 * sizeof(Int) - is_signed_v)) + return 1; + if (normalizedExponent() > static_cast(8 * sizeof(Int) - is_signed_v)) + return -1; + + if (mantissa() == 0) + return 0; + else + return -1; + } + } + + /// Too large number: abs(float) > abs(rhs). Also the case with infinities and NaN. + if (normalizedExponent() >= static_cast(8 * sizeof(Int) - is_signed_v)) + return isNegative() ? -1 : 1; + + using UInt = std::conditional_t<(sizeof(Int) > sizeof(typename Traits::UInt)), + make_unsigned_t, typename Traits::UInt>; + UInt uint_rhs = rhs < 0 ? -rhs : rhs; + + /// Smaller octave: abs(rhs) < abs(float) + /// FYI, TIL: octave is also called "binade", https://en.wikipedia.org/wiki/Binade + if (uint_rhs < (static_cast(1) << normalizedExponent())) return isNegative() ? -1 : 1; + + /// Larger octave: abs(rhs) > abs(float) + if (normalizedExponent() + 1 < static_cast(8 * sizeof(Int) - is_signed_v) && + uint_rhs >= (static_cast(1) << (normalizedExponent() + 1))) + return isNegative() ? 1 : -1; + + /// The same octave + /// uint_rhs == 2 ^ normalizedExponent + mantissa * 2 ^ (normalizedExponent - mantissa_bits) + + bool large_and_always_integer = + normalizedExponent() >= static_cast(Traits::mantissa_bits); + + UInt a = large_and_always_integer + ? static_cast(mantissa()) + << (normalizedExponent() - Traits::mantissa_bits) + : static_cast(mantissa()) >> + (Traits::mantissa_bits - normalizedExponent()); + + UInt b = uint_rhs - (static_cast(1) << normalizedExponent()); + + if (a < b) return isNegative() ? 1 : -1; + if (a > b) return isNegative() ? -1 : 1; + + /// Float has no fractional part means that the numbers are equal. + if (large_and_always_integer || + (mantissa() & ((1ULL << (Traits::mantissa_bits - normalizedExponent())) - 1)) == 0) + return 0; + else + /// Float has fractional part means its abs value is larger. + return isNegative() ? -1 : 1; + } + + template + bool equals(Int rhs) const { + return compare(rhs) == 0; + } + + template + bool notEquals(Int rhs) const { + return compare(rhs) != 0; + } + + template + bool less(Int rhs) const { + return compare(rhs) < 0; + } + + template + bool greater(Int rhs) const { + return compare(rhs) > 0; + } + + template + bool lessOrEquals(Int rhs) const { + return compare(rhs) <= 0; + } + + template + bool greaterOrEquals(Int rhs) const { + return compare(rhs) >= 0; + } +}; + +using DecomposedFloat64 = DecomposedFloat; +using DecomposedFloat32 = DecomposedFloat; diff --git a/be/src/vec/core/extended_types.h b/be/src/vec/core/extended_types.h new file mode 100644 index 00000000000000..362ecfea9c3cf1 --- /dev/null +++ b/be/src/vec/core/extended_types.h @@ -0,0 +1,108 @@ +#pragma once + +#include + +#include "wide_integer.h" + +// using Int128 = wide::integer<128, signed>; +// using UInt128 = wide::integer<128, unsigned>; +using Int256 = wide::integer<256, signed>; +// using UInt256 = wide::integer<256, unsigned>; + +static_assert(sizeof(Int256) == 32); +// static_assert(sizeof(UInt256) == 32); + +/// The standard library type traits, such as std::is_arithmetic, with one exception +/// (std::common_type), are "set in stone". Attempting to specialize them causes undefined behavior. +/// So instead of using the std type_traits, we use our own version which allows extension. +template +struct is_signed // NOLINT(readability-identifier-naming) +{ + static constexpr bool value = std::is_signed_v; +}; + +// template <> struct is_signed { static constexpr bool value = true; }; +template <> +struct is_signed { + static constexpr bool value = true; +}; + +template +inline constexpr bool is_signed_v = is_signed::value; + +template +struct is_unsigned // NOLINT(readability-identifier-naming) +{ + static constexpr bool value = std::is_unsigned_v; +}; + +// template <> struct is_unsigned { static constexpr bool value = true; }; +// template <> struct is_unsigned { static constexpr bool value = true; }; + +template +inline constexpr bool is_unsigned_v = is_unsigned::value; + +template +concept is_integer = std::is_integral_v + // || std::is_same_v + // || std::is_same_v + || std::is_same_v; +// || std::is_same_v; + +template +concept is_floating_point = std::is_floating_point_v; + +// template +// struct is_arithmetic // NOLINT(readability-identifier-naming) +// { +// static constexpr bool value = std::is_arithmetic_v; +// }; + +// template <> struct is_arithmetic { static constexpr bool value = true; }; +// template <> struct is_arithmetic { static constexpr bool value = true; }; +// template <> struct is_arithmetic { static constexpr bool value = true; }; +// template <> struct is_arithmetic { static constexpr bool value = true; }; + +// template +// inline constexpr bool is_arithmetic_v = is_arithmetic::value; +// +template +struct make_unsigned // NOLINT(readability-identifier-naming) +{ + using type = std::make_unsigned_t; +}; + +// template <> struct make_unsigned { using type = UInt128; }; +// template <> struct make_unsigned { using type = UInt128; }; +// template <> struct make_unsigned { using type = UInt256; }; +// template <> struct make_unsigned { using type = UInt256; }; + +template +using make_unsigned_t = typename make_unsigned::type; + +// template +// struct make_signed // NOLINT(readability-identifier-naming) +// { +// using type = std::make_signed_t; +// }; +// +// template <> struct make_signed { using type = Int128; }; +// template <> struct make_signed { using type = Int128; }; +// template <> struct make_signed { using type = Int256; }; +// template <> struct make_signed { using type = Int256; }; +// +// template using make_signed_t = typename make_signed::type; +// +// template +// struct is_big_int // NOLINT(readability-identifier-naming) +// { +// static constexpr bool value = false; +// }; +// +// template <> struct is_big_int { static constexpr bool value = true; }; +// template <> struct is_big_int { static constexpr bool value = true; }; +// template <> struct is_big_int { static constexpr bool value = true; }; +// template <> struct is_big_int { static constexpr bool value = true; }; +// +// template +// inline constexpr bool is_big_int_v = is_big_int::value; diff --git a/be/src/vec/core/field.cpp b/be/src/vec/core/field.cpp index 9970b284ceb4b0..337c2c395f201d 100644 --- a/be/src/vec/core/field.cpp +++ b/be/src/vec/core/field.cpp @@ -170,6 +170,7 @@ bool dec_less_or_equal(T x, T y, UInt32 x_scale, UInt32 y_scale) { DECLARE_DECIMAL_COMPARISON(Decimal32) DECLARE_DECIMAL_COMPARISON(Decimal64) DECLARE_DECIMAL_COMPARISON(Decimal128) +DECLARE_DECIMAL_COMPARISON(Decimal256) template <> bool decimal_equal(Decimal128I x, Decimal128I y, UInt32 xs, UInt32 ys) { diff --git a/be/src/vec/core/field.h b/be/src/vec/core/field.h index 9aadfe2a0ae2d8..b997d9ddc22245 100644 --- a/be/src/vec/core/field.h +++ b/be/src/vec/core/field.h @@ -319,6 +319,7 @@ class Field { Bitmap = 27, HyperLogLog = 28, QuantileState = 29, + Decimal256 = 30, }; static const int MIN_NON_POD = 16; @@ -355,6 +356,8 @@ class Field { return "Decimal128"; case Decimal128I: return "Decimal128I"; + case Decimal256: + return "Decimal256"; case FixedLengthObject: return "FixedLengthObject"; case VariantMap: @@ -753,6 +756,10 @@ struct TypeId> { static constexpr const TypeIndex value = TypeIndex::Decimal128I; }; template <> +struct TypeId> { + static constexpr const TypeIndex value = TypeIndex::Decimal256; +}; +template <> struct Field::TypeToEnum { static constexpr Types::Which value = Types::Null; }; @@ -813,6 +820,10 @@ struct Field::TypeToEnum> { static constexpr Types::Which value = Types::Decimal128I; }; template <> +struct Field::TypeToEnum> { + static constexpr Types::Which value = Types::Decimal256; +}; +template <> struct Field::TypeToEnum { static constexpr Types::Which value = Types::VariantMap; }; @@ -993,6 +1004,10 @@ struct NearestFieldTypeImpl { using Type = DecimalField; }; template <> +struct NearestFieldTypeImpl { + using Type = DecimalField; +}; +template <> struct NearestFieldTypeImpl> { using Type = DecimalField; }; diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h index 51c41f8662ccd2..1d7a597f6d493b 100644 --- a/be/src/vec/core/types.h +++ b/be/src/vec/core/types.h @@ -28,6 +28,10 @@ #include "common/consts.h" #include "util/binary_cast.hpp" #include "vec/common/int_exp.h" +#include "vec/core/wide_integer.h" +#include "vec/core/wide_integer_to_string.h" + +using wide::Int256; namespace doris { @@ -91,7 +95,8 @@ enum class TypeIndex { VARIANT = 41, QuantileState = 42, Time = 43, - AggState + AggState = 44, + Decimal256 }; struct Consted { @@ -299,6 +304,10 @@ template <> inline constexpr Int128 decimal_scale_multiplier(UInt32 scale) { return common::exp10_i128(scale); } +template <> +inline constexpr Int256 decimal_scale_multiplier(UInt32 scale) { + return common::exp10_i256(scale); +} /// Own FieldType for Decimal. /// It is only a "storage" for decimal. To perform operations, you also have to provide a scale (number of digits after point). @@ -313,6 +322,7 @@ struct Decimal { #define DECLARE_NUMERIC_CTOR(TYPE) \ Decimal(const TYPE& value_) : value(value_) {} + DECLARE_NUMERIC_CTOR(Int256) DECLARE_NUMERIC_CTOR(Int128) DECLARE_NUMERIC_CTOR(Int32) DECLARE_NUMERIC_CTOR(Int64) @@ -392,11 +402,17 @@ struct Decimal { std::string to_string(UInt32 scale) const { if (value == std::numeric_limits::min()) { - fmt::memory_buffer buffer; - fmt::format_to(buffer, "{}", value); - std::string res {buffer.data(), buffer.size()}; - res.insert(res.size() - scale, "."); - return res; + if constexpr (std::is_same_v) { + std::string res {wide::to_string(value)}; + res.insert(res.size() - scale, "."); + return res; + } else { + fmt::memory_buffer buffer; + fmt::format_to(buffer, "{}", value); + std::string res {buffer.data(), buffer.size()}; + res.insert(res.size() - scale, "."); + return res; + } } static constexpr auto precision = @@ -424,14 +440,18 @@ struct Decimal { whole_part = abs_value / decimal_scale_multiplier(scale); frac_part = abs_value % decimal_scale_multiplier(scale); } - auto end = fmt::format_to(str.data() + pos, "{}", whole_part); - pos = end - str.data(); + if constexpr (std::is_same_v) { + std::string res {wide::to_string(whole_part)}; + } else { + auto end = fmt::format_to(str.data() + pos, "{}", whole_part); + pos = end - str.data(); + } if (scale) { str[pos++] = '.'; for (auto end_pos = pos + scale - 1; end_pos >= pos && frac_part > 0; --end_pos, frac_part /= 10) { - str[end_pos] += frac_part % 10; + str[end_pos] += (int)(frac_part % 10); } } @@ -449,8 +469,11 @@ struct Decimal { __attribute__((always_inline)) size_t to_string(char* dst, UInt32 scale, const T& scale_multiplier) const { if (UNLIKELY(value == std::numeric_limits::min())) { - auto end = fmt::format_to(dst, "{}", value); - return end - dst; + if constexpr (std::is_same_v) { + } else { + auto end = fmt::format_to(dst, "{}", value); + return end - dst; + } } bool is_negative = value < 0; @@ -468,8 +491,11 @@ struct Decimal { whole_part = abs_value / scale_multiplier; frac_part = abs_value % scale_multiplier; } - auto end = fmt::format_to(dst + pos, "{}", whole_part); - pos = end - dst; + if constexpr (std::is_same_v) { + } else { + auto end = fmt::format_to(dst + pos, "{}", whole_part); + pos = end - dst; + } if (LIKELY(scale)) { int low_scale = 0; @@ -489,8 +515,11 @@ struct Decimal { pos += scale - low_scale; } if (frac_part) { - end = fmt::format_to(&dst[pos], "{}", frac_part); - pos = end - dst; + if constexpr (std::is_same_v) { + } else { + auto end = fmt::format_to(&dst[pos], "{}", frac_part); + pos = end - dst; + } } } @@ -521,9 +550,284 @@ struct Decimal128I : public Decimal { } }; +template <> +struct Decimal { + using T = Int256; + using NativeType = Int256; + + Decimal() = default; + Decimal(Decimal&&) = default; + Decimal(const Decimal&) = default; + +#define DECLARE_NUMERIC_CTOR(TYPE) \ + explicit Decimal(const TYPE& value_) : value(value_) {} + + DECLARE_NUMERIC_CTOR(Int256) + // DECLARE_NUMERIC_CTOR(Int128) + DECLARE_NUMERIC_CTOR(Int32) + DECLARE_NUMERIC_CTOR(Int64) + DECLARE_NUMERIC_CTOR(UInt32) + DECLARE_NUMERIC_CTOR(UInt64) + +#undef DECLARE_NUMERIC_CTOR + + explicit Decimal(const Float32& value_) : value(value_) { + if constexpr (std::is_integral::value) { + value = round(value_); + } + } + explicit Decimal(const Float64& value_) : value(value_) { + if constexpr (std::is_integral::value) { + value = round(value_); + } + } + + static Decimal double_to_decimal(double value_) { + DecimalV2Value decimal_value; + decimal_value.assign_from_double(value_); + return Decimal(binary_cast(decimal_value)); + } + + template + explicit Decimal(const Decimal& x) { + value = x; + } + + constexpr Decimal& operator=(Decimal&&) = default; + constexpr Decimal& operator=(const Decimal&) = default; + + operator T() const { return value; } + + const Decimal& operator++() { + value++; + return *this; + } + const Decimal& operator--() { + value--; + return *this; + } + + const Decimal& operator+=(const T& x) { + value += x; + return *this; + } + const Decimal& operator-=(const T& x) { + value -= x; + return *this; + } + const Decimal& operator*=(const T& x) { + value *= x; + return *this; + } + const Decimal& operator/=(const T& x) { + value /= x; + return *this; + } + const Decimal& operator%=(const T& x) { + value %= x; + return *this; + } + + auto operator<=>(const Decimal& x) const { return value <=> x.value; } + + static constexpr int max_string_length() { + constexpr auto precision = + std::is_same_v + ? BeConsts::MAX_DECIMAL32_PRECISION + : (std::is_same_v ? BeConsts::MAX_DECIMAL64_PRECISION + : BeConsts::MAX_DECIMAL128_PRECISION); + return precision + 1 // Add a space for decimal place + + 1 // Add a space for leading 0 + + 1; // Add a space for negative sign + } + + std::string to_string(UInt32 scale) const { + if (value == std::numeric_limits::min()) { + if constexpr (std::is_same_v) { + std::string res {wide::to_string(value)}; + res.insert(res.size() - scale, "."); + return res; + } else { + fmt::memory_buffer buffer; + fmt::format_to(buffer, "{}", value); + std::string res {buffer.data(), buffer.size()}; + res.insert(res.size() - scale, "."); + return res; + } + } + + // TODO: decimal256 + static constexpr auto precision = + std::is_same_v + ? BeConsts::MAX_DECIMAL32_PRECISION + : (std::is_same_v + ? BeConsts::MAX_DECIMAL64_PRECISION + : (std::is_same_v + ? BeConsts::MAX_DECIMAL128_PRECISION + : BeConsts::MAX_DECIMAL256_PRECISION)); + bool is_nagetive = value < 0; + int max_result_length = precision + (scale > 0) // Add a space for decimal place + + (scale == precision) // Add a space for leading 0 + + (is_nagetive); // Add a space for negative sign + std::string str = std::string(max_result_length, '0'); + + T abs_value = value; + int pos = 0; + + if (is_nagetive) { + abs_value = -value; + str[pos++] = '-'; + } + + T whole_part = abs_value; + T frac_part; + if (scale) { + whole_part = abs_value / decimal_scale_multiplier(scale); + frac_part = abs_value % decimal_scale_multiplier(scale); + } + if constexpr (std::is_same_v) { + std::string dec_str {wide::to_string(whole_part)}; + auto end = fmt::format_to(str.data() + pos, "{}", dec_str); + pos = end - str.data(); + } else { + auto end = fmt::format_to(str.data() + pos, "{}", whole_part); + pos = end - str.data(); + } + + if (scale) { + str[pos++] = '.'; + for (auto end_pos = pos + scale - 1; end_pos >= pos && frac_part > 0; + --end_pos, frac_part /= 10) { + str[end_pos] += (int)(frac_part % 10); + } + } + + str.resize(pos + scale); + return str; + } + + /** + * Got the string representation of a decimal. + * @param dst Store the result, should be pre-allocated. + * @param scale Decimal's scale. + * @param scale_multiplier Decimal's scale multiplier. + * @return The length of string. + */ + __attribute__((always_inline)) size_t to_string(char* dst, UInt32 scale, + const T& scale_multiplier) const { + if (UNLIKELY(value == std::numeric_limits::min())) { + if constexpr (std::is_same_v) { + std::string dec_str {wide::to_string(value)}; + auto end = fmt::format_to(dst, "{}", dec_str); + return end - dst; + } else { + auto end = fmt::format_to(dst, "{}", value); + return end - dst; + } + } + + bool is_negative = value < 0; + T abs_value = value; + int pos = 0; + + if (is_negative) { + abs_value = -value; + dst[pos++] = '-'; + } + + T whole_part = abs_value; + T frac_part; + if (LIKELY(scale)) { + whole_part = abs_value / scale_multiplier; + frac_part = abs_value % scale_multiplier; + } + if constexpr (std::is_same_v) { + std::string dec_str {wide::to_string(whole_part)}; + auto end = fmt::format_to(dst + pos, "{}", dec_str); + pos = end - dst; + } else { + auto end = fmt::format_to(dst + pos, "{}", whole_part); + pos = end - dst; + } + + if (LIKELY(scale)) { + int low_scale = 0; + int high_scale = scale; + while (low_scale < high_scale) { + int mid_scale = (high_scale + low_scale) >> 1; + const auto mid_scale_factor = decimal_scale_multiplier(mid_scale); + if (mid_scale_factor <= frac_part) { + low_scale = mid_scale + 1; + } else { + high_scale = mid_scale; + } + } + dst[pos++] = '.'; + if (low_scale < scale) { + memset(&dst[pos], '0', scale - low_scale); + pos += scale - low_scale; + } + if (frac_part) { + if constexpr (std::is_same_v) { + std::string dec_str {wide::to_string(frac_part)}; + auto end = fmt::format_to(dst + pos, "{}", dec_str); + pos = end - dst; + } else { + auto end = fmt::format_to(&dst[pos], "{}", frac_part); + pos = end - dst; + } + } + } + + return pos; + } + + T value; +}; + using Decimal32 = Decimal; using Decimal64 = Decimal; using Decimal128 = Decimal; +using Decimal256 = Decimal; +template +inline Decimal operator-(const Decimal& x) { + return -x.value; +} + +inline Decimal256 operator+(const Decimal256& x, const Decimal256& y) { + return Decimal256(x.value + y.value); +} +inline Decimal256 operator-(const Decimal256& x, const Decimal256& y) { + return Decimal256(x.value - y.value); +} +inline Decimal256 operator/(const Decimal256& x, const Decimal256& y) { + return Decimal256(x.value / y.value); +} +inline Decimal256 operator%(const Decimal256& x, const Decimal256& y) { + return Decimal256(x.value % y.value); +} +inline Decimal256 operator-(const Decimal256& x) { + return Decimal256(-x.value); +} + +inline bool operator<(const Decimal256& x, const Decimal256& y) { + return x.value < y.value; +} +inline bool operator>(const Decimal256& x, const Decimal256& y) { + return x.value > y.value; +} +inline bool operator<=(const Decimal256& x, const Decimal256& y) { + return x.value <= y.value; +} +inline bool operator>=(const Decimal256& x, const Decimal256& y) { + return x.value >= y.value; +} +inline bool operator==(const Decimal256& x, const Decimal256& y) { + return x.value == y.value; +} +inline bool operator!=(const Decimal256& x, const Decimal256& y) { + return x.value != y.value; +} template <> struct TypeName { @@ -542,6 +846,11 @@ struct TypeName { static const char* get() { return "Decimal128I"; } }; +template <> +struct TypeName { + static const char* get() { return "Decimal256"; } +}; + template <> struct TypeId { static constexpr const TypeIndex value = TypeIndex::Decimal32; @@ -558,6 +867,10 @@ template <> struct TypeId { static constexpr const TypeIndex value = TypeIndex::Decimal128I; }; +template <> +struct TypeId { + static constexpr const TypeIndex value = TypeIndex::Decimal256; +}; template constexpr bool IsDecimalNumber = false; @@ -569,6 +882,8 @@ template <> inline constexpr bool IsDecimalNumber = true; template <> inline constexpr bool IsDecimalNumber = true; +template <> +inline constexpr bool IsDecimalNumber = true; template constexpr bool IsDecimal128 = false; @@ -583,6 +898,7 @@ inline constexpr bool IsDecimal128I = true; template constexpr bool IsDecimalV2 = IsDecimal128 && !IsDecimal128I; +// TODO: decimal256? template using DisposeDecimal = std::conditional_t, Decimal128, std::conditional_t, Decimal128I, U>>; @@ -614,6 +930,10 @@ template <> struct NativeType { using Type = Int128; }; +template <> +struct NativeType { + using Type = Int256; +}; inline const char* getTypeName(TypeIndex idx) { switch (idx) { @@ -669,6 +989,8 @@ inline const char* getTypeName(TypeIndex idx) { return TypeName::get(); case TypeIndex::Decimal128I: return TypeName::get(); + case TypeIndex::Decimal256: + return TypeName::get(); case TypeIndex::UUID: return "UUID"; case TypeIndex::Array: @@ -739,6 +1061,16 @@ struct std::hash { } }; +// TODO: +template <> +struct std::hash { + size_t operator()(const doris::vectorized::Decimal256& x) const { + return std::hash()(x.value >> 192) ^ std::hash()(x.value >> 128) ^ + std::hash()(x.value >> 64) ^ + std::hash()(x.value & std::numeric_limits::max()); + } +}; + constexpr bool typeindex_is_int(doris::vectorized::TypeIndex index) { using TypeIndex = doris::vectorized::TypeIndex; switch (index) { diff --git a/be/src/vec/core/wide_integer.h b/be/src/vec/core/wide_integer.h new file mode 100644 index 00000000000000..c13e0092d47c72 --- /dev/null +++ b/be/src/vec/core/wide_integer.h @@ -0,0 +1,290 @@ +#pragma once + +/////////////////////////////////////////////////////////////// +// Distributed under the Boost Software License, Version 1.0. +// (See at http://www.boost.org/LICENSE_1_0.txt) +/////////////////////////////////////////////////////////////// + +/* Divide and multiply + * + * + * Copyright (c) 2008 + * Evan Teran + * + * Permission to use, copy, modify, and distribute this software and its + * documentation for any purpose and without fee is hereby granted, provided + * that the above copyright notice appears in all copies and that both the + * copyright notice and this permission notice appear in supporting + * documentation, and that the same name not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. We make no representations about the + * suitability this software for any purpose. It is provided "as is" + * without express or implied warranty. + */ + +#include +#include +#include +#include + +// NOLINTBEGIN(*) + +namespace wide { +template +class integer; +} + +namespace std { + +template +struct common_type, wide::integer>; + +template +struct common_type, Arithmetic>; + +template +struct common_type>; + +} // namespace std + +namespace wide { + +template +class integer { +public: + using base_type = uint64_t; + using signed_base_type = int64_t; + + // ctors + constexpr integer() noexcept = default; + + template + constexpr integer(T rhs) noexcept; + + template + constexpr integer(std::initializer_list il) noexcept; + + // assignment + template + constexpr integer& operator=(const integer& rhs) noexcept; + + template + constexpr integer& operator=(Arithmetic rhs) noexcept; + + template + constexpr integer& operator*=(const Arithmetic& rhs); + + template + constexpr integer& operator/=(const Arithmetic& rhs); + + template + constexpr integer& operator+=(const Arithmetic& rhs) noexcept( + std::is_same_v); + + template + constexpr integer& operator-=(const Arithmetic& rhs) noexcept( + std::is_same_v); + + template + constexpr integer& operator%=(const Integral& rhs); + + template + constexpr integer& operator&=(const Integral& rhs) noexcept; + + template + constexpr integer& operator|=(const Integral& rhs) noexcept; + + template + constexpr integer& operator^=(const Integral& rhs) noexcept; + + constexpr integer& operator<<=(int n) noexcept; + constexpr integer& operator>>=(int n) noexcept; + + constexpr integer& operator++() noexcept(std::is_same_v); + constexpr integer operator++(int) noexcept(std::is_same_v); + constexpr integer& operator--() noexcept(std::is_same_v); + constexpr integer operator--(int) noexcept(std::is_same_v); + + // observers + + constexpr explicit operator bool() const noexcept; + + template , T>> + constexpr operator T() const noexcept; + + constexpr operator long double() const noexcept; + constexpr operator double() const noexcept; + constexpr operator float() const noexcept; + + struct _impl; + + base_type items[_impl::item_count]; + +private: + template + friend class integer; + + friend class std::numeric_limits>; + friend class std::numeric_limits>; +}; + +using Int256 = integer<256, signed>; + +template +static constexpr bool ArithmeticConcept() noexcept; + +template +using _only_arithmetic = + typename std::enable_if() && ArithmeticConcept()>::type; + +template +static constexpr bool IntegralConcept() noexcept; + +template +using _only_integer = typename std::enable_if() && IntegralConcept()>::type; + +// Unary operators +template +constexpr integer operator~(const integer& lhs) noexcept; + +template +constexpr integer operator-(const integer& lhs) noexcept( + std::is_same_v); + +template +constexpr integer operator+(const integer& lhs) noexcept( + std::is_same_v); + +// Binary operators +template +std::common_type_t, integer> constexpr operator*( + const integer& lhs, const integer& rhs); +template > +std::common_type_t constexpr operator*(const Arithmetic& rhs, + const Arithmetic2& lhs); + +template +std::common_type_t, integer> constexpr operator/( + const integer& lhs, const integer& rhs); +template > +std::common_type_t constexpr operator/(const Arithmetic& rhs, + const Arithmetic2& lhs); + +template +std::common_type_t, integer> constexpr operator+( + const integer& lhs, const integer& rhs); +template > +std::common_type_t constexpr operator+(const Arithmetic& rhs, + const Arithmetic2& lhs); + +template +std::common_type_t, integer> constexpr operator-( + const integer& lhs, const integer& rhs); +template > +std::common_type_t constexpr operator-(const Arithmetic& rhs, + const Arithmetic2& lhs); + +template +std::common_type_t, integer> constexpr operator%( + const integer& lhs, const integer& rhs); +template > +std::common_type_t constexpr operator%(const Integral& rhs, + const Integral2& lhs); + +template +std::common_type_t, integer> constexpr operator&( + const integer& lhs, const integer& rhs); +template > +std::common_type_t constexpr operator&(const Integral& rhs, + const Integral2& lhs); + +template +std::common_type_t, integer> constexpr operator|( + const integer& lhs, const integer& rhs); +template > +std::common_type_t constexpr operator|(const Integral& rhs, + const Integral2& lhs); + +template +std::common_type_t, integer> constexpr operator^( + const integer& lhs, const integer& rhs); +template > +std::common_type_t constexpr operator^(const Integral& rhs, + const Integral2& lhs); + +// TODO: Integral +template +constexpr integer operator<<(const integer& lhs, int n) noexcept; + +template +constexpr integer operator>>(const integer& lhs, int n) noexcept; + +template >> +constexpr integer operator<<(const integer& lhs, Int n) noexcept { + return lhs << int(n); +} +template >> +constexpr integer operator>>(const integer& lhs, Int n) noexcept { + return lhs >> int(n); +} + +template +constexpr bool operator<(const integer& lhs, const integer& rhs); +template > +constexpr bool operator<(const Arithmetic& rhs, const Arithmetic2& lhs); + +template +constexpr bool operator>(const integer& lhs, const integer& rhs); +template > +constexpr bool operator>(const Arithmetic& rhs, const Arithmetic2& lhs); + +template +constexpr bool operator<=(const integer& lhs, const integer& rhs); +template > +constexpr bool operator<=(const Arithmetic& rhs, const Arithmetic2& lhs); + +template +constexpr bool operator>=(const integer& lhs, const integer& rhs); +template > +constexpr bool operator>=(const Arithmetic& rhs, const Arithmetic2& lhs); + +template +constexpr bool operator==(const integer& lhs, const integer& rhs); +template > +constexpr bool operator==(const Arithmetic& rhs, const Arithmetic2& lhs); + +template +constexpr bool operator!=(const integer& lhs, const integer& rhs); +template > +constexpr bool operator!=(const Arithmetic& rhs, const Arithmetic2& lhs); + +template +constexpr auto operator<=>(const integer& lhs, const integer& rhs); +template > +constexpr auto operator<=>(const Arithmetic& rhs, const Arithmetic2& lhs); + +} // namespace wide + +namespace std { + +template +struct hash>; + +} + +// NOLINTEND(*) + +#include "wide_integer_impl.h" diff --git a/be/src/vec/core/wide_integer_impl.h b/be/src/vec/core/wide_integer_impl.h new file mode 100644 index 00000000000000..b3f1198d89ed50 --- /dev/null +++ b/be/src/vec/core/wide_integer_impl.h @@ -0,0 +1,1357 @@ +#pragma once + +/// Original is here https://github.com/cerevra/int +/// Distributed under the Boost Software License, Version 1.0. +/// (See at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include +#include +#include +#include +#include + +#include "common/exception.h" + +// NOLINTBEGIN(*) + +/// Use same extended double for all platforms +#if (LDBL_MANT_DIG == 64) +#define CONSTEXPR_FROM_DOUBLE constexpr +using FromDoubleIntermediateType = long double; +#else +#include +/// `wide_integer_from_builtin` can't be constexpr with non-literal `cpp_bin_float_double_extended` +#define CONSTEXPR_FROM_DOUBLE +using FromDoubleIntermediateType = boost::multiprecision::cpp_bin_float_double_extended; +#endif + +namespace CityHash_v1_0_2 { +struct uint128; +} + +namespace wide { + +template +struct IsWideInteger { + static const constexpr bool value = false; +}; + +template +struct IsWideInteger> { + static const constexpr bool value = true; +}; + +template +static constexpr bool ArithmeticConcept() noexcept { + return std::is_arithmetic_v || IsWideInteger::value; +} + +template +static constexpr bool IntegralConcept() noexcept { + return std::is_integral_v || IsWideInteger::value; +} + +template +class IsTupleLike { + template + static auto check(U* p) -> decltype(std::tuple_size::value, int()); + template + static void check(...); + +public: + static constexpr const bool value = !std::is_void(nullptr))>::value; +}; + +} // namespace wide + +namespace std { + +// numeric limits +template +class numeric_limits> { +public: + static constexpr bool is_specialized = true; + static constexpr bool is_signed = is_same::value; + static constexpr bool is_integer = true; + static constexpr bool is_exact = true; + static constexpr bool has_infinity = false; + static constexpr bool has_quiet_NaN = false; + static constexpr bool has_signaling_NaN = true; + static constexpr std::float_denorm_style has_denorm = std::denorm_absent; + static constexpr bool has_denorm_loss = false; + static constexpr std::float_round_style round_style = std::round_toward_zero; + static constexpr bool is_iec559 = false; + static constexpr bool is_bounded = true; + static constexpr bool is_modulo = true; + static constexpr int digits = Bits - (is_same::value ? 1 : 0); + static constexpr int digits10 = digits * 0.30103 /*std::log10(2)*/; + static constexpr int max_digits10 = 0; + static constexpr int radix = 2; + static constexpr int min_exponent = 0; + static constexpr int min_exponent10 = 0; + static constexpr int max_exponent = 0; + static constexpr int max_exponent10 = 0; + static constexpr bool traps = true; + static constexpr bool tinyness_before = false; + + static constexpr wide::integer min() noexcept { + if (is_same::value) { + using T = wide::integer; + T res {}; + res.items[T::_impl::big(0)] = std::numeric_limits< + typename wide::integer::signed_base_type>::min(); + return res; + } + return wide::integer(0); + } + + static constexpr wide::integer max() noexcept { + using T = wide::integer; + T res {}; + res.items[T::_impl::big(0)] = + is_same::value + ? std::numeric_limits< + typename wide::integer::signed_base_type>::max() + : std::numeric_limits< + typename wide::integer::base_type>::max(); + for (unsigned i = 1; i < wide::integer::_impl::item_count; ++i) { + res.items[T::_impl::big(i)] = + std::numeric_limits::base_type>::max(); + } + return res; + } + + static constexpr wide::integer lowest() noexcept { return min(); } + static constexpr wide::integer epsilon() noexcept { return 0; } + static constexpr wide::integer round_error() noexcept { return 0; } + static constexpr wide::integer infinity() noexcept { return 0; } + static constexpr wide::integer quiet_NaN() noexcept { return 0; } + static constexpr wide::integer signaling_NaN() noexcept { return 0; } + static constexpr wide::integer denorm_min() noexcept { return 0; } +}; + +// type traits +template +struct common_type, wide::integer> { + using type = std::conditional_t < Bits == Bits2, + wide::integer && + std::is_same_v), + signed, unsigned>>, + std::conditional_t< + Bits2, wide::integer>>; +}; + +template +struct common_type, Arithmetic> { + static_assert(wide::ArithmeticConcept()); + + using type = std::conditional_t < std::is_floating_point_v, Arithmetic, + std::conditional_t, + std::conditional_t || + std::is_signed_v), + Arithmetic, wide::integer>>>>; +}; + +template +struct common_type> + : common_type, Arithmetic> {}; + +} // namespace std + +namespace wide { + +template +struct integer::_impl { + static constexpr size_t _bits = Bits; + static constexpr const unsigned byte_count = Bits / 8; + static constexpr const unsigned item_count = byte_count / sizeof(base_type); + static constexpr const unsigned base_bits = sizeof(base_type) * 8; + + static_assert(Bits % base_bits == 0); + + /// Simple iteration in both directions + static constexpr unsigned little(unsigned idx) { + if constexpr (std::endian::native == std::endian::little) + return idx; + else + return item_count - 1 - idx; + } + static constexpr unsigned big(unsigned idx) { + if constexpr (std::endian::native == std::endian::little) + return item_count - 1 - idx; + else + return idx; + } + static constexpr unsigned any(unsigned idx) { return idx; } + + template + constexpr static bool is_negative(const T& n) noexcept { + if constexpr (std::is_signed_v) + return n < 0; + else + return false; + } + + template + constexpr static bool is_negative(const integer& n) noexcept { + if constexpr (std::is_same_v) + return static_cast(n.items[integer::_impl::big(0)]) < 0; + else + return false; + } + + template + constexpr static auto make_positive(const T& n) noexcept { + if constexpr (std::is_signed_v) + return n < 0 ? -n : n; + else + return n; + } + + template + constexpr static integer make_positive(const integer& n) noexcept { + return is_negative(n) ? integer(operator_unary_minus(n)) : n; + } + + template + __attribute__((no_sanitize("undefined"))) constexpr static auto to_Integral(T f) noexcept { + /// NOTE: this can be called with DB::Decimal, and in this case, result + /// will be wrong + if constexpr (std::is_signed_v) + return static_cast(f); + else + return static_cast(f); + } + + template + constexpr static void wide_integer_from_builtin(integer& self, + Integral rhs) noexcept { + static_assert(sizeof(Integral) <= sizeof(base_type)); + + self.items[little(0)] = _impl::to_Integral(rhs); + + if constexpr (std::is_signed_v) { + if (rhs < 0) { + for (unsigned i = 1; i < item_count; ++i) self.items[little(i)] = -1; + return; + } + } + + for (unsigned i = 1; i < item_count; ++i) self.items[little(i)] = 0; + } + + template + constexpr static void wide_integer_from_tuple_like(integer& self, + const TupleLike& tuple) noexcept { + if constexpr (i < item_count) { + if constexpr (i < std::tuple_size_v) + self.items[i] = std::get(tuple); + else + self.items[i] = 0; + wide_integer_from_tuple_like(self, tuple); + } + } + + template + constexpr static void wide_integer_from_cityhash_uint128( + integer& self, const CityHashUInt128& value) noexcept { + static_assert(sizeof(item_count) >= 2); + + if constexpr (std::endian::native == std::endian::little) + wide_integer_from_tuple_like(self, std::make_pair(value.low64, value.high64)); + else + wide_integer_from_tuple_like(self, std::make_pair(value.high64, value.low64)); + } + + /** + * N.B. t is constructed from double, so max(t) = max(double) ~ 2^310 + * the recursive call happens when t / 2^64 > 2^64, so there won't be more than 5 of them. + * + * t = a1 * max_int + b1, a1 > max_int, b1 < max_int + * a1 = a2 * max_int + b2, a2 > max_int, b2 < max_int + * a_(n - 1) = a_n * max_int + b2, a_n <= max_int <- base case. + */ + template + constexpr static void set_multiplier(integer& self, T t) noexcept { + constexpr uint64_t max_int = std::numeric_limits::max(); + static_assert(std::is_same_v || std::is_same_v); + /// Implementation specific behaviour on overflow (if we don't check here, stack overflow will triggered in bigint_cast). + if constexpr (std::is_same_v) { + if (!std::isfinite(t)) { + self = 0; + return; + } + } else { + if (!boost::math::isfinite(t)) { + self = 0; + return; + } + } + + const T alpha = t / static_cast(max_int); + + /** Here we have to use strict comparison. + * The max_int is 2^64 - 1. + * When casted to floating point type, it will be rounded to the closest representable number, + * which is 2^64. + * But 2^64 is not representable in uint64_t, + * so the maximum representable number will be strictly less. + */ + if (alpha < static_cast(max_int)) + self = static_cast(alpha); + else // max(double) / 2^64 will surely contain less than 52 precision bits, so speed up computations. + set_multiplier(self, static_cast(alpha)); + + self *= max_int; + self += static_cast(t - floor(alpha) * static_cast(max_int)); // += b_i + } + + CONSTEXPR_FROM_DOUBLE static void wide_integer_from_builtin(integer& self, + double rhs) noexcept { + constexpr int64_t max_int = std::numeric_limits::max(); + constexpr int64_t min_int = std::numeric_limits::lowest(); + + /// There are values in int64 that have more than 53 significant bits (in terms of double + /// representation). Such values, being promoted to double, are rounded up or down. If they are rounded up, + /// the result may not fit in 64 bits. + /// The example of such a number is 9.22337e+18. + /// As to_Integral does a static_cast to int64_t, it may result in UB. + /// The necessary check here is that FromDoubleIntermediateType has enough significant (mantissa) bits to store the + /// int64_t max value precisely. + + if (rhs > static_cast(min_int) && + rhs < static_cast(max_int)) { + self = static_cast(rhs); + return; + } + + const FromDoubleIntermediateType rhs_long_double = + (static_cast(rhs) < 0) + ? -static_cast(rhs) + : rhs; + + set_multiplier(self, rhs_long_double); + + if (rhs < 0) self = -self; + } + + template + constexpr static void wide_integer_from_wide_integer( + integer& self, const integer& rhs) noexcept { + constexpr const unsigned min_bits = (Bits < Bits2) ? Bits : Bits2; + constexpr const unsigned to_copy = min_bits / base_bits; + + for (unsigned i = 0; i < to_copy; ++i) + self.items[little(i)] = rhs.items[integer::_impl::little(i)]; + + if constexpr (Bits > Bits2) { + if constexpr (std::is_signed_v) { + if (rhs < 0) { + for (unsigned i = to_copy; i < item_count; ++i) self.items[little(i)] = -1; + return; + } + } + + for (unsigned i = to_copy; i < item_count; ++i) self.items[little(i)] = 0; + } + } + + template + constexpr static bool should_keep_size() { + return sizeof(T) <= byte_count; + } + + constexpr static integer shift_left(const integer& rhs, + unsigned n) noexcept { + integer lhs; + unsigned items_shift = n / base_bits; + + if (unsigned bit_shift = n % base_bits) { + unsigned overflow_shift = base_bits - bit_shift; + + lhs.items[big(0)] = rhs.items[big(items_shift)] << bit_shift; + for (unsigned i = 1; i < item_count - items_shift; ++i) { + lhs.items[big(i - 1)] |= rhs.items[big(items_shift + i)] >> overflow_shift; + lhs.items[big(i)] = rhs.items[big(items_shift + i)] << bit_shift; + } + } else { + for (unsigned i = 0; i < item_count - items_shift; ++i) + lhs.items[big(i)] = rhs.items[big(items_shift + i)]; + } + + for (unsigned i = 0; i < items_shift; ++i) lhs.items[little(i)] = 0; + return lhs; + } + + constexpr static integer shift_right(const integer& rhs, + unsigned n) noexcept { + integer lhs; + unsigned items_shift = n / base_bits; + unsigned bit_shift = n % base_bits; + + if (bit_shift) { + unsigned overflow_shift = base_bits - bit_shift; + + lhs.items[little(0)] = rhs.items[little(items_shift)] >> bit_shift; + for (unsigned i = 1; i < item_count - items_shift; ++i) { + lhs.items[little(i - 1)] |= rhs.items[little(items_shift + i)] << overflow_shift; + lhs.items[little(i)] = rhs.items[little(items_shift + i)] >> bit_shift; + } + } else { + for (unsigned i = 0; i < item_count - items_shift; ++i) + lhs.items[little(i)] = rhs.items[little(items_shift + i)]; + } + + if (is_negative(rhs)) { + if (bit_shift) + lhs.items[big(items_shift)] |= std::numeric_limits::max() + << (base_bits - bit_shift); + + for (unsigned i = 0; i < items_shift; ++i) + lhs.items[big(i)] = std::numeric_limits::max(); + } else { + for (unsigned i = 0; i < items_shift; ++i) lhs.items[big(i)] = 0; + } + + return lhs; + } + +private: + template + constexpr static base_type get_item(const T& x, unsigned idx) { + if constexpr (IsWideInteger::value) { + if (idx < T::_impl::item_count) return x.items[idx]; + return 0; + } else { + if constexpr (sizeof(T) <= sizeof(base_type)) { + if (little(0) == idx) return static_cast(x); + } else if (idx * sizeof(base_type) < sizeof(T)) + return x >> (idx * base_bits); // & std::numeric_limits::max() + return 0; + } + } + + template + constexpr static integer minus(const integer& lhs, T rhs) { + constexpr const unsigned rhs_items = + (sizeof(T) > sizeof(base_type)) ? (sizeof(T) / sizeof(base_type)) : 1; + constexpr const unsigned op_items = (item_count < rhs_items) ? item_count : rhs_items; + + integer res(lhs); + bool underflows[item_count] = {}; + + for (unsigned i = 0; i < op_items; ++i) { + base_type rhs_item = get_item(rhs, little(i)); + base_type& res_item = res.items[little(i)]; + + underflows[i] = res_item < rhs_item; + res_item -= rhs_item; + } + + for (unsigned i = 1; i < item_count; ++i) { + if (underflows[i - 1]) { + base_type& res_item = res.items[little(i)]; + if (res_item == 0) underflows[i] = true; + --res_item; + } + } + + return res; + } + + template + constexpr static integer plus(const integer& lhs, T rhs) { + constexpr const unsigned rhs_items = + (sizeof(T) > sizeof(base_type)) ? (sizeof(T) / sizeof(base_type)) : 1; + constexpr const unsigned op_items = (item_count < rhs_items) ? item_count : rhs_items; + + integer res(lhs); + bool overflows[item_count] = {}; + + for (unsigned i = 0; i < op_items; ++i) { + base_type rhs_item = get_item(rhs, little(i)); + base_type& res_item = res.items[little(i)]; + + res_item += rhs_item; + overflows[i] = res_item < rhs_item; + } + + for (unsigned i = 1; i < item_count; ++i) { + if (overflows[i - 1]) { + base_type& res_item = res.items[little(i)]; + ++res_item; + if (res_item == 0) overflows[i] = true; + } + } + + return res; + } + + template + constexpr static integer multiply(const integer& lhs, + const T& rhs) { + if constexpr (Bits == 256 && sizeof(base_type) == 8) { + /// @sa https://github.com/abseil/abseil-cpp/blob/master/absl/numeric/int128.h + using HalfType = unsigned __int128; + + HalfType a01 = (HalfType(lhs.items[little(1)]) << 64) + lhs.items[little(0)]; + HalfType a23 = (HalfType(lhs.items[little(3)]) << 64) + lhs.items[little(2)]; + HalfType a0 = lhs.items[little(0)]; + HalfType a1 = lhs.items[little(1)]; + + HalfType b01 = rhs; + uint64_t b0 = b01; + uint64_t b1 = 0; + HalfType b23 = 0; + if constexpr (sizeof(T) > 8) b1 = b01 >> 64; + if constexpr (sizeof(T) > 16) + b23 = (HalfType(rhs.items[little(3)]) << 64) + rhs.items[little(2)]; + + HalfType r23 = a23 * b01 + a01 * b23 + a1 * b1; + HalfType r01 = a0 * b0; + HalfType r12 = (r01 >> 64) + (r23 << 64); + HalfType r12_x = a1 * b0; + + integer res; + res.items[little(0)] = r01; + res.items[little(3)] = r23 >> 64; + + if constexpr (sizeof(T) > 8) { + HalfType r12_y = a0 * b1; + r12_x += r12_y; + if (r12_x < r12_y) ++res.items[little(3)]; + } + + r12 += r12_x; + if (r12 < r12_x) ++res.items[little(3)]; + + res.items[little(1)] = r12; + res.items[little(2)] = r12 >> 64; + return res; + } else if constexpr (Bits == 128 && sizeof(base_type) == 8) { + using CompilerUInt128 = unsigned __int128; + CompilerUInt128 a = + (CompilerUInt128(lhs.items[little(1)]) << 64) + + lhs.items[little( + 0)]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult) + CompilerUInt128 b = + (CompilerUInt128(rhs.items[little(1)]) << 64) + + rhs.items[little( + 0)]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult) + CompilerUInt128 c = a * b; + integer res; + res.items[little(0)] = c; + res.items[little(1)] = c >> 64; + return res; + } else { + integer res {}; +#if 1 + integer lhs2 = plus(lhs, shift_left(lhs, 1)); + integer lhs3 = plus(lhs2, shift_left(lhs, 2)); +#endif + for (unsigned i = 0; i < item_count; ++i) { + base_type rhs_item = get_item(rhs, little(i)); + unsigned pos = i * base_bits; + + while (rhs_item) { +#if 1 /// optimization + if ((rhs_item & 0x7) == 0x7) { + res = plus(res, shift_left(lhs3, pos)); + rhs_item >>= 3; + pos += 3; + continue; + } + + if ((rhs_item & 0x3) == 0x3) { + res = plus(res, shift_left(lhs2, pos)); + rhs_item >>= 2; + pos += 2; + continue; + } +#endif + if (rhs_item & 1) res = plus(res, shift_left(lhs, pos)); + + rhs_item >>= 1; + ++pos; + } + } + + return res; + } + } + +public: + constexpr static integer operator_unary_tilda( + const integer& lhs) noexcept { + integer res; + + for (unsigned i = 0; i < item_count; ++i) res.items[any(i)] = ~lhs.items[any(i)]; + return res; + } + + constexpr static integer operator_unary_minus( + const integer& lhs) noexcept(std::is_same_v) { + return plus(operator_unary_tilda(lhs), 1); + } + + template + constexpr static auto operator_plus(const integer& lhs, + const T& rhs) noexcept(std::is_same_v) { + if constexpr (should_keep_size()) { + if (is_negative(rhs)) + return minus(lhs, -rhs); + else + return plus(lhs, rhs); + } else { + static_assert(IsWideInteger::value); + return std::common_type_t, integer>:: + _impl::operator_plus(integer(lhs), rhs); + } + } + + template + constexpr static auto operator_minus(const integer& lhs, + const T& rhs) noexcept(std::is_same_v) { + if constexpr (should_keep_size()) { + if (is_negative(rhs)) + return plus(lhs, -rhs); + else + return minus(lhs, rhs); + } else { + static_assert(IsWideInteger::value); + return std::common_type_t, integer>:: + _impl::operator_minus(integer(lhs), rhs); + } + } + + template + constexpr static auto operator_star(const integer& lhs, const T& rhs) { + if constexpr (should_keep_size()) { + integer res; + + if constexpr (std::is_signed_v) { + res = multiply((is_negative(lhs) ? make_positive(lhs) : lhs), + (is_negative(rhs) ? make_positive(rhs) : rhs)); + } else { + res = multiply(lhs, (is_negative(rhs) ? make_positive(rhs) : rhs)); + } + + if (std::is_same_v && is_negative(lhs) != is_negative(rhs)) + res = operator_unary_minus(res); + + return res; + } else { + static_assert(IsWideInteger::value); + return std::common_type_t, T>::_impl::operator_star(T(lhs), rhs); + } + } + + template + constexpr static bool operator_greater(const integer& lhs, + const T& rhs) noexcept { + if constexpr (should_keep_size()) { + if (std::numeric_limits::is_signed && (is_negative(lhs) != is_negative(rhs))) + return is_negative(rhs); + + integer t = rhs; + for (unsigned i = 0; i < item_count; ++i) { + base_type rhs_item = get_item(t, big(i)); + + if (lhs.items[big(i)] != rhs_item) return lhs.items[big(i)] > rhs_item; + } + + return false; + } else { + static_assert(IsWideInteger::value); + return std::common_type_t, T>::_impl::operator_greater(T(lhs), + rhs); + } + } + + template + constexpr static bool operator_less(const integer& lhs, const T& rhs) noexcept { + if constexpr (should_keep_size()) { + if (std::numeric_limits::is_signed && (is_negative(lhs) != is_negative(rhs))) + return is_negative(lhs); + + integer t = rhs; + for (unsigned i = 0; i < item_count; ++i) { + base_type rhs_item = get_item(t, big(i)); + + if (lhs.items[big(i)] != rhs_item) return lhs.items[big(i)] < rhs_item; + } + + return false; + } else { + static_assert(IsWideInteger::value); + return std::common_type_t, T>::_impl::operator_less(T(lhs), rhs); + } + } + + template + constexpr static bool operator_eq(const integer& lhs, const T& rhs) noexcept { + if constexpr (should_keep_size()) { + integer t = rhs; + for (unsigned i = 0; i < item_count; ++i) { + base_type rhs_item = get_item(t, any(i)); + + if (lhs.items[any(i)] != rhs_item) return false; + } + + return true; + } else { + static_assert(IsWideInteger::value); + return std::common_type_t, T>::_impl::operator_eq(T(lhs), rhs); + } + } + + template + constexpr static auto operator_pipe(const integer& lhs, const T& rhs) noexcept { + if constexpr (should_keep_size()) { + integer res; + + for (unsigned i = 0; i < item_count; ++i) + res.items[little(i)] = lhs.items[little(i)] | get_item(rhs, little(i)); + return res; + } else { + static_assert(IsWideInteger::value); + return std::common_type_t, T>::_impl::operator_pipe(T(lhs), rhs); + } + } + + template + constexpr static auto operator_amp(const integer& lhs, const T& rhs) noexcept { + if constexpr (should_keep_size()) { + integer res; + + for (unsigned i = 0; i < item_count; ++i) + res.items[little(i)] = lhs.items[little(i)] & get_item(rhs, little(i)); + return res; + } else { + static_assert(IsWideInteger::value); + return std::common_type_t, T>::_impl::operator_amp(T(lhs), rhs); + } + } + + template + constexpr static bool is_zero(const T& x) { + bool is_zero = true; + for (auto item : x.items) { + if (item != 0) { + is_zero = false; + break; + } + } + return is_zero; + } + + /// returns quotient as result and remainder in numerator. + template + constexpr static integer divide(integer& numerator, + integer denominator) { + static_assert(std::is_unsigned_v); + + if constexpr (Bits == 128 && sizeof(base_type) == 8) { + using CompilerUInt128 = unsigned __int128; + + CompilerUInt128 a = + (CompilerUInt128(numerator.items[little(1)]) << 64) + + numerator.items[little( + 0)]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult) + CompilerUInt128 b = + (CompilerUInt128(denominator.items[little(1)]) << 64) + + denominator.items[little( + 0)]; // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult) + CompilerUInt128 c = a / b; // NOLINT + + integer res; + res.items[little(0)] = c; + res.items[little(1)] = c >> 64; + + CompilerUInt128 remainder = a - b * c; + numerator.items[little(0)] = remainder; + numerator.items[little(1)] = remainder >> 64; + + return res; + } + + if (is_zero(denominator)) + throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT, "Division by zero"); + + integer x = 1; + integer quotient = 0; + + while (!operator_greater(denominator, numerator) && + is_zero(operator_amp(shift_right(denominator, Bits2 - 1), 1))) { + x = shift_left(x, 1); + denominator = shift_left(denominator, 1); + } + + while (!is_zero(x)) { + if (!operator_greater(denominator, numerator)) { + numerator = operator_minus(numerator, denominator); + quotient = operator_pipe(quotient, x); + } + + x = shift_right(x, 1); + denominator = shift_right(denominator, 1); + } + + return quotient; + } + + template + constexpr static auto operator_slash(const integer& lhs, const T& rhs) { + if constexpr (should_keep_size()) { + integer numerator = make_positive(lhs); + integer denominator = make_positive(integer(rhs)); + integer quotient = + integer::_impl::divide(numerator, std::move(denominator)); + + if (std::is_same_v && is_negative(rhs) != is_negative(lhs)) + quotient = operator_unary_minus(quotient); + return quotient; + } else { + static_assert(IsWideInteger::value); + return std::common_type_t, + integer>::operator_slash(T(lhs), + rhs); + } + } + + template + constexpr static auto operator_percent(const integer& lhs, const T& rhs) { + if constexpr (should_keep_size()) { + integer remainder = make_positive(lhs); + integer denominator = make_positive(integer(rhs)); + integer::_impl::divide(remainder, std::move(denominator)); + + if (std::is_same_v && is_negative(lhs)) + remainder = operator_unary_minus(remainder); + return remainder; + } else { + static_assert(IsWideInteger::value); + return std::common_type_t, + integer>::operator_percent(T(lhs), + rhs); + } + } + + // ^ + template + constexpr static auto operator_circumflex(const integer& lhs, + const T& rhs) noexcept { + if constexpr (should_keep_size()) { + integer t(rhs); + integer res = lhs; + + for (unsigned i = 0; i < item_count; ++i) res.items[any(i)] ^= t.items[any(i)]; + return res; + } else { + static_assert(IsWideInteger::value); + return T::operator_circumflex(T(lhs), rhs); + } + } + + constexpr static integer from_str(const char* c) { + integer res = 0; + + bool is_neg = std::is_same_v && *c == '-'; + if (is_neg) ++c; + + if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) { // hex + ++c; + ++c; + while (*c) { + if (*c >= '0' && *c <= '9') { + res = multiply(res, 16U); + res = plus(res, *c - '0'); + ++c; + } else if (*c >= 'a' && *c <= 'f') { + res = multiply(res, 16U); + res = plus(res, *c - 'a' + 10U); + ++c; + } else if (*c >= 'A' && + *c <= 'F') { // tolower must be used, but it is not constexpr + res = multiply(res, 16U); + res = plus(res, *c - 'A' + 10U); + ++c; + } else + throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT, "Invalid char from"); + } + } else { // dec + while (*c) { + if (*c < '0' || *c > '9') + throw doris::Exception(doris::ErrorCode::INVALID_ARGUMENT, "Invalid char from"); + + res = multiply(res, 10U); + res = plus(res, *c - '0'); + ++c; + } + } + + if (is_neg) res = operator_unary_minus(res); + + return res; + } +}; + +// Members + +template +template +constexpr integer::integer(T rhs) noexcept : items {} { + if constexpr (IsWideInteger::value) + _impl::wide_integer_from_wide_integer(*this, rhs); + else if constexpr (IsTupleLike::value) + _impl::wide_integer_from_tuple_like(*this, rhs); + else if constexpr (std::is_same_v, CityHash_v1_0_2::uint128>) + _impl::wide_integer_from_cityhash_uint128(*this, rhs); + else + _impl::wide_integer_from_builtin(*this, rhs); +} + +template +template +constexpr integer::integer(std::initializer_list il) noexcept : items {} { + if (il.size() == 1) { + if constexpr (IsWideInteger::value) + _impl::wide_integer_from_wide_integer(*this, *il.begin()); + else if constexpr (IsTupleLike::value) + _impl::wide_integer_from_tuple_like(*this, *il.begin()); + else if constexpr (std::is_same_v, CityHash_v1_0_2::uint128>) + _impl::wide_integer_from_cityhash_uint128(*this, *il.begin()); + else + _impl::wide_integer_from_builtin(*this, *il.begin()); + } else if (il.size() == 0) { + _impl::wide_integer_from_builtin(*this, 0); + } else { + auto it = il.begin(); + for (unsigned i = 0; i < _impl::item_count; ++i) { + if (it < il.end()) { + items[_impl::little(i)] = *it; + ++it; + } else + items[_impl::little(i)] = 0; + } + } +} + +template +template +constexpr integer& integer::operator=( + const integer& rhs) noexcept { + _impl::wide_integer_from_wide_integer(*this, rhs); + return *this; +} + +template +template +constexpr integer& integer::operator=(T rhs) noexcept { + if constexpr (IsTupleLike::value) + _impl::wide_integer_from_tuple_like(*this, rhs); + else if constexpr (std::is_same_v, CityHash_v1_0_2::uint128>) + _impl::wide_integer_from_cityhash_uint128(*this, rhs); + else + _impl::wide_integer_from_builtin(*this, rhs); + return *this; +} + +template +template +constexpr integer& integer::operator*=(const T& rhs) { + *this = *this * rhs; + return *this; +} + +template +template +constexpr integer& integer::operator/=(const T& rhs) { + *this = *this / rhs; + return *this; +} + +template +template +constexpr integer& integer::operator+=(const T& rhs) noexcept( + std::is_same_v) { + *this = *this + rhs; + return *this; +} + +template +template +constexpr integer& integer::operator-=(const T& rhs) noexcept( + std::is_same_v) { + *this = *this - rhs; + return *this; +} + +template +template +constexpr integer& integer::operator%=(const T& rhs) { + *this = *this % rhs; + return *this; +} + +template +template +constexpr integer& integer::operator&=(const T& rhs) noexcept { + *this = *this & rhs; + return *this; +} + +template +template +constexpr integer& integer::operator|=(const T& rhs) noexcept { + *this = *this | rhs; + return *this; +} + +template +template +constexpr integer& integer::operator^=(const T& rhs) noexcept { + *this = *this ^ rhs; + return *this; +} + +template +constexpr integer& integer::operator<<=(int n) noexcept { + if (static_cast(n) >= Bits) + *this = 0; + else if (n > 0) + *this = _impl::shift_left(*this, n); + return *this; +} + +template +constexpr integer& integer::operator>>=(int n) noexcept { + if (static_cast(n) >= Bits) { + if (_impl::is_negative(*this)) + *this = -1; + else + *this = 0; + } else if (n > 0) + *this = _impl::shift_right(*this, n); + return *this; +} + +template +constexpr integer& integer::operator++() noexcept( + std::is_same_v) { + *this = _impl::operator_plus(*this, 1); + return *this; +} + +template +constexpr integer integer::operator++(int) noexcept( + std::is_same_v) { + auto tmp = *this; + *this = _impl::operator_plus(*this, 1); + return tmp; +} + +template +constexpr integer& integer::operator--() noexcept( + std::is_same_v) { + *this = _impl::operator_minus(*this, 1); + return *this; +} + +template +constexpr integer integer::operator--(int) noexcept( + std::is_same_v) { + auto tmp = *this; + *this = _impl::operator_minus(*this, 1); + return tmp; +} + +template +constexpr integer::operator bool() const noexcept { + return !_impl::operator_eq(*this, 0); +} + +template +template +constexpr integer::operator T() const noexcept { + static_assert(std::numeric_limits::is_integer); + + /// NOTE: memcpy will suffice, but unfortunately, this function is constexpr. + + using UnsignedT = std::make_unsigned_t; + + UnsignedT res {}; + for (unsigned i = 0; + i < _impl::item_count && i < (sizeof(T) + sizeof(base_type) - 1) / sizeof(base_type); ++i) + res += UnsignedT(items[_impl::little(i)]) + << (sizeof(base_type) * 8 * + i); // NOLINT(clang-analyzer-core.UndefinedBinaryOperatorResult) + + return res; +} + +template +constexpr integer::operator long double() const noexcept { + if (_impl::operator_eq(*this, 0)) return 0; + + integer tmp = *this; + if (_impl::is_negative(*this)) tmp = -tmp; + + long double res = 0; + for (unsigned i = 0; i < _impl::item_count; ++i) { + long double t = res; + res *= static_cast(std::numeric_limits::max()); + res += t; + res += tmp.items[_impl::big(i)]; + } + + if (_impl::is_negative(*this)) res = -res; + + return res; +} + +template +constexpr integer::operator double() const noexcept { + return static_cast(static_cast(*this)); +} + +template +constexpr integer::operator float() const noexcept { + return static_cast(static_cast(*this)); +} + +// Unary operators +template +constexpr integer operator~(const integer& lhs) noexcept { + return integer::_impl::operator_unary_tilda(lhs); +} + +template +constexpr integer operator-(const integer& lhs) noexcept( + std::is_same_v) { + return integer::_impl::operator_unary_minus(lhs); +} + +template +constexpr integer operator+(const integer& lhs) noexcept( + std::is_same_v) { + return lhs; +} + +#define CT(x) \ + std::common_type_t, std::decay_t> { \ + x \ + } + +// Binary operators +template +std::common_type_t, integer> constexpr operator*( + const integer& lhs, const integer& rhs) { + return std::common_type_t, integer>::_impl::operator_star( + lhs, rhs); +} + +template +std::common_type_t constexpr operator*(const Arithmetic& lhs, + const Arithmetic2& rhs) { + return CT(lhs) * CT(rhs); +} + +template +std::common_type_t, integer> constexpr operator/( + const integer& lhs, const integer& rhs) { + return std::common_type_t, + integer>::_impl::operator_slash(lhs, rhs); +} +template +std::common_type_t constexpr operator/(const Arithmetic& lhs, + const Arithmetic2& rhs) { + return CT(lhs) / CT(rhs); +} + +template +std::common_type_t, integer> constexpr operator+( + const integer& lhs, const integer& rhs) { + return std::common_type_t, integer>::_impl::operator_plus( + lhs, rhs); +} +template +std::common_type_t constexpr operator+(const Arithmetic& lhs, + const Arithmetic2& rhs) { + return CT(lhs) + CT(rhs); +} + +template +std::common_type_t, integer> constexpr operator-( + const integer& lhs, const integer& rhs) { + return std::common_type_t, + integer>::_impl::operator_minus(lhs, rhs); +} +template +std::common_type_t constexpr operator-(const Arithmetic& lhs, + const Arithmetic2& rhs) { + return CT(lhs) - CT(rhs); +} + +template +std::common_type_t, integer> constexpr operator%( + const integer& lhs, const integer& rhs) { + return std::common_type_t, + integer>::_impl::operator_percent(lhs, rhs); +} +template +std::common_type_t constexpr operator%(const Integral& lhs, + const Integral2& rhs) { + return CT(lhs) % CT(rhs); +} + +template +std::common_type_t, integer> constexpr operator&( + const integer& lhs, const integer& rhs) { + return std::common_type_t, integer>::_impl::operator_amp( + lhs, rhs); +} +template +std::common_type_t constexpr operator&(const Integral& lhs, + const Integral2& rhs) { + return CT(lhs) & CT(rhs); +} + +template +std::common_type_t, integer> constexpr operator|( + const integer& lhs, const integer& rhs) { + return std::common_type_t, integer>::_impl::operator_pipe( + lhs, rhs); +} +template +std::common_type_t constexpr operator|(const Integral& lhs, + const Integral2& rhs) { + return CT(lhs) | CT(rhs); +} + +template +std::common_type_t, integer> constexpr operator^( + const integer& lhs, const integer& rhs) { + return std::common_type_t, + integer>::_impl::operator_circumflex(lhs, rhs); +} +template +std::common_type_t constexpr operator^(const Integral& lhs, + const Integral2& rhs) { + return CT(lhs) ^ CT(rhs); +} + +template +constexpr integer operator<<(const integer& lhs, int n) noexcept { + if (static_cast(n) >= Bits) return integer(0); + if (n <= 0) return lhs; + return integer::_impl::shift_left(lhs, n); +} +template +constexpr integer operator>>(const integer& lhs, int n) noexcept { + if (static_cast(n) >= Bits) return integer(0); + if (n <= 0) return lhs; + return integer::_impl::shift_right(lhs, n); +} + +template +constexpr bool operator<(const integer& lhs, const integer& rhs) { + return std::common_type_t, integer>::_impl::operator_less( + lhs, rhs); +} +template +constexpr bool operator<(const Arithmetic& lhs, const Arithmetic2& rhs) { + return CT(lhs) < CT(rhs); +} + +template +constexpr bool operator>(const integer& lhs, const integer& rhs) { + return std::common_type_t, + integer>::_impl::operator_greater(lhs, rhs); +} +template +constexpr bool operator>(const Arithmetic& lhs, const Arithmetic2& rhs) { + return CT(lhs) > CT(rhs); +} + +template +constexpr bool operator<=(const integer& lhs, const integer& rhs) { + return std::common_type_t, integer>::_impl::operator_less( + lhs, rhs) || + std::common_type_t, integer>::_impl::operator_eq( + lhs, rhs); +} +template +constexpr bool operator<=(const Arithmetic& lhs, const Arithmetic2& rhs) { + return CT(lhs) <= CT(rhs); +} + +template +constexpr bool operator>=(const integer& lhs, const integer& rhs) { + return std::common_type_t, + integer>::_impl::operator_greater(lhs, rhs) || + std::common_type_t, integer>::_impl::operator_eq( + lhs, rhs); +} +template +constexpr bool operator>=(const Arithmetic& lhs, const Arithmetic2& rhs) { + return CT(lhs) >= CT(rhs); +} + +template +constexpr bool operator==(const integer& lhs, const integer& rhs) { + return std::common_type_t, integer>::_impl::operator_eq( + lhs, rhs); +} +template +constexpr bool operator==(const Arithmetic& lhs, const Arithmetic2& rhs) { + return CT(lhs) == CT(rhs); +} + +template +constexpr bool operator!=(const integer& lhs, const integer& rhs) { + return !std::common_type_t, integer>::_impl::operator_eq( + lhs, rhs); +} +template +constexpr bool operator!=(const Arithmetic& lhs, const Arithmetic2& rhs) { + return CT(lhs) != CT(rhs); +} + +template +constexpr auto operator<=>(const integer& lhs, const integer& rhs) { + return std::strong_ordering::equivalent; +} +template +constexpr auto operator<=>(const Arithmetic& lhs, const Arithmetic2& rhs) { + return std::strong_ordering::equivalent; +} + +#undef CT + +} // namespace wide + +namespace std { + +template +struct hash> { + std::size_t operator()(const wide::integer& lhs) const { + static_assert(Bits % (sizeof(size_t) * 8) == 0); + + const auto* ptr = reinterpret_cast(lhs.items); + unsigned count = Bits / (sizeof(size_t) * 8); + + size_t res = 0; + for (unsigned i = 0; i < count; ++i) res ^= ptr[i]; + return res; + } +}; + +} // namespace std + +// NOLINTEND(*) diff --git a/be/src/vec/core/wide_integer_to_string.h b/be/src/vec/core/wide_integer_to_string.h new file mode 100644 index 00000000000000..d83dee45ce149c --- /dev/null +++ b/be/src/vec/core/wide_integer_to_string.h @@ -0,0 +1,58 @@ +#pragma once + +#include + +#include +#include + +#include "wide_integer.h" + +namespace wide { + +template +inline std::string to_string(const integer& n) { + std::string res; + if (integer::_impl::operator_eq(n, 0U)) return "0"; + + integer t; + bool is_neg = integer::_impl::is_negative(n); + if (is_neg) + t = integer::_impl::operator_unary_minus(n); + else + t = n; + + while (!integer::_impl::operator_eq(t, 0U)) { + res.insert(res.begin(), + '0' + char(integer::_impl::operator_percent(t, 10U))); + t = integer::_impl::operator_slash(t, 10U); + } + + if (is_neg) res.insert(res.begin(), '-'); + return res; +} + +} // namespace wide + +template +std::ostream& operator<<(std::ostream& out, const wide::integer& value) { + return out << to_string(value); +} + +/// See https://fmt.dev/latest/api.html#formatting-user-defined-types +template +struct fmt::formatter> { + constexpr auto parse(format_parse_context& ctx) { + const auto* it = ctx.begin(); + const auto* end = ctx.end(); + + /// Only support {}. + if (it != end && *it != '}') throw format_error("invalid format"); + + return it; + } + + template + auto format(const wide::integer& value, FormatContext& ctx) { + return fmt::format_to(ctx.out(), "{}", to_string(value)); + } +}; diff --git a/be/src/vec/data_types/convert_field_to_type.cpp b/be/src/vec/data_types/convert_field_to_type.cpp index ba49257898051d..b5c4263181eb49 100644 --- a/be/src/vec/data_types/convert_field_to_type.cpp +++ b/be/src/vec/data_types/convert_field_to_type.cpp @@ -82,6 +82,9 @@ class FieldVisitorToStringSimple : public StaticVisitor { [[noreturn]] String operator()(const DecimalField& x) const { LOG(FATAL) << "not implemeted"; } + [[noreturn]] String operator()(const DecimalField& x) const { + LOG(FATAL) << "not implemeted"; + } }; namespace { diff --git a/be/src/vec/data_types/data_type.cpp b/be/src/vec/data_types/data_type.cpp index 8b7a094dcf5b40..48d37b38c397a2 100644 --- a/be/src/vec/data_types/data_type.cpp +++ b/be/src/vec/data_types/data_type.cpp @@ -139,6 +139,8 @@ PGenericType_TypeId IDataType::get_pdata_type(const IDataType* data_type) { return PGenericType::DECIMAL128; case TypeIndex::Decimal128I: return PGenericType::DECIMAL128I; + case TypeIndex::Decimal256: + return PGenericType::DECIMAL256; case TypeIndex::String: return PGenericType::STRING; case TypeIndex::Date: diff --git a/be/src/vec/data_types/data_type.h b/be/src/vec/data_types/data_type.h index 2aee6fdb1e47ab..fdfc0fde82a92f 100644 --- a/be/src/vec/data_types/data_type.h +++ b/be/src/vec/data_types/data_type.h @@ -286,8 +286,10 @@ struct WhichDataType { bool is_decimal64() const { return idx == TypeIndex::Decimal64; } bool is_decimal128() const { return idx == TypeIndex::Decimal128; } bool is_decimal128i() const { return idx == TypeIndex::Decimal128I; } + bool is_decimal256() const { return idx == TypeIndex::Decimal256; } bool is_decimal() const { - return is_decimal32() || is_decimal64() || is_decimal128() || is_decimal128i(); + return is_decimal32() || is_decimal64() || is_decimal128() || is_decimal128i() || + is_decimal256(); } bool is_float32() const { return idx == TypeIndex::Float32; } diff --git a/be/src/vec/data_types/data_type_decimal.cpp b/be/src/vec/data_types/data_type_decimal.cpp index cfc62bfbc2d0bb..f5751a7f1f0161 100644 --- a/be/src/vec/data_types/data_type_decimal.cpp +++ b/be/src/vec/data_types/data_type_decimal.cpp @@ -35,6 +35,7 @@ #include "vec/common/int_exp.h" #include "vec/common/string_buffer.hpp" #include "vec/common/typeid_cast.h" +#include "vec/core/types.h" #include "vec/io/io_helper.h" #include "vec/io/reader_buffer.h" @@ -188,8 +189,10 @@ DataTypePtr create_decimal(UInt64 precision_value, UInt64 scale_value, bool use_ return std::make_shared>(precision_value, scale_value); } else if (precision_value <= max_decimal_precision()) { return std::make_shared>(precision_value, scale_value); + } else if (precision_value <= max_decimal_precision()) { + return std::make_shared>(precision_value, scale_value); } - return std::make_shared>(precision_value, scale_value); + return std::make_shared>(precision_value, scale_value); } template <> @@ -212,10 +215,16 @@ Decimal128I DataTypeDecimal::get_scale_multiplier(UInt32 scale) { return common::exp10_i128(scale); } +template <> +Decimal256 DataTypeDecimal::get_scale_multiplier(UInt32 scale) { + return Decimal256(common::exp10_i256(scale)); +} + /// Explicit template instantiations. template class DataTypeDecimal; template class DataTypeDecimal; template class DataTypeDecimal; template class DataTypeDecimal; +template class DataTypeDecimal; } // namespace doris::vectorized diff --git a/be/src/vec/data_types/data_type_decimal.h b/be/src/vec/data_types/data_type_decimal.h index c704c90365e009..0034655886309f 100644 --- a/be/src/vec/data_types/data_type_decimal.h +++ b/be/src/vec/data_types/data_type_decimal.h @@ -34,6 +34,7 @@ // IWYU pragma: no_include #include "common/compiler_util.h" // IWYU pragma: keep +#include "common/consts.h" #include "common/logging.h" #include "common/status.h" #include "olap/olap_common.h" @@ -74,19 +75,23 @@ constexpr size_t max_decimal_precision() { } template <> constexpr size_t max_decimal_precision() { - return 9; + return BeConsts::MAX_DECIMAL32_PRECISION; } template <> constexpr size_t max_decimal_precision() { - return 18; + return BeConsts::MAX_DECIMAL64_PRECISION; } template <> constexpr size_t max_decimal_precision() { - return 38; + return BeConsts::MAX_DECIMAL128_PRECISION; } template <> constexpr size_t max_decimal_precision() { - return 38; + return BeConsts::MAX_DECIMAL128_PRECISION; +} +template <> +constexpr size_t max_decimal_precision() { + return BeConsts::MAX_DECIMAL256_PRECISION; } DataTypePtr create_decimal(UInt64 precision, UInt64 scale, bool use_v2); @@ -155,6 +160,9 @@ class DataTypeDecimal final : public IDataType { if constexpr (std::is_same_v, TypeId>) { return TYPE_DECIMAL128I; } + // if constexpr (std::is_same_v, TypeId>) { + // return TYPE_DECIMAL256; + // } return TYPE_DECIMALV2; } @@ -168,6 +176,9 @@ class DataTypeDecimal final : public IDataType { if constexpr (std::is_same_v, TypeId>) { return TPrimitiveType::DECIMAL128I; } + // if constexpr (std::is_same_v, TypeId>) { + // return TPrimitiveType::DECIMAL256; + // } LOG(FATAL) << "__builtin_unreachable"; __builtin_unreachable(); } @@ -254,7 +265,7 @@ class DataTypeDecimal final : public IDataType { return x % get_scale_multiplier(); } - T max_whole_value() const { return get_scale_multiplier(max_precision() - scale) - 1; } + T max_whole_value() const { return get_scale_multiplier(max_precision() - scale) - T(1); } bool can_store_whole(T x) const { T max = max_whole_value(); @@ -309,6 +320,7 @@ class DataTypeDecimal final : public IDataType { const UInt32 scale; }; +// TODO template DataTypePtr decimal_result_type(const DataTypeDecimal& tx, const DataTypeDecimal& ty, bool is_multiply, bool is_divide, bool is_plus_minus) { @@ -355,6 +367,9 @@ inline UInt32 get_decimal_scale(const IDataType& data_type, UInt32 default_value if (auto* decimal_type = check_decimal(data_type)) { return decimal_type->get_scale(); } + if (auto* decimal_type = check_decimal(data_type)) { + return decimal_type->get_scale(); + } return default_value; } @@ -370,6 +385,8 @@ template <> inline constexpr bool IsDataTypeDecimal> = true; template <> inline constexpr bool IsDataTypeDecimal> = true; +template <> +inline constexpr bool IsDataTypeDecimal> = true; template constexpr bool IsDataTypeDecimalV2 = false; @@ -381,6 +398,11 @@ constexpr bool IsDataTypeDecimal128I = false; template <> inline constexpr bool IsDataTypeDecimal128I> = true; +template +constexpr bool IsDataTypeDecimal256 = false; +template <> +inline constexpr bool IsDataTypeDecimal256> = true; + template constexpr bool IsDataTypeDecimalOrNumber = IsDataTypeDecimal || IsDataTypeNumber; @@ -392,6 +414,7 @@ ToDataType::FieldType convert_decimals(const typename FromDataType::FieldType& v UInt8* overflow_flag = nullptr) { using FromFieldType = typename FromDataType::FieldType; using ToFieldType = typename ToDataType::FieldType; + // TODO: decimal256 using MaxFieldType = std::conditional_t<(sizeof(FromFieldType) == sizeof(ToFieldType)) && (std::is_same_v || @@ -443,6 +466,7 @@ void convert_decimal_cols( UInt8* overflow_flag = nullptr) { using FromFieldType = typename FromDataType::FieldType; using ToFieldType = typename ToDataType::FieldType; + // TODO: decimal256 using MaxFieldType = std::conditional_t<(sizeof(FromFieldType) == sizeof(ToFieldType)) && (std::is_same_v || diff --git a/be/src/vec/data_types/data_type_factory.cpp b/be/src/vec/data_types/data_type_factory.cpp index 4ab836141b80aa..479b1261de72bf 100644 --- a/be/src/vec/data_types/data_type_factory.cpp +++ b/be/src/vec/data_types/data_type_factory.cpp @@ -302,6 +302,10 @@ DataTypePtr DataTypeFactory::create_data_type(const TypeIndex& type_index, bool nested = std::make_shared>(BeConsts::MAX_DECIMAL128_PRECISION, 0); break; + // case TypeIndex::Decimal256: + // nested = std::make_shared>(BeConsts::MAX_DECIMAL256_PRECISION, + // 0); + // break; case TypeIndex::JSONB: nested = std::make_shared(); break; @@ -479,6 +483,10 @@ DataTypePtr DataTypeFactory::create_data_type(const PColumnMeta& pcolumn) { nested = std::make_shared>(pcolumn.decimal_param().precision(), pcolumn.decimal_param().scale()); break; + // case PGenericType::DECIMAL256: + // nested = std::make_shared>(pcolumn.decimal_param().precision(), + // pcolumn.decimal_param().scale()); + // break; case PGenericType::BITMAP: nested = std::make_shared(); break; diff --git a/be/src/vec/data_types/get_least_supertype.cpp b/be/src/vec/data_types/get_least_supertype.cpp index be9dd5c05c4f1c..a45d5443892add 100644 --- a/be/src/vec/data_types/get_least_supertype.cpp +++ b/be/src/vec/data_types/get_least_supertype.cpp @@ -358,10 +358,12 @@ void get_least_supertype(const DataTypes& types, DataTypePtr* type, bool compati UInt32 have_decimal64 = type_ids.count(TypeIndex::Decimal64); UInt32 have_decimal128 = type_ids.count(TypeIndex::Decimal128); UInt32 have_decimal128i = type_ids.count(TypeIndex::Decimal128I); + UInt32 have_decimal256 = type_ids.count(TypeIndex::Decimal256); - if (have_decimal32 || have_decimal64 || have_decimal128 || have_decimal128i) { + if (have_decimal32 || have_decimal64 || have_decimal128 || have_decimal128i || + have_decimal256) { UInt32 num_supported = - have_decimal32 + have_decimal64 + have_decimal128 + have_decimal128i; + have_decimal32 + have_decimal64 + have_decimal128 + have_decimal128i; // TODO std::vector int_ids = { TypeIndex::Int8, TypeIndex::UInt8, TypeIndex::Int16, TypeIndex::UInt16, @@ -412,6 +414,7 @@ void get_least_supertype(const DataTypes& types, DataTypePtr* type, bool compati doris::ErrorCode::INVALID_ARGUMENT); } + // TODO: decimal256 if (have_decimal128 || min_precision > DataTypeDecimal::max_precision()) { *type = std::make_shared>( DataTypeDecimal::max_precision(), max_scale); diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp index e70e5d4d2caf16..5155ff19507809 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp @@ -108,6 +108,7 @@ void DataTypeDecimalSerDe::write_column_to_arrow(const IColumn& column, const checkArrowStatus(builder.Append(value), column.get_name(), array_builder->type()->name()); } + // TODO: decimal256 } else if constexpr (std::is_same_v) { std::shared_ptr s_decimal_ptr = std::make_shared(38, col.get_scale()); @@ -241,5 +242,6 @@ template class DataTypeDecimalSerDe; template class DataTypeDecimalSerDe; template class DataTypeDecimalSerDe; template class DataTypeDecimalSerDe; +template class DataTypeDecimalSerDe; } // namespace vectorized } // namespace doris diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h b/be/src/vec/data_types/serde/data_type_decimal_serde.h index d20ff4c4d56420..3adaac4ccf5887 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.h +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h @@ -28,6 +28,7 @@ #include "common/status.h" #include "data_type_serde.h" #include "olap/olap_common.h" +#include "runtime/define_primitive_type.h" #include "util/jsonb_document.h" #include "util/jsonb_writer.h" #include "vec/columns/column.h" @@ -60,6 +61,9 @@ class DataTypeDecimalSerDe : public DataTypeSerDe { if constexpr (std::is_same_v, TypeId>) { return TYPE_DECIMALV2; } + if constexpr (std::is_same_v, TypeId>) { + return TYPE_DECIMAL256; + } LOG(FATAL) << "__builtin_unreachable"; __builtin_unreachable(); } @@ -123,6 +127,8 @@ Status DataTypeDecimalSerDe::write_column_to_pb(const IColumn& column, PValue ptype->set_id(PGenericType::DECIMAL128); } else if constexpr (std::is_same_v) { ptype->set_id(PGenericType::DECIMAL128I); + } else if constexpr (std::is_same_v) { + ptype->set_id(PGenericType::DECIMAL256); } else if constexpr (std::is_same_v>) { ptype->set_id(PGenericType::INT32); } else if constexpr (std::is_same_v>) { @@ -138,6 +144,7 @@ Status DataTypeDecimalSerDe::write_column_to_pb(const IColumn& column, PValue return Status::OK(); } +// TODO: decimal256 template Status DataTypeDecimalSerDe::read_column_from_pb(IColumn& column, const PValues& arg) const { if constexpr (std::is_same_v> || std::is_same_v || @@ -159,6 +166,7 @@ void DataTypeDecimalSerDe::write_one_cell_to_jsonb(const IColumn& column, Jso int row_num) const { StringRef data_ref = column.get_data_at(row_num); result.writeKey(col_id); + // TODO: decimal256 if constexpr (std::is_same_v>) { Decimal128::NativeType val = *reinterpret_cast(data_ref.data); @@ -183,6 +191,7 @@ template void DataTypeDecimalSerDe::read_one_cell_from_jsonb(IColumn& column, const JsonbValue* arg) const { auto& col = reinterpret_cast&>(column); + // TODO: decimal256 if constexpr (std::is_same_v>) { col.insert_value(static_cast(arg)->val()); } else if constexpr (std::is_same_v) { diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 06f41a2edcd769..bea9559f914b10 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -1268,6 +1268,7 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, case TypeIndex::Decimal64: return _decode_decimal_column(col_name, data_column, data_type, cvb, num_values); + // TODO: decimal256 case TypeIndex::Decimal128: return _decode_decimal_column(col_name, data_column, data_type, cvb, num_values); diff --git a/be/src/vec/exec/format/parquet/byte_array_dict_decoder.cpp b/be/src/vec/exec/format/parquet/byte_array_dict_decoder.cpp index 1e09890a9807fd..6f5f36a33a972d 100644 --- a/be/src/vec/exec/format/parquet/byte_array_dict_decoder.cpp +++ b/be/src/vec/exec/format/parquet/byte_array_dict_decoder.cpp @@ -169,6 +169,7 @@ Status ByteArrayDictDecoder::_decode_values(MutableColumnPtr& doris_column, Data return _decode_binary_decimal(doris_column, data_type, select_vector); case TypeIndex::Decimal128I: return _decode_binary_decimal(doris_column, data_type, select_vector); + // TODO: decimal256 default: break; } diff --git a/be/src/vec/exec/format/parquet/byte_array_plain_decoder.cpp b/be/src/vec/exec/format/parquet/byte_array_plain_decoder.cpp index 9a032b540b3757..e91f9f1db94ce2 100644 --- a/be/src/vec/exec/format/parquet/byte_array_plain_decoder.cpp +++ b/be/src/vec/exec/format/parquet/byte_array_plain_decoder.cpp @@ -118,6 +118,7 @@ Status ByteArrayPlainDecoder::_decode_values(MutableColumnPtr& doris_column, Dat return _decode_binary_decimal(doris_column, data_type, select_vector); case TypeIndex::Decimal128I: return _decode_binary_decimal(doris_column, data_type, select_vector); + // TODO: decimal256 default: break; } diff --git a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp index 10474148996fff..b99ebab574a68e 100644 --- a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp +++ b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp @@ -149,6 +149,7 @@ class FixLengthDictDecoder final : public BaseDictDecoder { select_vector); } break; + // TODO: decimal256 case TypeIndex::String: [[fallthrough]]; case TypeIndex::FixedString: @@ -448,6 +449,7 @@ class FixLengthDictDecoder final : public BaseDictDecoder { select_vector); } break; + // TODO: decimal256 case TypeIndex::String: [[fallthrough]]; case TypeIndex::FixedString: diff --git a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp index e94948ad402528..2078fd5d075204 100644 --- a/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp +++ b/be/src/vec/exec/format/parquet/fix_length_plain_decoder.cpp @@ -173,6 +173,7 @@ Status FixLengthPlainDecoder::_decode_values(MutableColumnPtr& doris_column, Dat select_vector); } break; + // TODO: decimal256 case TypeIndex::String: [[fallthrough]]; case TypeIndex::FixedString: diff --git a/be/src/vec/exec/jni_connector.cpp b/be/src/vec/exec/jni_connector.cpp index bad7f52cc3b547..1af66620e6d01d 100644 --- a/be/src/vec/exec/jni_connector.cpp +++ b/be/src/vec/exec/jni_connector.cpp @@ -280,6 +280,7 @@ Status JniConnector::_fill_column(ColumnPtr& doris_column, DataTypePtr& data_typ data_column, reinterpret_cast(_next_meta_as_ptr()), num_rows); FOR_LOGICAL_NUMERIC_TYPES(DISPATCH) #undef DISPATCH + // TODO: decimal256 case TypeIndex::Decimal128: [[fallthrough]]; case TypeIndex::Decimal128I: @@ -449,6 +450,7 @@ Status JniConnector::generate_meta_info(Block* block, std::unique_ptr& m } FOR_LOGICAL_NUMERIC_TYPES(DISPATCH) #undef DISPATCH + // TODO: decimal256 case TypeIndex::Decimal128: [[fallthrough]]; case TypeIndex::Decimal128I: { diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index 9f3fe2b7ac4828..99304e69376627 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -131,6 +131,9 @@ OlapBlockDataConvertor::create_olap_column_data_convertor(const TabletColumn& co case FieldType::OLAP_FIELD_TYPE_DECIMAL128I: { return std::make_unique>(); } + // case FieldType::OLAP_FIELD_TYPE_DECIMAL256: { + // return std::make_unique>(); + // } case FieldType::OLAP_FIELD_TYPE_JSONB: { return std::make_unique(true); } diff --git a/be/src/vec/runtime/vorc_transformer.cpp b/be/src/vec/runtime/vorc_transformer.cpp index 2acede5d4be29c..0aa905355cda03 100644 --- a/be/src/vec/runtime/vorc_transformer.cpp +++ b/be/src/vec/runtime/vorc_transformer.cpp @@ -594,6 +594,7 @@ Status VOrcTransformer::_write_one_col(const TypeDescriptor& type_descriptor, SET_NUM_ELEMENTS break; } + // TODO: decimal256 case TYPE_DECIMAL128I: { orc::Decimal128VectorBatch* cur_batch = dynamic_cast(orc_col_batch); diff --git a/be/src/vec/runtime/vparquet_transformer.cpp b/be/src/vec/runtime/vparquet_transformer.cpp index 7d1ceed404284b..afcb8d4181408e 100644 --- a/be/src/vec/runtime/vparquet_transformer.cpp +++ b/be/src/vec/runtime/vparquet_transformer.cpp @@ -856,6 +856,7 @@ Status VParquetTransformer::write(const Block& block) { } break; } + // TODO: decimal256 case TYPE_DECIMAL128I: { parquet::RowGroupWriter* rgWriter = get_rg_writer(); parquet::FixedLenByteArrayWriter* col_writer = diff --git a/be/src/vec/sink/vtablet_block_convertor.cpp b/be/src/vec/sink/vtablet_block_convertor.cpp index d05a1a9257af1e..36f256cf6971ec 100644 --- a/be/src/vec/sink/vtablet_block_convertor.cpp +++ b/be/src/vec/sink/vtablet_block_convertor.cpp @@ -329,6 +329,10 @@ Status OlapTableBlockConvertor::_validate_column(RuntimeState* state, const Type CHECK_VALIDATION_FOR_DECIMALV3(vectorized::Decimal128I); break; } + // case TYPE_DECIMAL256: { + // CHECK_VALIDATION_FOR_DECIMALV3(vectorized::Decimal256); + // break; + // } #undef CHECK_VALIDATION_FOR_DECIMALV3 case TYPE_ARRAY: { const auto column_array = diff --git a/be/test/vec/data_types/decimal_test.cpp b/be/test/vec/data_types/decimal_test.cpp new file mode 100644 index 00000000000000..74a65dd2b246ad --- /dev/null +++ b/be/test/vec/data_types/decimal_test.cpp @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include + +#include "gtest/gtest_pred_impl.h" +#include "runtime/type_limit.h" +#include "vec/core/types.h" +namespace doris::vectorized { + +TEST(DecimalTest, Decimal256) { + // 9999999999999999999999999999999999999999999999999999999999999999999999999999 + Decimal256 dec1(type_limit::max()); + auto des_str = dec1.to_string(10); + EXPECT_EQ(des_str, + "999999999999999999999999999999999999999999999999999999999999999999.9999999999"); + des_str = dec1.to_string(0); + EXPECT_EQ(des_str, + "9999999999999999999999999999999999999999999999999999999999999999999999999999"); + des_str = dec1.to_string(76); + EXPECT_EQ(des_str, + "0.9999999999999999999999999999999999999999999999999999999999999999999999999999"); + + auto dec2 = type_limit::min(); + des_str = dec2.to_string(10); + EXPECT_EQ(des_str, + "-999999999999999999999999999999999999999999999999999999999999999999.9999999999"); + des_str = dec2.to_string(0); + EXPECT_EQ(des_str, + "-9999999999999999999999999999999999999999999999999999999999999999999999999999"); + des_str = dec2.to_string(76); + EXPECT_EQ(des_str, + "-0.9999999999999999999999999999999999999999999999999999999999999999999999999999"); + + // plus + Decimal256 dec3 = dec1 + dec2; + des_str = dec3.to_string(10); + EXPECT_EQ(des_str, "0.0000000000"); + des_str = dec3.to_string(0); + EXPECT_EQ(des_str, "0"); + des_str = dec3.to_string(76); + EXPECT_EQ(des_str, + "0.0000000000000000000000000000000000000000000000000000000000000000000000000000"); + + // minus + dec2 = type_limit::max(); + dec3 = dec1 - dec2; + des_str = dec3.to_string(10); + EXPECT_EQ(des_str, "0.0000000000"); + + // multiply + + // divide + dec1 = type_limit::max(); + dec2 = vectorized::Decimal256(10); + dec3 = dec1 / dec2; + des_str = dec3.to_string(1); + EXPECT_EQ(des_str, + "99999999999999999999999999999999999999999999999999999999999999999999999999.9"); + + // overflow +} +} // namespace doris::vectorized \ No newline at end of file diff --git a/gensrc/proto/types.proto b/gensrc/proto/types.proto index 0bc9f46fa18f59..240b68c89d8cd3 100644 --- a/gensrc/proto/types.proto +++ b/gensrc/proto/types.proto @@ -112,6 +112,7 @@ message PGenericType { TIME = 35; AGG_STATE = 36; TIMEV2 = 37; + DECIMAL256 = 38; UNKNOWN = 999; } required TypeId id = 2; diff --git a/gensrc/thrift/Types.thrift b/gensrc/thrift/Types.thrift index baca98b228bc8a..a803e373e258b4 100644 --- a/gensrc/thrift/Types.thrift +++ b/gensrc/thrift/Types.thrift @@ -94,7 +94,8 @@ enum TPrimitiveType { UNSUPPORTED, VARIANT, LAMBDA_FUNCTION, - AGG_STATE + AGG_STATE, + DECIMAL256 } enum TTypeNodeType {