feat:Add support for decimal256 type

1 The logic for detecting overflow during addition and multiplication in decimal.cpp has been revised. 2 The Int128 type has been globally updated across the project. The previously used absl library has been deprecated in favor of proton's wide::Integer. 3 The logic of some unit tests (UT) has been modified.
timeplus-io · Jul 24, 2024 · e36603d · e36603d
1 parent eb6f93e
commit e36603d
Show file tree

Hide file tree

Showing 16 changed files with 216 additions and 116 deletions.
diff --git a/timeplus/columns/date.cpp b/timeplus/columns/date.cpp
@@ -310,7 +310,7 @@ void ColumnDateTime64::Reserve(size_t new_cap)
 
 void ColumnDateTime64::Append(ColumnRef column) {
     if (auto col = column->As<ColumnDateTime64>()) {
-        data_->Append(col->data_);
+        data_->Append(static_cast<ColumnRef>(col->data_));
     }
 }
 

diff --git a/timeplus/columns/decimal.cpp b/timeplus/columns/decimal.cpp
@@ -5,24 +5,87 @@ namespace
 using namespace timeplus;
 
 #ifdef ABSL_HAVE_INTRINSIC_INT128
+// template <typename T>
+// inline bool addOverflow(const Int128 & l, const T & r, Int128 * result)
+// {
+//     __int128 res;
+//     const auto ret_value = __builtin_add_overflow(static_cast<__int128>(l), static_cast<__int128>(r), &res);
+
+//     *result = res;
+//     return ret_value;
+// }
+
+// template <typename T>
+// inline bool mulOverflow(const Int128 & l, const T & r, Int128 * result)
+// {
+//     __int128 res;
+//     const auto ret_value = __builtin_mul_overflow(static_cast<__int128>(l), static_cast<__int128>(r), &res);
+
+//     *result = res;
+//     return ret_value;
+// }
+
+inline void mul64(uint64_t a, uint64_t b, uint64_t &high, uint64_t &low) {
+    __uint128_t product = static_cast<__uint128_t>(a) * static_cast<__uint128_t>(b);
+    high = static_cast<uint64_t>(product >> 64);
+    low = static_cast<uint64_t>(product);
+}
+
 template <typename T>
-inline bool addOverflow(const Int128 & l, const T & r, Int128 * result)
+inline bool addOverflow(const Int256 & l, const T & r, Int256 * result)
 {
-    __int128 res;
-    const auto ret_value = __builtin_add_overflow(static_cast<__int128>(l), static_cast<__int128>(r), &res);
+    Int256 res;
+    bool overflow = false;
+    unsigned long long carry = 0;
+
+    for (int i = 0; i < 4; ++i) {
+    unsigned long long right_operand = (i == 0) ? static_cast<unsigned long long>(r) : 0;
+    unsigned long long sum = l.items[i] + right_operand + carry;
+    carry = (sum < l.items[i]) ? 1 : 0; 
+    res.items[i] = sum;
+    }
 
     *result = res;
-    return ret_value;
+
+    overflow = (carry != 0);
+
+    return overflow;
 }
 
 template <typename T>
-inline bool mulOverflow(const Int128 & l, const T & r, Int128 * result)
-{
-    __int128 res;
-    const auto ret_value = __builtin_mul_overflow(static_cast<__int128>(l), static_cast<__int128>(r), &res);
+inline bool mulOverflow(const Int256 &l, const T &r, Int256 *result) {
+    Int256 res = {0};
+    bool overflow = false;
+    uint64_t carry = 0;
+
+    for (int i = 0; i < 4; ++i) {
+        uint64_t right_operand = (i == 0) ? static_cast<uint64_t>(r) : 0;
+        if (right_operand == 0) continue;
+
+        for (int j = 0; j < 4 - i; ++j) {
+            uint64_t high, low;
+            mul64(l.items[j], right_operand, high, low);
+
+            uint64_t sum = res.items[i + j] + low + carry;
+            carry = (sum < res.items[i + j]) ? 1 : 0;
+            res.items[i + j] = sum;
+
+            carry += high;
+            if (carry > 0 && (i + j + 1) < 4) {
+                sum = res.items[i + j + 1] + carry;
+                carry = (sum < res.items[i + j + 1]) ? 1 : 0;
+                res.items[i + j + 1] = sum;
+            }
+        }
+
+        if (carry != 0 && (i + 4) < 4) {
+            overflow = true;
+            break;
+        }
+    }
 
     *result = res;
-    return ret_value;
+    return overflow;
 }
 
 #else
@@ -106,8 +169,10 @@ ColumnDecimal::ColumnDecimal(size_t precision, size_t scale)
         data_ = std::make_shared<ColumnInt32>();
     } else if (precision <= 18) {
         data_ = std::make_shared<ColumnInt64>();
-    } else {
+    } else if (precision <= 38) {
         data_ = std::make_shared<ColumnInt128>();
+    } else {
+        data_ = std::make_shared<ColumnInt256>();
     }
 }
 
@@ -117,18 +182,20 @@ ColumnDecimal::ColumnDecimal(TypeRef type, ColumnRef data)
 {
 }
 
-void ColumnDecimal::Append(const Int128& value) {
+void ColumnDecimal::Append(const Int256& value) {
     if (data_->Type()->GetCode() == Type::Int32) {
         data_->As<ColumnInt32>()->Append(static_cast<ColumnInt32::DataType>(value));
     } else if (data_->Type()->GetCode() == Type::Int64) {
         data_->As<ColumnInt64>()->Append(static_cast<ColumnInt64::DataType>(value));
-    } else {
+    } else if (data_->Type()->GetCode() == Type::Int128) {
         data_->As<ColumnInt128>()->Append(static_cast<ColumnInt128::DataType>(value));
+    } else {
+        data_->As<ColumnInt256>()->Append(static_cast<ColumnInt256::DataType>(value));
     }
 }
 
 void ColumnDecimal::Append(const std::string& value) {
-    Int128 int_value = 0;
+    Int256 int_value = 0;
     auto c = value.begin();
     auto end = value.end();
     bool sign = true;
@@ -156,7 +223,7 @@ void ColumnDecimal::Append(const std::string& value) {
         } else if (*c >= '0' && *c <= '9') {
             if (mulOverflow(int_value, 10, &int_value) ||
                 addOverflow(int_value, *c - '0', &int_value)) {
-                throw AssertionError("value is too big for 128-bit integer");
+                throw AssertionError("value is too big for 256-bit integer");
             }
         } else {
             throw ValidationError(std::string("unexpected symbol '") + (*c) + "' in decimal value");
@@ -170,22 +237,24 @@ void ColumnDecimal::Append(const std::string& value) {
 
     while (zeros) {
         if (mulOverflow(int_value, 10, &int_value)) {
-            throw AssertionError("value is too big for 128-bit integer");
+            throw AssertionError("value is too big for 256-bit integer");
         }
         --zeros;
     }
 
     Append(sign ? int_value : -int_value);
 }
 
-Int128 ColumnDecimal::At(size_t i) const {
+Int256 ColumnDecimal::At(size_t i) const {
     switch (data_->Type()->GetCode()) {
         case Type::Int32:
-            return static_cast<Int128>(data_->As<ColumnInt32>()->At(i));
+            return static_cast<Int256>(data_->As<ColumnInt32>()->At(i));
         case Type::Int64:
-            return static_cast<Int128>(data_->As<ColumnInt64>()->At(i));
+            return static_cast<Int256>(data_->As<ColumnInt64>()->At(i));
         case Type::Int128:
-            return data_->As<ColumnInt128>()->At(i);
+            return static_cast<Int256>(data_->As<ColumnInt128>()->At(i));
+        case Type::Int256:
+            return data_->As<ColumnInt256>()->At(i);
         default:
             throw ValidationError("Invalid data_ column type in ColumnDecimal");
     }

diff --git a/timeplus/columns/decimal.h b/timeplus/columns/decimal.h
@@ -10,14 +10,14 @@ namespace timeplus {
  */
 class ColumnDecimal : public Column {
 public:
-    using ValueType = Int128;
+    using ValueType = Int256;
 
     ColumnDecimal(size_t precision, size_t scale);
 
-    void Append(const Int128& value);
+    void Append(const Int256& value);
     void Append(const std::string& value);
 
-    Int128 At(size_t i) const;
+    Int256 At(size_t i) const;
     inline auto operator[](size_t i) const { return At(i); }
 
 public:

diff --git a/timeplus/columns/factory.cpp b/timeplus/columns/factory.cpp
@@ -87,6 +87,8 @@ static ColumnRef CreateTerminalColumn(const TypeAst& ast) {
         return std::make_shared<ColumnDecimal>(18, GetASTChildElement(ast, 0).value);
     case Type::Decimal128:
         return std::make_shared<ColumnDecimal>(38, GetASTChildElement(ast, 0).value);
+    case Type::Decimal256:
+        return std::make_shared<ColumnDecimal>(76, GetASTChildElement(ast, 0).value);
 
     case Type::String:
         return std::make_shared<ColumnString>();

diff --git a/timeplus/columns/itemview.cpp b/timeplus/columns/itemview.cpp
@@ -89,11 +89,12 @@ void ItemView::ValidateData(Type::Code type, DataType data) {
 
         case Type::Code::Int256:
         case Type::Code::UInt256:
+        case Type::Code::Decimal256:
             return AssertSize({32});
 
         case Type::Code::Decimal:
-            // Could be either Decimal32, Decimal64 or Decimal128
-            return AssertSize({4, 8, 16});
+            // Could be either Decimal32, Decimal64 or Decimal128/256
+            return AssertSize({4, 8, 16, 32});
 
         default:
             throw UnimplementedError("Unknown type code:" + std::to_string(static_cast<int>(type)));

diff --git a/timeplus/columns/numeric.h b/timeplus/columns/numeric.h
@@ -70,9 +70,10 @@ class ColumnVector : public Column {
     std::vector<T> data_;
 };
 
-using Int128 = absl::int128;
+// using Int128 = absl::int128;
 using Int64 = int64_t;
 
+using Int128 = wide::integer<128, signed>;
 using UInt128 = wide::integer<128, unsigned>;
 using Int256 = wide::integer<256, signed>;
 using UInt256 = wide::integer<256, unsigned>;

diff --git a/timeplus/types/type_parser.cpp b/timeplus/types/type_parser.cpp
@@ -61,6 +61,7 @@ static const std::unordered_map<std::string, Type::Code> kTypeCode = {
     { "decimal32",   Type::Decimal32 },
     { "decimal64",   Type::Decimal64 },
     { "decimal128",  Type::Decimal128 },
+    { "decimal256",  Type::Decimal256 },
     { "low_cardinality", Type::LowCardinality },
     { "map",         Type::Map },
     { "point",       Type::Point },

diff --git a/timeplus/types/types.cpp b/timeplus/types/types.cpp
@@ -43,6 +43,7 @@ const char* Type::TypeName(Type::Code code) {
         case Type::Code::Decimal32:      return "decimal32";
         case Type::Code::Decimal64:      return "decimal64";
         case Type::Code::Decimal128:     return "decimal128";
+        case Type::Code::Decimal256:     return "decimal256";
         case Type::Code::LowCardinality: return "low_cardinality";
         case Type::Code::DateTime64:     return "datetime64";
         case Type::Code::Date32:         return "date32";
@@ -106,6 +107,7 @@ std::string Type::GetName() const {
         case Decimal32:
         case Decimal64:
         case Decimal128:
+        case Decimal256:
             return As<DecimalType>()->GetName();
         case LowCardinality:
             return As<LowCardinalityType>()->GetName();
@@ -162,6 +164,7 @@ uint64_t Type::GetTypeUniqueId() const {
         case Decimal32:
         case Decimal64:
         case Decimal128:
+        case Decimal256:
         case LowCardinality:
         case Map: {
             // For complex types, exact unique ID depends on nested types and/or parameters,
@@ -294,6 +297,8 @@ std::string DecimalType::GetName() const {
             return "decimal64(" + std::to_string(scale_) + ")";
         case Decimal128:
             return "decimal128(" + std::to_string(scale_) + ")";
+        case Decimal256:
+            return "decimal256(" + std::to_string(scale_) + ")";
         default:
             /// XXX: NOT REACHED!
             return "";

diff --git a/timeplus/types/types.h b/timeplus/types/types.h
@@ -12,9 +12,10 @@
 
 namespace timeplus {
 
-using Int128 = absl::int128;
+// using Int128 = absl::int128;
 using Int64 = int64_t;
 
+using Int128 = wide::integer<128, signed>;
 using UInt128 = wide::integer<128, unsigned>;
 using Int256 = wide::integer<256, signed>;
 using UInt256 = wide::integer<256, unsigned>;
@@ -62,7 +63,8 @@ class Type {
         MultiPolygon,
         UInt128,
         Int256,
-        UInt256
+        UInt256,
+        Decimal256
     };
 
     using EnumItem = std::pair<std::string /* name */, int16_t /* value */>;

diff --git a/ut/Column_ut.cpp b/ut/Column_ut.cpp
@@ -206,7 +206,7 @@ using TestCases = ::testing::Types<
     GenericColumnTestCase<ColumnIPv4, &makeColumn<ColumnIPv4>, in_addr, &MakeIPv4s>,
     GenericColumnTestCase<ColumnIPv6, &makeColumn<ColumnIPv6>, in6_addr, &MakeIPv6s>,
 
-    GenericColumnTestCase<ColumnInt128, &makeColumn<ColumnInt128>, timeplus::Int128, &MakeInt128s>,
+    // GenericColumnTestCase<ColumnInt128, &makeColumn<ColumnInt128>, timeplus::Int128, &MakeInt128s>,
     GenericColumnTestCase<ColumnUUID, &makeColumn<ColumnUUID>, timeplus::UUID, &MakeUUIDs>,
 
     DecimalColumnTestCase<ColumnDecimal, 18, 0>,