From a453a6a23ab1b6ceaca561bd88110df9940b221a Mon Sep 17 00:00:00 2001 From: dovgopoly <69435717+dovgopoly@users.noreply.github.com> Date: Wed, 11 Dec 2024 13:39:17 +0200 Subject: [PATCH] Feat/affine (#123) * init affine * fixed & opt * 2.7.14 * rm debug * opt & fixed warning * fix comments * removed unused functions * added quad * opt 8.1kk * fixed comment & rm debug * remove unused functions * rollback version --------- Co-authored-by: Artem Chystiakov --- contracts/libs/crypto/ECDSA384.sol | 467 ++++++++++++----------------- 1 file changed, 190 insertions(+), 277 deletions(-) diff --git a/contracts/libs/crypto/ECDSA384.sol b/contracts/libs/crypto/ECDSA384.sol index 940718ff..7fa6e9fb 100644 --- a/contracts/libs/crypto/ECDSA384.sol +++ b/contracts/libs/crypto/ECDSA384.sol @@ -7,9 +7,12 @@ import {MemoryUtils} from "../utils/MemoryUtils.sol"; * @notice Cryptography module * * This library provides functionality for ECDSA verification over any 384-bit curve. Currently, - * this is the most efficient implementation out there, consuming ~9 million gas per call. + * this is the most efficient implementation out there, consuming ~8.1 million gas per call. * - * The approach is Strauss-Shamir double scalar multiplication with 4 bits of precompute + projective points. + * The approach is Strauss-Shamir double scalar multiplication with 4 bits of precompute + affine coordinates. + * For reference, naive implementation uses ~400 billion gas, which is 48000 times more expensive. + * + * We also tried using projective coordinates, however, the gas consumption rose to ~9 million gas. */ library ECDSA384 { using MemoryUtils for *; @@ -117,7 +120,7 @@ library ECDSA384 { uint256 three = U384.init(3); /// We use 4-bit masks where the first 2 bits refer to `scalar1` and the last 2 bits refer to `scalar2`. - uint256[3][16] memory points_ = _precomputePointsTable( + uint256[2][16] memory points_ = _precomputePointsTable( call, params_.p, three, @@ -128,7 +131,7 @@ library ECDSA384 { inputs_.y ); - (scalar1, , scalar2) = _doubleScalarMultiplication( + (scalar1, ) = _doubleScalarMultiplication( call, params_.p, three, @@ -139,7 +142,7 @@ library ECDSA384 { ); } - return U384.eq(U384.moddiv(call, scalar1, scalar2, params_.p), inputs_.r); + return U384.eq(scalar1, inputs_.r); } } @@ -182,10 +185,10 @@ library ECDSA384 { uint256 p, uint256 three, uint256 a, - uint256[3][16] memory points, + uint256[2][16] memory points, uint256 scalar1, uint256 scalar2 - ) private view returns (uint256 x, uint256 y, uint256 z) { + ) private view returns (uint256 x, uint256 y) { unchecked { uint256 mask_; uint256 scalar1Bits_; @@ -196,33 +199,15 @@ library ECDSA384 { scalar2Bits_ := mload(scalar2) } - x = U384.init(0); - y = U384.init(0); - z = U384.init(1); - for (uint256 word = 2; word <= 184; word += 2) { - (x, y, z) = _twiceProj(call, p, three, a, x, y, z); - (x, y, z) = _twiceProj(call, p, three, a, x, y, z); + (x, y) = _qaudAffine(call, p, three, a, x, y); mask_ = (((scalar1Bits_ >> (184 - word)) & 0x03) << 2) | ((scalar2Bits_ >> (184 - word)) & 0x03); if (mask_ != 0) { - uint256[3] memory maskedPoints_ = points[mask_]; - - (x, y, z) = _addProj( - call, - p, - three, - a, - maskedPoints_[0], - maskedPoints_[1], - maskedPoints_[2], - x, - y, - z - ); + (x, y) = _addAffine(call, p, points[mask_][0], points[mask_][1], x, y); } } @@ -232,177 +217,150 @@ library ECDSA384 { } for (uint256 word = 2; word <= 256; word += 2) { - (x, y, z) = _twiceProj(call, p, three, a, x, y, z); - (x, y, z) = _twiceProj(call, p, three, a, x, y, z); + (x, y) = _qaudAffine(call, p, three, a, x, y); mask_ = (((scalar1Bits_ >> (256 - word)) & 0x03) << 2) | ((scalar2Bits_ >> (256 - word)) & 0x03); if (mask_ != 0) { - uint256[3] memory maskedPoints_ = points[mask_]; - - (x, y, z) = _addProj( - call, - p, - three, - a, - maskedPoints_[0], - maskedPoints_[1], - maskedPoints_[2], - x, - y, - z - ); + (x, y) = _addAffine(call, p, points[mask_][0], points[mask_][1], x, y); } } - - return (x, y, z); } } /** - * @dev Double an elliptic curve point in projective coordinates. See - * https://www.nayuki.io/page/elliptic-curve-point-addition-in-projective-coordinates + * @dev Double an elliptic curve point in affine coordinates. */ - function _twiceProj( + function _twiceAffine( uint256 call, uint256 p, uint256 three, uint256 a, - uint256 x0, - uint256 y0, - uint256 z0 - ) private view returns (uint256 x1, uint256 y1, uint256 z1) { + uint256 x1, + uint256 y1 + ) private view returns (uint256 x2, uint256 y2) { unchecked { - if (U384.eqInteger(x0, 0) && U384.eqInteger(y0, 0)) { - return (U384.init(0), U384.init(0), U384.init(1)); // zero proj + if (x1 == 0) { + return (0, 0); } - uint256 u = U384.modmul(call, y0, z0); - U384.modshl1Assign(u, p); - - x1 = U384.modmul(call, u, x0); - U384.modmulAssign(call, x1, y0); - U384.modshl1Assign(x1, p); - - x0 = U384.modexp(call, x0, 2); - - y1 = U384.modmul(call, x0, three); - - z0 = U384.modexp(call, z0, 2); - U384.modmulAssign(call, z0, a); - U384.modaddAssign(y1, z0, p); - - z1 = U384.modexp(call, y1, 2); - U384.modshl1AssignTo(x0, x1, p); - - uint256 diff = U384.sub(p, x0); - U384.modaddAssign(z1, diff, p); - - U384.subAssignTo(diff, p, z1); - U384.modaddAssignTo(x0, x1, diff, p); - U384.modmulAssign(call, x0, y1); + if (U384.eqInteger(y1, 0)) { + return (0, 0); + } - y0 = U384.modmul(call, y0, u); - U384.modexpAssign(call, y0, 2); - U384.modshl1Assign(y0, p); + uint256 m1 = U384.modexp(call, x1, 2); + U384.modmulAssign(call, m1, three); + U384.modaddAssign(m1, a, p); - U384.subAssignTo(diff, p, y0); - U384.modaddAssignTo(y1, x0, diff, p); + uint256 m2 = U384.modshl1(y1, p); + U384.moddivAssign(call, m1, m2); - U384.modmulAssignTo(call, x1, u, z1); + x2 = U384.modexp(call, m1, 2); + U384.modsubAssign(x2, x1, p); + U384.modsubAssign(x2, x1, p); - U384.modexpAssignTo(call, z1, u, 2); - U384.modmulAssign(call, z1, u); + y2 = U384.modsub(x1, x2, p); + U384.modmulAssign(call, y2, m1); + U384.modsubAssign(y2, y1, p); } } /** - * @dev Add two elliptic curve points in projective coordinates. See - * https://www.nayuki.io/page/elliptic-curve-point-addition-in-projective-coordinates + * @dev Quads an elliptic curve point in affine coordinates. */ - function _addProj( + function _qaudAffine( uint256 call, uint256 p, uint256 three, uint256 a, - uint256 x0, - uint256 y0, - uint256 z0, uint256 x1, - uint256 y1, - uint256 z1 - ) private view returns (uint256 x2, uint256 y2, uint256 z2) { + uint256 y1 + ) private view returns (uint256 x2, uint256 y2) { unchecked { - if (U384.eqInteger(x0, 0) && U384.eqInteger(y0, 0)) { - return (x1.copy(), y1.copy(), z1.copy()); - } else if (U384.eqInteger(x1, 0) && U384.eqInteger(y1, 0)) { - return (x0.copy(), y0.copy(), z0.copy()); + if (x1 == 0) { + return (0, 0); } - x2 = U384.modmul(call, y0, z1); - y2 = U384.modmul(call, y1, z0); - z2 = U384.modmul(call, x0, z1); - y1 = U384.modmul(call, x1, z0); + if (U384.eqInteger(y1, 0)) { + return (0, 0); + } - if (U384.eq(z2, y1)) { - if (U384.eq(x2, y2)) { - return _twiceProj(call, p, three, a, x0, y0, z0); - } else { - return (U384.init(0), U384.init(0), U384.init(1)); // zero proj - } + uint256 m1 = U384.modexp(call, x1, 2); + U384.modmulAssign(call, m1, three); + U384.modaddAssign(m1, a, p); + + uint256 m2 = U384.modshl1(y1, p); + U384.moddivAssign(call, m1, m2); + + x2 = U384.modexp(call, m1, 2); + U384.modsubAssign(x2, x1, p); + U384.modsubAssign(x2, x1, p); + + y2 = U384.modsub(x1, x2, p); + U384.modmulAssign(call, y2, m1); + U384.modsubAssign(y2, y1, p); + + if (U384.eqInteger(y2, 0)) { + return (0, 0); } - a = U384.modmul(call, z0, z1); + U384.modexpAssignTo(call, m1, x2, 2); + U384.modmulAssign(call, m1, three); + U384.modaddAssign(m1, a, p); + + U384.modshl1AssignTo(m2, y2, p); + U384.moddivAssign(call, m1, m2); + + U384.modexpAssignTo(call, x1, m1, 2); + U384.modsubAssign(x1, x2, p); + U384.modsubAssign(x1, x2, p); + + U384.modsubAssignTo(y1, x2, x1, p); + U384.modmulAssign(call, y1, m1); + U384.modsubAssign(y1, y2, p); - return _addProj2(call, a, z2, p, y1, y2, x2); + return (x1, y1); } } /** - * @dev Helper function that splits addProj to avoid too many local variables. + * @dev Add two elliptic curve points in affine coordinates. */ - function _addProj2( + function _addAffine( uint256 call, - uint256 v, - uint256 u0, uint256 p, - uint256 u1, - uint256 t1, - uint256 t0 - ) private view returns (uint256 x2, uint256 y2, uint256 z2) { + uint256 x1, + uint256 y1, + uint256 x2, + uint256 y2 + ) private view returns (uint256 x3, uint256 y3) { unchecked { - uint256 diff = U384.sub(p, t1); - y2 = U384.modadd(t0, diff, p); - - U384.subAssignTo(diff, p, u1); - x2 = U384.modadd(u0, diff, p); - uint256 u2 = U384.modexp(call, x2, 2); - - z2 = U384.modexp(call, y2, 2); - - U384.modmulAssign(call, z2, v); - u1 = U384.modadd(u1, u0, p); - U384.modmulAssign(call, u1, u2); - U384.subAssignTo(diff, p, u1); - U384.modaddAssign(z2, diff, p); + if (x1 == 0 || x2 == 0) { + if (x1 == 0 && x2 == 0) { + return (0, 0); + } - uint256 u3 = U384.modmul(call, u2, x2); + return x1 == 0 ? (x2.copy(), y2.copy()) : (x1.copy(), y1.copy()); + } - U384.modmulAssign(call, x2, z2); + if (U384.eq(x1, x2)) { + return (0, 0); + } - u0 = U384.modmul(call, u0, u2); + uint256 m1 = U384.modsub(y1, y2, p); + uint256 m2 = U384.modsub(x1, x2, p); - U384.subAssignTo(diff, p, z2); - U384.modaddAssign(u0, diff, p); - U384.modmulAssign(call, y2, u0); - t0 = U384.modmul(call, t0, u3); + U384.moddivAssign(call, m1, m2); - U384.subAssignTo(diff, p, t0); - U384.modaddAssign(y2, diff, p); + x3 = U384.modexp(call, m1, 2); + U384.modsubAssign(x3, x1, p); + U384.modsubAssign(x3, x2, p); - U384.modmulAssignTo(call, z2, u3, v); + y3 = U384.modsub(x1, x3, p); + U384.modmulAssign(call, y3, m1); + U384.modsubAssign(y3, y1, p); } } @@ -415,181 +373,127 @@ library ECDSA384 { uint256 gy, uint256 hx, uint256 hy - ) private view returns (uint256[3][16] memory points_) { + ) private view returns (uint256[2][16] memory points_) { /// 0b0100: 1G + 0H - (points_[0x04][0], points_[0x04][1], points_[0x04][2]) = ( - gx.copy(), - gy.copy(), - U384.init(1) - ); + (points_[0x04][0], points_[0x04][1]) = (gx.copy(), gy.copy()); /// 0b1000: 2G + 0H - (points_[0x08][0], points_[0x08][1], points_[0x08][2]) = _twiceProj( + (points_[0x08][0], points_[0x08][1]) = _twiceAffine( call, p, three, a, points_[0x04][0], - points_[0x04][1], - points_[0x04][2] + points_[0x04][1] ); /// 0b1100: 3G + 0H - (points_[0x0C][0], points_[0x0C][1], points_[0x0C][2]) = _addProj( + (points_[0x0C][0], points_[0x0C][1]) = _addAffine( call, p, - three, - a, points_[0x04][0], points_[0x04][1], - points_[0x04][2], points_[0x08][0], - points_[0x08][1], - points_[0x08][2] + points_[0x08][1] ); /// 0b0001: 0G + 1H - (points_[0x01][0], points_[0x01][1], points_[0x01][2]) = ( - hx.copy(), - hy.copy(), - U384.init(1) - ); + (points_[0x01][0], points_[0x01][1]) = (hx.copy(), hy.copy()); /// 0b0010: 0G + 2H - (points_[0x02][0], points_[0x02][1], points_[0x02][2]) = _twiceProj( + (points_[0x02][0], points_[0x02][1]) = _twiceAffine( call, p, three, a, points_[0x01][0], - points_[0x01][1], - points_[0x01][2] + points_[0x01][1] ); /// 0b0011: 0G + 3H - (points_[0x03][0], points_[0x03][1], points_[0x03][2]) = _addProj( + (points_[0x03][0], points_[0x03][1]) = _addAffine( call, p, - three, - a, points_[0x01][0], points_[0x01][1], - points_[0x01][2], points_[0x02][0], - points_[0x02][1], - points_[0x02][2] + points_[0x02][1] ); /// 0b0101: 1G + 1H - (points_[0x05][0], points_[0x05][1], points_[0x05][2]) = _addProj( + (points_[0x05][0], points_[0x05][1]) = _addAffine( call, p, - three, - a, points_[0x04][0], points_[0x04][1], - points_[0x04][2], points_[0x01][0], - points_[0x01][1], - points_[0x01][2] + points_[0x01][1] ); /// 0b0110: 1G + 2H - (points_[0x06][0], points_[0x06][1], points_[0x06][2]) = _addProj( + (points_[0x06][0], points_[0x06][1]) = _addAffine( call, p, - three, - a, points_[0x04][0], points_[0x04][1], - points_[0x04][2], points_[0x02][0], - points_[0x02][1], - points_[0x02][2] + points_[0x02][1] ); /// 0b0111: 1G + 3H - (points_[0x07][0], points_[0x07][1], points_[0x07][2]) = _addProj( + (points_[0x07][0], points_[0x07][1]) = _addAffine( call, p, - three, - a, points_[0x04][0], points_[0x04][1], - points_[0x04][2], points_[0x03][0], - points_[0x03][1], - points_[0x03][2] + points_[0x03][1] ); /// 0b1001: 2G + 1H - (points_[0x09][0], points_[0x09][1], points_[0x09][2]) = _addProj( + (points_[0x09][0], points_[0x09][1]) = _addAffine( call, p, - three, - a, points_[0x08][0], points_[0x08][1], - points_[0x08][2], points_[0x01][0], - points_[0x01][1], - points_[0x01][2] + points_[0x01][1] ); /// 0b1010: 2G + 2H - (points_[0x0A][0], points_[0x0A][1], points_[0x0A][2]) = _addProj( + (points_[0x0A][0], points_[0x0A][1]) = _addAffine( call, p, - three, - a, points_[0x08][0], points_[0x08][1], - points_[0x08][2], points_[0x02][0], - points_[0x02][1], - points_[0x02][2] + points_[0x02][1] ); /// 0b1011: 2G + 3H - (points_[0x0B][0], points_[0x0B][1], points_[0x0B][2]) = _addProj( + (points_[0x0B][0], points_[0x0B][1]) = _addAffine( call, p, - three, - a, points_[0x08][0], points_[0x08][1], - points_[0x08][2], points_[0x03][0], - points_[0x03][1], - points_[0x03][2] + points_[0x03][1] ); /// 0b1101: 3G + 1H - (points_[0x0D][0], points_[0x0D][1], points_[0x0D][2]) = _addProj( + (points_[0x0D][0], points_[0x0D][1]) = _addAffine( call, p, - three, - a, points_[0x0C][0], points_[0x0C][1], - points_[0x0C][2], points_[0x01][0], - points_[0x01][1], - points_[0x01][2] + points_[0x01][1] ); /// 0b1110: 3G + 2H - (points_[0x0E][0], points_[0x0E][1], points_[0x0E][2]) = _addProj( + (points_[0x0E][0], points_[0x0E][1]) = _addAffine( call, p, - three, - a, points_[0x0C][0], points_[0x0C][1], - points_[0x0C][2], points_[0x02][0], - points_[0x02][1], - points_[0x02][2] + points_[0x02][1] ); /// 0b1111: 3G + 3H - (points_[0x0F][0], points_[0x0F][1], points_[0x0F][2]) = _addProj( + (points_[0x0F][0], points_[0x0F][1]) = _addAffine( call, p, - three, - a, points_[0x0C][0], points_[0x0C][1], - points_[0x0C][2], points_[0x03][0], - points_[0x03][1], - points_[0x03][2] + points_[0x03][1] ); } } @@ -601,12 +505,12 @@ library ECDSA384 { */ library U384 { uint256 private constant SHORT_ALLOCATION = 64; - uint256 private constant LONG_ALLOCATION = 96; - uint256 private constant CALL_ALLOCATION = 3 * 288; + uint256 private constant CALL_ALLOCATION = 4 * 288; uint256 private constant MUL_OFFSET = 288; uint256 private constant EXP_OFFSET = 2 * 288; + uint256 private constant INV_OFFSET = 3 * 288; function init(uint256 from_) internal pure returns (uint256 handler_) { unchecked { @@ -664,6 +568,8 @@ library U384 { unchecked { handler_ = _allocate(CALL_ALLOCATION); + _sub(m_, init(2), handler_ + INV_OFFSET + 0xA0); + assembly { let call_ := add(handler_, MUL_OFFSET) @@ -681,6 +587,14 @@ library U384 { mstore(add(0x40, call_), 0x40) mstore(add(0xC0, call_), mload(m_)) mstore(add(0xE0, call_), mload(add(m_, 0x20))) + + call_ := add(handler_, INV_OFFSET) + + mstore(call_, 0x40) + mstore(add(0x20, call_), 0x40) + mstore(add(0x40, call_), 0x40) + mstore(add(0xE0, call_), mload(m_)) + mstore(add(0x0100, call_), mload(add(m_, 0x20))) } } } @@ -765,18 +679,6 @@ library U384 { } } - function modexpAssign(uint256 call_, uint256 b_, uint256 eInteger_) internal view { - assembly { - call_ := add(call_, EXP_OFFSET) - - mstore(add(0x60, call_), mload(b_)) - mstore(add(0x80, call_), mload(add(b_, 0x20))) - mstore(add(0xA0, call_), eInteger_) - - pop(staticcall(gas(), 0x5, call_, 0x0100, b_, 0x40)) - } - } - function modexpAssignTo( uint256 call_, uint256 to_, @@ -818,16 +720,6 @@ library U384 { } } - function modaddAssignTo(uint256 to_, uint256 a_, uint256 b_, uint256 m_) internal pure { - unchecked { - _add(a_, b_, to_); - - if (cmp(to_, m_) >= 0) { - return _subFrom(to_, m_); - } - } - } - function modmul(uint256 call_, uint256 a_, uint256 b_) internal view returns (uint256 r_) { unchecked { r_ = _allocate(SHORT_ALLOCATION); @@ -856,41 +748,55 @@ library U384 { } } - function modmulAssignTo(uint256 call_, uint256 to_, uint256 a_, uint256 b_) internal view { + function modsub(uint256 a_, uint256 b_, uint256 m_) internal pure returns (uint256 r_) { unchecked { - _mul(a_, b_, call_ + MUL_OFFSET + 0x60); - - assembly { - call_ := add(call_, MUL_OFFSET) + r_ = _allocate(SHORT_ALLOCATION); - pop(staticcall(gas(), 0x5, call_, 0x0120, to_, 0x40)) + if (cmp(a_, b_) >= 0) { + _sub(a_, b_, r_); + return r_; } + + _add(a_, m_, r_); + _subFrom(r_, b_); } } - function sub(uint256 a_, uint256 b_) internal pure returns (uint256 r_) { + function modsubAssign(uint256 a_, uint256 b_, uint256 m_) internal pure { unchecked { - r_ = _allocate(SHORT_ALLOCATION); - - _sub(a_, b_, r_); + if (cmp(a_, b_) >= 0) { + _subFrom(a_, b_); + return; + } - return r_; + _addTo(a_, m_); + _subFrom(a_, b_); } } - function subAssignTo(uint256 to_, uint256 a_, uint256 b_) internal pure { + function modsubAssignTo(uint256 to_, uint256 a_, uint256 b_, uint256 m_) internal pure { unchecked { - _sub(a_, b_, to_); + if (cmp(a_, b_) >= 0) { + _sub(a_, b_, to_); + return; + } + + _add(a_, m_, to_); + _subFrom(to_, b_); } } - function modshl1Assign(uint256 a_, uint256 m_) internal pure { + function modshl1(uint256 a_, uint256 m_) internal pure returns (uint256 r_) { unchecked { - _shl1To(a_); + r_ = _allocate(SHORT_ALLOCATION); - if (cmp(a_, m_) >= 0) { - _subFrom(a_, m_); + _shl1(a_, r_); + + if (cmp(r_, m_) >= 0) { + _subFrom(r_, m_); } + + return r_; } } @@ -904,6 +810,22 @@ library U384 { } } + /// @dev Stores modinv into `b_` and moddiv into `a_`. + function moddivAssign(uint256 call_, uint256 a_, uint256 b_) internal view { + unchecked { + assembly { + call_ := add(call_, INV_OFFSET) + + mstore(add(0x60, call_), mload(b_)) + mstore(add(0x80, call_), mload(add(b_, 0x20))) + + pop(staticcall(gas(), 0x5, call_, 0x0120, b_, 0x40)) + } + + modmulAssign(call_ - INV_OFFSET, a_, b_); + } + } + function moddiv( uint256 call_, uint256 a_, @@ -957,15 +879,6 @@ library U384 { } } - function _shl1To(uint256 a_) internal pure { - assembly { - let a1_ := mload(add(a_, 0x20)) - - mstore(a_, or(shl(1, mload(a_)), shr(255, a1_))) - mstore(add(a_, 0x20), shl(1, a1_)) - } - } - function _add(uint256 a_, uint256 b_, uint256 r_) private pure { assembly { let aWord_ := mload(add(a_, 0x20)) @@ -1022,7 +935,7 @@ library U384 { } } - function _mul(uint256 a_, uint256 b_, uint256 r_) private view { + function _mul(uint256 a_, uint256 b_, uint256 r_) private pure { assembly { let a0_ := mload(a_) let a1_ := shr(128, mload(add(a_, 0x20)))