diff --git a/doc/components/adder.md b/doc/components/adder.md index 0c162052f..4e593b5ea 100644 --- a/doc/components/adder.md +++ b/doc/components/adder.md @@ -4,7 +4,7 @@ ROHD-HCL provides a set of adder modules to get the sum from a pair of Logic. So - [Ripple Carry Adder](#ripple-carry-adder) - [Parallel Prefix Adder](#parallel-prefix-adder) -- [One's Complement Adder Subtractor](#ones-complement-adder-subtractor) +- [Ones' Complement Adder Subtractor](#ones-complement-adder-subtractor) - [Sign Magnitude Adder](#sign-magnitude-adder) - [Compound Adder](#compound-adder) @@ -50,7 +50,7 @@ Here is an example of instantiating a [ParallelPrefixAdder](https://intel.github ## Ones' Complement Adder Subtractor -A ones-complement adder (and subtractor) is useful in efficient arithmetic operations as the +A ones'-complement adder (and subtractor) is useful in efficient arithmetic operations as the end-around carry can be bypassed and used later. The [OnesComplementAdder](https://intel.github.io/rohd-hcl/rohd_hcl/OnesComplementAdder-class.html) can take a subtraction command as either a `Logic` `subtractIn` or a boolean `subtract` (the Logic overrides the boolean). If Logic `carry` is provided, the end-around carry is output on `carry` and the value will be one less than expected when `carry` is high. An `adderGen` adder function can be provided that generates your favorite internal adder (such as a parallel prefix adder). @@ -76,7 +76,7 @@ Here is an example of instantiating a [OnesComplementAdder](https://intel.githu ## Sign Magnitude Adder -A sign magnitude adder is useful in situations where the sign of the addends is separated from their magnitude (e.g., not 2s complement), such as in floating point multipliers. The [SignMagnitudeAdder](https://intel.github.io/rohd-hcl/rohd_hcl/SignMagnitudeAdder-class.html) inherits from `Adder` but adds the `Logic` inputs for the two operands. +A sign magnitude adder is useful in situations where the sign of the addends is separated from their magnitude (e.g., not twos' complement), such as in floating point multipliers. The [SignMagnitudeAdder](https://intel.github.io/rohd-hcl/rohd_hcl/SignMagnitudeAdder-class.html) inherits from `Adder` but adds the `Logic` inputs for the two operands. If you can supply the largest magnitude number first, then you can disable a comparator generation inside by declaring the `largestMagnitudeFirst` option as true. @@ -137,3 +137,23 @@ final sum1 = rippleCarryAdder.sum1; final rippleCarryAdder4BitBlock = CarrySelectCompoundAdder(a, b, widthGen: CarrySelectCompoundAdder.splitSelectAdderAlgorithm4Bit); ``` + +## Native Adder + +As logic synthesis can replace a '+' in RTL with a wide variety of adder architectures on its own, we have a [NativeAdder] wrapper class that allows you to use the native '+' with any component that exposes an [Adder] functor as a parameter: + +```dart +// API definition: FloatingPointAdderRound(super.a, super.b, +// {Logic? subtract, +// super.clk, +// super.reset, +// super.enable, +// Adder Function(Logic, Logic, {Logic? carryIn}) adderGen = +// ParallelPrefixAdder.new, +// ParallelPrefix Function(List, Logic Function(Logic, Logic)) +// ppTree = KoggeStone.new, +// super.name = 'floating_point_adder_round'}) + +// Instantiate with a NativeAdder as the internal adder +final adder = FloatingPointAdderRound(a, b, adderGen: NativeAdder.new); +``` diff --git a/lib/src/arithmetic/adder.dart b/lib/src/arithmetic/adder.dart index 01d94c08a..2c4143a37 100644 --- a/lib/src/arithmetic/adder.dart +++ b/lib/src/arithmetic/adder.dart @@ -67,3 +67,19 @@ class FullAdder extends Adder { sum <= [carryIn! & (a ^ b) | a & b, (a ^ b) ^ carryIn!].swizzle(); } } + +/// A class which wraps the native '+' operator so that it can be passed +/// into other modules as a parameter for using the native operation. +class NativeAdder extends Adder { + /// The width of input [a] and [b] must be the same. + NativeAdder(super.a, super.b, {super.carryIn, super.name = 'native_adder'}) { + if (a.width != b.width) { + throw RohdHclException('inputs of a and b should have same width.'); + } + if (carryIn != null) { + sum <= a.zeroExtend(a.width + 1) + b.zeroExtend(b.width + 1); + } else { + sum <= a.zeroExtend(a.width + 1) + b.zeroExtend(b.width + 1) + carryIn; + } + } +} diff --git a/lib/src/arithmetic/float_to_fixed.dart b/lib/src/arithmetic/float_to_fixed.dart index 4f3721ac0..a683f93fd 100644 --- a/lib/src/arithmetic/float_to_fixed.dart +++ b/lib/src/arithmetic/float_to_fixed.dart @@ -42,7 +42,7 @@ class FloatToFixed extends Module { n = bias + float.mantissa.width - 1; final outputWidth = m + n + 1; - final jBit = Logic(name: 'jBit')..gets(float.isNormal()); + final jBit = Logic(name: 'jBit')..gets(float.isNormal); final shift = Logic(name: 'shift', width: float.exponent.width) ..gets( mux(jBit, float.exponent - 1, Const(0, width: float.exponent.width))); diff --git a/lib/src/arithmetic/floating_point/floating_point.dart b/lib/src/arithmetic/floating_point/floating_point.dart index 3b850d9b6..ac6f36b0b 100644 --- a/lib/src/arithmetic/floating_point/floating_point.dart +++ b/lib/src/arithmetic/floating_point/floating_point.dart @@ -1,5 +1,8 @@ // Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: BSD-3-Clause +export 'floating_point_adder.dart'; export 'floating_point_adder_round.dart'; export 'floating_point_adder_simple.dart'; +export 'floating_point_multiplier.dart'; +export 'floating_point_multiplier_simple.dart'; diff --git a/lib/src/arithmetic/floating_point/floating_point_adder.dart b/lib/src/arithmetic/floating_point/floating_point_adder.dart new file mode 100644 index 000000000..89e9967e2 --- /dev/null +++ b/lib/src/arithmetic/floating_point/floating_point_adder.dart @@ -0,0 +1,73 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause +// +// floating_point_adder.dart +// An abstract base class defining the API for floating-point adders. +// +// 2025 January 3 +// Author: Desmond A Kirkpatrick - (clk == null) - ? d - : flop( - clk, - d, - en: en, - reset: reset, - resetValue: resetValue, - ); - /// An adder module for variable FloatingPoint type with rounding. // This is a Seidel/Even adder, dual-path implementation. -class FloatingPointAdderRound extends Module { - /// Must be greater than 0. - final int exponentWidth; - - /// Must be greater than 0. - final int mantissaWidth; - - /// The [clk]: if a valid clock signal is passed in, a pipestage is added to - /// the adder to help optimize frequency. - Logic? clk; - - /// Optional [reset], used only if a [clk] is not null to reset the pipeline - /// flops. - Logic? reset; - - /// Optional [enable], used only if a [clk] is not null to enable the pipeline - /// flops. - Logic? enable; - - /// Output [FloatingPoint] representing the sum of two input [FloatingPoint]s - late final FloatingPoint sum = - FloatingPoint(exponentWidth: exponentWidth, mantissaWidth: mantissaWidth) - ..gets(output('sum')); - - /// The result of [FloatingPoint] addition - @protected - late final FloatingPoint _sum = - FloatingPoint(exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); - +class FloatingPointAdderRound extends FloatingPointAdder { /// Swapping two FloatingPoint structures based on a conditional static (FloatingPoint, FloatingPoint) _swap( Logic swap, (FloatingPoint, FloatingPoint) toSwap) => @@ -87,34 +28,34 @@ class FloatingPointAdderRound extends Module { /// functions. /// [ppTree] is an ParallelPrefix generator for use in increment /decrement /// functions. - FloatingPointAdderRound(FloatingPoint a, FloatingPoint b, + FloatingPointAdderRound(super.a, super.b, {Logic? subtract, - this.clk, - this.reset, - this.enable, + super.clk, + super.reset, + super.enable, Adder Function(Logic, Logic, {Logic? carryIn}) adderGen = ParallelPrefixAdder.new, ParallelPrefix Function(List, Logic Function(Logic, Logic)) ppTree = KoggeStone.new, super.name = 'floating_point_adder_round'}) - : exponentWidth = a.exponent.width, - mantissaWidth = a.mantissa.width { - if (b.exponent.width != exponentWidth || - b.mantissa.width != mantissaWidth) { - throw RohdHclException('FloatingPoint widths must match'); - } - if (clk != null) { - clk = addInput('clk', clk!); - } - if (reset != null) { - reset = addInput('reset', reset!); - } - if (enable != null) { - enable = addInput('enable', enable!); - } - a = a.clone()..gets(addInput('a', a, width: a.width)); - b = b.clone()..gets(addInput('b', b, width: b.width)); - addOutput('sum', width: _sum.width) <= _sum; + : super() { + final outSum = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + addOutput('sum', width: outSum.width) <= outSum; + + // Ensure that the larger number is wired as 'a' + final doSwap = ia.exponent.lt(ib.exponent) | + (ia.exponent.eq(ib.exponent) & ia.mantissa.lt(ib.mantissa)) | + ((ia.exponent.eq(ib.exponent) & ia.mantissa.eq(ib.mantissa)) & ia.sign); + final FloatingPoint a; + final FloatingPoint b; + (a, b) = _swap(doSwap, (ia, ib)); + + // Seidel: S.EFF = effectiveSubtraction + final effectiveSubtraction = a.sign ^ b.sign ^ (subtract ?? Const(0)); + final isNaN = + a.isNaN | b.isNaN | a.isInfinity & b.isInfinity & effectiveSubtraction; + final isInf = a.isInfinity | b.isInfinity; final exponentSubtractor = OnesComplementAdder(a.exponent, b.exponent, subtract: true, adderGen: adderGen, name: 'exponent_sub'); @@ -126,17 +67,14 @@ class FloatingPointAdderRound extends Module { final (larger, smaller) = _swap(signDelta, (a, b)); final fl = mux( - larger.isNormal(), - [larger.isNormal(), larger.mantissa].swizzle(), + larger.isNormal, + [larger.isNormal, larger.mantissa].swizzle(), [larger.mantissa, Const(0)].swizzle()); final fs = mux( - smaller.isNormal(), - [smaller.isNormal(), smaller.mantissa].swizzle(), + smaller.isNormal, + [smaller.isNormal, smaller.mantissa].swizzle(), [smaller.mantissa, Const(0)].swizzle()); - // Seidel: S.EFF = effectiveSubtraction - final effectiveSubtraction = a.sign ^ b.sign ^ (subtract ?? Const(0)); - // Seidel: flp larger preshift, normally in [2,4) final sigWidth = fl.width + 1; final largeShift = mux(effectiveSubtraction, fl.zeroExtend(sigWidth) << 1, @@ -163,9 +101,9 @@ class FloatingPointAdderRound extends Module { /// R Pipestage here: final aIsNormalLatched = - condFlop(clk, a.isNormal(), en: enable, reset: reset); + condFlop(clk, a.isNormal, en: enable, reset: reset); final bIsNormalLatched = - condFlop(clk, b.isNormal(), en: enable, reset: reset); + condFlop(clk, b.isNormal, en: enable, reset: reset); final effectiveSubtractionLatched = condFlop(clk, effectiveSubtraction, en: enable, reset: reset); final largeOperandLatched = @@ -177,6 +115,8 @@ class FloatingPointAdderRound extends Module { final largerExpLatched = condFlop(clk, larger.exponent, en: enable, reset: reset); final deltaLatched = condFlop(clk, delta, en: enable, reset: reset); + final isInfLatched = condFlop(clk, isInf, en: enable, reset: reset); + final isNaNLatched = condFlop(clk, isNaN, en: enable, reset: reset); final carryRPath = Logic(); final significandAdderRPath = OnesComplementAdder( @@ -278,13 +218,21 @@ class FloatingPointAdderRound extends Module { final significandNPath = significandSubtractorNPath.sum.slice(smallOperandNPath.width - 1, 0); - final leadOneNPath = mux( - significandNPath.or(), - ParallelPrefixPriorityEncoder(significandNPath.reversed, - ppGen: ppTree, name: 'npath_leadingOne') - .out - .zeroExtend(exponentWidth), - Const(15, width: exponentWidth)); + final validLeadOneNPath = Logic(); + final leadOneNPathPre = ParallelPrefixPriorityEncoder( + significandNPath.reversed, + ppGen: ppTree, + valid: validLeadOneNPath, + name: 'npath_leadingOne') + .out; + // Limit leadOne to exponent range and match widths + final leadOneNPath = (leadOneNPathPre.width > exponentWidth) + ? mux( + leadOneNPathPre + .gte(a.inf().exponent.zeroExtend(leadOneNPathPre.width)), + a.inf().exponent, + leadOneNPathPre.getRange(0, exponentWidth)) + : leadOneNPathPre.zeroExtend(exponentWidth); // N pipestage here: final significandNPathLatched = @@ -294,6 +242,8 @@ class FloatingPointAdderRound extends Module { en: enable, reset: reset); final leadOneNPathLatched = condFlop(clk, leadOneNPath, en: enable, reset: reset); + final validLeadOneNPathLatched = + condFlop(clk, validLeadOneNPath, en: enable, reset: reset); final largerSignLatched = condFlop(clk, larger.sign, en: enable, reset: reset); final smallerSignLatched = @@ -307,7 +257,8 @@ class FloatingPointAdderRound extends Module { final preExpNPath = expCalcNPath.sum.slice(exponentWidth - 1, 0); - final posExpNPath = preExpNPath.or() & ~expCalcNPath.sign; + final posExpNPath = + preExpNPath.or() & ~expCalcNPath.sign & validLeadOneNPathLatched; final exponentNPath = mux(posExpNPath, preExpNPath, zeroExp); @@ -330,14 +281,29 @@ class FloatingPointAdderRound extends Module { final isR = deltaLatched.gte(Const(2, width: delta.width)) | ~effectiveSubtractionLatched; - _sum <= - mux( - isR, - [ - largerSignLatched, - exponentRPath, - mantissaRPath.slice(mantissaRPath.width - 2, 1) - ].swizzle(), - [signNPath, exponentNPath, finalSignificandNPath].swizzle()); + + Combinational([ + If(isNaNLatched, then: [ + outSum < outSum.nan, + ], orElse: [ + If(isInfLatched, then: [ + // ROHD 0.6.0 trace error if we use the following + // outSum < outSum.inf(inSign: largerSignLatched), + outSum.sign < largerSignLatched, + outSum.exponent < outSum.nan.exponent, + outSum.mantissa < Const(0, width: mantissaWidth, fill: true), + ], orElse: [ + If(isR, then: [ + outSum.sign < largerSignLatched, + outSum.exponent < exponentRPath, + outSum.mantissa < mantissaRPath.slice(mantissaRPath.width - 2, 1), + ], orElse: [ + outSum.sign < signNPath, + outSum.exponent < exponentNPath, + outSum.mantissa < finalSignificandNPath, + ]) + ]) + ]) + ]); } } diff --git a/lib/src/arithmetic/floating_point/floating_point_adder_simple.dart b/lib/src/arithmetic/floating_point/floating_point_adder_simple.dart index 4051e219c..4646be3c5 100644 --- a/lib/src/arithmetic/floating_point/floating_point_adder_simple.dart +++ b/lib/src/arithmetic/floating_point/floating_point_adder_simple.dart @@ -7,28 +7,12 @@ // 2024 August 30 // Author: Desmond A Kirkpatrick @@ -37,68 +21,111 @@ class FloatingPointAdderSimple extends Module { toSwap.$2.clone()..gets(mux(swap, toSwap.$1, toSwap.$2)) ); - /// Add two floating point numbers [a] and [b], returning result in [sum] - FloatingPointAdderSimple(FloatingPoint a, FloatingPoint b, - {ParallelPrefix Function(List, Logic Function(Logic, Logic)) - ppGen = KoggeStone.new, - super.name = 'floatingpoint_adder_simple'}) - : exponentWidth = a.exponent.width, - mantissaWidth = a.mantissa.width { - if (b.exponent.width != exponentWidth || - b.mantissa.width != mantissaWidth) { - throw RohdHclException('FloatingPoint widths must match'); - } - a = a.clone()..gets(addInput('a', a, width: a.width)); - b = b.clone()..gets(addInput('b', b, width: b.width)); - addOutput('sum', width: _sum.width) <= _sum; + /// Add two floating point numbers [a] and [b], returning result in [sum]. + /// - [ppGen] is an adder generator to be used in the primary adder + /// functions. + FloatingPointAdderSimple(super.a, super.b, + {super.clk, + super.reset, + super.enable, + ParallelPrefix Function(List, Logic Function(Logic, Logic)) ppGen = + KoggeStone.new, + super.name = 'floatingpoint_adder_simple2'}) + : super() { + final outputSum = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + addOutput('sum', width: outputSum.width) <= outputSum; // Ensure that the larger number is wired as 'a' - final doSwap = a.exponent.lt(b.exponent) | - (a.exponent.eq(b.exponent) & a.mantissa.lt(b.mantissa)) | - ((a.exponent.eq(b.exponent) & a.mantissa.eq(b.mantissa)) & b.sign); - - (a, b) = _swap(doSwap, (a, b)); + final doSwap = ia.exponent.lt(ib.exponent) | + (ia.exponent.eq(ib.exponent) & ia.mantissa.lt(ib.mantissa)) | + ((ia.exponent.eq(ib.exponent) & ia.mantissa.eq(ib.mantissa)) & ib.sign); + final FloatingPoint a; + final FloatingPoint b; + (a, b) = _swap(doSwap, (ia, ib)); - final aExp = - a.exponent + mux(a.isNormal(), a.zeroExponent(), a.oneExponent()); - final bExp = - b.exponent + mux(b.isNormal(), b.zeroExponent(), b.oneExponent()); + final isInf = a.isInfinity | b.isInfinity; + final isNaN = + a.isNaN | b.isNaN | (a.isInfinity & b.isInfinity & (a.sign ^ b.sign)); // Align and add mantissas - final expDiff = aExp - bExp; + final expDiff = a.exponent - b.exponent; + final aMantissa = mux( + a.isNormal, + [Const(1), a.mantissa, Const(0, width: mantissaWidth + 1)].swizzle(), + [a.mantissa, Const(0, width: mantissaWidth + 2)].swizzle()); + final bMantissa = mux( + b.isNormal, + [Const(1), b.mantissa, Const(0, width: mantissaWidth + 1)].swizzle(), + [b.mantissa, Const(0, width: mantissaWidth + 2)].swizzle()); + final adder = SignMagnitudeAdder( a.sign, - [a.isNormal(), a.mantissa].swizzle(), + aMantissa, b.sign, - [b.isNormal(), b.mantissa].swizzle() >>> expDiff, + bMantissa >>> expDiff, (a, b, {carryIn}) => ParallelPrefixAdder(a, b, carryIn: carryIn, ppGen: ppGen)); - final sum = adder.sum.slice(adder.sum.width - 2, 0); - final leadOneE = - ParallelPrefixPriorityEncoder(sum.reversed, ppGen: ppGen).out; - final leadOne = leadOneE.zeroExtend(exponentWidth); + final intSum = adder.sum.slice(adder.sum.width - 1, 0); + + final aSignLatched = condFlop(clk, a.sign, en: enable, reset: reset); + final aExpLatched = condFlop(clk, a.exponent, en: enable, reset: reset); + final sumLatched = condFlop(clk, intSum, en: enable, reset: reset); + final isInfLatched = condFlop(clk, isInf, en: enable, reset: reset); + final isNaNLatched = condFlop(clk, isNaN, en: enable, reset: reset); + + final mantissa = + sumLatched.reversed.getRange(0, min(intSum.width, intSum.width)); + final leadOneValid = Logic(); + final leadOnePre = ParallelPrefixPriorityEncoder(mantissa, + ppGen: ppGen, valid: leadOneValid) + .out; + // Limit leadOne to exponent range and match widths + // ROHD 0.6.0 trace error if we use this as well + // final infExponent = outputSum.inf(inSign: aSignLatched).exponent; + // We use hardcoding isntead + final infExponent = Const(1, width: exponentWidth, fill: true); + final leadOne = (leadOnePre.width > exponentWidth) + ? mux(leadOnePre.gte(infExponent.zeroExtend(leadOnePre.width)), + infExponent, leadOnePre.getRange(0, exponentWidth)) + : leadOnePre.zeroExtend(exponentWidth); + + final leadOneDominates = leadOne.gt(aExpLatched) | ~leadOneValid; + final outExp = + mux(leadOneDominates, a.zeroExponent, aExpLatched - leadOne + 1); + + // ROHD 0.6.0 trace error if we use either of the following: + // (I think trace is not able to figure out this dependency) + // final realIsInf = isInfLatched | outExp.eq(a.inf().exponent); + // final realIsInf = isInfLatched | outExp.eq(outputSum.inf().exponent); + final realIsInf = isInfLatched | outExp.eq(infExponent); - // Assemble the output FloatingPoint - _sum.sign <= adder.sign; Combinational([ If.block([ - Iff(adder.sum[-1] & a.sign.eq(b.sign), [ - _sum.mantissa < (sum >> 1).slice(mantissaWidth - 1, 0), - _sum.exponent < a.exponent + 1 + Iff(isNaNLatched, [ + outputSum < outputSum.nan, ]), - ElseIf(a.exponent.gt(leadOne) & sum.or(), [ - _sum.mantissa < (sum << leadOne).slice(mantissaWidth - 1, 0), - _sum.exponent < a.exponent - leadOne + ElseIf(realIsInf, [ + // ROHD 0.6.0 trace error if we use the following + // outputSum < outputSum.inf(inSign: aSignLatched), + outputSum.sign < aSignLatched, + outputSum.exponent < infExponent, + outputSum.mantissa < Const(0, width: mantissaWidth, fill: true), ]), - ElseIf(leadOne.eq(0) & sum.or(), [ - _sum.mantissa < (sum << leadOne).slice(mantissaWidth - 1, 0), - _sum.exponent < a.exponent - leadOne + 1 + ElseIf(leadOneDominates, [ + outputSum.sign < aSignLatched, + outputSum.exponent < a.zeroExponent, + outputSum.mantissa < + (sumLatched << aExpLatched + 1) + .getRange(intSum.width - mantissaWidth, intSum.width), ]), Else([ - // subnormal result - _sum.mantissa < sum.slice(mantissaWidth - 1, 0), - _sum.exponent < _sum.zeroExponent() + outputSum.sign < aSignLatched, + outputSum.exponent < aExpLatched - leadOne + 1, + outputSum.mantissa < + (sumLatched << leadOne + 1) + .getRange(intSum.width - mantissaWidth, intSum.width), ]) ]) ]); diff --git a/lib/src/arithmetic/floating_point/floating_point_multiplier.dart b/lib/src/arithmetic/floating_point/floating_point_multiplier.dart new file mode 100644 index 000000000..1b3866b6e --- /dev/null +++ b/lib/src/arithmetic/floating_point/floating_point_multiplier.dart @@ -0,0 +1,79 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause +// +// floating_point_multiplier_simple.dart +// Implementation of non-rounding floating-point multiplier +// +// 2025 January 3 +// Author: Desmond A Kirkpatrick , Logic Function(Logic, Logic)) ppGen = + KoggeStone.new, + super.name = 'floating_point_multiplier'}) + : exponentWidth = a.exponent.width, + mantissaWidth = a.mantissa.width { + if (b.exponent.width != exponentWidth || + b.mantissa.width != mantissaWidth) { + throw RohdHclException('FloatingPoint widths must match'); + } + if (clk != null) { + clk = addInput('clk', clk!); + } + if (reset != null) { + reset = addInput('reset', reset!); + } + if (enable != null) { + enable = addInput('enable', enable!); + } + ia = a.clone()..gets(addInput('a', a, width: a.width)); + ib = b.clone()..gets(addInput('b', b, width: b.width)); + // output 'product' must be constructed in the sub-class + } +} diff --git a/lib/src/arithmetic/floating_point/floating_point_multiplier_simple.dart b/lib/src/arithmetic/floating_point/floating_point_multiplier_simple.dart new file mode 100644 index 000000000..931f24d78 --- /dev/null +++ b/lib/src/arithmetic/floating_point/floating_point_multiplier_simple.dart @@ -0,0 +1,104 @@ +// Copyright (C) 2024-2025 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause +// +// floating_point_multiplier_simple.dart +// Implementation of non-rounding floating-point multiplier +// +// 2024 December 30 +// Author: Desmond A Kirkpatrick , Logic Function(Logic, Logic)) + ppTree = KoggeStone.new, + super.name}) { + final product = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + addOutput('product', width: product.width) <= product; + final a = ia; + final b = ib; + + final aMantissa = mux(a.isNormal, [a.isNormal, a.mantissa].swizzle(), + [a.mantissa, Const(0)].swizzle()); + final bMantissa = mux(b.isNormal, [b.isNormal, b.mantissa].swizzle(), + [b.mantissa, Const(0)].swizzle()); + + final productExp = a.exponent.zeroExtend(exponentWidth + 2) + + b.exponent.zeroExtend(exponentWidth + 2) - + a.bias.zeroExtend(exponentWidth + 2); + + final pp = PartialProductGeneratorCompactRectSignExtension( + aMantissa, bMantissa, RadixEncoder(radix)); + final compressor = + ColumnCompressor(pp, clk: clk, reset: reset, enable: enable) + ..compress(); + final adder = ParallelPrefixAdder( + compressor.extractRow(0), compressor.extractRow(1), + ppGen: ppTree); + // Input mantissas have implicit lead: product mantissa width is (mw+1)*2) + final mantissa = adder.sum.getRange(0, (mantissaWidth + 1) * 2); + + final isInf = a.isInfinity | b.isInfinity; + final isNaN = a.isNaN | + b.isNaN | + ((a.isInfinity | b.isInfinity) & (a.isZero | b.isZero)); + + final productExpLatch = condFlop(clk, productExp, en: enable, reset: reset); + final aSignLatch = condFlop(clk, a.sign, en: enable, reset: reset); + final bSignLatch = condFlop(clk, b.sign, en: enable, reset: reset); + final isInfLatch = condFlop(clk, isInf, en: enable, reset: reset); + final isNaNLatch = condFlop(clk, isNaN, en: enable, reset: reset); + + final leadingOnePos = ParallelPrefixPriorityEncoder(mantissa.reversed, + ppGen: ppTree, name: 'leading_one_encoder') + .out + .zeroExtend(exponentWidth + 2); + + final shifter = SignedShifter( + mantissa, + mux(productExpLatch[-1] | productExpLatch.lt(leadingOnePos), + productExpLatch, leadingOnePos), + name: 'mantissa_shifter'); + + final remainingExp = productExpLatch - leadingOnePos + 1; + + final overFlow = isInfLatch | + (~remainingExp[-1] & + remainingExp.abs().gte(Const(1, width: exponentWidth, fill: true) + .zeroExtend(exponentWidth + 2))); + + Combinational([ + If(isNaNLatch, then: [ + product < product.nan, + ], orElse: [ + If(overFlow, then: [ + // product < product.inf(inSign: aSignLatch ^ bSignLatch), + product.sign < aSignLatch ^ bSignLatch, + product.exponent < product.nan.exponent, + product.mantissa < Const(0, width: mantissaWidth, fill: true), + ], orElse: [ + product.sign < aSignLatch ^ bSignLatch, + If(remainingExp[-1], then: [ + product.exponent < Const(0, width: exponentWidth) + ], orElse: [ + product.exponent < remainingExp.getRange(0, exponentWidth), + ]), + // Remove the leading one for implicit representation + product.mantissa < + shifter.shifted.getRange(-mantissaWidth - 1, mantissa.width - 1) + ]) + ]) + ]); + } +} diff --git a/lib/src/arithmetic/ones_complement_adder.dart b/lib/src/arithmetic/ones_complement_adder.dart index deec89ff6..71ee888d0 100644 --- a/lib/src/arithmetic/ones_complement_adder.dart +++ b/lib/src/arithmetic/ones_complement_adder.dart @@ -25,16 +25,18 @@ class OnesComplementAdder extends Adder { @protected Logic _sign = Logic(); - /// [OnesComplementAdder] constructor with an adder functor [adderGen] - /// Either a Logic [subtractIn] or a boolean [subtract] can enable - /// subtraction, with [subtractIn] overriding [subtract]. If Logic [carryOut] - /// is provided as not null, then the end-around carry is not performed and is - /// left to the caller via the output [carryOut]. + /// [OnesComplementAdder] constructor with an adder functor [adderGen]. + /// - Either a Logic [subtractIn] or a boolean [subtract] can enable + /// subtraction, with [subtractIn] overriding [subtract]. + /// - If Logic [carryOut] is provided as not null, then the end-around carry + /// is not performed and is provided as value on [carryOut]. + /// - [carryIn] allows for another adder to chain into this one. OnesComplementAdder(super.a, super.b, {Adder Function(Logic, Logic, {Logic? carryIn}) adderGen = ParallelPrefixAdder.new, Logic? subtractIn, Logic? carryOut, + Logic? carryIn, bool subtract = false, super.name = 'ones_complement_adder'}) { if (subtractIn != null) { @@ -55,12 +57,13 @@ class OnesComplementAdder extends Adder { final ax = a.zeroExtend(a.width); final bx = b.zeroExtend(b.width); - final adder = adderGen(ax, mux(doSubtract, ~bx, bx)); + final adder = + adderGen(ax, mux(doSubtract, ~bx, bx), carryIn: carryIn ?? Const(0)); if (this.carryOut != null) { this.carryOut! <= adder.sum[-1]; } - final endAround = mux(doSubtract, adder.sum[-1], Const(0)); + final endAround = adder.sum[-1]; final magnitude = adder.sum.slice(a.width - 1, 0); sum <= diff --git a/lib/src/arithmetic/parallel_prefix_operations.dart b/lib/src/arithmetic/parallel_prefix_operations.dart index bbbcdf0ee..2906285ea 100644 --- a/lib/src/arithmetic/parallel_prefix_operations.dart +++ b/lib/src/arithmetic/parallel_prefix_operations.dart @@ -190,19 +190,41 @@ class ParallelPrefixPriorityFinder extends Module { /// Priority Encoder based on ParallelPrefix tree class ParallelPrefixPriorityEncoder extends Module { - /// Output [out] is the bit position of the first '1' in the Logic input - /// Search is counted from the LSB + /// Output [out] is the bit position of the first '1' in the Logic input. + /// Search starts from the LSB. Logic get out => output('out'); + /// Optional output that says the encoded position is valid. + Logic? get valid => tryOutput('valid'); + /// PriorityEncoder constructor + /// - [ppGen] is the type of [ParallelPrefix] tree to use + /// - [valid] is an optional Logic output to raise if no '1' is found + /// + /// If there is a '1' in the [inp], the [ParallelPrefixPriorityEncoder] + /// sets [out] to the index of the position of the first '1' starting from + /// the LSb (and optionally sets [valid] to true). + /// + /// If there is no 1' in the [inp], it sets [out] to [inp].width + 1, + /// as well as setting optional [valid] to false. ParallelPrefixPriorityEncoder(Logic inp, {ParallelPrefix Function(List, Logic Function(Logic, Logic)) ppGen = KoggeStone.new, + Logic? valid, super.name = 'parallel_prefix_encoder'}) { inp = addInput('inp', inp, width: inp.width); - addOutput('out', width: log2Ceil(inp.width)); + final sz = log2Ceil(inp.width + 1); + addOutput('out', width: sz); + if (valid != null) { + addOutput('valid'); + valid <= this.valid!; + } final u = ParallelPrefixPriorityFinder(inp, ppGen: ppGen); - out <= OneHotToBinary(u.out).binary; + final pos = OneHotToBinary(u.out).binary.zeroExtend(sz); + if (this.valid != null) { + this.valid! <= pos.or() | inp[0]; + } + out <= mux(pos.or() | inp[0], pos, Const(inp.width + 1, width: sz)); } } diff --git a/lib/src/arithmetic/signals/floating_point_logic.dart b/lib/src/arithmetic/signals/floating_point_logic.dart index 1d9d4f6cc..bf32d906f 100644 --- a/lib/src/arithmetic/signals/floating_point_logic.dart +++ b/lib/src/arithmetic/signals/floating_point_logic.dart @@ -47,13 +47,49 @@ class FloatingPoint extends LogicStructure { /// Return a Logic true if this FloatingPoint contains a normal number, /// defined as having mantissa in the range [1,2) - Logic isNormal() => exponent.neq(LogicValue.zero.zeroExtend(exponent.width)); + Logic get isNormal => + exponent.neq(LogicValue.zero.zeroExtend(exponent.width)); + + /// Return a Logic true if this FloatingPoint is Not a Number (NaN) + /// by having its exponent field set to the NaN value (typically all + /// ones) and a non-zero mantissa. + Logic get isNaN => + exponent.eq(floatingPointValue.nan.exponent) & mantissa.or().eq(Const(1)); + + /// Return a Logic true if this FloatingPoint is an infinity + /// by having its exponent field set to the NaN value (typically all + /// ones) and a zero mantissa. + Logic get isInfinity => + exponent.eq(floatingPointValue.infinity.exponent) & + mantissa.or().eq(Const(0)); + + /// Return a Logic true if this FloatingPoint is an zero + /// by having its exponent field set to the NaN value (typically all + /// ones) and a zero mantissa. + Logic get isZero => + exponent.eq(floatingPointValue.zero.exponent) & + mantissa.or().eq(Const(0)); /// Return the zero exponent representation for this type of FloatingPoint - Logic zeroExponent() => Const(LogicValue.zero).zeroExtend(exponent.width); + Logic get zeroExponent => Const(LogicValue.zero).zeroExtend(exponent.width); /// Return the one exponent representation for this type of FloatingPoint - Logic oneExponent() => Const(LogicValue.one).zeroExtend(exponent.width); + Logic get oneExponent => Const(LogicValue.one).zeroExtend(exponent.width); + + /// Return the exponent Logic value representing the true zero exponent + /// 2^0 = 1 often termed [bias] or the offset of the stored exponent. + Logic get bias => Const((1 << exponent.width - 1) - 1, width: exponent.width); + + /// Construct a FloatingPoint that represents infinity for this FP type. + FloatingPoint inf({Logic? inSign, bool sign = false}) => FloatingPoint.inf( + exponentWidth: exponent.width, + mantissaWidth: mantissa.width, + inSign: inSign, + sign: sign); + + /// Construct a FloatingPoint that represents NaN for this FP type. + FloatingPoint get nan => FloatingPoint.nan( + exponentWidth: exponent.width, mantissaWidth: mantissa.width); @override void put(dynamic val, {bool fill = false}) { @@ -63,6 +99,28 @@ class FloatingPoint extends LogicStructure { super.put(val, fill: fill); } } + + /// Construct a FloatingPoint that represents infinity. + factory FloatingPoint.inf( + {required int exponentWidth, + required int mantissaWidth, + Logic? inSign, + bool sign = false}) { + final signLogic = mux(inSign ?? Const(sign), Const(1), Const(0)); + // final signLogic = inSign ?? Const(sign); + final exponent = Const(1, width: exponentWidth, fill: true); + final mantissa = Const(0, width: mantissaWidth, fill: true); + return FloatingPoint._(signLogic, exponent, mantissa); + } + + /// Construct a FloatingPoint that represents NaN. + factory FloatingPoint.nan( + {required int exponentWidth, required int mantissaWidth}) { + final signLogic = Const(0); + final exponent = Const(1, width: exponentWidth, fill: true); + final mantissa = Const(1, width: mantissaWidth); + return FloatingPoint._(signLogic, exponent, mantissa); + } } /// Single floating point representation diff --git a/lib/src/arithmetic/values/floating_point_values/floating_point_8_value.dart b/lib/src/arithmetic/values/floating_point_values/floating_point_8_value.dart index 2d61f6d59..9b5d7b611 100644 --- a/lib/src/arithmetic/values/floating_point_values/floating_point_8_value.dart +++ b/lib/src/arithmetic/values/floating_point_values/floating_point_8_value.dart @@ -32,10 +32,15 @@ class FloatingPoint8E4M3Value extends FloatingPointValue { int get constrainedMantissaWidth => mantissaWidth; /// The maximum value representable by the E4M3 format - static double get maxValue => 448.toDouble(); + static double get maxValue => + FloatingPoint8E4M3Value.getFloatingPointConstant( + FloatingPointConstants.largestNormal) + .toDouble(); /// The minimum value representable by the E4M3 format - static double get minValue => pow(2, -9).toDouble(); + static double get minValue => FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.smallestPositiveSubnormal, 4, 3) + .toDouble(); /// Constructor for a double precision floating point value FloatingPoint8E4M3Value( @@ -70,6 +75,22 @@ class FloatingPoint8E4M3Value extends FloatingPointValue { : super.ofInts( exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + /// Inf is not representable in this format + @override + bool isAnInfinity() => false; + + @override + bool isNaN() => (exponent.toInt() == 15) && (mantissa.toInt() == 7); + + /// Override the toDouble to avoid NaN + @override + double toDouble() { + if (exponent.toInt() == 15) { + return 448; + } + return super.toDouble(); + } + /// Numeric conversion of a [FloatingPoint8E4M3Value] from a host double factory FloatingPoint8E4M3Value.ofDouble(double inDouble) { if ((inDouble.abs() > maxValue) | @@ -86,6 +107,28 @@ class FloatingPoint8E4M3Value extends FloatingPointValue { factory FloatingPoint8E4M3Value.ofLogicValue(LogicValue val) => FloatingPointValue.buildOfLogicValue( FloatingPoint8E4M3Value.new, exponentWidth, mantissaWidth, val); + + /// Return the [FloatingPointValue] representing the constant specified. + /// Special case for 8E4M3 type. + factory FloatingPoint8E4M3Value.getFloatingPointConstant( + FloatingPointConstants constantFloatingPoint) { + switch (constantFloatingPoint) { + /// Largest positive number, most positive exponent, full mantissa + case FloatingPointConstants.largestNormal: + return FloatingPoint8E4M3Value.ofBinaryStrings( + '0', '1' * exponentWidth, '${'1' * (mantissaWidth - 1)}0'); + case FloatingPointConstants.nan: + return FloatingPoint8E4M3Value.ofBinaryStrings( + '0', '${'1' * (exponentWidth - 1)}1', '1' * mantissaWidth); + case FloatingPointConstants.infinity: + case FloatingPointConstants.negativeInfinity: + throw RohdHclException('Infinity is not representable in this format'); + case _: + return FloatingPointValue.getFloatingPointConstant( + constantFloatingPoint, exponentWidth, mantissaWidth) + as FloatingPoint8E4M3Value; + } + } } /// The E5M2 representation of a 8-bit floating point value as defined in diff --git a/lib/src/arithmetic/values/floating_point_values/floating_point_value.dart b/lib/src/arithmetic/values/floating_point_values/floating_point_value.dart index 825758b34..0a388a588 100644 --- a/lib/src/arithmetic/values/floating_point_values/floating_point_value.dart +++ b/lib/src/arithmetic/values/floating_point_values/floating_point_value.dart @@ -48,6 +48,9 @@ enum FloatingPointConstants { /// Largest possible number infinity, + + /// Not a Number, demarked by all 1s in exponent and any 1 in mantissa + nan, } /// IEEE Floating Point Rounding Modes @@ -299,6 +302,32 @@ class FloatingPointValue implements Comparable { mantissa: val.slice(mantissaWidth - 1, 0)); } + /// Abbreviation Functions for common constants + + /// Return the Infinity value for this FloatingPointValue size. + FloatingPointValue get infinity => + FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.infinity, exponent.width, mantissa.width); + + /// Return the Negative Infinity value for this FloatingPointValue size. + FloatingPointValue get negativeInfinity => + FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.negativeInfinity, + exponent.width, + mantissa.width); + + /// Return the Negative Infinity value for this FloatingPointValue size. + FloatingPointValue get nan => FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.nan, exponent.width, mantissa.width); + + /// Return the value one for this FloatingPointValue size. + FloatingPointValue get one => FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.one, exponent.width, mantissa.width); + + /// Return the Negative Infinity value for this FloatingPointValue size. + FloatingPointValue get zero => FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.positiveZero, exponent.width, mantissa.width); + /// Return the [FloatingPointValue] representing the constant specified factory FloatingPointValue.getFloatingPointConstant( FloatingPointConstants constantFloatingPoint, @@ -353,12 +382,17 @@ class FloatingPointValue implements Comparable { /// Largest positive number, most positive exponent, full mantissa case FloatingPointConstants.largestNormal: return FloatingPointValue.ofBinaryStrings( - '0', '0' * exponentWidth, '1' * mantissaWidth); + '0', '${'1' * (exponentWidth - 1)}0', '1' * mantissaWidth); /// Largest possible number case FloatingPointConstants.infinity: return FloatingPointValue.ofBinaryStrings( '0', '1' * exponentWidth, '0' * mantissaWidth); + + /// Not a Number (NaN) + case FloatingPointConstants.nan: + return FloatingPointValue.ofBinaryStrings( + '0', '1' * exponentWidth, '${'0' * (mantissaWidth - 1)}1'); } } @@ -375,6 +409,19 @@ class FloatingPointValue implements Comparable { return FloatingPoint64Value.ofDouble(inDouble); } + if (inDouble.isNaN) { + return FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.nan, exponentWidth, mantissaWidth); + } + if (inDouble.isInfinite) { + return FloatingPointValue.getFloatingPointConstant( + inDouble < 0.0 + ? FloatingPointConstants.negativeInfinity + : FloatingPointConstants.infinity, + exponentWidth, + mantissaWidth); + } + if (roundingMode != FloatingPointRoundingMode.roundNearestEven && roundingMode != FloatingPointRoundingMode.truncate) { throw UnimplementedError( @@ -457,16 +504,12 @@ class FloatingPointValue implements Comparable { } else if ((exponentWidth == 11) && (mantissaWidth == 52)) { return FloatingPoint64Value.ofDouble(inDouble); } - - var doubleVal = inDouble; if (inDouble.isNaN) { - return FloatingPointValue( - exponent: - LogicValue.ofInt(pow(2, exponentWidth).toInt() - 1, exponentWidth), - mantissa: LogicValue.zero, - sign: LogicValue.zero, - ); + return FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.nan, exponentWidth, mantissaWidth); } + + var doubleVal = inDouble; LogicValue sign; if (inDouble < 0.0) { doubleVal = -doubleVal; @@ -474,6 +517,14 @@ class FloatingPointValue implements Comparable { } else { sign = LogicValue.zero; } + if (inDouble.isInfinite) { + return FloatingPointValue.getFloatingPointConstant( + sign.toBool() + ? FloatingPointConstants.negativeInfinity + : FloatingPointConstants.infinity, + exponentWidth, + mantissaWidth); + } // If we are dealing with a really small number we need to scale it up var scaleToWhole = (doubleVal != 0) ? (-log(doubleVal) / log(2)).ceil() : 0; @@ -512,6 +563,15 @@ class FloatingPointValue implements Comparable { ? fullLength - mantissaWidth - scaleToWhole : FloatingPointValue.computeMinExponent(exponentWidth); + if (e > FloatingPointValue.computeMaxExponent(exponentWidth) + 1) { + return FloatingPointValue.getFloatingPointConstant( + sign.toBool() + ? FloatingPointConstants.negativeInfinity + : FloatingPointConstants.infinity, + exponentWidth, + mantissaWidth); + } + if (e <= -FloatingPointValue.computeBias(exponentWidth)) { fullValue = fullValue >>> (scaleToWhole - FloatingPointValue.computeBias(exponentWidth)); @@ -533,10 +593,7 @@ class FloatingPointValue implements Comparable { .reversed; return FloatingPointValue( - exponent: exponent, - mantissa: mantissa, - sign: sign, - ); + exponent: exponent, mantissa: mantissa, sign: sign); } @override @@ -566,47 +623,69 @@ class FloatingPointValue implements Comparable { return 0; } - /// Return the bias of this FP format - // int bias() => FloatingPointValue.computeBias(exponent.width); - @override bool operator ==(Object other) { if (other is! FloatingPointValue) { return false; } - if ((exponent.width != other.exponent.width) | (mantissa.width != other.mantissa.width)) { return false; } + if (isNaN() != other.isNaN()) { + return false; + } + if (isAnInfinity() != other.isAnInfinity()) { + return false; + } + if (isAnInfinity()) { + return sign == other.sign; + } // IEEE 754: -0 an +0 are considered equal if ((exponent.isZero && mantissa.isZero) && (other.exponent.isZero && other.mantissa.isZero)) { return true; } - return (sign == other.sign) & (exponent == other.exponent) & (mantissa == other.mantissa); } - // TODO(desmonddak): figure out the difference with Infinity /// Return true if the represented floating point number is considered - /// NaN or 'Not a Number' due to overflow - bool isNaN() { - if ((exponent.width == 4) & (mantissa.width == 3)) { - // FP8 E4M3 does not support infinities - final cond1 = (1 + exponent.toInt()) == pow(2, exponent.width).toInt(); - final cond2 = (1 + mantissa.toInt()) == pow(2, mantissa.width).toInt(); - return cond1 & cond2; - } else { - return exponent.toInt() == - computeMaxExponent(exponent.width) + computeBias(exponent.width) + 1; - } - } + /// NaN or 'Not a Number' + bool isNaN() => + (exponent.toInt() == + computeMaxExponent(exponent.width) + + computeBias(exponent.width) + + 1) & + !mantissa.or().isZero; + + /// Return true if the represented floating point number is considered + /// infinity or negative infinity + bool isAnInfinity() => + (exponent.toInt() == + computeMaxExponent(exponent.width) + + computeBias(exponent.width) + + 1) & + mantissa.or().isZero; + + /// Return true if the represented floating point number is zero. Note + /// that the equality operator will treat + /// [FloatingPointConstants.positiveZero] + /// and [FloatingPointConstants.negativeZero] as equal. + bool isZero() => + this == + FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.positiveZero, exponent.width, mantissa.width); /// Return the value of the floating point number in a Dart [double] type. double toDouble() { + if (isNaN()) { + return double.nan; + } + if (isAnInfinity()) { + return sign.isZero ? double.infinity : double.negativeInfinity; + } var doubleVal = double.nan; if (value.isValid) { if (exponent.toInt() == 0) { @@ -658,26 +737,84 @@ class FloatingPointValue implements Comparable { throw RohdHclException('FloatingPointValue: ' 'multiplicand must have the same mantissa and exponent widths'); } + if (isNaN() | other.isNaN()) { + return FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.nan, exponent.width, mantissa.width); + } return FloatingPointValue.ofDouble(op(toDouble(), other.toDouble()), mantissaWidth: mantissa.width, exponentWidth: exponent.width); } /// Multiply operation for [FloatingPointValue] - FloatingPointValue operator *(FloatingPointValue multiplicand) => - _performOp(multiplicand, (a, b) => a * b); + FloatingPointValue operator *(FloatingPointValue multiplicand) { + if (isAnInfinity()) { + if (multiplicand.isAnInfinity()) { + return sign != multiplicand.sign ? negativeInfinity : infinity; + } else if (multiplicand.isZero()) { + return nan; + } else { + return this; + } + } else if (multiplicand.isAnInfinity()) { + if (isZero()) { + return nan; + } else { + return multiplicand; + } + } + return _performOp(multiplicand, (a, b) => a * b); + } /// Addition operation for [FloatingPointValue] - FloatingPointValue operator +(FloatingPointValue addend) => - _performOp(addend, (a, b) => a + b); + FloatingPointValue operator +(FloatingPointValue addend) { + if (isAnInfinity()) { + if (addend.isAnInfinity()) { + if (sign != addend.sign) { + return nan; + } else { + return sign.toBool() ? negativeInfinity : infinity; + } + } else { + return this; + } + } else if (addend.isAnInfinity()) { + return addend; + } + return _performOp(addend, (a, b) => a + b); + } /// Divide operation for [FloatingPointValue] - FloatingPointValue operator /(FloatingPointValue divisor) => - _performOp(divisor, (a, b) => a / b); + FloatingPointValue operator /(FloatingPointValue divisor) { + if (isAnInfinity()) { + if (divisor.isAnInfinity() | divisor.isZero()) { + return nan; + } else { + return this; + } + } else { + if (divisor.isZero()) { + return sign != divisor.sign ? negativeInfinity : infinity; + } + } + return _performOp(divisor, (a, b) => a / b); + } /// Subtract operation for [FloatingPointValue] - FloatingPointValue operator -(FloatingPointValue subend) => - _performOp(subend, (a, b) => a - b); + FloatingPointValue operator -(FloatingPointValue subend) { + if (isAnInfinity() & subend.isAnInfinity()) { + if (sign == subend.sign) { + return nan; + } else { + return this; + } + } else if (subend.isAnInfinity()) { + return subend.negate(); + } else if (isAnInfinity()) { + return this; + } + return _performOp(subend, (a, b) => a - b); + } /// Negate operation for [FloatingPointValue] FloatingPointValue negate() => FloatingPointValue( diff --git a/lib/src/component_config/components/component_registry.dart b/lib/src/component_config/components/component_registry.dart index 81064fb50..4acff8005 100644 --- a/lib/src/component_config/components/component_registry.dart +++ b/lib/src/component_config/components/component_registry.dart @@ -26,6 +26,7 @@ List get componentRegistry => [ EdgeDetectorConfigurator(), FindConfigurator(), FloatingPointAdderRoundConfigurator(), + FloatingPointMultiplierSimpleConfigurator(), ParallelPrefixAdderConfigurator(), CompressionTreeMultiplierConfigurator(), ExtremaConfigurator(), diff --git a/lib/src/component_config/components/components.dart b/lib/src/component_config/components/components.dart index 22a438669..94275d505 100644 --- a/lib/src/component_config/components/components.dart +++ b/lib/src/component_config/components/components.dart @@ -13,6 +13,7 @@ export 'config_fixed_to_float.dart'; export 'config_float8_to_fixed.dart'; export 'config_float_to_fixed.dart'; export 'config_floating_point_adder_round.dart'; +export 'config_floating_point_multiplier_simple.dart'; export 'config_one_hot.dart'; export 'config_parallel_prefix_adder.dart'; export 'config_priority_arbiter.dart'; diff --git a/lib/src/component_config/components/config_floating_point_adder_round.dart b/lib/src/component_config/components/config_floating_point_adder_round.dart index 2ca5a5283..7a48aae53 100644 --- a/lib/src/component_config/components/config_floating_point_adder_round.dart +++ b/lib/src/component_config/components/config_floating_point_adder_round.dart @@ -1,8 +1,8 @@ // Copyright (C) 2023-2024 Intel Corporation // SPDX-License-Identifier: BSD-3-Clause // -// config_floating_point_adder.dart -// Configurator for a Floating-Point Adder. +// config_floating_point_adder_round.dart +// Configurator for a rounding Floating-Point adder. // // 2024 October 11 // Author: Desmond Kirkpatrick diff --git a/lib/src/component_config/components/config_floating_point_multiplier_simple.dart b/lib/src/component_config/components/config_floating_point_multiplier_simple.dart new file mode 100644 index 000000000..83085988e --- /dev/null +++ b/lib/src/component_config/components/config_floating_point_multiplier_simple.dart @@ -0,0 +1,62 @@ +// Copyright (C) 2023-2024 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause +// +// config_floating_point_multiplier_simple.dart +// Configurator for a simple Floating-Point multiplier. +// +// 2025 January 6 +// Author: Desmond Kirkpatrick + +import 'dart:collection'; + +import 'package:rohd/rohd.dart'; +import 'package:rohd_hcl/rohd_hcl.dart'; + +/// A [Configurator] for [FloatingPointMultiplierSimple]s. +class FloatingPointMultiplierSimpleConfigurator extends Configurator { + /// Map from Type to Function for Parallel Prefix generator + static Map, Logic Function(Logic, Logic))> + treeGeneratorMap = { + Ripple: Ripple.new, + Sklansky: Sklansky.new, + KoggeStone: KoggeStone.new, + BrentKung: BrentKung.new + }; + + /// Controls the type of [ParallelPrefix] tree used in the internal functions. + final prefixTreeKnob = + ChoiceConfigKnob(treeGeneratorMap.keys.toList(), value: KoggeStone); + + /// Controls the width of the exponent. + final IntConfigKnob exponentWidthKnob = IntConfigKnob(value: 4); + + /// Controls the width of the mantissa. + final IntConfigKnob mantissaWidthKnob = IntConfigKnob(value: 5); + + /// Controls whether the multiplier is pipelined + final ToggleConfigKnob pipelinedKnob = ToggleConfigKnob(value: false); + + @override + Module createModule() => FloatingPointMultiplierSimple( + clk: pipelinedKnob.value ? Logic() : null, + FloatingPoint( + exponentWidth: exponentWidthKnob.value, + mantissaWidth: mantissaWidthKnob.value, + ), + FloatingPoint( + exponentWidth: exponentWidthKnob.value, + mantissaWidth: mantissaWidthKnob.value), + ppTree: treeGeneratorMap[prefixTreeKnob.value]!); + + @override + late final Map> knobs = UnmodifiableMapView({ + 'Prefix tree type': prefixTreeKnob, + 'Exponent width': exponentWidthKnob, + 'Mantissa width': mantissaWidthKnob, + 'Pipelined': pipelinedKnob, + }); + + @override + final String name = 'Floating-Point Simple Multiplier'; +} diff --git a/lib/src/utils.dart b/lib/src/utils.dart index 102149e64..1cce8e60b 100644 --- a/lib/src/utils.dart +++ b/lib/src/utils.dart @@ -63,3 +63,50 @@ extension SignedBigInt on BigInt { ? BigInt.from(value).toSigned(width) : BigInt.from(value).toUnsigned(width); } + +/// Conditionally constructs a positive edge triggered flip condFlop on [clk]. +/// +/// It returns either [FlipFlop.q] if [clk] is valid or [d] if not. +/// +/// When the optional [en] is provided, an additional input will be created for +/// condFlop. If optional [en] is high or not provided, output will vary as per +/// input[d]. For low [en], output remains frozen irrespective of input [d]. +/// +/// When the optional [reset] is provided, the condFlop will be reset +/// (active-high). +/// If no [resetValue] is provided, the reset value is always `0`. Otherwise, +/// it will reset to the provided [resetValue]. +Logic condFlop( + Logic? clk, + Logic d, { + Logic? en, + Logic? reset, + dynamic resetValue, +}) => + (clk == null) + ? d + : flop( + clk, + d, + en: en, + reset: reset, + resetValue: resetValue, + ); + +/// A bit shifter that takes a positive or negative shift amount +class SignedShifter extends Module { + /// The output [shifted] bits + Logic get shifted => output('shifted'); + + /// Create a [SignedShifter] that treats shift as signed + /// - [bits] is the input to be shifted + /// - [shift] is the signed amount to be shifted + + SignedShifter(Logic bits, Logic shift, {super.name = 'shifter'}) { + bits = addInput('bits', bits, width: bits.width); + shift = addInput('shift', shift, width: shift.width); + + addOutput('shifted', width: bits.width); + shifted <= mux(shift[-1], bits >>> shift.abs(), bits << shift); + } +} diff --git a/test/arithmetic/adder_test.dart b/test/arithmetic/adder_test.dart index 3aad74554..bc7937aca 100644 --- a/test/arithmetic/adder_test.dart +++ b/test/arithmetic/adder_test.dart @@ -311,6 +311,22 @@ void main() { } }); + test('ones complement subtractor', () { + const width = 5; + final a = Logic(width: width); + final b = Logic(width: width); + + const subtract = true; + const av = 1; + const bv = 6; + + a.put(av); + b.put(bv); + final adder = OnesComplementAdder(a, b, subtract: subtract); + expect(adder.sum.value.toInt(), equals(bv - av)); + expect(adder.sign.value, LogicValue.one); + }); + test('ones complement with Logic subtract', () { const width = 2; final a = Logic(width: width); diff --git a/test/arithmetic/fixed_to_float_test.dart b/test/arithmetic/fixed_to_float_test.dart index 3f5c73fcf..45648e13a 100644 --- a/test/arithmetic/fixed_to_float_test.dart +++ b/test/arithmetic/fixed_to_float_test.dart @@ -139,8 +139,8 @@ void main() async { } }); - // Test is skipped as FloatingPointValue.ofDouble does not handle infinities. - // TODO(desmonddak): + // TODO(desmonddak): complete this test as now + // FloatingPointValue.ofDouble handles infinities. test('Signed Q7.0 to E3M2', () async { final fixed = FixedPoint(signed: true, m: 7, n: 0); final dut = FixedToFloat(fixed, exponentWidth: 3, mantissaWidth: 2); diff --git a/test/arithmetic/float_to_fixed_test.dart b/test/arithmetic/float_to_fixed_test.dart index 239147fb5..76de8afc3 100644 --- a/test/arithmetic/float_to_fixed_test.dart +++ b/test/arithmetic/float_to_fixed_test.dart @@ -20,7 +20,7 @@ void main() async { for (var val = 0; val < pow(2, 8); val++) { final fpv = FloatingPointValue.ofLogicValue( 5, 2, LogicValue.ofInt(val, float.width)); - if (!fpv.isNaN()) { + if (!fpv.isAnInfinity() & !fpv.isNaN()) { float.put(fpv); final fxp = dut.fixed; final fxpExp = FixedPointValue.ofDouble(fpv.toDouble(), @@ -41,7 +41,7 @@ void main() async { for (var val = 0; val < pow(2, 8); val++) { final fp8 = FloatingPointValue.ofLogicValue( 4, 3, LogicValue.ofInt(val, float.width)); - if (!fp8.isNaN()) { + if (!fp8.isNaN() & !fp8.isAnInfinity()) { float.put(fp8.value); final fx8 = FixedPointValue.ofDouble(fp8.toDouble(), signed: true, m: 23, n: 9); @@ -55,7 +55,7 @@ void main() async { for (var val = 0; val < pow(2, 8); val++) { final fp8 = FloatingPointValue.ofLogicValue( 5, 2, LogicValue.ofInt(val, float.width)); - if (!fp8.isNaN()) { + if (!fp8.isNaN() & !fp8.isAnInfinity()) { float.put(fp8.value); final fx8 = FixedPointValue.ofDouble(fp8.toDouble(), signed: true, m: 16, n: 16); diff --git a/test/arithmetic/floating_point/floating_point_adder_round_test.dart b/test/arithmetic/floating_point/floating_point_adder_round_test.dart index e454a4ae6..66983a297 100644 --- a/test/arithmetic/floating_point/floating_point_adder_round_test.dart +++ b/test/arithmetic/floating_point/floating_point_adder_round_test.dart @@ -17,78 +17,77 @@ void main() { tearDown(() async { await Simulator.reset(); }); - test('FP: singleton N path', () async { - final clk = SimpleClockGenerator(10).clk; - - const eWidth = 4; - const mWidth = 5; - final fa = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); - final fb = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); - - final fva = FloatingPointValue.ofInts(14, 31, - exponentWidth: eWidth, mantissaWidth: mWidth); - final fvb = FloatingPointValue.ofInts(13, 7, - exponentWidth: eWidth, mantissaWidth: mWidth, sign: true); - - fa.put(fva); - fb.put(fvb); + test('FP: rounding adder singleton N path', () async { + const exponentWidth = 4; + const mantissawidth = 5; + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissawidth); + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissawidth); + + final fv1 = FloatingPointValue.ofInts(14, 31, + exponentWidth: exponentWidth, mantissaWidth: mantissawidth); + final fv2 = FloatingPointValue.ofInts(13, 7, + exponentWidth: exponentWidth, mantissaWidth: mantissawidth, sign: true); + + fp1.put(fv1); + fp2.put(fv2); final expectedNoRound = FloatingPointValue.ofDoubleUnrounded( - fva.toDouble() + fvb.toDouble(), - exponentWidth: eWidth, - mantissaWidth: mWidth); + fv1.toDouble() + fv2.toDouble(), + exponentWidth: exponentWidth, + mantissaWidth: mantissawidth); final expected = expectedNoRound; - final adder = FloatingPointAdderRound(fa, fb, clk: clk); + final adder = FloatingPointAdderRound(fp1, fp2); unawaited(Simulator.run()); - await clk.nextNegedge; - fa.put(0); - fb.put(0); final computed = adder.sum.floatingPointValue; - expect(computed.isNaN(), equals(expected.isNaN())); expect(computed, equals(expected)); await Simulator.endSimulation(); }); - test('FP: N path, subtraction, delta < 2', () async { - const eWidth = 3; - const mWidth = 5; + test('FP: rounding adder N path, subtraction, delta < 2', () async { + const exponentWidth = 3; + const mantissaWidth = 5; final one = FloatingPointValue.getFloatingPointConstant( - FloatingPointConstants.one, eWidth, mWidth); - final fa = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); - final fb = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); - fa.put(one); - fb.put(one); - final adder = FloatingPointAdderRound(fa, fb); + FloatingPointConstants.one, exponentWidth, mantissaWidth); + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + fp1.put(one); + fp2.put(one); + final adder = FloatingPointAdderRound(fp1, fp2); await adder.build(); unawaited(Simulator.run()); - final largestExponent = FloatingPointValue.computeBias(eWidth) + - FloatingPointValue.computeMaxExponent(eWidth); - final largestMantissa = pow(2, mWidth).toInt() - 1; - for (var i = 0; i <= largestExponent; i++) { - for (var j = 0; j <= largestExponent; j++) { - if ((i - j).abs() < 2) { - for (var ii = 0; ii <= largestMantissa; ii++) { - for (var jj = 0; jj <= largestMantissa; jj++) { - final fva = FloatingPointValue.ofInts(i, ii, - exponentWidth: eWidth, mantissaWidth: mWidth); - final fvb = FloatingPointValue.ofInts(j, jj, - exponentWidth: eWidth, mantissaWidth: mWidth, sign: true); - - fa.put(fva); - fb.put(fvb); + final largestExponent = FloatingPointValue.computeBias(exponentWidth) + + FloatingPointValue.computeMaxExponent(exponentWidth); + final largestMantissa = pow(2, mantissaWidth).toInt() - 1; + for (var e1 = 0; e1 <= largestExponent; e1++) { + for (var e2 = 0; e2 <= largestExponent; e2++) { + if ((e1 - e2).abs() < 2) { + for (var m1 = 0; m1 <= largestMantissa; m1++) { + final fv1 = FloatingPointValue.ofInts(e1, m1, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + for (var m2 = 0; m2 <= largestMantissa; m2++) { + final fv2 = FloatingPointValue.ofInts(e2, m2, + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth, + sign: true); + + fp1.put(fv1); + fp2.put(fv2); // No rounding final expected = FloatingPointValue.ofDoubleUnrounded( - fva.toDouble() + fvb.toDouble(), - exponentWidth: eWidth, - mantissaWidth: mWidth); + fv1.toDouble() + fv2.toDouble(), + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); final computed = adder.sum.floatingPointValue; - expect(computed.isNaN(), equals(expected.isNaN())); expect(computed, equals(expected)); } } @@ -98,31 +97,33 @@ void main() { await Simulator.endSimulation(); }); - test('FP: singleton R path', () async { + test('FP: rounding adder singleton R path', () async { final clk = SimpleClockGenerator(10).clk; - const eWidth = 4; - const mWidth = 5; - final fa = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); - final fb = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); - fa.put(0); - fb.put(0); - - final fva = FloatingPointValue.ofInts(3, 11, - exponentWidth: eWidth, mantissaWidth: mWidth); - final fvb = FloatingPointValue.ofInts(11, 25, - exponentWidth: eWidth, mantissaWidth: mWidth, sign: true); - - fa.put(fva); - fb.put(fvb); - - final expected = fva + fvb; - final adder = FloatingPointAdderRound(clk: clk, fa, fb); + const exponentWidth = 4; + const mantissaWidth = 5; + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + fp1.put(0); + fp2.put(0); + + final fv1 = FloatingPointValue.ofInts(3, 11, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fv2 = FloatingPointValue.ofInts(11, 25, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth, sign: true); + + fp1.put(fv1); + fp2.put(fv2); + + final expected = fv1 + fv2; + final adder = FloatingPointAdderRound(clk: clk, fp1, fp2); await adder.build(); unawaited(Simulator.run()); await clk.nextNegedge; - fa.put(0); - fb.put(0); + fp1.put(0); + fp2.put(0); final computed = adder.sum.floatingPointValue; expect(computed.isNaN(), equals(expected.isNaN())); @@ -130,35 +131,38 @@ void main() { await Simulator.endSimulation(); }); - test('FP: R path, strict subnormal', () async { - const eWidth = 4; - const mWidth = 5; - - final fa = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); - final fb = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); - fa.put(0); - fb.put(0); - final adder = FloatingPointAdderRound(fa, fb); + test('FP: rounding adder R path, strict subnormal', () async { + const exponentWidth = 4; + const mantissaWidth = 5; + + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + fp1.put(0); + fp2.put(0); + final adder = FloatingPointAdderRound(fp1, fp2); await adder.build(); unawaited(Simulator.run()); - final largestMantissa = pow(2, mWidth).toInt() - 1; - for (final sign in [false]) { - for (var i = 0; i <= 1; i++) { - for (var j = 0; j <= 1; j++) { - if (!sign || (i - j).abs() >= 2) { - for (var ii = 0; ii <= largestMantissa; ii++) { - for (var jj = 0; jj <= largestMantissa; jj++) { - final fva = FloatingPointValue.ofInts(i, ii, - exponentWidth: eWidth, mantissaWidth: mWidth); - final fvb = FloatingPointValue.ofInts(j, jj, - exponentWidth: eWidth, mantissaWidth: mWidth, sign: sign); - - fa.put(fva); - fb.put(fvb); - final expected = fva + fvb; + final largestMantissa = pow(2, mantissaWidth).toInt() - 1; + for (final sign in [false, true]) { + for (var e1 = 0; e1 <= 1; e1++) { + for (var e2 = 0; e2 <= 1; e2++) { + if (!sign || (e1 - e2).abs() >= 2) { + for (var m1 = 0; m1 <= largestMantissa; m1++) { + final fv1 = FloatingPointValue.ofInts(e1, m1, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + for (var m2 = 0; m2 <= largestMantissa; m2++) { + final fv2 = FloatingPointValue.ofInts(e2, m2, + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth, + sign: sign); + + fp1.put(fv1); + fp2.put(fv2); + final expected = fv1 + fv2; final computed = adder.sum.floatingPointValue; - expect(computed.isNaN(), equals(expected.isNaN())); expect(computed, equals(expected)); } } @@ -169,44 +173,264 @@ void main() { await Simulator.endSimulation(); }); - test('FP: R path, full random', () async { + test('FP: rounding adder R path, full random', () async { final clk = SimpleClockGenerator(10).clk; - const eWidth = 3; - const mWidth = 5; + const exponentWidth = 3; + const mantissaWidth = 5; - final fa = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); - final fb = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); - fa.put(0); - fb.put(0); - final adder = FloatingPointAdderRound(clk: clk, fa, fb); + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + fp1.put(0); + fp2.put(0); + final adder = FloatingPointAdderRound(clk: clk, fp1, fp2); await adder.build(); unawaited(Simulator.run()); final value = Random(47); var cnt = 200; while (cnt > 0) { - final fva = FloatingPointValue.random(value, - exponentWidth: eWidth, mantissaWidth: mWidth); - final fvb = FloatingPointValue.random(value, - exponentWidth: eWidth, mantissaWidth: mWidth); - fa.put(fva); - fb.put(fvb); - if ((fva.exponent.toInt() - fvb.exponent.toInt()).abs() >= 2) { + final fv1 = FloatingPointValue.random(value, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fv2 = FloatingPointValue.random(value, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + fp1.put(fv1); + fp2.put(fv2); + if ((fv1.exponent.toInt() - fv2.exponent.toInt()).abs() >= 2) { cnt--; - final expected = fva + fvb; + final expected = fv1 + fv2; await clk.nextNegedge; - fa.put(0); - fb.put(0); + fp1.put(0); + fp2.put(0); final computed = adder.sum.floatingPointValue; - expect(computed.isNaN(), equals(expected.isNaN())); expect(computed, equals(expected)); } } await Simulator.endSimulation(); }); - test('FP: singleton merged path', () async { + test('FP: rounding adder singleton merged pipelined path', () async { + final clk = SimpleClockGenerator(10).clk; + + const exponentWidth = 3; + const mantissaWidth = 5; + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + fp1.put(0); + fp2.put(0); + final fv1 = FloatingPointValue.ofInts(14, 31, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fv2 = FloatingPointValue.ofInts(13, 7, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth, sign: true); + fp1.put(fv1); + fp2.put(fv2); + + final expectedNoRound = FloatingPointValue.ofDoubleUnrounded( + fv1.toDouble() + fv2.toDouble(), + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + + final FloatingPointValue expected; + final expectedRound = fv1 + fv2; + if (((fv1.exponent.toInt() - fv2.exponent.toInt()).abs() < 2) & + (fv1.sign.toInt() != fv2.sign.toInt())) { + expected = expectedNoRound; + } else { + expected = expectedRound; + } + final adder = FloatingPointAdderRound(clk: clk, fp1, fp2); + await adder.build(); + unawaited(Simulator.run()); + await clk.nextNegedge; + fp1.put(0); + fp2.put(0); + + final computed = adder.sum.floatingPointValue; + expect(computed, equals(expected)); + await Simulator.endSimulation(); + }); + + test('FP: rounding adder full random wide', () async { + const exponentWidth = 11; + const mantissaWidth = 52; + + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + fp1.put(0); + fp2.put(0); + final adder = FloatingPointAdderRound(fp1, fp2); + await adder.build(); + unawaited(Simulator.run()); + final value = Random(51); + + var cnt = 100; + while (cnt > 0) { + final fv1 = FloatingPointValue.random(value, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fv2 = FloatingPointValue.random(value, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + fp1.put(fv1); + fp2.put(fv2); + final expected = fv1 + fv2; + final computed = adder.sum.floatingPointValue; + expect(computed.isNaN(), equals(expected.isNaN())); + expect(computed, equals(expected)); + cnt--; + } + await Simulator.endSimulation(); + }); + + test('FP: rounding adder singleton merged path', () async { + const exponentWidth = 3; + const mantissaWidth = 5; + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + fp1.put(0); + fp2.put(0); + final fv1 = FloatingPointValue.ofInts(14, 31, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fv2 = FloatingPointValue.ofInts(13, 7, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth, sign: true); + fp1.put(fv1); + fp2.put(fv2); + + final expectedNoRound = FloatingPointValue.ofDoubleUnrounded( + fv1.toDouble() + fv2.toDouble(), + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + + final FloatingPointValue expected; + final expectedRound = fv1 + fv2; + if (((fv1.exponent.toInt() - fv2.exponent.toInt()).abs() < 2) & + (fv1.sign.toInt() != fv2.sign.toInt())) { + expected = expectedNoRound; + } else { + expected = expectedRound; + } + final adder = FloatingPointAdderRound(fp1, fp2); + + final computed = adder.sum.floatingPointValue; + expect(computed, equals(expected)); + }); + + test('FP: rounding adder singleton', () async { + const exponentWidth = 4; + const mantissaWidth = 4; + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + fp1.put(0); + fp2.put(0); + final fv1 = FloatingPointValue.ofBinaryStrings('0', '1100', '0000'); + final fv2 = FloatingPointValue.ofBinaryStrings('1', '1100', '0000'); + + fp1.put(fv1); + fp2.put(fv2); + + final expectedNoRound = FloatingPointValue.ofDoubleUnrounded( + fv1.toDouble() + fv2.toDouble(), + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + + final FloatingPointValue expected; + final expectedRound = fv1 + fv2; + if (((fv1.exponent.toInt() - fv2.exponent.toInt()).abs() < 2) & + (fv1.sign.toInt() != fv2.sign.toInt())) { + expected = expectedNoRound; + } else { + expected = expectedRound; + } + final adder = FloatingPointAdderRound(fp1, fp2); + + final computed = adder.sum.floatingPointValue; + + expect(computed, equals(expected)); + }); + + test('FP: rounding adder exhaustive', () { + const exponentWidth = 4; + const mantissaWidth = 4; + + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final adder = FloatingPointAdderRound(fp1, fp2); + + final expLimit = pow(2, exponentWidth); + final mantLimit = pow(2, mantissaWidth); + for (final subtract in [0, 1]) { + for (var e1 = 0; e1 < expLimit; e1++) { + for (var m1 = 0; m1 < mantLimit; m1++) { + final fv1 = FloatingPointValue.ofInts(e1, m1, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + for (var e2 = 0; e2 < expLimit; e2++) { + for (var m2 = 0; m2 < mantLimit; m2++) { + final fv2 = FloatingPointValue.ofInts(e2, m2, + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth, + sign: subtract == 1); + + fp1.put(fv1.value); + fp2.put(fv2.value); + final computed = adder.sum.floatingPointValue; + final expectedDouble = fv1.toDouble() + fv2.toDouble(); + + final FloatingPointValue expected; + if ((subtract == 1) & + ((fv1.exponent.toInt() - fv2.exponent.toInt()).abs() < 2)) { + expected = FloatingPointValue.ofDoubleUnrounded(expectedDouble, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + } else { + expected = FloatingPointValue.ofDouble(expectedDouble, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + } + + expect(computed, equals(expected), + reason: '\t$fv1 (${fv1.toDouble()})\n' + '\t$fv2 (${fv2.toDouble()}) =\n' + '\t$computed (${computed.toDouble()}) computed\n' + '\t$expected (${expected.toDouble()}) expected'); + } + } + } + } + } + }); + test('FP: rounding adder general singleton test', () { + FloatingPointValue ofString(String s) => + FloatingPointValue.ofSpacedBinaryString(s); + + final fv1 = ofString('0 001 111111'); + final fv2 = ofString('1 010 000000'); + + final fp1 = FloatingPoint( + exponentWidth: fv1.exponent.width, mantissaWidth: fv1.mantissa.width); + final fp2 = FloatingPoint( + exponentWidth: fv2.exponent.width, mantissaWidth: fv2.mantissa.width); + fp1.put(fv1); + fp2.put(fv2); + final adder = FloatingPointAdderRound(fp1, fp2); + final exponentWidth = adder.sum.exponent.width; + final mantissaWidth = adder.sum.mantissa.width; + + final expectedDouble = + fp1.floatingPointValue.toDouble() + fp2.floatingPointValue.toDouble(); + + final expectedNoRound = FloatingPointValue.ofDoubleUnrounded(expectedDouble, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + expect(adder.sum.floatingPointValue, equals(expectedNoRound)); + }); + test('FP: rounding with native adder', () async { final clk = SimpleClockGenerator(10).clk; const eWidth = 3; @@ -235,7 +459,8 @@ void main() { } else { expected = expectedRound; } - final adder = FloatingPointAdderRound(clk: clk, fa, fb); + final adder = + FloatingPointAdderRound(clk: clk, fa, fb, adderGen: NativeAdder.new); await adder.build(); unawaited(Simulator.run()); await clk.nextNegedge; @@ -247,34 +472,4 @@ void main() { expect(computed, equals(expected)); await Simulator.endSimulation(); }); - - test('FP: full random wide', () async { - const eWidth = 11; - const mWidth = 52; - - final fa = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); - final fb = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); - fa.put(0); - fb.put(0); - final adder = FloatingPointAdderRound(fa, fb); - await adder.build(); - unawaited(Simulator.run()); - final value = Random(51); - - var cnt = 100; - while (cnt > 0) { - final fva = FloatingPointValue.random(value, - exponentWidth: eWidth, mantissaWidth: mWidth); - final fvb = FloatingPointValue.random(value, - exponentWidth: eWidth, mantissaWidth: mWidth); - fa.put(fva); - fb.put(fvb); - final expected = fva + fvb; - final computed = adder.sum.floatingPointValue; - expect(computed.isNaN(), equals(expected.isNaN())); - expect(computed, equals(expected)); - cnt--; - } - await Simulator.endSimulation(); - }); } diff --git a/test/arithmetic/floating_point/floating_point_adder_simple_test.dart b/test/arithmetic/floating_point/floating_point_adder_simple_test.dart index c5b0f17ee..d5092b1a0 100644 --- a/test/arithmetic/floating_point/floating_point_adder_simple_test.dart +++ b/test/arithmetic/floating_point/floating_point_adder_simple_test.dart @@ -1,7 +1,7 @@ // Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: BSD-3-Clause // -// floating_point_smple test.dart +// floating_point_simple test.dart // Tests of FloatingPointAdderSimple -- non-rounding FP adder // // 2024 April 1 @@ -10,309 +10,390 @@ // Desmond A Kirkpatrick + FloatingPointValue.ofSpacedBinaryString(s); + + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + test('FP: simple adder narrow corner tests', () { + final testCases = [ + (ofString('0 0001 0000'), ofString('0 0000 0000')), + // subnormal from ae=1 s1=1, chop + (ofString('0 0000 0001'), ofString('1 0001 0000')), + // ae=0, l1=0 -- don't chop the leading digit + (ofString('0 0000 0000'), ofString('1 0000 1000')), + // requires unrounded comparison + (ofString('0 0000 0001'), ofString('1 0010 0010')), + // fix for shifting by l1 + (ofString('0 0000 0010'), ofString('1 0010 0000')), + // circle back ae=1 l1=1, shift, do not chop + (ofString('0 0000 0001'), ofString('1 0001 0000')), + // Large exponent difference requires rounding? + (ofString('0 0000 0001'), ofString('1 0111 0000')), + // This one wants no rounding + (ofString('0 0000 0001'), ofString('1 0011 0000')), + // wants rounding + (ofString('0 0000 0001'), ofString('1 0101 0000')), + // here a=7, l1=0, we need to add 1 + (ofString('0 0111 0000'), ofString('0 0111 0000')), + // Needs a shift of 1 when ae = 0 and l1 > ae and subnormal + (ofString('0 0000 0000'), ofString('0 0000 0001')), + // needs to shift 1 more and add to exponent a = 0 l1=0 when adding + (ofString('0 0000 0010'), ofString('0 0000 1110')), + // counterexample to adding 1 to exponent a = 0 l1=14 + (ofString('0 0000 0000'), ofString('0 0000 0000')), + //another counterexample: adding 1 to many to exp + (ofString('0 0000 0001'), ofString('0 0000 0001')), + // catastrophic cancellation + (ofString('0 1100 0000'), ofString('0 1100 0000')), + ]; + final adder = FloatingPointAdderSimple(fp1, fp2); - test('FP: addersmall numbers test', () { - final val = FloatingPoint32Value.getFloatingPointConstant( - FloatingPointConstants.smallestPositiveSubnormal) - .toDouble(); - final fp1 = FloatingPoint32() - ..put(FloatingPoint32Value.getFloatingPointConstant( - FloatingPointConstants.smallestPositiveSubnormal) - .value); - final fp2 = FloatingPoint32() - ..put(FloatingPoint32Value.getFloatingPointConstant( - FloatingPointConstants.smallestPositiveSubnormal) - .negate() - .value); - final out = FloatingPoint32Value.ofDouble(val - val); + for (final test in testCases) { + final fv1 = test.$1; + final fv2 = test.$2; + fp1.put(fv1.value); + fp2.put(fv2.value); + final expectedDouble = fp1.floatingPointValue.toDouble() + + fp2.floatingPointValue.toDouble(); + + final expectedRound = FloatingPointValue.ofDouble(expectedDouble, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + + final expectedNoRound = FloatingPointValue.ofDoubleUnrounded( + expectedDouble, + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + final expected = expectedNoRound; + + final computed = adder.sum.floatingPointValue; + if ((computed != expectedNoRound) && (computed != expectedRound)) { + expect(computed, equals(expected), + reason: '\t$fv1 (${fv1.toDouble()})\n' + '\t$fv2 (${fv2.toDouble()}) =\n' + '\t$computed (${computed.toDouble()}) computed\n' + '\t$expected (${expected.toDouble()}) expected'); + } + } + }); + test('FP: simple adder narrow singleton test', () { + fp1.put(ofString('0 1100 0000')); + fp2.put(ofString('1 1100 0000')); + final adder = FloatingPointAdderSimple(fp1, fp2); - final adder = FloatingPointAdderSimple(fp1, fp2); + final expectedDouble = + fp1.floatingPointValue.toDouble() + fp2.floatingPointValue.toDouble(); + + final expectedNoRound = FloatingPointValue.ofDoubleUnrounded( + expectedDouble, + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + expect(adder.sum.floatingPointValue, equals(expectedNoRound)); + }); + test('FP: simple adder singleton pipelined path', () async { + final clk = SimpleClockGenerator(10).clk; + fp1.put(ofString('0 0000 0000')); + fp2.put(ofString('0 0001 0000')); + + final expectedDouble = + fp1.floatingPointValue.toDouble() + fp2.floatingPointValue.toDouble(); + + final expectedNoRound = FloatingPointValue.ofDoubleUnrounded( + expectedDouble, + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + + final FloatingPointValue expected; + expected = expectedNoRound; + final adder = FloatingPointAdderSimple(clk: clk, fp1, fp2); + await adder.build(); + unawaited(Simulator.run()); + await clk.nextNegedge; + fp1.put(0); + fp2.put(0); - final fpSuper = adder.sum.floatingPointValue; - final fpStr = fpSuper.toDouble().abs().toStringAsPrecision(7); - final valStr = out.toDouble().toStringAsPrecision(7); - expect(fpStr, valStr); + final computed = adder.sum.floatingPointValue; + expect(computed, equals(expected)); + await Simulator.endSimulation(); + }); + + test('FP: adder simple pipeline random', () async { + final clk = SimpleClockGenerator(10).clk; + + final adder = FloatingPointAdderSimple(clk: clk, fp1, fp2); + await adder.build(); + unawaited(Simulator.run()); + + final value = Random(513); + + for (var i = 0; i < 500; i++) { + final fv1 = FloatingPointValue.random(value, + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth, + normal: true); + final fv2 = FloatingPointValue.random(value, + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth, + normal: true); + + fp1.put(fv1.value); + fp2.put(fv2.value); + await clk.nextNegedge; + fp1.put(0); + fp2.put(0); + + final computed = adder.sum.floatingPointValue; + + final expectedRound = FloatingPointValue.ofDouble( + fv1.toDouble() + fv2.toDouble(), + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + + final expectedNoRound = FloatingPointValue.ofDoubleUnrounded( + fv1.toDouble() + fv2.toDouble(), + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + + if ((computed != expectedNoRound) & (computed != expectedRound)) { + expect(computed, equals(expectedNoRound), + reason: '\t$fv1 (${fv1.toDouble()})\n' + '\t$fv2 (${fv2.toDouble()}) =\n' + '\t$computed (${computed.toDouble()}) computed\n' + '\t$expectedNoRound ' + '(${expectedNoRound.toDouble()}) expected'); + } + } + await Simulator.endSimulation(); + }); }); - test('FP: adder carry numbers test', () { - final val = pow(2.5, -12).toDouble(); - final fp1 = FloatingPoint32() - ..put(FloatingPoint32Value.ofDouble(pow(2.5, -12).toDouble()).value); - final fp2 = FloatingPoint32() - ..put(FloatingPoint32Value.ofDouble(pow(2.5, -12).toDouble()).value); - final out = FloatingPoint32Value.ofDouble(val + val); + test('FP: adder simple wide mantissa random', () async { + const exponentWidth = 2; + const mantissaWidth = 20; + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); final adder = FloatingPointAdderSimple(fp1, fp2); + await adder.build(); + unawaited(Simulator.run()); - final fpSuper = adder.sum.floatingPointValue; - final fpStr = fpSuper.toDouble().toStringAsPrecision(7); - final valStr = out.toDouble().toStringAsPrecision(7); - expect(fpStr, valStr); - }); - - test('FP: adder basic loop test', () { - final input = [(3.25, 1.5), (4.5, 3.75)]; - - for (final pair in input) { - final fp1 = FloatingPoint32() - ..put(FloatingPoint32Value.ofDouble(pair.$1).value); - final fp2 = FloatingPoint32() - ..put(FloatingPoint32Value.ofDouble(pair.$2).value); - final out = FloatingPoint32Value.ofDouble(pair.$1 + pair.$2); - - final adder = FloatingPointAdderSimple(fp1, fp2); - - final fpSuper = adder.sum.floatingPointValue; - final fpStr = fpSuper.toDouble().toStringAsPrecision(7); - final valStr = out.toDouble().toStringAsPrecision(7); - expect(fpStr, valStr); - } - }); + final value = Random(513); -// if you name two tests the same they get run together -// RippleCarryAdder: cannot access inputs from outside -- super.a issue - test('FP: adder basic loop test - negative numbers', () { - final input = [(4.5, 3.75), (9.0, -3.75), (-9.0, 3.9375), (-3.9375, 9.0)]; + for (var i = 0; i < 500; i++) { + final fv1 = FloatingPointValue.random(value, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fv2 = FloatingPointValue.random(value, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); - for (final pair in input) { - final fp1 = FloatingPoint32() - ..put(FloatingPoint32Value.ofDouble(pair.$1).value); - final fp2 = FloatingPoint32() - ..put(FloatingPoint32Value.ofDouble(pair.$2).value); - final out = FloatingPoint32Value.ofDouble(pair.$1 + pair.$2); + fp1.put(fv1.value); + fp2.put(fv2.value); - final adder = FloatingPointAdderSimple(fp1, fp2); + final computed = adder.sum.floatingPointValue; - final fpSuper = adder.sum.floatingPointValue; - final fpStr = fpSuper.toDouble().toStringAsPrecision(7); - final valStr = out.toDouble().toStringAsPrecision(7); - expect(fpStr, valStr); + final expectedRound = FloatingPointValue.ofDouble( + fv1.toDouble() + fv2.toDouble(), + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + + final expectedNoRound = FloatingPointValue.ofDoubleUnrounded( + fv1.toDouble() + fv2.toDouble(), + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + + if ((computed != expectedNoRound) & (computed != expectedRound)) { + expect(computed, equals(expectedNoRound), + reason: '\t$fv1 (${fv1.toDouble()})\n' + '\t$fv2 (${fv2.toDouble()}) =\n' + '\t$computed (${computed.toDouble()}) computed\n' + '\t$expectedNoRound ' + '(${expectedNoRound.toDouble()}) expected'); + } } }); - test('FP: adder basic subnormal test', () { - final fp1 = FloatingPoint32() - ..put(FloatingPoint32Value.getFloatingPointConstant( - FloatingPointConstants.smallestPositiveNormal) - .value); - final fp2 = FloatingPoint32() - ..put(FloatingPoint32Value.getFloatingPointConstant( - FloatingPointConstants.smallestPositiveSubnormal) - .negate() - .value); - - final out = FloatingPoint32Value.ofDouble( - fp1.floatingPointValue.toDouble() + fp2.floatingPointValue.toDouble()); - final adder = FloatingPointAdderSimple(fp1, fp2); - - final fpSuper = adder.sum.floatingPointValue; - final fpStr = fpSuper.toDouble().toStringAsPrecision(7); - final valStr = out.toDouble().toStringAsPrecision(7); - expect(fpStr, valStr); - }); - - test('FP: tiny subnormal test', () { - const ew = 4; - const mw = 4; - final fp1 = FloatingPoint(exponentWidth: ew, mantissaWidth: mw) - ..put(FloatingPointValue.getFloatingPointConstant( - FloatingPointConstants.smallestPositiveNormal, ew, mw) - .value); - final fp2 = FloatingPoint(exponentWidth: ew, mantissaWidth: mw) - ..put(FloatingPointValue.getFloatingPointConstant( - FloatingPointConstants.smallestPositiveSubnormal, ew, mw) - .negate() - .value); - - final outDouble = - fp1.floatingPointValue.toDouble() + fp2.floatingPointValue.toDouble(); - final out = FloatingPointValue.ofDoubleUnrounded(outDouble, - exponentWidth: ew, mantissaWidth: mw); - final adder = FloatingPointAdderSimple(fp1, fp2); - - expect(adder.sum.floatingPointValue.compareTo(out), 0); - }); + test('FP: adder simple wide exponent random', () async { + const exponentWidth = 10; + const mantissaWidth = 2; + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); - test('FP: addernegative number requiring a carryOut', () { - const pair = (9.0, -3.75); - const ew = 3; - const mw = 5; - - final fp1 = FloatingPoint(exponentWidth: ew, mantissaWidth: mw) - ..put(FloatingPointValue.ofDouble(pair.$1, - exponentWidth: ew, mantissaWidth: mw) - .value); - final fp2 = FloatingPoint(exponentWidth: ew, mantissaWidth: mw) - ..put(FloatingPointValue.ofDouble(pair.$2, - exponentWidth: ew, mantissaWidth: mw) - .value); - - final out = FloatingPointValue.ofDouble(pair.$1 + pair.$2, - exponentWidth: ew, mantissaWidth: mw); final adder = FloatingPointAdderSimple(fp1, fp2); + await adder.build(); - expect(adder.sum.floatingPointValue.compareTo(out), 0); - }); - - test('FP: adder subnormal cancellation', () { - const ew = 4; - const mw = 4; - final fp1 = FloatingPoint(exponentWidth: ew, mantissaWidth: mw) - ..put(FloatingPointValue.getFloatingPointConstant( - FloatingPointConstants.smallestPositiveSubnormal, ew, mw) - .negate() - .value); - final fp2 = FloatingPoint(exponentWidth: ew, mantissaWidth: mw) - ..put(FloatingPointValue.getFloatingPointConstant( - FloatingPointConstants.smallestPositiveSubnormal, ew, mw) - .value); - - final out = fp2.floatingPointValue + fp1.floatingPointValue; - - final adder = FloatingPointAdderSimple(fp1, fp2); - expect(adder.sum.floatingPointValue.abs().compareTo(out), 0); - }); + final value = Random(513); - test('FP: adder adder basic loop adder test2', () { - final input = [(4.5, 3.75), (9.0, -3.75), (-9.0, 3.9375), (-3.9375, 9.0)]; + for (var i = 0; i < 500; i++) { + final fv1 = FloatingPointValue.random(value, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fv2 = FloatingPointValue.random(value, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); - for (final pair in input) { - final fp1 = FloatingPoint32() - ..put(FloatingPoint32Value.ofDouble(pair.$1).value); - final fp2 = FloatingPoint32() - ..put(FloatingPoint32Value.ofDouble(pair.$2).value); - final out = FloatingPoint32Value.ofDouble(pair.$1 + pair.$2); + fp1.put(fv1.value); + fp2.put(fv2.value); - final adder = FloatingPointAdderSimple(fp1, fp2); + final computed = adder.sum.floatingPointValue; - final fpSuper = adder.sum.floatingPointValue; - final fpStr = fpSuper.toDouble().toStringAsPrecision(7); - final valStr = out.toDouble().toStringAsPrecision(7); - expect(fpStr, valStr); + final expectedRound = FloatingPointValue.ofDouble( + fv1.toDouble() + fv2.toDouble(), + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + + final expectedNoRound = FloatingPointValue.ofDoubleUnrounded( + fv1.toDouble() + fv2.toDouble(), + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + + if ((computed != expectedNoRound) & (computed != expectedRound)) { + expect(computed, equals(expectedNoRound), + reason: '\t$fv1 (${fv1.toDouble()})\n' + '\t$fv2 (${fv2.toDouble()}) =\n' + '\t$computed (${computed.toDouble()}) computed\n' + '\t$expectedNoRound ' + '(${expectedNoRound.toDouble()}) expected'); + } } }); - test('FP: adder singleton', () { - const pair = (9.0, -3.75); - { - final fp1 = FloatingPoint32() - ..put(FloatingPoint32Value.ofDouble(pair.$1).value); - final fp2 = FloatingPoint32() - ..put(FloatingPoint32Value.ofDouble(pair.$2).value); - final out = FloatingPoint32Value.ofDouble(pair.$1 + pair.$2); + test('FP: simple adder general singleton test', () { + FloatingPointValue ofString(String s) => + FloatingPointValue.ofSpacedBinaryString(s); + + final fv1 = ofString('0 001 111111'); + final fv2 = ofString('1 010 000000'); + + final fp1 = FloatingPoint( + exponentWidth: fv1.exponent.width, mantissaWidth: fv1.mantissa.width); + final fp2 = FloatingPoint( + exponentWidth: fv2.exponent.width, mantissaWidth: fv2.mantissa.width); + fp1.put(fv1); + fp2.put(fv2); + final adder = FloatingPointAdderSimple(fp1, fp2); + final exponentWidth = adder.sum.exponent.width; + final mantissaWidth = adder.sum.mantissa.width; - final adder = FloatingPointAdderSimple(fp1, fp2); + final expectedDouble = + fp1.floatingPointValue.toDouble() + fp2.floatingPointValue.toDouble(); - final fpSuper = adder.sum.floatingPointValue; - final fpStr = fpSuper.toDouble().toStringAsPrecision(7); - final valStr = out.toDouble().toStringAsPrecision(7); - expect(fpStr, valStr); - } - }); - test('FP: adder random', () { - const eWidth = 5; - const mWidth = 20; - - final fa = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); - final fb = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); - final fpv = FloatingPointValue.ofInts(0, 0, - exponentWidth: eWidth, mantissaWidth: mWidth); - final smallest = FloatingPointValue.getFloatingPointConstant( - FloatingPointConstants.smallestPositiveNormal, eWidth, mWidth); - fa.put(0); - fb.put(0); - final adder = FloatingPointAdderSimple(fa, fb); - final value = Random(513); - for (var i = 0; i < 50; i++) { - final fva = FloatingPointValue.random(value, - exponentWidth: eWidth, mantissaWidth: mWidth, normal: true); - final fvb = FloatingPointValue.random(value, - exponentWidth: eWidth, mantissaWidth: mWidth, normal: true); - fa.put(fva); - fb.put(fvb); - // fromDoubleIter does not round like '+' would - final expected = FloatingPointValue.ofDoubleUnrounded( - fva.toDouble() + fvb.toDouble(), - exponentWidth: fpv.exponent.width, - mantissaWidth: fpv.mantissa.width); - final computed = adder.sum.floatingPointValue; - final ulp = FloatingPointValue.ofInts( - max(expected.exponent.toInt(), 1), 1, - exponentWidth: eWidth, mantissaWidth: mWidth); - final diff = (expected.toDouble() - computed.toDouble()).abs(); - if (expected.isNormal()) { - expect(expected.isNaN(), equals(computed.isNaN())); - if (!expected.isNaN()) { - expect(diff, lessThan(ulp.toDouble() * smallest.toDouble())); - } - } - } + final expectedNoRound = FloatingPointValue.ofDoubleUnrounded(expectedDouble, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + expect(adder.sum.floatingPointValue, equals(expectedNoRound)); }); } diff --git a/test/arithmetic/floating_point/floating_point_adder_test.dart b/test/arithmetic/floating_point/floating_point_adder_test.dart new file mode 100644 index 000000000..2c5edfbc6 --- /dev/null +++ b/test/arithmetic/floating_point/floating_point_adder_test.dart @@ -0,0 +1,86 @@ +// Copyright (C) 2025 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause +// +// floating_point_adder_test.dart +// Basic tests for all floating-point adders. +// +// 2025 January 3 +// Author: Desmond A Kirkpatrick + FloatingPointValue.ofSpacedBinaryString(s); + final testCases = [ + (ofString('0 0001 0000'), ofString('0 0000 0000')), + (ofString('0 0111 0010'), ofString('0 1110 1111')), + (ofString('0 1010 0000'), ofString('0 1011 0100')), + (fv.infinity, fv.infinity), + (fv.negativeInfinity, fv.negativeInfinity), + (fv.infinity, fv.negativeInfinity), + (fv.infinity, fv.zero), + (fv.negativeInfinity, fv.zero), + (fv.infinity, fv.one), + (fv.zero, fv.one), + (fv.negativeInfinity, fv.one), + ]; + + for (final test in testCases) { + final fv1 = test.$1; + final fv2 = test.$2; + + final expected = FloatingPointValue.ofDoubleUnrounded( + fv1.toDouble() * fv2.toDouble(), + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + + fp1.put(fv1.value); + fp2.put(fv2.value); + final multiply = FloatingPointMultiplierSimple(fp1, fp2); + final computed = multiply.product.floatingPointValue; + + expect(computed, equals(expected), + reason: '\t$fv1 (${fv1.toDouble()})\n' + '\t$fv2 (${fv2.toDouble()}) =\n' + '\t$computed (${computed.toDouble()}) computed\n' + '\t$expected (${expected.toDouble()}) expected'); + } + }); + + test('FP: simple multiplier exhaustive', () { + const exponentWidth = 3; + const mantissaWidth = 3; + + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + fp1.put(0); + fp2.put(0); + final multiply = FloatingPointMultiplierSimple(fp1, fp2); + + final expLimit = pow(2, exponentWidth) - 1; + final mantLimit = pow(2, mantissaWidth); + for (final subtract in [0, 1]) { + for (var e1 = 0; e1 < expLimit; e1++) { + for (var m1 = 0; m1 < mantLimit; m1++) { + final fv1 = FloatingPointValue.ofInts(e1, m1, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + for (var e2 = 0; e2 < expLimit; e2++) { + for (var m2 = 0; m2 < mantLimit; m2++) { + final fv2 = FloatingPointValue.ofInts(e2, m2, + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth, + sign: subtract == 1); + + final expected = FloatingPointValue.ofDoubleUnrounded( + fv1.toDouble() * fv2.toDouble(), + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + + fp1.put(fv1.value); + fp2.put(fv2.value); + final computed = multiply.product.floatingPointValue; + + expect(computed, equals(expected), + reason: '\t$fv1 (${fv1.toDouble()})\n' + '\t$fv2 (${fv2.toDouble()}) =\n' + '\t$computed (${computed.toDouble()}) computed\n' + '\t$expected (${expected.toDouble()}) expected'); + } + } + } + } + } + }); + + test('FP: simple multiplier full random', () async { + const exponentWidth = 4; + const mantissaWidth = 4; + + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + fp1.put(0); + fp2.put(0); + final multiplier = FloatingPointMultiplierSimple(fp1, fp2); + final value = Random(51); + + var cnt = 1000; + while (cnt > 0) { + final fv1 = FloatingPointValue.random(value, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fv2 = FloatingPointValue.random(value, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + fp1.put(fv1); + fp2.put(fv2); + + final expected = FloatingPointValue.ofDoubleUnrounded( + fv1.toDouble() * fv2.toDouble(), + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + final computed = multiplier.product.floatingPointValue; + + expect(computed, equals(expected), + reason: '\t fa=$fv1 (${fv1.toDouble()}) \n' + '\t* fb=$fv2 (${fv2.toDouble()}) = \n' + '\t prd=$computed (${computed.toDouble()})'); + cnt--; + } + }); + + test('FP: simple multiplier singleton', () { + const exponentWidth = 4; + const mantissaWidth = 4; + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fv1 = FloatingPointValue.ofBinaryStrings('1', '1100', '0111'); + + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fv2 = FloatingPointValue.ofBinaryStrings('0', '1100', '0000'); + + final doubleProduct = fv1.toDouble() * fv2.toDouble(); + final expected = FloatingPointValue.ofDoubleUnrounded(doubleProduct, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + + fp1.put(fv1.value); + fp2.put(fv2.value); + + final multiply = FloatingPointMultiplierSimple(fp1, fp2); + final computed = multiply.product.floatingPointValue; + + expect(computed, equals(expected), + reason: '\t fp1=$fv1 (${fv1.toDouble()}) \n' + '\t* fp2=$fv2 (${fv2.toDouble()}) = \n' + '\t prd=$computed (${computed.toDouble()})'); + }); + }); + test('FP: simple multiplier singleton pipelined', () async { + final clk = SimpleClockGenerator(10).clk; + + const exponentWidth = 4; + const mantissaWidth = 4; + final fp1 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fv1 = FloatingPointValue.ofBinaryStrings('0', '0111', '0000'); + + final fp2 = FloatingPoint( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final fv2 = FloatingPointValue.ofBinaryStrings('0', '1101', '0101'); + + final expected = FloatingPointValue.ofDoubleUnrounded( + fv1.toDouble() * fv2.toDouble(), + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + + fp1.put(fv1.value); + fp2.put(fv2.value); + + final multiply = FloatingPointMultiplierSimple(fp1, fp2, clk: clk); + + unawaited(Simulator.run()); + await clk.nextNegedge; + fp1.put(0); + fp2.put(0); + final computed = multiply.product.floatingPointValue; + + expect(computed, equals(expected), + reason: '\t fp1=$fv1 (${fv1.toDouble()}) \n' + '\t* fp2=$fv2 (${fv2.toDouble()}) = \n' + '\t prd=$computed (${computed.toDouble()})'); + await Simulator.endSimulation(); + }); +} diff --git a/test/arithmetic/floating_point/floating_point_value_test.dart b/test/arithmetic/floating_point/floating_point_value_test.dart index 36933ce81..d6af44825 100644 --- a/test/arithmetic/floating_point/floating_point_value_test.dart +++ b/test/arithmetic/floating_point/floating_point_value_test.dart @@ -17,27 +17,29 @@ import 'package:test/test.dart'; void main() { test('FPV: exhaustive round-trip', () { - const signStr = '0'; const exponentWidth = 4; const mantissaWidth = 4; - var exponent = LogicValue.zero.zeroExtend(exponentWidth); - var mantissa = LogicValue.zero.zeroExtend(mantissaWidth); - for (var k = 0; k < pow(2.0, exponentWidth).toInt() - 1; k++) { - final expStr = exponent.bitString; - for (var i = 0; i < pow(2.0, mantissaWidth).toInt(); i++) { - final mantStr = mantissa.bitString; - final fp = FloatingPointValue.ofBinaryStrings(signStr, expStr, mantStr); - final dbl = fp.toDouble(); - final fp2 = FloatingPointValue.ofDouble(dbl, - exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); - if (fp != fp2) { - if (fp.isNaN() != fp2.isNaN()) { - expect(fp, equals(fp2)); + for (final signStr in ['0', '1']) { + var exponent = LogicValue.zero.zeroExtend(exponentWidth); + var mantissa = LogicValue.zero.zeroExtend(mantissaWidth); + for (var k = 0; k < pow(2.0, exponentWidth).toInt() - 1; k++) { + final expStr = exponent.bitString; + for (var i = 0; i < pow(2.0, mantissaWidth).toInt(); i++) { + final mantStr = mantissa.bitString; + final fp = + FloatingPointValue.ofBinaryStrings(signStr, expStr, mantStr); + final dbl = fp.toDouble(); + final fp2 = FloatingPointValue.ofDouble(dbl, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + if (fp != fp2) { + if (fp.isNaN() != fp2.isNaN()) { + expect(fp, equals(fp2)); + } } + mantissa = mantissa + 1; } - mantissa = mantissa + 1; + exponent = exponent + 1; } - exponent = exponent + 1; } }); @@ -164,10 +166,7 @@ void main() { for (var c = 0; c < corners.length; c++) { final val = corners[c][1] as double; final str = corners[c][0] as String; - final fp = - FloatingPointValue.ofDouble(val, exponentWidth: 4, mantissaWidth: 3); - expect(val, fp.toDouble()); - expect(str, fp.toString()); + final fp8 = FloatingPoint8E4M3Value.ofDouble(val); expect(val, fp8.toDouble()); expect(str, fp8.toString()); @@ -313,4 +312,105 @@ void main() { fp2.compareTo(FloatingPointValue.ofSpacedBinaryString('0 0000 0000')), equals(0)); }); + test('FPV: infinity/NaN conversion tests', () async { + const exponentWidth = 4; + const mantissaWidth = 4; + final infinity = FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.infinity, exponentWidth, mantissaWidth); + final negativeInfinity = FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.negativeInfinity, exponentWidth, mantissaWidth); + + final tooLargeNumber = FloatingPointValue.ofDouble(257, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + + expect(infinity.toDouble(), equals(double.infinity)); + expect(negativeInfinity.toDouble(), equals(double.negativeInfinity)); + + expect(tooLargeNumber.toDouble(), equals(double.infinity)); + + expect(tooLargeNumber.negate().toDouble(), equals(double.negativeInfinity)); + + expect( + FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.nan, exponentWidth, mantissaWidth) + .toDouble() + .isNaN, + equals(true)); + }); + test('FPV: infinity/NaN unrounded conversion tests', () async { + const exponentWidth = 4; + const mantissaWidth = 4; + final infinity = FloatingPointValue.ofDoubleUnrounded(double.infinity, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + final negativeInfinity = FloatingPointValue.ofDoubleUnrounded( + double.negativeInfinity, + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth); + final tooLargeNumber = FloatingPointValue.ofDoubleUnrounded(257, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + expect(tooLargeNumber.toDouble(), equals(double.infinity)); + expect(infinity.toDouble(), equals(double.infinity)); + expect(tooLargeNumber.negate().toDouble(), equals(double.negativeInfinity)); + expect(negativeInfinity.toDouble(), equals(double.negativeInfinity)); + }); + + test('FPV: infinity operation tests', () { + const exponentWidth = 4; + const mantissaWidth = 4; + final one = FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.one, exponentWidth, mantissaWidth); + final zero = FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.positiveZero, exponentWidth, mantissaWidth); + final infinity = FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.infinity, exponentWidth, mantissaWidth); + final negativeInfinity = FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.negativeInfinity, exponentWidth, mantissaWidth); + + for (final f in [infinity, negativeInfinity]) { + for (final s in [infinity, negativeInfinity]) { + // Addition + if (f == s) { + expect((f + s).toDouble(), equals(f.toDouble() + s.toDouble())); + } else { + expect((f + s).toDouble().isNaN, + equals((f.toDouble() + s.toDouble()).isNaN)); + } + // Subtraction + if (f != s) { + expect((f - s).toDouble(), equals(f.toDouble())); + } else { + expect((f - s).toDouble().isNaN, + equals((f.toDouble() - s.toDouble()).isNaN)); + } + // Multiplication + expect((f * s).toDouble(), equals(f.toDouble() * s.toDouble())); + // Division + expect((f / s).toDouble().isNaN, + equals((f.toDouble() / s.toDouble()).isNaN)); + } + } + for (final f in [infinity, negativeInfinity]) { + for (final s in [zero, one]) { + // Addition + expect((f + s).toDouble(), equals(f.toDouble() + s.toDouble())); + // Subtraction + expect((f - s).toDouble(), equals(f.toDouble())); + expect((s - f).toDouble(), equals(-f.toDouble())); + // Multiplication + if (s == zero) { + expect((f * s).toDouble().isNaN, + equals((f.toDouble() * s.toDouble()).isNaN)); + } else { + expect((f * s).toDouble(), equals(f.toDouble())); + } + // Division + if (s == zero) { + expect((f / s).toDouble().isNaN, + equals((f.toDouble() * s.toDouble()).isNaN)); + } else { + expect((f / s).toDouble(), equals(f.toDouble())); + } + } + } + }); } diff --git a/test/arithmetic/parallel_prefix_operations_test.dart b/test/arithmetic/parallel_prefix_operations_test.dart index b42365f90..b37247466 100644 --- a/test/arithmetic/parallel_prefix_operations_test.dart +++ b/test/arithmetic/parallel_prefix_operations_test.dart @@ -89,7 +89,7 @@ void testPriorityEncoder( // put/expect testing - for (var j = 0; j < (1 << n); ++j) { + for (var j = 1; j < (1 << n); ++j) { final golden = computePriorityEncoding(j); inp.put(j); final result = mod.out.value.toInt(); @@ -202,6 +202,22 @@ void main() { expect(ParallelPrefixPriorityEncoder(val).out.value.toInt(), equals(0)); expect(ParallelPrefixPriorityEncoder(val.reversed).out.value.toInt(), equals(3)); + + final valid = Logic(); + ParallelPrefixPriorityEncoder(val, valid: valid); + expect(valid.value.toBool(), equals(true)); + }); + test('priority encoder return beyond width if zero', () { + final val = Logic(width: 5); + // ignore: cascade_invocations + val.put(0); + expect(ParallelPrefixPriorityEncoder(val).out.value.toInt(), + equals(val.width + 1)); + expect(ParallelPrefixPriorityEncoder(val.reversed).out.value.toInt(), + equals(val.width + 1)); + final valid = Logic(); + ParallelPrefixPriorityEncoder(val, valid: valid); + expect(valid.value.toBool(), equals(false)); }); // Note: all ParallelPrefixAdders are tested in adder_test.dart