From 5c5043485bed1b7c50f4fceb32b67d0dc89c9309 Mon Sep 17 00:00:00 2001 From: "Desmond A. Kirkpatrick" Date: Fri, 20 Sep 2024 08:01:26 -0700 Subject: [PATCH 01/19] first floating_point components --- lib/src/arithmetic/arithmetic.dart | 1 + .../floating_point/floating_point.dart | 7 + .../floating_point/floating_point_adder.dart | 117 +++ .../floating_point/floating_point_logic.dart | 84 ++ .../floating_point_multiplier.dart | 98 +++ .../floating_point/floating_point_value.dart | 761 ++++++++++++++++++ .../floating_point_adder_test.dart | 280 +++++++ .../floating_point_multiplier_test.dart | 142 ++++ .../floating_point_value_test.dart | 211 +++++ 9 files changed, 1701 insertions(+) create mode 100644 lib/src/arithmetic/floating_point/floating_point.dart create mode 100644 lib/src/arithmetic/floating_point/floating_point_adder.dart create mode 100644 lib/src/arithmetic/floating_point/floating_point_logic.dart create mode 100644 lib/src/arithmetic/floating_point/floating_point_multiplier.dart create mode 100644 lib/src/arithmetic/floating_point/floating_point_value.dart create mode 100644 test/arithmetic/floating_point/floating_point_adder_test.dart create mode 100644 test/arithmetic/floating_point/floating_point_multiplier_test.dart create mode 100644 test/arithmetic/floating_point/floating_point_value_test.dart diff --git a/lib/src/arithmetic/arithmetic.dart b/lib/src/arithmetic/arithmetic.dart index bf52d17b1..549d843ec 100644 --- a/lib/src/arithmetic/arithmetic.dart +++ b/lib/src/arithmetic/arithmetic.dart @@ -4,6 +4,7 @@ export 'adder.dart'; export 'carry_save_mutiplier.dart'; export 'divider.dart'; +export 'floating_point/floating_point.dart'; export 'multiplier.dart'; export 'multiplier_lib.dart'; export 'ones_complement_adder.dart'; diff --git a/lib/src/arithmetic/floating_point/floating_point.dart b/lib/src/arithmetic/floating_point/floating_point.dart new file mode 100644 index 000000000..ec65e4045 --- /dev/null +++ b/lib/src/arithmetic/floating_point/floating_point.dart @@ -0,0 +1,7 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause + +export 'floating_point_adder.dart'; +export 'floating_point_logic.dart'; +export 'floating_point_multiplier.dart'; +export 'floating_point_value.dart'; diff --git a/lib/src/arithmetic/floating_point/floating_point_adder.dart b/lib/src/arithmetic/floating_point/floating_point_adder.dart new file mode 100644 index 000000000..f7a760c2b --- /dev/null +++ b/lib/src/arithmetic/floating_point/floating_point_adder.dart @@ -0,0 +1,117 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause +// +// floating_point_test.dart +// Tests of Floating Point stuff +// +// 2024 August 30 +// Author: Desmond A Kirkpatrick + ( + toSwap.$1.clone()..gets(mux(swap, toSwap.$2, toSwap.$1)), + toSwap.$2.clone()..gets(mux(swap, toSwap.$1, toSwap.$2)) + ); + + /// Add two floating point numbers [a] and [b], returning result in [sum] + FloatingPointAdder(FloatingPoint a, FloatingPoint b, + ParallelPrefix Function(List, Logic Function(Logic, Logic)) ppGen, + {super.name}) + : exponentWidth = a.exponent.width, + mantissaWidth = a.mantissa.width { + if (b.exponent.width != exponentWidth || + b.mantissa.width != mantissaWidth) { + throw RohdHclException('FloatingPoint widths must match'); + } + a = a.clone()..gets(addInput('a', a, width: a.width)); + b = b.clone()..gets(addInput('b', b, width: b.width)); + addOutput('sum', width: _sum.width) <= _sum; + + // Ensure that the larger number is wired as 'a' + final doSwap = a.exponent.lt(b.exponent) | + (a.exponent.eq(b.exponent) & a.mantissa.lt(b.mantissa)) | + ((a.exponent.eq(b.exponent) & a.mantissa.eq(b.mantissa)) & b.sign); + + (a, b) = _swap(doSwap, (a, b)); + + final aExp = + a.exponent + mux(a.isNormal(), a.zeroExponent(), a.oneExponent()); + final bExp = + b.exponent + mux(b.isNormal(), b.zeroExponent(), b.oneExponent()); + + // Align and add mantissas +// TODO(desmonddak): GRS system for FP rounding: https://i.sstatic.net/n1izR.png + // + final expDiff = aExp - bExp; + // print('${expDiff.value.toInt()} exponent diff'); + final adder = SignMagnitudeAdder( + a.sign, + [a.isNormal(), a.mantissa].swizzle(), + b.sign, + [b.isNormal(), b.mantissa].swizzle() >>> expDiff, + (a, b) => ParallelPrefixAdder(a, b, ppGen: ppGen)); + + final sum = adder.sum.slice(adder.sum.width - 2, 0); + // TODO(desmonddak): what happens if sum is zero -- should return width + // TODO(desmonddak): should trim search to what can fit in exponentWidth! + final leadOneE = + ParallelPrefixPriorityEncoder(sum.reversed, ppGen: ppGen).out; + final leadOne = leadOneE.zeroExtend(exponentWidth); + + // print('leadOneE=${leadOneE.value.toInt()}'); + + // print('adding ${a.mantissa.value.bitString} and ' + // ' ${b.mantissa.value.bitString}' + // ' = ${sum.value.bitString} ${leadOne.value.bitString} ' + // '${a.exponent.value.bitString}'); + + // Assemble the output FloatingPoint + _sum.sign <= adder.sign; + Combinational([ + If.block([ + Iff(adder.sum[-1] & a.sign.eq(b.sign), [ + _sum.mantissa < (sum >> 1).slice(mantissaWidth - 1, 0), + _sum.exponent < a.exponent + 1 + ]), + ElseIf(a.exponent.gt(leadOne) & sum.or(), [ + _sum.mantissa < (sum << leadOne).slice(mantissaWidth - 1, 0), + _sum.exponent < a.exponent - leadOne + ]), + ElseIf(leadOne.eq(0) & sum.or(), [ + _sum.mantissa < (sum << leadOne).slice(mantissaWidth - 1, 0), + _sum.exponent < a.exponent - leadOne + 1 + ]), + Else([ + // subnormal result + _sum.mantissa < sum.slice(mantissaWidth - 1, 0), + _sum.exponent < _sum.zeroExponent() + ]) + ]) + ]); + // print('final sum: ${_sum.value.bitString}'); + } +} diff --git a/lib/src/arithmetic/floating_point/floating_point_logic.dart b/lib/src/arithmetic/floating_point/floating_point_logic.dart new file mode 100644 index 000000000..4e885651b --- /dev/null +++ b/lib/src/arithmetic/floating_point/floating_point_logic.dart @@ -0,0 +1,84 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause +// +// floating_point_logic.dart +// Implementation of Floating Point objects +// +// 2024 April 1 +// Authors: +// Max Korbel +// Desmond A Kirkpatrick FloatingPoint( + exponentWidth: exponent.width, + mantissaWidth: mantissa.width, + ); + + /// Return the [FloatingPointValue] + FloatingPointValue get floatingPointValue => FloatingPointValue( + sign: sign.value, exponent: exponent.value, mantissa: mantissa.value); + + /// Return a Logic true if this FloatingPoint contains a normal number, + /// defined as having mantissa in the range [1,2) + Logic isNormal() => exponent.neq(LogicValue.zero.zeroExtend(exponent.width)); + + /// Return the zero exponent representation for this type of FloatingPoint + Logic zeroExponent() => Const(LogicValue.zero).zeroExtend(exponent.width); + + /// Return the one exponent representation for this type of FloatingPoint + Logic oneExponent() => Const(LogicValue.one).zeroExtend(exponent.width); + + @override + void put(dynamic val, {bool fill = false}) { + if (val is FloatingPointValue) { + put(val.value); + } else { + super.put(val, fill: fill); + } + } +} + +/// Single floating point representation +class FloatingPoint32 extends FloatingPoint { + /// Construct a 32-bit (single-precision) floating point number + FloatingPoint32() + : super( + exponentWidth: FloatingPoint32Value.exponentWidth, + mantissaWidth: FloatingPoint32Value.mantissaWidth); +} + +/// Double floating point representation +class FloatingPoint64 extends FloatingPoint { + /// Construct a 64-bit (double-precision) floating point number + FloatingPoint64() + : super( + exponentWidth: FloatingPoint64Value.exponentWidth, + mantissaWidth: FloatingPoint64Value.mantissaWidth); +} diff --git a/lib/src/arithmetic/floating_point/floating_point_multiplier.dart b/lib/src/arithmetic/floating_point/floating_point_multiplier.dart new file mode 100644 index 000000000..a220bda73 --- /dev/null +++ b/lib/src/arithmetic/floating_point/floating_point_multiplier.dart @@ -0,0 +1,98 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause +// +// floating_point.dart +// Implementation of Floating Point stuff +// +// 2024 August 30 +// Author: Desmond A Kirkpatrick , Logic Function(Logic, Logic)) ppTree, + {super.name}) + : exponentWidth = a.exponent.width, + mantissaWidth = a.mantissa.width { + if (b.exponent.width != exponentWidth || + b.mantissa.width != mantissaWidth) { + throw RohdHclException('FloatingPoint widths must match'); + } + a = a.clone()..gets(addInput('a', a, width: a.width)); + b = b.clone()..gets(addInput('b', b, width: b.width)); + addOutput('out', width: _out.width) <= _out; + final aExp = + a.exponent + mux(a.isNormal(), a.zeroExponent(), a.oneExponent()); + final bExp = + b.exponent + mux(b.isNormal(), b.zeroExponent(), b.oneExponent()); + + final aMantissa = [a.isNormal(), a.mantissa].swizzle(); + final bMantissa = [b.isNormal(), b.mantissa].swizzle(); + + // print('am = ${bitString(aMantissa.value)}'); + // print('bm = ${bitString(bMantissa.value)}'); + + final pp = PartialProductGeneratorCompactRectSignExtension( + aMantissa, bMantissa, RadixEncoder(radix), + signed: false); + final compressor = ColumnCompressor(pp)..compress(); + final r0 = compressor.extractRow(0); + final r1 = compressor.extractRow(1); + final adder = ParallelPrefixAdder(r0, r1, ppGen: ppTree); + + final rawMantissa = adder.sum.slice((exponentWidth + 1) * 2 - 1, 0); + + // Find the leading '1' in the mantissa + final pos = + ParallelPrefixPriorityEncoder(rawMantissa.reversed, ppGen: ppTree) + .out + .zeroExtend(exponentWidth); + + final expAdd = + aExp - FloatingPointValue.computeBias(aExp.width) + bExp - pos + 1; + + // stdout.write('aExp=${aExp.value}, bExp=${bExp.value}, ' + // 'pos=${pos.value}, bias=${FloatingPointValue.bias(aExp.width)} ' + // 'expAdd=${expAdd.value}\n'); + + final mantissa = rawMantissa << (pos + 1); + final normMantissa = mantissa.reversed.slice(mantissaWidth - 1, 0).reversed; + + // stdout + // ..write('aMant: ${bitString(aMantissa.value)}\n') + // ..write('bMant: ${bitString(bMantissa.value)}\n') + // ..write('out: ${bitString(adder.out.value)}\n') + // ..write('lenOut: ${adder.out.width} ') + // ..write('rawMantissa: ${bitString(rawMantissa.value)} ') + // ..write('normMantissa: ${bitString(normMantissa.value)}') + // ..write('\n') + // ..write( + // 'e=${bitString(expAdd.value)} m=${bitString(normMantissa.value)}\n'); + + _out.sign <= a.sign ^ b.sign; + _out.exponent <= expAdd; + // _out.exponent <= Const(8, width: exponentWidth); + _out.mantissa <= normMantissa; + } +} diff --git a/lib/src/arithmetic/floating_point/floating_point_value.dart b/lib/src/arithmetic/floating_point/floating_point_value.dart new file mode 100644 index 000000000..f235f5371 --- /dev/null +++ b/lib/src/arithmetic/floating_point/floating_point_value.dart @@ -0,0 +1,761 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause +// +// floating_point.dart +// Implementation of Floating Point stuff +// +// 2024 April 1 +// Authors: +// Max Korbel +// Desmond A Kirkpatrick { + /// The full floating point value bit storage + final LogicValue value; + + /// The sign of the value: 1 means a negative value + final LogicValue sign; + + /// The exponent of the floating point: this is biased about a midpoint for + /// positive and negative exponents + final LogicValue exponent; + + /// The mantissa of the floating point + final LogicValue mantissa; + + /// Return the exponent value representing the true zero exponent 2^0 = 1 + /// often termed [computeBias] or the offset of the exponent + static int computeBias(int exponentWidth) => + pow(2, exponentWidth - 1).toInt() - 1; + + /// Return the minimum exponent value + static int computeMinExponent(int exponentWidth) => + -pow(2, exponentWidth - 1).toInt() + 2; + + /// Return the maximum exponent value + static int computeMaxExponent(int exponentWidth) => + computeBias(exponentWidth); + + /// Factory (static) constructor of a [FloatingPointValue] from + /// sign, mantissa and exponent + factory FloatingPointValue( + {required LogicValue sign, + required LogicValue exponent, + required LogicValue mantissa}) { + if (exponent.width == FloatingPoint32Value.exponentWidth && + mantissa.width == FloatingPoint32Value.mantissaWidth) { + return FloatingPoint32Value( + sign: sign, mantissa: mantissa, exponent: exponent); + } else if (exponent.width == FloatingPoint64Value._exponentWidth && + mantissa.width == FloatingPoint64Value._mantissaWidth) { + return FloatingPoint64Value( + sign: sign, mantissa: mantissa, exponent: exponent); + } else { + return FloatingPointValue._( + sign: sign, mantissa: mantissa, exponent: exponent); + } + } + + /// [FloatingPointValue] constructor from a binary string representation of + /// individual bitfields + factory FloatingPointValue.ofBinaryStrings( + String sign, String exponent, String mantissa) { + if (sign.length != 1) { + throw RohdHclException('Sign string must be of length 1'); + } + + return FloatingPointValue( + sign: LogicValue.of(sign), + exponent: LogicValue.of(exponent), + mantissa: LogicValue.of(mantissa)); + } + + /// [FloatingPointValue] constructor from a single binary string representing + /// space-separated bitfields + factory FloatingPointValue.ofSeparatedBinaryStrings(String fp) { + final s = fp.split(' '); + if (s.length != 3) { + throw RohdHclException('FloatingPointValue requires three strings ' + 'to initialize'); + } + return FloatingPointValue.ofBinaryStrings(s[0], s[1], s[2]); + } + + /// [FloatingPointValue] constructor from a radix-encoded string + /// representation and the size of the exponent and mantissa + factory FloatingPointValue.ofString( + String fp, int exponentWidth, int mantissaWidth, + {int radix = 2}) { + final binaryFp = LogicValue.ofBigInt( + BigInt.parse(fp, radix: radix), exponentWidth + mantissaWidth + 1) + .bitString; + + final (sign, exponent, mantissa) = ( + binaryFp.substring(0, 1), + binaryFp.substring(1, 1 + exponentWidth), + binaryFp.substring(1 + exponentWidth, 1 + exponentWidth + mantissaWidth) + ); + return FloatingPointValue.ofBinaryStrings(sign, exponent, mantissa); + } + + /// [FloatingPointValue] constructor from a set of [BigInt]s of the binary + /// representation and the size of the exponent and mantissa + factory FloatingPointValue.ofBigInts(BigInt exponent, BigInt mantissa, + {int exponentWidth = 0, int mantissaWidth = 0, bool sign = false}) { + final (signLv, exponentLv, mantissaLv) = ( + LogicValue.ofBigInt(sign ? BigInt.one : BigInt.zero, 1), + LogicValue.ofBigInt(exponent, exponentWidth), + LogicValue.ofBigInt(mantissa, mantissaWidth) + ); + + return FloatingPointValue( + sign: signLv, exponent: exponentLv, mantissa: mantissaLv); + } + + /// [FloatingPointValue] constructor from a set of [int]s of the binary + /// representation and the size of the exponent and mantissa + factory FloatingPointValue.ofInts(int exponent, int mantissa, + {int exponentWidth = 0, int mantissaWidth = 0, bool sign = false}) { + final (signLv, exponentLv, mantissaLv) = ( + LogicValue.ofBigInt(sign ? BigInt.one : BigInt.zero, 1), + LogicValue.ofBigInt(BigInt.from(exponent), exponentWidth), + LogicValue.ofBigInt(BigInt.from(mantissa), mantissaWidth) + ); + + return FloatingPointValue( + sign: signLv, exponent: exponentLv, mantissa: mantissaLv); + } + + FloatingPointValue._( + {required this.sign, required this.exponent, required this.mantissa}) + : value = [sign, exponent, mantissa].swizzle() { + if (sign.width != 1) { + throw RohdHclException('FloatingPointValue: sign width must be 1'); + } + } + + /// Construct a [FloatingPointValue] from a Logic word + factory FloatingPointValue.fromLogic( + int exponentWidth, int mantissaWidth, LogicValue val) { + final sign = (val[-1] == LogicValue.one); + final exponent = + val.slice(exponentWidth + mantissaWidth - 1, mantissaWidth).toBigInt(); + final mantissa = val.slice(mantissaWidth - 1, 0).toBigInt(); + final (signLv, exponentLv, mantissaLv) = ( + LogicValue.ofBigInt(sign ? BigInt.one : BigInt.zero, 1), + LogicValue.ofBigInt(exponent, exponentWidth), + LogicValue.ofBigInt(mantissa, mantissaWidth) + ); + return FloatingPointValue( + sign: signLv, exponent: exponentLv, mantissa: mantissaLv); + } + + /// Return the [FloatingPointValue] representing the constant specified + factory FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants constantFloatingPoint, + int exponentWidth, + int mantissaWidth) { + switch (constantFloatingPoint) { + /// smallest possible number + case FloatingPointConstants.negativeInfinity: + return FloatingPointValue.ofBinaryStrings( + '1', '1' * exponentWidth, '0' * mantissaWidth); + + /// -0.0 + case FloatingPointConstants.negativeZero: + return FloatingPointValue.ofBinaryStrings( + '1', '0' * exponentWidth, '0' * mantissaWidth); + + /// 0.0 + case FloatingPointConstants.positiveZero: + return FloatingPointValue.ofBinaryStrings( + '0', '0' * exponentWidth, '0' * mantissaWidth); + + /// Smallest possible number, most exponent negative, LSB set in mantissa + case FloatingPointConstants.smallestPositiveSubnormal: + return FloatingPointValue.ofBinaryStrings( + '0', '0' * exponentWidth, '${'0' * (mantissaWidth - 1)}1'); + + /// Largest possible subnormal, most negative exponent, mantissa all 1s + case FloatingPointConstants.largestPositiveSubnormal: + return FloatingPointValue.ofBinaryStrings( + '0', '0' * exponentWidth, '1' * mantissaWidth); + + /// Smallest possible positive number, most negative exponent, mantissa 0 + case FloatingPointConstants.smallestPositiveNormal: + return FloatingPointValue.ofBinaryStrings( + '0', '${'0' * (exponentWidth - 1)}1', '0' * mantissaWidth); + + /// Largest number smaller than one + case FloatingPointConstants.largestLessThanOne: + return FloatingPointValue.ofBinaryStrings( + '0', '0${'1' * (exponentWidth - 2)}0', '1' * mantissaWidth); + + /// The number '1.0' + case FloatingPointConstants.one: + return FloatingPointValue.ofBinaryStrings( + '0', '0${'1' * (exponentWidth - 1)}', '0' * mantissaWidth); + + /// Smallest number greater than one + case FloatingPointConstants.smallestLargerThanOne: + return FloatingPointValue.ofBinaryStrings('0', + '0${'1' * (exponentWidth - 2)}0', '${'0' * (mantissaWidth - 1)}1'); + + /// Largest positive number, most positive exponent, full mantissa + case FloatingPointConstants.largestNormal: + return FloatingPointValue.ofBinaryStrings( + '0', '0' * exponentWidth, '1' * mantissaWidth); + + /// Largest possible number + case FloatingPointConstants.infinity: + return FloatingPointValue.ofBinaryStrings( + '0', '1' * exponentWidth, '0' * mantissaWidth); + } + } + + /// Convert from double using its native binary representation + factory FloatingPointValue.fromDouble(double inDouble, + {required int exponentWidth, + required int mantissaWidth, + FloatingPointRoundingMode roundingMode = + FloatingPointRoundingMode.roundNearestEven}) { + if ((exponentWidth == 8) && (mantissaWidth == 23)) { + return FloatingPoint32Value.fromDouble(inDouble); + } else if ((exponentWidth == 11) && (mantissaWidth == 52)) { + return FloatingPoint64Value.fromDouble(inDouble); + } + + final fp64 = FloatingPoint64Value.fromDouble(inDouble); + final exponent64 = fp64.exponent; + + var expVal = (exponent64.toInt() - fp64.bias()) + + FloatingPointValue.computeBias(exponentWidth); + // Handle subnormal + final mantissa64 = [ + if (expVal < 0) + ([LogicValue.one, fp64.mantissa].swizzle() >>> -expVal).slice(52, 1) + else + fp64.mantissa + ].first; + var mantissa = mantissa64.slice(51, 51 - mantissaWidth + 1); + + if (roundingMode == FloatingPointRoundingMode.roundNearestEven) { + final sticky = mantissa64.slice(51 - (mantissaWidth + 2), 0).or(); + final roundPos = 51 - (mantissaWidth + 2) + 1; + final round = mantissa64[roundPos]; + final guard = mantissa64[roundPos + 1]; + + // RNE Rounding + if (guard == LogicValue.one) { + if ((round == LogicValue.one) | + (sticky == LogicValue.one) | + (mantissa[0] == LogicValue.one)) { + mantissa += 1; + if (mantissa == LogicValue.zero.zeroExtend(mantissa.width)) { + expVal += 1; + } + } + } + } + + final exponent = + LogicValue.ofBigInt(BigInt.from(max(expVal, 0)), exponentWidth); + + return FloatingPointValue( + sign: fp64.sign, exponent: exponent, mantissa: mantissa); + } + + /// Convert a floating point number into a [FloatingPointValue] + /// representation. This form performs NO ROUNDING. + factory FloatingPointValue.fromDoubleIter(double inDouble, + {required int exponentWidth, required int mantissaWidth}) { + if ((exponentWidth == 8) && (mantissaWidth == 23)) { + return FloatingPoint32Value.fromDouble(inDouble); + } else if ((exponentWidth == 11) && (mantissaWidth == 52)) { + return FloatingPoint64Value.fromDouble(inDouble); + } + + var doubleVal = inDouble; + if (inDouble.isNaN) { + return FloatingPointValue( + exponent: + LogicValue.ofInt(pow(2, exponentWidth).toInt() - 1, exponentWidth), + mantissa: LogicValue.zero, + sign: LogicValue.zero, + ); + } + LogicValue sign; + if (inDouble < 0.0) { + doubleVal = -doubleVal; + sign = LogicValue.one; + } else { + sign = LogicValue.zero; + } + + // If we are dealing with a really small number we need to scale it up + var scaleToWhole = (doubleVal != 0) ? (-log(doubleVal) / log(2)).ceil() : 0; + + if (doubleVal < 1.0) { + var myCnt = 0; + var myVal = doubleVal; + while (myVal % 1 != 0.0) { + myVal = myVal * 2.0; + myCnt++; + } + if (myCnt < scaleToWhole) { + scaleToWhole = myCnt; + } + } + + // Scale it up to go beyond the mantissa and include the GRS bits + final scale = mantissaWidth + scaleToWhole; + var s = scale; + + var sVal = doubleVal; + if (s > 0) { + while (s > 0) { + sVal *= 2.0; + s = s - 1; + } + } else { + sVal = doubleVal * pow(2.0, scale); + } + + final scaledValue = BigInt.from(sVal); + final fullLength = scaledValue.bitLength; + + var fullValue = LogicValue.ofBigInt(scaledValue, fullLength); + var e = (fullLength > 0) + ? fullLength - mantissaWidth - scaleToWhole + : FloatingPointValue.computeMinExponent(exponentWidth); + + if (e <= -FloatingPointValue.computeBias(exponentWidth)) { + fullValue = fullValue >>> + (scaleToWhole - FloatingPointValue.computeBias(exponentWidth)); + e = -FloatingPointValue.computeBias(exponentWidth); + } else { + // Could be just one away from subnormal + e -= 1; + if (e > -FloatingPointValue.computeBias(exponentWidth)) { + fullValue = fullValue << 1; // Chop the first '1' + } + } + // We reverse so that we fit into a shorter BigInt, we keep the MSB. + // The conversion fills leftward. + // We reverse again after conversion. + final exponent = LogicValue.ofInt( + e + FloatingPointValue.computeBias(exponentWidth), exponentWidth); + final mantissa = + LogicValue.ofBigInt(fullValue.reversed.toBigInt(), mantissaWidth) + .reversed; + + return FloatingPointValue( + exponent: exponent, + mantissa: mantissa, + sign: sign, + ); + } + + @override + int get hashCode => sign.hashCode ^ exponent.hashCode ^ mantissa.hashCode; + + /// Floating point comparison to implement Comparable<> + @override + int compareTo(Object other) { + if (other is! FloatingPointValue) { + throw Exception('Input must be of type FloatingPointValue '); + } + if ((exponent.width != other.exponent.width) | + (mantissa.width != other.mantissa.width)) { + throw Exception('FloatingPointValue widths must match for comparison'); + } + final signCompare = sign.compareTo(other.sign); + if (signCompare != 0) { + return signCompare; + } else { + final expCompare = exponent.compareTo(other.exponent); + if (expCompare != 0) { + return expCompare; + } else { + return mantissa.compareTo(other.mantissa); + } + } + } + + /// Return the bias of this FP format + int bias() => FloatingPointValue.computeBias(exponent.width); + + @override + bool operator ==(Object other) { + if (other is! FloatingPointValue) { + return false; + } + + if ((exponent.width != other.exponent.width) | + (mantissa.width != other.mantissa.width)) { + return false; + } + + return (sign == other.sign) & + (exponent == other.exponent) & + (mantissa == other.mantissa); + } + + /// Return true if the represented floating point number is considered + /// NaN or 'Not a Number' due to overflow + // TODO(desmonddak): figure out the difference with Infinity + bool isNaN() => + exponent.toInt() == + computeMaxExponent(exponent.width) + computeBias(exponent.width) + 1; + + /// Return the value of the floating point number in a Dart [double] type. + double toDouble() { + var doubleVal = double.nan; + if (value.isValid) { + if (exponent.toInt() == 0) { + doubleVal = (sign.toBool() ? -1.0 : 1.0) * + pow(2.0, computeMinExponent(exponent.width)) * + mantissa.toBigInt().toDouble() / + pow(2.0, mantissa.width); + } else if (!isNaN()) { + doubleVal = (sign.toBool() ? -1.0 : 1.0) * + (1.0 + mantissa.toBigInt().toDouble() / pow(2.0, mantissa.width)) * + pow( + 2.0, + exponent.toInt().toSigned(exponent.width) - + computeBias(exponent.width)); + doubleVal = (sign.toBool() ? -1.0 : 1.0) * + (1.0 + mantissa.toBigInt().toDouble() / pow(2.0, mantissa.width)) * + pow(2.0, exponent.toInt() - computeBias(exponent.width)); + } + } + return doubleVal; + } + + /// Return a Logic true if this FloatingPointVa;ie contains a normal number, + /// defined as having mantissa in the range [1,2) + bool isNormal() => exponent != LogicValue.zero; + + @override + String toString() => '${sign.toString(includeWidth: false)}' + ' ${exponent.toString(includeWidth: false)}' + ' ${mantissa.toString(includeWidth: false)}'; + + // TODO(desmonddak): what about floating point representations >> 64 bits? + FloatingPointValue _performOp( + FloatingPointValue other, double Function(double a, double b) op) { + // make sure multiplicand has the same sizes as this + if (mantissa.width != other.mantissa.width || + exponent.width != other.exponent.width) { + throw RohdHclException('FloatingPointValue: ' + 'multiplicand must have the same mantissa and exponent widths'); + } + + return FloatingPointValue.fromDouble(op(toDouble(), other.toDouble()), + mantissaWidth: mantissa.width, exponentWidth: exponent.width); + } + + /// Multiply operation for [FloatingPointValue] + FloatingPointValue operator *(FloatingPointValue multiplicand) => + _performOp(multiplicand, (a, b) => a * b); + + /// Addition operation for [FloatingPointValue] + FloatingPointValue operator +(FloatingPointValue addend) => + _performOp(addend, (a, b) => a + b); + + /// Divide operation for [FloatingPointValue] + FloatingPointValue operator /(FloatingPointValue divisor) => + _performOp(divisor, (a, b) => a / b); + + /// Subtract operation for [FloatingPointValue] + FloatingPointValue operator -(FloatingPointValue subend) => + _performOp(subend, (a, b) => a - b); + + /// Negate operation for [FloatingPointValue] + FloatingPointValue negate() => FloatingPointValue( + sign: sign.isZero ? LogicValue.one : LogicValue.zero, + exponent: exponent, + mantissa: mantissa); + + /// Absolute value operation for [FloatingPointValue] + FloatingPointValue abs() => FloatingPointValue( + sign: LogicValue.zero, exponent: exponent, mantissa: mantissa); +} + +/// A representation of a single precision floating point value +class FloatingPoint32Value extends FloatingPointValue { + /// The exponent width + static const int exponentWidth = 8; + + /// The mantissa width + static const int mantissaWidth = 23; + + /// Constructor for a single precision floating point value + FloatingPoint32Value( + {required super.sign, required super.exponent, required super.mantissa}) + : super._() { + // throw exceptions if widths don't match expectations + if (exponent.width != exponentWidth) { + throw RohdHclException( + 'FloatingPoint32Value: exponent width must be $exponentWidth'); + } + if (mantissa.width != mantissaWidth) { + throw RohdHclException( + 'FloatingPoint32Value: mantissa width must be $mantissaWidth'); + } + } + + /// Return the [FloatingPoint32Value] representing the constant specified + factory FloatingPoint32Value.getFloatingPointConstant( + FloatingPointConstants constantFloatingPoint) => + FloatingPointValue.getFloatingPointConstant( + constantFloatingPoint, exponentWidth, mantissaWidth) + as FloatingPoint32Value; + + /// [FloatingPoint32Value] constructor from string representation of + /// individual bitfields + factory FloatingPoint32Value.ofStrings( + String sign, String exponent, String mantissa) => + FloatingPoint32Value( + sign: LogicValue.of(sign), + exponent: LogicValue.of(exponent), + mantissa: LogicValue.of(mantissa)); + + /// [FloatingPoint32Value] constructor from a single string representing + /// space-separated bitfields + factory FloatingPoint32Value.ofString(String fp) { + final s = fp.split(' '); + assert(s.length == 3, 'Wrong FloatingPointValue string length ${s.length}'); + return FloatingPoint32Value.ofStrings(s[0], s[1], s[2]); + } + + /// [FloatingPoint32Value] constructor from a set of [BigInt]s of the binary + /// representation + factory FloatingPoint32Value.ofBigInts(BigInt exponent, BigInt mantissa, + {bool sign = false}) { + final (signLv, exponentLv, mantissaLv) = ( + LogicValue.ofBigInt(sign ? BigInt.one : BigInt.zero, 1), + LogicValue.ofBigInt(exponent, exponentWidth), + LogicValue.ofBigInt(mantissa, mantissaWidth) + ); + + return FloatingPoint32Value( + sign: signLv, exponent: exponentLv, mantissa: mantissaLv); + } + + /// [FloatingPoint32Value] constructor from a set of [int]s of the binary + /// representation + factory FloatingPoint32Value.ofInts(int exponent, int mantissa, + {bool sign = false}) { + final (signLv, exponentLv, mantissaLv) = ( + LogicValue.ofBigInt(sign ? BigInt.one : BigInt.zero, 1), + LogicValue.ofBigInt(BigInt.from(exponent), exponentWidth), + LogicValue.ofBigInt(BigInt.from(mantissa), mantissaWidth) + ); + + return FloatingPoint32Value( + sign: signLv, exponent: exponentLv, mantissa: mantissaLv); + } + + /// Numeric conversion of a [FloatingPoint32Value] from a host double + factory FloatingPoint32Value.fromDouble(double inDouble) { + final byteData = ByteData(4) + ..setFloat32(0, inDouble) + ..buffer.asUint8List(); + final bytes = byteData.buffer.asUint8List(); + final lv = bytes.map((b) => LogicValue.ofInt(b, 32)); + + final accum = lv.reduce((accum, v) => (accum << 8) | v); + + final sign = accum[-1]; + final exponent = + accum.slice(exponentWidth + mantissaWidth - 1, mantissaWidth); + final mantissa = accum.slice(mantissaWidth - 1, 0); + + return FloatingPoint32Value( + sign: sign, exponent: exponent, mantissa: mantissa); + } + + /// Construct a [FloatingPoint32Value] from a Logic word + factory FloatingPoint32Value.fromLogic(LogicValue val) { + final sign = (val[-1] == LogicValue.one); + final exponent = + val.slice(exponentWidth + mantissaWidth - 1, mantissaWidth); + final mantissa = val.slice(mantissaWidth - 1, 0); + final (signLv, exponentLv, mantissaLv) = ( + LogicValue.ofBigInt(sign ? BigInt.one : BigInt.zero, 1), + exponent, + mantissa + ); + return FloatingPoint32Value( + sign: signLv, exponent: exponentLv, mantissa: mantissaLv); + } +} + +/// A representation of a double precision floating point value +class FloatingPoint64Value extends FloatingPointValue { + static const int _exponentWidth = 11; + static const int _mantissaWidth = 52; + + /// return the exponent width + static int get exponentWidth => _exponentWidth; + + /// return the mantissa width + static int get mantissaWidth => _mantissaWidth; + + /// Constructor for a double precision floating point value + FloatingPoint64Value( + {required super.sign, required super.mantissa, required super.exponent}) + : super._() { + // throw exceptions if widths don't match expectations + if (exponent.width != _exponentWidth) { + throw RohdHclException( + 'FloatingPoint64Value: exponent width must be $_exponentWidth'); + } + if (mantissa.width != _mantissaWidth) { + throw RohdHclException( + 'FloatingPoint64Value: mantissa width must be $_mantissaWidth'); + } + } + + /// Return the [FloatingPoint64Value] representing the constant specified + factory FloatingPoint64Value.getFloatingPointConstant( + FloatingPointConstants constantFloatingPoint) => + FloatingPointValue.getFloatingPointConstant( + constantFloatingPoint, _exponentWidth, _mantissaWidth) + as FloatingPoint64Value; + + /// [FloatingPoint64Value] constructor from string representation of + /// individual bitfields + factory FloatingPoint64Value.ofStrings( + String sign, String exponent, String mantissa) => + FloatingPoint64Value( + sign: LogicValue.of(sign), + exponent: LogicValue.of(exponent), + mantissa: LogicValue.of(mantissa)); + + /// [FloatingPoint64Value] constructor from a single string representing + /// space-separated bitfields + factory FloatingPoint64Value.ofString(String fp) { + final s = fp.split(' '); + assert(s.length == 3, 'Wrong FloatingPointValue string length ${s.length}'); + return FloatingPoint64Value.ofStrings(s[0], s[1], s[2]); + } + + /// [FloatingPoint64Value] constructor from a set of [BigInt]s of the binary + /// representation + factory FloatingPoint64Value.ofBigInts(BigInt exponent, BigInt mantissa, + {bool sign = false}) => + FloatingPointValue.ofBigInts(exponent, mantissa, + sign: sign, + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth) as FloatingPoint64Value; + + /// [FloatingPoint64Value] constructor from a set of [int]s of the binary + /// representation + factory FloatingPoint64Value.ofInts(int exponent, int mantissa, + {bool sign = false}) => + FloatingPointValue.ofInts(exponent, mantissa, + sign: sign, + exponentWidth: exponentWidth, + mantissaWidth: mantissaWidth) as FloatingPoint64Value; + + /// Numeric conversion of a [FloatingPoint64Value] from a host double + factory FloatingPoint64Value.fromDouble(double inDouble) { + final byteData = ByteData(8) + ..setFloat64(0, inDouble) + ..buffer.asUint8List(); + final bytes = byteData.buffer.asUint8List(); + final lv = bytes.map((b) => LogicValue.ofInt(b, 64)); + + final accum = lv.reduce((accum, v) => (accum << 8) | v); + + final sign = accum[-1]; + final exponent = + accum.slice(_exponentWidth + _mantissaWidth - 1, _mantissaWidth); + final mantissa = accum.slice(_mantissaWidth - 1, 0); + + return FloatingPoint64Value( + sign: sign, mantissa: mantissa, exponent: exponent); + } + + /// Construct a [FloatingPoint32Value] from a Logic word + factory FloatingPoint64Value.fromLogic(LogicValue val) { + final sign = (val[-1] == LogicValue.one); + final exponent = + val.slice(exponentWidth + mantissaWidth - 1, mantissaWidth).toBigInt(); + final mantissa = val.slice(mantissaWidth - 1, 0).toBigInt(); + final (signLv, exponentLv, mantissaLv) = ( + LogicValue.ofBigInt(sign ? BigInt.one : BigInt.zero, 1), + LogicValue.ofBigInt(exponent, exponentWidth), + LogicValue.ofBigInt(mantissa, mantissaWidth) + ); + return FloatingPoint64Value( + sign: signLv, exponent: exponentLv, mantissa: mantissaLv); + } +} diff --git a/test/arithmetic/floating_point/floating_point_adder_test.dart b/test/arithmetic/floating_point/floating_point_adder_test.dart new file mode 100644 index 000000000..77fedfd11 --- /dev/null +++ b/test/arithmetic/floating_point/floating_point_adder_test.dart @@ -0,0 +1,280 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause +// +// floating_point_test.dart +// Tests of Floating Point stuff +// +// 2024 April 1 +// Authors: +// Max Korbel +// Desmond A Kirkpatrick +// Desmond A Kirkpatrick Date: Fri, 20 Sep 2024 11:40:18 -0700 Subject: [PATCH 02/19] Create a withConstraints constructor for FloatingPointValue (#1) --- .../floating_point/floating_point_value.dart | 44 ++++++++----------- 1 file changed, 19 insertions(+), 25 deletions(-) diff --git a/lib/src/arithmetic/floating_point/floating_point_value.dart b/lib/src/arithmetic/floating_point/floating_point_value.dart index f235f5371..2fab9d9fd 100644 --- a/lib/src/arithmetic/floating_point/floating_point_value.dart +++ b/lib/src/arithmetic/floating_point/floating_point_value.dart @@ -116,7 +116,7 @@ class FloatingPointValue implements Comparable { return FloatingPoint64Value( sign: sign, mantissa: mantissa, exponent: exponent); } else { - return FloatingPointValue._( + return FloatingPointValue.withConstraints( sign: sign, mantissa: mantissa, exponent: exponent); } } @@ -191,12 +191,24 @@ class FloatingPointValue implements Comparable { sign: signLv, exponent: exponentLv, mantissa: mantissaLv); } - FloatingPointValue._( - {required this.sign, required this.exponent, required this.mantissa}) + FloatingPointValue.withConstraints( + {required this.sign, + required this.exponent, + required this.mantissa, + int? mantissaWidth, + int? exponentWidth}) : value = [sign, exponent, mantissa].swizzle() { if (sign.width != 1) { throw RohdHclException('FloatingPointValue: sign width must be 1'); } + if (mantissa.width != mantissaWidth) { + throw RohdHclException( + 'FloatingPointValue: mantissa width must be $mantissaWidth'); + } + if (exponent.width != exponentWidth) { + throw RohdHclException( + 'FloatingPointValue: exponent width must be $exponentWidth'); + } } /// Construct a [FloatingPointValue] from a Logic word @@ -558,17 +570,8 @@ class FloatingPoint32Value extends FloatingPointValue { /// Constructor for a single precision floating point value FloatingPoint32Value( {required super.sign, required super.exponent, required super.mantissa}) - : super._() { - // throw exceptions if widths don't match expectations - if (exponent.width != exponentWidth) { - throw RohdHclException( - 'FloatingPoint32Value: exponent width must be $exponentWidth'); - } - if (mantissa.width != mantissaWidth) { - throw RohdHclException( - 'FloatingPoint32Value: mantissa width must be $mantissaWidth'); - } - } + : super.withConstraints( + mantissaWidth: mantissaWidth, exponentWidth: exponentWidth); /// Return the [FloatingPoint32Value] representing the constant specified factory FloatingPoint32Value.getFloatingPointConstant( @@ -671,17 +674,8 @@ class FloatingPoint64Value extends FloatingPointValue { /// Constructor for a double precision floating point value FloatingPoint64Value( {required super.sign, required super.mantissa, required super.exponent}) - : super._() { - // throw exceptions if widths don't match expectations - if (exponent.width != _exponentWidth) { - throw RohdHclException( - 'FloatingPoint64Value: exponent width must be $_exponentWidth'); - } - if (mantissa.width != _mantissaWidth) { - throw RohdHclException( - 'FloatingPoint64Value: mantissa width must be $_mantissaWidth'); - } - } + : super.withConstraints( + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); /// Return the [FloatingPoint64Value] representing the constant specified factory FloatingPoint64Value.getFloatingPointConstant( From 953d98bd41c87b2e4226bcdadcf5b04b6ac9e3fe Mon Sep 17 00:00:00 2001 From: Max Korbel Date: Fri, 20 Sep 2024 16:23:14 -0700 Subject: [PATCH 03/19] fix null check bug in withConstraints (#2) --- lib/src/arithmetic/floating_point/floating_point_value.dart | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/src/arithmetic/floating_point/floating_point_value.dart b/lib/src/arithmetic/floating_point/floating_point_value.dart index 2fab9d9fd..73ba3ca90 100644 --- a/lib/src/arithmetic/floating_point/floating_point_value.dart +++ b/lib/src/arithmetic/floating_point/floating_point_value.dart @@ -201,11 +201,11 @@ class FloatingPointValue implements Comparable { if (sign.width != 1) { throw RohdHclException('FloatingPointValue: sign width must be 1'); } - if (mantissa.width != mantissaWidth) { + if (mantissaWidth != null && mantissa.width != mantissaWidth) { throw RohdHclException( 'FloatingPointValue: mantissa width must be $mantissaWidth'); } - if (exponent.width != exponentWidth) { + if (exponentWidth != null && exponent.width != exponentWidth) { throw RohdHclException( 'FloatingPointValue: exponent width must be $exponentWidth'); } From b4fc56a69049027fdc672c6d6079561996e3aa4a Mon Sep 17 00:00:00 2001 From: soneryaldiz <56893713+soneryaldiz@users.noreply.github.com> Date: Tue, 24 Sep 2024 12:40:05 -0700 Subject: [PATCH 04/19] Bug fix and Floating Point 8 Value and Logic (#3) * bug fix and fp8 class * fixed point 8 logic * reuse isLegal --- .../floating_point/floating_point_logic.dart | 18 +++ .../floating_point/floating_point_value.dart | 115 +++++++++++++++++- .../floating_point_value_test.dart | 50 ++++++++ 3 files changed, 179 insertions(+), 4 deletions(-) diff --git a/lib/src/arithmetic/floating_point/floating_point_logic.dart b/lib/src/arithmetic/floating_point/floating_point_logic.dart index 4e885651b..fcdca3107 100644 --- a/lib/src/arithmetic/floating_point/floating_point_logic.dart +++ b/lib/src/arithmetic/floating_point/floating_point_logic.dart @@ -12,6 +12,7 @@ import 'package:rohd/rohd.dart'; import 'package:rohd_hcl/src/arithmetic/floating_point/floating_point_value.dart'; +import 'package:rohd_hcl/src/exceptions.dart'; /// Flexible floating point logic representation class FloatingPoint extends LogicStructure { @@ -82,3 +83,20 @@ class FloatingPoint64 extends FloatingPoint { exponentWidth: FloatingPoint64Value.exponentWidth, mantissaWidth: FloatingPoint64Value.mantissaWidth); } + +/// Eight-bit floating point representation for deep learning +class FloatingPoint8 extends FloatingPoint { + /// Calculate mantissa width and sanitize + static int _calculateMantissaWidth(int exponentWidth) { + final mantissaWidth = 7 - exponentWidth; + if (!FloatingPoint8Value.isLegal(exponentWidth, mantissaWidth)) { + throw RohdHclException('FloatingPoint8 must follow E4M3 or E5M2'); + } else { + return mantissaWidth; + } + } + + /// Construct an 8-bit floating point number + FloatingPoint8({required super.exponentWidth}) + : super(mantissaWidth: _calculateMantissaWidth(exponentWidth)); +} diff --git a/lib/src/arithmetic/floating_point/floating_point_value.dart b/lib/src/arithmetic/floating_point/floating_point_value.dart index 73ba3ca90..7b914d5a3 100644 --- a/lib/src/arithmetic/floating_point/floating_point_value.dart +++ b/lib/src/arithmetic/floating_point/floating_point_value.dart @@ -309,7 +309,7 @@ class FloatingPointValue implements Comparable { FloatingPointValue.computeBias(exponentWidth); // Handle subnormal final mantissa64 = [ - if (expVal < 0) + if (expVal <= 0) ([LogicValue.one, fp64.mantissa].swizzle() >>> -expVal).slice(52, 1) else fp64.mantissa @@ -481,9 +481,17 @@ class FloatingPointValue implements Comparable { /// Return true if the represented floating point number is considered /// NaN or 'Not a Number' due to overflow // TODO(desmonddak): figure out the difference with Infinity - bool isNaN() => - exponent.toInt() == - computeMaxExponent(exponent.width) + computeBias(exponent.width) + 1; + bool isNaN() { + if ((exponent.width == 4) & (mantissa.width == 3)) { + // FP8 E4M3 does not support infinities + final cond1 = (1 + exponent.toInt()) == pow(2, exponent.width).toInt(); + final cond2 = (1 + mantissa.toInt()) == pow(2, mantissa.width).toInt(); + return cond1 & cond2; + } else { + return exponent.toInt() == + computeMaxExponent(exponent.width) + computeBias(exponent.width) + 1; + } + } /// Return the value of the floating point number in a Dart [double] type. double toDouble() { @@ -753,3 +761,102 @@ class FloatingPoint64Value extends FloatingPointValue { sign: signLv, exponent: exponentLv, mantissa: mantissaLv); } } + +/// A representation of a 8-bit floating point value as defined in +/// [FP8 Formats for Deep Learning](https://arxiv.org/abs/2209.05433). +class FloatingPoint8Value extends FloatingPointValue { + /// The exponent width + late final int exponentWidth; + + /// The mantissa width + late final int mantissaWidth; + + static double get _e4m3max => 448.toDouble(); + static double get _e5m2max => 57344.toDouble(); + static double get _e4m3min => pow(2, -9).toDouble(); + static double get _e5m2min => pow(2, -16).toDouble(); + + /// Return if the exponent and mantissa widths match E4M3 or E5M2 + static bool isLegal(int exponentWidth, int mantissaWidth) { + if (((exponentWidth == 4) & (mantissaWidth == 3)) | + ((exponentWidth == 5) & (mantissaWidth == 2))) { + return true; + } else { + return false; + } + } + + /// Constructor for a double precision floating point value + FloatingPoint8Value( + {required super.sign, required super.mantissa, required super.exponent}) + : super.withConstraints() { + exponentWidth = exponent.width; + mantissaWidth = mantissa.width; + if (!isLegal(exponentWidth, mantissaWidth)) { + throw RohdHclException('FloatingPoint8 must follow E4M3 or E5M2'); + } + } + + /// [FloatingPoint8Value] constructor from string representation of + /// individual bitfields + factory FloatingPoint8Value.ofStrings( + String sign, String exponent, String mantissa) => + FloatingPoint8Value( + sign: LogicValue.of(sign), + exponent: LogicValue.of(exponent), + mantissa: LogicValue.of(mantissa)); + + /// [FloatingPoint8Value] constructor from a single string representing + /// space-separated bitfields + factory FloatingPoint8Value.ofString(String fp) { + final s = fp.split(' '); + assert(s.length == 3, 'Wrong FloatingPointValue string length ${s.length}'); + return FloatingPoint8Value.ofStrings(s[0], s[1], s[2]); + } + + /// Construct a [FloatingPoint8Value] from a Logic word + factory FloatingPoint8Value.fromLogic(LogicValue val, int exponentWidth) { + if (val.width != 8) { + throw RohdHclException('Width must be 8'); + } + + final mantissaWidth = 7 - exponentWidth; + if (!isLegal(exponentWidth, mantissaWidth)) { + throw RohdHclException('FloatingPoint8 must follow E4M3 or E5M2'); + } + + final sign = (val[-1] == LogicValue.one); + final exponent = + val.slice(exponentWidth + mantissaWidth - 1, mantissaWidth).toBigInt(); + final mantissa = val.slice(mantissaWidth - 1, 0).toBigInt(); + final (signLv, exponentLv, mantissaLv) = ( + LogicValue.ofBigInt(sign ? BigInt.one : BigInt.zero, 1), + LogicValue.ofBigInt(exponent, exponentWidth), + LogicValue.ofBigInt(mantissa, mantissaWidth) + ); + return FloatingPoint8Value( + sign: signLv, exponent: exponentLv, mantissa: mantissaLv); + } + + /// Numeric conversion of a [FloatingPoint8Value] from a host double + factory FloatingPoint8Value.fromDouble(double inDouble, + {required int exponentWidth}) { + final mantissaWidth = 7 - exponentWidth; + if (!isLegal(exponentWidth, mantissaWidth)) { + throw RohdHclException('FloatingPoint8 must follow E4M3 or E5M2'); + } + if (exponentWidth == 4) { + if ((inDouble > _e4m3max) | (inDouble < _e4m3min)) { + throw RohdHclException('Number exceeds E4M3 range'); + } + } else if (exponentWidth == 5) { + if ((inDouble > _e5m2max) | (inDouble < _e5m2min)) { + throw RohdHclException('Number exceeds E5M2 range'); + } + } + final fpv = FloatingPointValue.fromDouble(inDouble, + exponentWidth: exponentWidth, mantissaWidth: mantissaWidth); + return FloatingPoint8Value( + sign: fpv.sign, exponent: fpv.exponent, mantissa: fpv.mantissa); + } +} diff --git a/test/arithmetic/floating_point/floating_point_value_test.dart b/test/arithmetic/floating_point/floating_point_value_test.dart index 9f3640326..252d33262 100644 --- a/test/arithmetic/floating_point/floating_point_value_test.dart +++ b/test/arithmetic/floating_point/floating_point_value_test.dart @@ -148,6 +148,50 @@ void main() { } }); + test('FPV: E4M3', () { + final corners = [ + ['0 0000 000', 0.toDouble()], + ['0 1111 110', 448.toDouble()], + ['0 0001 000', pow(2, -6).toDouble()], + ['0 0000 111', 0.875 * pow(2, -6).toDouble()], + ['0 0000 001', pow(2, -9).toDouble()], + ]; + for (var c = 0; c < corners.length; c++) { + final val = corners[c][1] as double; + final str = corners[c][0] as String; + final fp = FloatingPointValue.fromDouble(val, + exponentWidth: 4, mantissaWidth: 3); + expect(val, fp.toDouble()); + expect(str, fp.toString()); + final fp8 = FloatingPointValue.fromDouble(val, + exponentWidth: 4, mantissaWidth: 3); + expect(val, fp8.toDouble()); + expect(str, fp8.toString()); + } + }); + + test('FP8: E5M2', () { + final corners = [ + ['0 00000 00', 0.toDouble()], + ['0 11110 11', 57344.toDouble()], + ['0 00001 00', pow(2, -14).toDouble()], + ['0 00000 11', 0.75 * pow(2, -14).toDouble()], + ['0 00000 01', pow(2, -16).toDouble()], + ]; + for (var c = 0; c < corners.length; c++) { + final val = corners[c][1] as double; + final str = corners[c][0] as String; + final fp = FloatingPointValue.fromDouble(val, + exponentWidth: 5, mantissaWidth: 2); + expect(val, fp.toDouble()); + expect(str, fp.toString()); + final fp8 = FloatingPointValue.fromDouble(val, + exponentWidth: 5, mantissaWidth: 2); + expect(val, fp8.toDouble()); + expect(str, fp8.toString()); + } + }); + test('FPV: setting and getting from a signal', () { final fp = FloatingPoint32() ..put(FloatingPoint32Value.fromDouble(1.5).value); @@ -155,6 +199,12 @@ void main() { final fp2 = FloatingPoint64() ..put(FloatingPoint64Value.fromDouble(1.5).value); expect(fp2.floatingPointValue.toDouble(), 1.5); + final fp8e4m3 = FloatingPoint8(exponentWidth: 4) + ..put(FloatingPoint8Value.fromDouble(1.5, exponentWidth: 4).value); + expect(fp8e4m3.floatingPointValue.toDouble(), 1.5); + final fp8e5m2 = FloatingPoint8(exponentWidth: 5) + ..put(FloatingPoint8Value.fromDouble(1.5, exponentWidth: 5).value); + expect(fp8e5m2.floatingPointValue.toDouble(), 1.5); }); test('FPV: round nearest even Guard and Sticky', () { From 83ad503aa6fbd813f45eb1a8b4d02cdbee5b023c Mon Sep 17 00:00:00 2001 From: "Desmond A. Kirkpatrick" Date: Thu, 26 Sep 2024 21:16:48 -0700 Subject: [PATCH 05/19] improved arithmetic evaluation utilities --- lib/src/arithmetic/arithmetic_utils.dart | 99 +++++++++++++++++++ .../arithmetic/evaluate_partial_product.dart | 75 ++++++++++++++ 2 files changed, 174 insertions(+) create mode 100644 lib/src/arithmetic/arithmetic_utils.dart diff --git a/lib/src/arithmetic/arithmetic_utils.dart b/lib/src/arithmetic/arithmetic_utils.dart new file mode 100644 index 000000000..2e6660a7d --- /dev/null +++ b/lib/src/arithmetic/arithmetic_utils.dart @@ -0,0 +1,99 @@ +// Copyright (C) 2024 Intel Corporation +// SPDX-License-Identifier: BSD-3-Clause +// +// floating_point_test.dart +// Tests of Floating Point stuff +// +// 2024 August 30 +// Author: Desmond A Kirkpatrick = 0; col--) { + final bits = col > 9 ? 2 : 1; + if (sep != null && sep == col) { + str.write(' ' * (2 - bits)); + if (col > 10 || col == 0) { + str.write(' $col$sepChar'); + } else { + str.write(' $col $sepChar'); + } + } else if (sep != null && sep == col + 1) { + str.write('$col'); + } else { + str + ..write(' ' * (2 - bits)) + ..write(' $col'); + } + } + str.write('\n'); + } + final String strPrefix; + strPrefix = (name.length <= prefix) + ? name.padRight(prefix) + : name.substring(0, prefix); + str + ..write(strPrefix) + ..write(' ' * ((align ?? width) - width)); + for (var col = 0; col < width; col++) { + final pos = width - 1 - col; + final v = this[pos].bitString; + if (sep != null && sep == pos) { + str.write( + ((pos > 10) | (pos == 0)) ? ' $v$sepChar ' : ' $v $sepChar'); + } else if (sep != null && sep == pos + 1) { + str.write(v); + } else { + str.write(' $v'); + } + } + return str.toString(); + } +} + +// void main() { +// final lv0 = LogicValue.ofInt(42, 15); +// final lv1 = LogicValue.ofInt(117, 15); +// // ignore: cascade_invocations +// // No separator +// print(lv0.vecString('lv0', header: true)); +// print(lv1.vecString('lv1_with_ridiculously_long_name')); +// // Separator +// print(lv0.vecString('lv0', sep: 8)); +// print(lv1.vecString('lv1_with_ridiculously_long_name', sep: 8)); +// // separator at double-digits +// print(lv0.vecString('lv0', sep: 12, align: 24, header: true)); +// print(lv1.vecString('lv1_with_ridiculously_long_name', align: 24, sep: 12)); +// // transition to single-digit separator +// print(lv0.vecString('lv0', sep: 10, align: 24, header: true)); +// print(lv1.vecString('lv1_with_ridiculously_long_name', align: 24, sep: 10)); +// print(lv0.vecString('lv0', sep: 9, align: 24, header: true)); +// print(lv1.vecString('lv1_with_ridiculously_long_name', align: 24, sep: 9)); +// // Single digit separator +// print(lv0.vecString('lv0', sep: 8, align: 24, header: true)); +// print(lv1.vecString('lv1_with_ridiculously_long_name', align: 24, sep: 8)); +// // Separator at zero +// print(lv0.vecString('lv0', sep: 0, align: 24, header: true)); +// print(lv1.vecString('lv1_with_ridiculously_long_name', align: 24, sep: 0)); +// } diff --git a/lib/src/arithmetic/evaluate_partial_product.dart b/lib/src/arithmetic/evaluate_partial_product.dart index e421d13d3..6bd3e6c2a 100644 --- a/lib/src/arithmetic/evaluate_partial_product.dart +++ b/lib/src/arithmetic/evaluate_partial_product.dart @@ -104,4 +104,79 @@ extension EvaluateLivePartialProduct on PartialProductGenerator { } return str.toString(); } + + /// Print out the partial product matrix + String markdown() { + final str = StringBuffer(); + + final maxW = maxWidth(); + final nonSignExtendedPad = isSignExtended + ? 0 + : shift > 2 + ? shift - 1 + : 1; + + // print bit position header + str.write('| R | M | S'); + for (var i = maxW - 1; i >= 0; i--) { + str.write('| $i '); + } + str + ..write('| bitvector | value|\n') + ..write('|:--:' * 3); + for (var i = maxW - 1; i >= 0; i--) { + str.write('|:--:'); + } + str.write('|:--: |:--:|\n'); + // Partial product matrix: rows of multiplicand multiples shift by + // rowshift[row] + for (var row = 0; row < rows; row++) { + final rowStr = (row < 10) ? '0$row' : '$row'; + if (row < encoder.rows) { + final encoding = encoder.getEncoding(row); + if (encoding.multiples.value.isValid) { + final first = encoding.multiples.value.firstOne() ?? -1; + final multiple = first + 1; + str.write('|$rowStr| ' + '$multiple| ' + '${encoding.sign.value.toInt()}'); + } else { + str.write('| | |'); + } + } else { + str.write('|$rowStr | |'); + } + final entry = partialProducts[row].reversed.toList(); + str.write('| ' * (maxW - (entry.length + rowShift[row]))); + for (var col = 0; col < entry.length; col++) { + str.write('|${entry[col].value.bitString}'); + } + final suffixCnt = rowShift[row]; + final value = entry.swizzle().value.zeroExtend(maxW) << suffixCnt; + final intValue = value.isValid ? value.toBigInt() : BigInt.from(-1); + str + ..write('| ' * suffixCnt) + ..write('| ${value.bitString}') + ..write('| ${value.isValid ? intValue : ""}' + ' (${value.isValid ? intValue.toSigned(maxW) : ""})|\n'); + } + // Compute and print binary representation from accumulated value + // Later: we will compare with a compression tree result + str.write('||\n'); + + final sum = LogicValue.ofBigInt(evaluate(), maxW); + // print out the sum as a MSB-first bitvector + str.write('|||'); + for (final elem in [for (var i = 0; i < maxW; i++) sum[i]].reversed) { + str.write('|${elem.toInt()} '); + } + final val = evaluate(); + str.write('| ${sum.bitString}| ' + '${val.toUnsigned(maxW)}'); + if (isSignExtended) { + str.write(' ($val)'); + } + str.write('|\n'); + return str.toString(); + } } From ed4411845c0604ed1db750a663e4ce1ce9c8e96c Mon Sep 17 00:00:00 2001 From: "Desmond A. Kirkpatrick" Date: Fri, 27 Sep 2024 07:50:11 -0700 Subject: [PATCH 06/19] vecString utility adds upper-bit printing feature, sep allowed at beginning --- lib/src/arithmetic/arithmetic_utils.dart | 36 ++++++++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/lib/src/arithmetic/arithmetic_utils.dart b/lib/src/arithmetic/arithmetic_utils.dart index 2e6660a7d..59840a895 100644 --- a/lib/src/arithmetic/arithmetic_utils.dart +++ b/lib/src/arithmetic/arithmetic_utils.dart @@ -25,21 +25,30 @@ extension NumericVector on LogicValue { int? align, int? sep, bool header = false, - String sepChar = '|'}) { + String sepChar = '|', + int lowLimit = 0}) { final str = StringBuffer(); // ignore: cascade_invocations if (header) { str.write(' ' * prefix); - for (var col = ((align ?? width) - width) + width - 1; col >= 0; col--) { + + for (var col = ((align ?? width) - width) + width - 1; + col >= lowLimit; + col--) { final bits = col > 9 ? 2 : 1; if (sep != null && sep == col) { str.write(' ' * (2 - bits)); - if (col > 10 || col == 0) { + if (col > 10 || col == lowLimit) { str.write(' $col$sepChar'); } else { str.write(' $col $sepChar'); } } else if (sep != null && sep == col + 1) { + if (sep == width) { + str + ..write(sepChar) + ..write(' ' * (2 - bits)); + } str.write('$col'); } else { str @@ -56,13 +65,16 @@ extension NumericVector on LogicValue { str ..write(strPrefix) ..write(' ' * ((align ?? width) - width)); - for (var col = 0; col < width; col++) { - final pos = width - 1 - col; + for (var col = lowLimit; col < width; col++) { + final pos = width - 1 - col + lowLimit; final v = this[pos].bitString; if (sep != null && sep == pos) { str.write( ((pos > 10) | (pos == 0)) ? ' $v$sepChar ' : ' $v $sepChar'); } else if (sep != null && sep == pos + 1) { + if (sep == width) { + str.write('$sepChar '); + } str.write(v); } else { str.write(' $v'); @@ -75,13 +87,13 @@ extension NumericVector on LogicValue { // void main() { // final lv0 = LogicValue.ofInt(42, 15); // final lv1 = LogicValue.ofInt(117, 15); -// // ignore: cascade_invocations // // No separator // print(lv0.vecString('lv0', header: true)); // print(lv1.vecString('lv1_with_ridiculously_long_name')); // // Separator // print(lv0.vecString('lv0', sep: 8)); // print(lv1.vecString('lv1_with_ridiculously_long_name', sep: 8)); +// print(lv1.vecString('lv1_with_ridiculously_long_name', sep: 8)); // // separator at double-digits // print(lv0.vecString('lv0', sep: 12, align: 24, header: true)); // print(lv1.vecString('lv1_with_ridiculously_long_name', align: 24, sep: 12)); @@ -96,4 +108,16 @@ extension NumericVector on LogicValue { // // Separator at zero // print(lv0.vecString('lv0', sep: 0, align: 24, header: true)); // print(lv1.vecString('lv1_with_ridiculously_long_name', align: 24, sep: 0)); +// final ref = FloatingPoint64Value.fromDouble(3.14159); +// print(ref); +// print( +// ref.mantissa.vecString('reference', lowLimit: 31, header: true, sep: 52)); +// print( +// ref.mantissa.vecString('reference', lowLimit: 31, header: true, sep: 48)); + +// final lv2 = LogicValue.ofInt(42, 8); +// print(lv2.vecString('lv2', header: true)); +// for (var i = lv2.width; i >= 0; i--) { +// print(lv2.vecString('lv2', sep: i)); +// } // } From f6d0b5195b9969417ff7c20442bce085050e54e6 Mon Sep 17 00:00:00 2001 From: "Desmond A. Kirkpatrick" Date: Fri, 27 Sep 2024 10:37:09 -0700 Subject: [PATCH 07/19] vecString markdown enabled --- lib/src/arithmetic/arithmetic_utils.dart | 61 +++++++++++++++++------- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/lib/src/arithmetic/arithmetic_utils.dart b/lib/src/arithmetic/arithmetic_utils.dart index 59840a895..429adefbc 100644 --- a/lib/src/arithmetic/arithmetic_utils.dart +++ b/lib/src/arithmetic/arithmetic_utils.dart @@ -18,45 +18,59 @@ extension NumericVector on LogicValue { /// [name] is printed at the LHS of the line, trimmed by [prefix]. /// [prefix] is the distance from the margin bebore the vector is printed. /// You can align with longer bitvectors by stating the length [align]. + /// [lowLimit] will trim the vector below this bit position. /// You can insert a separator [sepChar] at position [sep]. /// A header can be printed by setting [header] to true. + /// Markdown format can be produced by setting [markDown] to true. String vecString(String name, {int prefix = 10, int? align, int? sep, bool header = false, - String sepChar = '|', - int lowLimit = 0}) { + String sepChar = '*', + int lowLimit = 0, + bool markDown = false}) { final str = StringBuffer(); // ignore: cascade_invocations if (header) { - str.write(' ' * prefix); + str.write(markDown ? '|Name' : ' ' * prefix); for (var col = ((align ?? width) - width) + width - 1; col >= lowLimit; col--) { final bits = col > 9 ? 2 : 1; if (sep != null && sep == col) { - str.write(' ' * (2 - bits)); + str.write(markDown ? '' : ' ' * (2 - bits)); if (col > 10 || col == lowLimit) { - str.write(' $col$sepChar'); + str.write('${markDown ? '|' : ' '}$col$sepChar'); } else { - str.write(' $col $sepChar'); + str.write('${markDown ? '|' : ' '}$col $sepChar'); } + str.write(markDown ? '|' : ''); } else if (sep != null && sep == col + 1) { if (sep == width) { str ..write(sepChar) - ..write(' ' * (2 - bits)); + ..write(markDown ? '|' : ' ' * (2 - bits)); } str.write('$col'); } else { str - ..write(' ' * (2 - bits)) + ..write(markDown ? '|' : ' ' * (2 - bits)) ..write(' $col'); } } - str.write('\n'); + str.write(markDown ? '|\n' : '\n'); + if (markDown) { + str.write(markDown ? '|:--:' : ' ' * prefix); + + for (var col = ((align ?? width) - width) + width - 1; + col >= lowLimit; + col--) { + str.write('|:--'); + } + str.write('-|\n'); + } } final String strPrefix; strPrefix = (name.length <= prefix) @@ -69,17 +83,30 @@ extension NumericVector on LogicValue { final pos = width - 1 - col + lowLimit; final v = this[pos].bitString; if (sep != null && sep == pos) { - str.write( - ((pos > 10) | (pos == 0)) ? ' $v$sepChar ' : ' $v $sepChar'); + if (markDown) { + str.write('|$v $sepChar'); + } else { + str.write( + ((pos > 9) | (pos == 0)) ? ' $v$sepChar ' : ' $v $sepChar'); + } } else if (sep != null && sep == pos + 1) { + if (markDown) { + str.write('|'); + } if (sep == width) { str.write('$sepChar '); } str.write(v); } else { + if (markDown) { + str.write('|'); + } str.write(' $v'); } } + if (markDown) { + str.write('|'); + } return str.toString(); } } @@ -112,12 +139,14 @@ extension NumericVector on LogicValue { // print(ref); // print( // ref.mantissa.vecString('reference', lowLimit: 31, header: true, sep: 52)); -// print( -// ref.mantissa.vecString('reference', lowLimit: 31, header: true, sep: 48)); +// print(''); -// final lv2 = LogicValue.ofInt(42, 8); -// print(lv2.vecString('lv2', header: true)); +// print(ref.mantissa.vecString('reference', +// lowLimit: 31, header: true, sep: 48, markDown: true)); +// print(''); +// final lv2 = LogicValue.ofInt(42, 12); +// print(lv2.vecString('lv2', header: true, markDown: true)); // for (var i = lv2.width; i >= 0; i--) { -// print(lv2.vecString('lv2', sep: i)); +// print(lv2.vecString('lv2', sep: i, markDown: true)); // } // } From c9d5f0ac04534b994c08d4323e2558ebe380811a Mon Sep 17 00:00:00 2001 From: "Desmond A. Kirkpatrick" Date: Mon, 30 Sep 2024 21:58:25 -0700 Subject: [PATCH 08/19] added syntactic sugar and speed for bias/minExp/maxExp --- .../floating_point/floating_point_value.dart | 43 +++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/lib/src/arithmetic/floating_point/floating_point_value.dart b/lib/src/arithmetic/floating_point/floating_point_value.dart index 7b914d5a3..5eada921d 100644 --- a/lib/src/arithmetic/floating_point/floating_point_value.dart +++ b/lib/src/arithmetic/floating_point/floating_point_value.dart @@ -101,6 +101,19 @@ class FloatingPointValue implements Comparable { static int computeMaxExponent(int exponentWidth) => computeBias(exponentWidth); + /// Return the bias of this [FloatingPointValue]. + int get bias => _bias; + + /// Return the maximum exponent of this [FloatingPointValue]. + int get maxExponent => _maxExp; + + /// Return the minimum exponent of this [FloatingPointValue]. + int get minExponent => _minExp; + + final int _bias; + final int _maxExp; + final int _minExp; + /// Factory (static) constructor of a [FloatingPointValue] from /// sign, mantissa and exponent factory FloatingPointValue( @@ -191,13 +204,17 @@ class FloatingPointValue implements Comparable { sign: signLv, exponent: exponentLv, mantissa: mantissaLv); } + /// Constructor enabling subclasses. FloatingPointValue.withConstraints( {required this.sign, required this.exponent, required this.mantissa, int? mantissaWidth, int? exponentWidth}) - : value = [sign, exponent, mantissa].swizzle() { + : value = [sign, exponent, mantissa].swizzle(), + _bias = computeBias(exponent.width), + _minExp = computeMinExponent(exponent.width), + _maxExp = computeMaxExponent(exponent.width) { if (sign.width != 1) { throw RohdHclException('FloatingPointValue: sign width must be 1'); } @@ -305,7 +322,7 @@ class FloatingPointValue implements Comparable { final fp64 = FloatingPoint64Value.fromDouble(inDouble); final exponent64 = fp64.exponent; - var expVal = (exponent64.toInt() - fp64.bias()) + + var expVal = (exponent64.toInt() - fp64.bias) + FloatingPointValue.computeBias(exponentWidth); // Handle subnormal final mantissa64 = [ @@ -460,7 +477,7 @@ class FloatingPointValue implements Comparable { } /// Return the bias of this FP format - int bias() => FloatingPointValue.computeBias(exponent.width); + // int bias() => FloatingPointValue.computeBias(exponent.width); @override bool operator ==(Object other) { @@ -526,6 +543,26 @@ class FloatingPointValue implements Comparable { ' ${exponent.toString(includeWidth: false)}' ' ${mantissa.toString(includeWidth: false)}'; + /// Generate a random [FloatingPointValue] of the same widths. + FloatingPointValue random(Random rv, {bool normal = false}) { + final exponentWidth = exponent.width; + final mantissaWidth = mantissa.width; + final largestExponent = FloatingPointValue.computeBias(exponentWidth) + + FloatingPointValue.computeMaxExponent(exponentWidth); + final s = rv.nextLogicValue(width: 1).toInt(); + var e = BigInt.one; + do { + e = rv + .nextLogicValue(width: exponentWidth, max: largestExponent) + .toBigInt(); + } while ((e == BigInt.zero) & normal); + final m = rv.nextLogicValue(width: exponentWidth).toBigInt(); + return FloatingPointValue( + sign: LogicValue.ofInt(s, 1), + exponent: LogicValue.ofBigInt(e, exponentWidth), + mantissa: LogicValue.ofBigInt(m, mantissaWidth)); + } + // TODO(desmonddak): what about floating point representations >> 64 bits? FloatingPointValue _performOp( FloatingPointValue other, double Function(double a, double b) op) { From 1314113faf43a7befdac0c8dcb25ed74751899a4 Mon Sep 17 00:00:00 2001 From: "Desmond A. Kirkpatrick" Date: Mon, 30 Sep 2024 23:06:08 -0700 Subject: [PATCH 09/19] added random fp adder testing --- .../floating_point/floating_point_adder.dart | 16 +- .../floating_point_multiplier.dart | 98 ------------ .../floating_point/floating_point_value.dart | 4 +- .../floating_point_adder_test.dart | 83 +++++++--- .../floating_point_multiplier_test.dart | 142 ------------------ 5 files changed, 65 insertions(+), 278 deletions(-) delete mode 100644 lib/src/arithmetic/floating_point/floating_point_multiplier.dart delete mode 100644 test/arithmetic/floating_point/floating_point_multiplier_test.dart diff --git a/lib/src/arithmetic/floating_point/floating_point_adder.dart b/lib/src/arithmetic/floating_point/floating_point_adder.dart index f7a760c2b..a171f48ed 100644 --- a/lib/src/arithmetic/floating_point/floating_point_adder.dart +++ b/lib/src/arithmetic/floating_point/floating_point_adder.dart @@ -39,8 +39,9 @@ class FloatingPointAdder extends Module { /// Add two floating point numbers [a] and [b], returning result in [sum] FloatingPointAdder(FloatingPoint a, FloatingPoint b, - ParallelPrefix Function(List, Logic Function(Logic, Logic)) ppGen, - {super.name}) + {ParallelPrefix Function(List, Logic Function(Logic, Logic)) + ppGen = KoggeStone.new, + super.name}) : exponentWidth = a.exponent.width, mantissaWidth = a.mantissa.width { if (b.exponent.width != exponentWidth || @@ -64,8 +65,6 @@ class FloatingPointAdder extends Module { b.exponent + mux(b.isNormal(), b.zeroExponent(), b.oneExponent()); // Align and add mantissas -// TODO(desmonddak): GRS system for FP rounding: https://i.sstatic.net/n1izR.png - // final expDiff = aExp - bExp; // print('${expDiff.value.toInt()} exponent diff'); final adder = SignMagnitudeAdder( @@ -76,19 +75,10 @@ class FloatingPointAdder extends Module { (a, b) => ParallelPrefixAdder(a, b, ppGen: ppGen)); final sum = adder.sum.slice(adder.sum.width - 2, 0); - // TODO(desmonddak): what happens if sum is zero -- should return width - // TODO(desmonddak): should trim search to what can fit in exponentWidth! final leadOneE = ParallelPrefixPriorityEncoder(sum.reversed, ppGen: ppGen).out; final leadOne = leadOneE.zeroExtend(exponentWidth); - // print('leadOneE=${leadOneE.value.toInt()}'); - - // print('adding ${a.mantissa.value.bitString} and ' - // ' ${b.mantissa.value.bitString}' - // ' = ${sum.value.bitString} ${leadOne.value.bitString} ' - // '${a.exponent.value.bitString}'); - // Assemble the output FloatingPoint _sum.sign <= adder.sign; Combinational([ diff --git a/lib/src/arithmetic/floating_point/floating_point_multiplier.dart b/lib/src/arithmetic/floating_point/floating_point_multiplier.dart deleted file mode 100644 index a220bda73..000000000 --- a/lib/src/arithmetic/floating_point/floating_point_multiplier.dart +++ /dev/null @@ -1,98 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: BSD-3-Clause -// -// floating_point.dart -// Implementation of Floating Point stuff -// -// 2024 August 30 -// Author: Desmond A Kirkpatrick , Logic Function(Logic, Logic)) ppTree, - {super.name}) - : exponentWidth = a.exponent.width, - mantissaWidth = a.mantissa.width { - if (b.exponent.width != exponentWidth || - b.mantissa.width != mantissaWidth) { - throw RohdHclException('FloatingPoint widths must match'); - } - a = a.clone()..gets(addInput('a', a, width: a.width)); - b = b.clone()..gets(addInput('b', b, width: b.width)); - addOutput('out', width: _out.width) <= _out; - final aExp = - a.exponent + mux(a.isNormal(), a.zeroExponent(), a.oneExponent()); - final bExp = - b.exponent + mux(b.isNormal(), b.zeroExponent(), b.oneExponent()); - - final aMantissa = [a.isNormal(), a.mantissa].swizzle(); - final bMantissa = [b.isNormal(), b.mantissa].swizzle(); - - // print('am = ${bitString(aMantissa.value)}'); - // print('bm = ${bitString(bMantissa.value)}'); - - final pp = PartialProductGeneratorCompactRectSignExtension( - aMantissa, bMantissa, RadixEncoder(radix), - signed: false); - final compressor = ColumnCompressor(pp)..compress(); - final r0 = compressor.extractRow(0); - final r1 = compressor.extractRow(1); - final adder = ParallelPrefixAdder(r0, r1, ppGen: ppTree); - - final rawMantissa = adder.sum.slice((exponentWidth + 1) * 2 - 1, 0); - - // Find the leading '1' in the mantissa - final pos = - ParallelPrefixPriorityEncoder(rawMantissa.reversed, ppGen: ppTree) - .out - .zeroExtend(exponentWidth); - - final expAdd = - aExp - FloatingPointValue.computeBias(aExp.width) + bExp - pos + 1; - - // stdout.write('aExp=${aExp.value}, bExp=${bExp.value}, ' - // 'pos=${pos.value}, bias=${FloatingPointValue.bias(aExp.width)} ' - // 'expAdd=${expAdd.value}\n'); - - final mantissa = rawMantissa << (pos + 1); - final normMantissa = mantissa.reversed.slice(mantissaWidth - 1, 0).reversed; - - // stdout - // ..write('aMant: ${bitString(aMantissa.value)}\n') - // ..write('bMant: ${bitString(bMantissa.value)}\n') - // ..write('out: ${bitString(adder.out.value)}\n') - // ..write('lenOut: ${adder.out.width} ') - // ..write('rawMantissa: ${bitString(rawMantissa.value)} ') - // ..write('normMantissa: ${bitString(normMantissa.value)}') - // ..write('\n') - // ..write( - // 'e=${bitString(expAdd.value)} m=${bitString(normMantissa.value)}\n'); - - _out.sign <= a.sign ^ b.sign; - _out.exponent <= expAdd; - // _out.exponent <= Const(8, width: exponentWidth); - _out.mantissa <= normMantissa; - } -} diff --git a/lib/src/arithmetic/floating_point/floating_point_value.dart b/lib/src/arithmetic/floating_point/floating_point_value.dart index 5eada921d..bd0640b8b 100644 --- a/lib/src/arithmetic/floating_point/floating_point_value.dart +++ b/lib/src/arithmetic/floating_point/floating_point_value.dart @@ -536,7 +536,7 @@ class FloatingPointValue implements Comparable { /// Return a Logic true if this FloatingPointVa;ie contains a normal number, /// defined as having mantissa in the range [1,2) - bool isNormal() => exponent != LogicValue.zero; + bool isNormal() => exponent != LogicValue.ofInt(0, exponent.width); @override String toString() => '${sign.toString(includeWidth: false)}' @@ -556,7 +556,7 @@ class FloatingPointValue implements Comparable { .nextLogicValue(width: exponentWidth, max: largestExponent) .toBigInt(); } while ((e == BigInt.zero) & normal); - final m = rv.nextLogicValue(width: exponentWidth).toBigInt(); + final m = rv.nextLogicValue(width: mantissaWidth).toBigInt(); return FloatingPointValue( sign: LogicValue.ofInt(s, 1), exponent: LogicValue.ofBigInt(e, exponentWidth), diff --git a/test/arithmetic/floating_point/floating_point_adder_test.dart b/test/arithmetic/floating_point/floating_point_adder_test.dart index 77fedfd11..d9a603fa9 100644 --- a/test/arithmetic/floating_point/floating_point_adder_test.dart +++ b/test/arithmetic/floating_point/floating_point_adder_test.dart @@ -21,7 +21,7 @@ void main() { final fp2 = FloatingPoint32() ..put(FloatingPoint32Value.fromDouble(1.5).value); final out = FloatingPoint32Value.fromDouble(3.25 + 1.5); - final adder = FloatingPointAdder(fp1, fp2, KoggeStone.new); + final adder = FloatingPointAdder(fp1, fp2); final fpSuper = adder.sum.floatingPointValue; final fpStr = fpSuper.toDouble().toStringAsPrecision(7); @@ -37,7 +37,7 @@ void main() { ..put(FloatingPoint32Value.fromDouble(pow(2.0, -23).toDouble()).value); final out = FloatingPoint32Value.fromDouble(val + val); - final adder = FloatingPointAdder(fp1, fp2, KoggeStone.new); + final adder = FloatingPointAdder(fp1, fp2); final fpSuper = adder.sum.floatingPointValue; final fpStr = fpSuper.toDouble().toStringAsPrecision(7); @@ -55,7 +55,7 @@ void main() { ..put(FloatingPoint32Value.fromDouble(pair.$2).value); final out = FloatingPoint32Value.fromDouble(pair.$1 + pair.$2); - final adder = FloatingPointAdder(fp1, fp2, KoggeStone.new); + final adder = FloatingPointAdder(fp1, fp2); final fpSuper = adder.sum.floatingPointValue; final fpStr = fpSuper.toDouble().toStringAsPrecision(7); @@ -71,7 +71,7 @@ void main() { ..put(FloatingPoint32Value.fromDouble(1.5).value); final out = FloatingPoint32Value.fromDouble(3.25 + 1.5); - final adder = FloatingPointAdder(fp1, fp2, KoggeStone.new); + final adder = FloatingPointAdder(fp1, fp2); final fpSuper = adder.sum.floatingPointValue; final fpStr = fpSuper.toDouble().toStringAsPrecision(7); @@ -79,7 +79,7 @@ void main() { expect(fpStr, valStr); }); - test('FP: small numbers adder test', () { + test('FP: addersmall numbers test', () { final val = FloatingPoint32Value.getFloatingPointConstant( FloatingPointConstants.smallestPositiveSubnormal) .toDouble(); @@ -94,7 +94,7 @@ void main() { .value); final out = FloatingPoint32Value.fromDouble(val - val); - final adder = FloatingPointAdder(fp1, fp2, KoggeStone.new); + final adder = FloatingPointAdder(fp1, fp2); final fpSuper = adder.sum.floatingPointValue; final fpStr = fpSuper.toDouble().abs().toStringAsPrecision(7); @@ -102,7 +102,7 @@ void main() { expect(fpStr, valStr); }); - test('FP: carry numbers adder test', () { + test('FP: adder carry numbers test', () { final val = pow(2.5, -12).toDouble(); final fp1 = FloatingPoint32() ..put(FloatingPoint32Value.fromDouble(pow(2.5, -12).toDouble()).value); @@ -110,7 +110,7 @@ void main() { ..put(FloatingPoint32Value.fromDouble(pow(2.5, -12).toDouble()).value); final out = FloatingPoint32Value.fromDouble(val + val); - final adder = FloatingPointAdder(fp1, fp2, KoggeStone.new); + final adder = FloatingPointAdder(fp1, fp2); final fpSuper = adder.sum.floatingPointValue; final fpStr = fpSuper.toDouble().toStringAsPrecision(7); @@ -118,7 +118,7 @@ void main() { expect(fpStr, valStr); }); - test('FP: basic loop adder test', () { + test('FP: adder basic loop test', () { final input = [(3.25, 1.5), (4.5, 3.75)]; for (final pair in input) { @@ -128,7 +128,7 @@ void main() { ..put(FloatingPoint32Value.fromDouble(pair.$2).value); final out = FloatingPoint32Value.fromDouble(pair.$1 + pair.$2); - final adder = FloatingPointAdder(fp1, fp2, KoggeStone.new); + final adder = FloatingPointAdder(fp1, fp2); final fpSuper = adder.sum.floatingPointValue; final fpStr = fpSuper.toDouble().toStringAsPrecision(7); @@ -139,7 +139,7 @@ void main() { // if you name two tests the same they get run together // RippleCarryAdder: cannot access inputs from outside -- super.a issue - test('FP: basic loop adder test - negative numbers', () { + test('FP: adder basic loop test - negative numbers', () { final input = [(4.5, 3.75), (9.0, -3.75), (-9.0, 3.9375), (-3.9375, 9.0)]; for (final pair in input) { @@ -149,7 +149,7 @@ void main() { ..put(FloatingPoint32Value.fromDouble(pair.$2).value); final out = FloatingPoint32Value.fromDouble(pair.$1 + pair.$2); - final adder = FloatingPointAdder(fp1, fp2, KoggeStone.new); + final adder = FloatingPointAdder(fp1, fp2); final fpSuper = adder.sum.floatingPointValue; final fpStr = fpSuper.toDouble().toStringAsPrecision(7); @@ -158,7 +158,7 @@ void main() { } }); - test('FP: basic subnormal test', () { + test('FP: adder basic subnormal test', () { final fp1 = FloatingPoint32() ..put(FloatingPoint32Value.getFloatingPointConstant( FloatingPointConstants.smallestPositiveNormal) @@ -171,7 +171,7 @@ void main() { final out = FloatingPoint32Value.fromDouble( fp1.floatingPointValue.toDouble() + fp2.floatingPointValue.toDouble()); - final adder = FloatingPointAdder(fp1, fp2, KoggeStone.new); + final adder = FloatingPointAdder(fp1, fp2); final fpSuper = adder.sum.floatingPointValue; final fpStr = fpSuper.toDouble().toStringAsPrecision(7); @@ -196,12 +196,12 @@ void main() { fp1.floatingPointValue.toDouble() + fp2.floatingPointValue.toDouble(); final out = FloatingPointValue.fromDoubleIter(outDouble, exponentWidth: ew, mantissaWidth: mw); - final adder = FloatingPointAdder(fp1, fp2, KoggeStone.new); + final adder = FloatingPointAdder(fp1, fp2); expect(adder.sum.floatingPointValue.compareTo(out), 0); }); - test('FP: negative number requiring a carryOut', () { + test('FP: addernegative number requiring a carryOut', () { const pair = (9.0, -3.75); const ew = 3; const mw = 5; @@ -217,12 +217,12 @@ void main() { final out = FloatingPointValue.fromDouble(pair.$1 + pair.$2, exponentWidth: ew, mantissaWidth: mw); - final adder = FloatingPointAdder(fp1, fp2, KoggeStone.new); + final adder = FloatingPointAdder(fp1, fp2); expect(adder.sum.floatingPointValue.compareTo(out), 0); }); - test('FP: subnormal cancellation', () { + test('FP: adder subnormal cancellation', () { const ew = 4; const mw = 4; final fp1 = FloatingPoint(exponentWidth: ew, mantissaWidth: mw) @@ -237,12 +237,12 @@ void main() { final out = fp2.floatingPointValue + fp1.floatingPointValue; - final adder = FloatingPointAdder(fp1, fp2, KoggeStone.new); + final adder = FloatingPointAdder(fp1, fp2); // TODO(desmonddak): figure out how to handle -0.0, as this would fail expect(adder.sum.floatingPointValue.abs().compareTo(out), 0); }); - test('FP: basic loop adder test2', () { + test('FP: adder adder basic loop adder test2', () { final input = [(4.5, 3.75), (9.0, -3.75), (-9.0, 3.9375), (-3.9375, 9.0)]; for (final pair in input) { @@ -252,7 +252,7 @@ void main() { ..put(FloatingPoint32Value.fromDouble(pair.$2).value); final out = FloatingPoint32Value.fromDouble(pair.$1 + pair.$2); - final adder = FloatingPointAdder(fp1, fp2, KoggeStone.new); + final adder = FloatingPointAdder(fp1, fp2); final fpSuper = adder.sum.floatingPointValue; final fpStr = fpSuper.toDouble().toStringAsPrecision(7); @@ -260,7 +260,7 @@ void main() { expect(fpStr, valStr); } }); - test('FP: singleton', () { + test('FP: adder singleton', () { const pair = (9.0, -3.75); { final fp1 = FloatingPoint32() @@ -269,7 +269,7 @@ void main() { ..put(FloatingPoint32Value.fromDouble(pair.$2).value); final out = FloatingPoint32Value.fromDouble(pair.$1 + pair.$2); - final adder = FloatingPointAdder(fp1, fp2, KoggeStone.new); + final adder = FloatingPointAdder(fp1, fp2); final fpSuper = adder.sum.floatingPointValue; final fpStr = fpSuper.toDouble().toStringAsPrecision(7); @@ -277,4 +277,41 @@ void main() { expect(fpStr, valStr); } }); + test('FP: adder random', () { + const eWidth = 5; + const mWidth = 20; + + final fa = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); + final fb = FloatingPoint(exponentWidth: eWidth, mantissaWidth: mWidth); + final fpv = FloatingPointValue.ofInts(0, 0, + exponentWidth: eWidth, mantissaWidth: mWidth); + final smallest = FloatingPointValue.getFloatingPointConstant( + FloatingPointConstants.smallestPositiveNormal, eWidth, mWidth); + fa.put(0); + fb.put(0); + final adder = FloatingPointAdder(fa, fb); + final value = Random(513); + for (var i = 0; i < 50; i++) { + final fva = fpv.random(value, normal: true); + final fvb = fpv.random(value, normal: true); + fa.put(fva); + fb.put(fvb); + // fromDoubleIter does not round like '+' would + final expected = FloatingPointValue.fromDoubleIter( + fva.toDouble() + fvb.toDouble(), + exponentWidth: fpv.exponent.width, + mantissaWidth: fpv.mantissa.width); + final computed = adder.sum.floatingPointValue; + final ulp = FloatingPointValue.ofInts( + max(expected.exponent.toInt(), 1), 1, + exponentWidth: eWidth, mantissaWidth: mWidth); + final diff = (expected.toDouble() - computed.toDouble()).abs(); + if (expected.isNormal()) { + expect(expected.isNaN(), equals(computed.isNaN())); + if (!expected.isNaN()) { + expect(diff, lessThan(ulp.toDouble() * smallest.toDouble())); + } + } + } + }); } diff --git a/test/arithmetic/floating_point/floating_point_multiplier_test.dart b/test/arithmetic/floating_point/floating_point_multiplier_test.dart deleted file mode 100644 index d20cc54c1..000000000 --- a/test/arithmetic/floating_point/floating_point_multiplier_test.dart +++ /dev/null @@ -1,142 +0,0 @@ -// Copyright (C) 2024 Intel Corporation -// SPDX-License-Identifier: BSD-3-Clause -// -// floating_point_test.dart -// Tests of Floating Point stuff -// -// 2024 September 20 -// Authors: -// Desmond A Kirkpatrick Date: Mon, 30 Sep 2024 23:13:22 -0700 Subject: [PATCH 10/19] move FPV.random to a factory method --- .../floating_point/floating_point_value.dart | 41 ++++++++++--------- .../floating_point_adder_test.dart | 6 ++- 2 files changed, 25 insertions(+), 22 deletions(-) diff --git a/lib/src/arithmetic/floating_point/floating_point_value.dart b/lib/src/arithmetic/floating_point/floating_point_value.dart index bd0640b8b..4525c7339 100644 --- a/lib/src/arithmetic/floating_point/floating_point_value.dart +++ b/lib/src/arithmetic/floating_point/floating_point_value.dart @@ -359,6 +359,27 @@ class FloatingPointValue implements Comparable { sign: fp64.sign, exponent: exponent, mantissa: mantissa); } + /// Generate a random [FloatingPointValue], supplying random seed [rv]. + factory FloatingPointValue.random(Random rv, + {required int exponentWidth, + required int mantissaWidth, + bool normal = false}) { + final largestExponent = FloatingPointValue.computeBias(exponentWidth) + + FloatingPointValue.computeMaxExponent(exponentWidth); + final s = rv.nextLogicValue(width: 1).toInt(); + var e = BigInt.one; + do { + e = rv + .nextLogicValue(width: exponentWidth, max: largestExponent) + .toBigInt(); + } while ((e == BigInt.zero) & normal); + final m = rv.nextLogicValue(width: mantissaWidth).toBigInt(); + return FloatingPointValue( + sign: LogicValue.ofInt(s, 1), + exponent: LogicValue.ofBigInt(e, exponentWidth), + mantissa: LogicValue.ofBigInt(m, mantissaWidth)); + } + /// Convert a floating point number into a [FloatingPointValue] /// representation. This form performs NO ROUNDING. factory FloatingPointValue.fromDoubleIter(double inDouble, @@ -543,26 +564,6 @@ class FloatingPointValue implements Comparable { ' ${exponent.toString(includeWidth: false)}' ' ${mantissa.toString(includeWidth: false)}'; - /// Generate a random [FloatingPointValue] of the same widths. - FloatingPointValue random(Random rv, {bool normal = false}) { - final exponentWidth = exponent.width; - final mantissaWidth = mantissa.width; - final largestExponent = FloatingPointValue.computeBias(exponentWidth) + - FloatingPointValue.computeMaxExponent(exponentWidth); - final s = rv.nextLogicValue(width: 1).toInt(); - var e = BigInt.one; - do { - e = rv - .nextLogicValue(width: exponentWidth, max: largestExponent) - .toBigInt(); - } while ((e == BigInt.zero) & normal); - final m = rv.nextLogicValue(width: mantissaWidth).toBigInt(); - return FloatingPointValue( - sign: LogicValue.ofInt(s, 1), - exponent: LogicValue.ofBigInt(e, exponentWidth), - mantissa: LogicValue.ofBigInt(m, mantissaWidth)); - } - // TODO(desmonddak): what about floating point representations >> 64 bits? FloatingPointValue _performOp( FloatingPointValue other, double Function(double a, double b) op) { diff --git a/test/arithmetic/floating_point/floating_point_adder_test.dart b/test/arithmetic/floating_point/floating_point_adder_test.dart index d9a603fa9..3a9f69076 100644 --- a/test/arithmetic/floating_point/floating_point_adder_test.dart +++ b/test/arithmetic/floating_point/floating_point_adder_test.dart @@ -292,8 +292,10 @@ void main() { final adder = FloatingPointAdder(fa, fb); final value = Random(513); for (var i = 0; i < 50; i++) { - final fva = fpv.random(value, normal: true); - final fvb = fpv.random(value, normal: true); + final fva = FloatingPointValue.random(value, + exponentWidth: eWidth, mantissaWidth: mWidth, normal: true); + final fvb = FloatingPointValue.random(value, + exponentWidth: eWidth, mantissaWidth: mWidth, normal: true); fa.put(fva); fb.put(fvb); // fromDoubleIter does not round like '+' would From cfa4e17cdcf7ce0ae7d83ac6afa7c82994860fd5 Mon Sep 17 00:00:00 2001 From: "Desmond A. Kirkpatrick" Date: Mon, 30 Sep 2024 23:17:31 -0700 Subject: [PATCH 11/19] cleanup comments in arithmetic_utils --- lib/src/arithmetic/arithmetic_utils.dart | 40 ------------------- .../floating_point/floating_point.dart | 1 - 2 files changed, 41 deletions(-) diff --git a/lib/src/arithmetic/arithmetic_utils.dart b/lib/src/arithmetic/arithmetic_utils.dart index 429adefbc..93b17bf0e 100644 --- a/lib/src/arithmetic/arithmetic_utils.dart +++ b/lib/src/arithmetic/arithmetic_utils.dart @@ -110,43 +110,3 @@ extension NumericVector on LogicValue { return str.toString(); } } - -// void main() { -// final lv0 = LogicValue.ofInt(42, 15); -// final lv1 = LogicValue.ofInt(117, 15); -// // No separator -// print(lv0.vecString('lv0', header: true)); -// print(lv1.vecString('lv1_with_ridiculously_long_name')); -// // Separator -// print(lv0.vecString('lv0', sep: 8)); -// print(lv1.vecString('lv1_with_ridiculously_long_name', sep: 8)); -// print(lv1.vecString('lv1_with_ridiculously_long_name', sep: 8)); -// // separator at double-digits -// print(lv0.vecString('lv0', sep: 12, align: 24, header: true)); -// print(lv1.vecString('lv1_with_ridiculously_long_name', align: 24, sep: 12)); -// // transition to single-digit separator -// print(lv0.vecString('lv0', sep: 10, align: 24, header: true)); -// print(lv1.vecString('lv1_with_ridiculously_long_name', align: 24, sep: 10)); -// print(lv0.vecString('lv0', sep: 9, align: 24, header: true)); -// print(lv1.vecString('lv1_with_ridiculously_long_name', align: 24, sep: 9)); -// // Single digit separator -// print(lv0.vecString('lv0', sep: 8, align: 24, header: true)); -// print(lv1.vecString('lv1_with_ridiculously_long_name', align: 24, sep: 8)); -// // Separator at zero -// print(lv0.vecString('lv0', sep: 0, align: 24, header: true)); -// print(lv1.vecString('lv1_with_ridiculously_long_name', align: 24, sep: 0)); -// final ref = FloatingPoint64Value.fromDouble(3.14159); -// print(ref); -// print( -// ref.mantissa.vecString('reference', lowLimit: 31, header: true, sep: 52)); -// print(''); - -// print(ref.mantissa.vecString('reference', -// lowLimit: 31, header: true, sep: 48, markDown: true)); -// print(''); -// final lv2 = LogicValue.ofInt(42, 12); -// print(lv2.vecString('lv2', header: true, markDown: true)); -// for (var i = lv2.width; i >= 0; i--) { -// print(lv2.vecString('lv2', sep: i, markDown: true)); -// } -// } diff --git a/lib/src/arithmetic/floating_point/floating_point.dart b/lib/src/arithmetic/floating_point/floating_point.dart index ec65e4045..231569572 100644 --- a/lib/src/arithmetic/floating_point/floating_point.dart +++ b/lib/src/arithmetic/floating_point/floating_point.dart @@ -3,5 +3,4 @@ export 'floating_point_adder.dart'; export 'floating_point_logic.dart'; -export 'floating_point_multiplier.dart'; export 'floating_point_value.dart'; From f1d1a3eb1a4e41847207d173b5960e3f36dd4ec2 Mon Sep 17 00:00:00 2001 From: "Desmond A. Kirkpatrick" Date: Mon, 30 Sep 2024 23:57:32 -0700 Subject: [PATCH 12/19] added fp documentation --- doc/components/floating_point.md | 23 +++++++++++++++++++ .../arithmetic/evaluate_partial_product.dart | 6 ----- 2 files changed, 23 insertions(+), 6 deletions(-) create mode 100644 doc/components/floating_point.md diff --git a/doc/components/floating_point.md b/doc/components/floating_point.md new file mode 100644 index 000000000..203e72d6d --- /dev/null +++ b/doc/components/floating_point.md @@ -0,0 +1,23 @@ +# Floating-Point Components + +Floating-point operations require meticulous precision, and have standards like [IEEE-754]() which govern them. To support floating-point components, we have created a parallel to `Logic`/`LogicValue` which are part of [ROHD](). Here, `FloatingPoint` is the `Logic` wire in a component that carries `FloatingPointValue` literal values. An important distinction is that these classes are parameterized to create arbitrary size floating-point values. + +## FloatingPointValue + +The `FloatingPointValue` class comprises the sign, exponent, and mantissa `LogicValue`s that represent a floating-point number. `FloatingPointValue`s can be converted to and from Dart native `Double`s, as well as constructed from integer and string representations of their fields. They can be operated on (+, -, *, /) and compared. + +The various IEEE constants representing corner cases of the field of floating-point values for a given size of `FloatingPointValue`: infinities, zeros, limits for normal (e.g. mantissa in the range of [1,2]) and sub-normal numbers (zero exponent, and mantissa <1). + +Appropriate string representations, comparison operations, and operators are available. The usefulness of `FloatingPointValue` is in the testing of `FloatingPoint` components, where we can leverage the abstraction of a floating-point value type to drive and compare floating-point values operated upon by floating-point components. + +As 32-bit single precision and 64-bit double-precision floating-point types are most common, we have `FloatingPoint32Value` and `FloatingPoint64Value` subclasses with direct converters from Dart native Double. + +Finally, we have a `FloatingPointValue` random generator for testing purposes, generating valid floating-point types, optionally constrained to normal range (mantissa in [1, 2)). + +## FloatingPoint + +The `FloatingPoint` type is a `LogicStructure` which comprises the `Logic` bits for the sign, exponent, and mantissa used in hardware floating-point. These types are provided to simplify and abstract the declaration and manipulation of floating-point types in hardware. This type is parameterized like `FloatingPointValue`, for exponent and mantissa width. + +Again, like `FloatingPointValue`, `FloatingPoint64` and `FloatingPoint32` subclasses are provided as these are the most common floating-point number types. + +## FloatingPointAdder diff --git a/lib/src/arithmetic/evaluate_partial_product.dart b/lib/src/arithmetic/evaluate_partial_product.dart index 6bd3e6c2a..f684b0bfa 100644 --- a/lib/src/arithmetic/evaluate_partial_product.dart +++ b/lib/src/arithmetic/evaluate_partial_product.dart @@ -110,12 +110,6 @@ extension EvaluateLivePartialProduct on PartialProductGenerator { final str = StringBuffer(); final maxW = maxWidth(); - final nonSignExtendedPad = isSignExtended - ? 0 - : shift > 2 - ? shift - 1 - : 1; - // print bit position header str.write('| R | M | S'); for (var i = maxW - 1; i >= 0; i--) { From 8cdc1cf7f97e38f5c79eeb39f33f0b52bde259f4 Mon Sep 17 00:00:00 2001 From: "Desmond A. Kirkpatrick" Date: Tue, 1 Oct 2024 08:18:58 -0700 Subject: [PATCH 13/19] documentation update, redundand code for a MultiplierSelector --- doc/README.md | 9 +++- doc/components/floating_point.md | 4 ++ lib/src/arithmetic/multiplier_encoder.dart | 61 ---------------------- lib/src/arithmetic/multiplier_lib.dart | 1 + 4 files changed, 13 insertions(+), 62 deletions(-) diff --git a/doc/README.md b/doc/README.md index f76b7f5c9..a4ed6fc99 100644 --- a/doc/README.md +++ b/doc/README.md @@ -32,9 +32,14 @@ Some in-development items will have opened issues, as well. Feel free to create - Sort - [Bitonic sort](./components/sort.md#bitonic-sort) - Arithmetic - - [Prefix Trees](./components/parallel_prefix_operations.md) + - [Prefix Trees](./components/parallel_prefix_operations.md) Several efficient components that leverage a variety of parallel prefix trees such as Ripple, Kogge-Stone, Sklansky, and Brent-Kung tree types. + - [Priority Encoder](./components/parallel_prefix_operations.md) + - [Or-scan](./components/parallel_prefix_operations.md) + - [Incrementer](./components/parallel_prefix_operations.md) + - [Decrementer](./components/parallel_prefix_operations.md) - [Adders](./components/adder.md) - [Sign Magnitude Adder](./components/adder.md#ripple-carry-adder) + - [Parallel Prefix Adder](./components/parallel_prefix_operations.md) - Subtractors - [One's Complement Adder Subtractor](./components/adder.md#ones-complement-adder-subtractor) - Multipliers @@ -48,11 +53,13 @@ Some in-development items will have opened issues, as well. Feel free to create - Square root - Inverse square root - Floating point + - [Floating-Point Value Types](./components/floating_point.md) - Double (64-bit) - Float (32-bit) - BFloat16 (16-bit) - BFloat8 (8-bit) - BFloat4 (4-bit) + - [Simple Floating-Point Adder](./componeents/floating_point.md#floatingpointadder) - Fixed point - Binary-Coded Decimal (BCD) - [Rotate](./components/rotate.md) diff --git a/doc/components/floating_point.md b/doc/components/floating_point.md index 203e72d6d..83505eba5 100644 --- a/doc/components/floating_point.md +++ b/doc/components/floating_point.md @@ -21,3 +21,7 @@ The `FloatingPoint` type is a `LogicStructure` which comprises the `Logic` bits Again, like `FloatingPointValue`, `FloatingPoint64` and `FloatingPoint32` subclasses are provided as these are the most common floating-point number types. ## FloatingPointAdder + +A very basic `FloatingPointAdder` component is available which does not perform any rounding. It takes two `FloatingPoint` `LogicStructure`s and adds them, returning a normalized `FloatingPointValue` on the output. An option on input is the type of `ParallelPrefixTree` used in the internal addition of the mantissas. + +Currently, the `FloatingPointAdder` is close in accuracy (as it has no rounding) and is not optimized for circuit performance, but only provides the key functionalities of alignment, addition, and normalization. Still, this component is a starting point for more realistic floating-point components that leverage the logical `FloatingPoint` and literal `FloatingPointValue` type abstractions. diff --git a/lib/src/arithmetic/multiplier_encoder.dart b/lib/src/arithmetic/multiplier_encoder.dart index cfe7727ef..2d5b0753a 100644 --- a/lib/src/arithmetic/multiplier_encoder.dart +++ b/lib/src/arithmetic/multiplier_encoder.dart @@ -131,64 +131,3 @@ class MultiplierEncoder { return _encoder.encode(multiplierSlice.first); } } - -/// A class accessing the multiples of the multiplicand at a position -class MultiplicandSelector { - /// radix of the selector - int radix; - - /// The bit shift of the selector (typically overlaps 1) - int shift; - - /// New width of partial products generated from the multiplicand - int get width => multiplicand.width + shift - 1; - - /// Access the multiplicand - Logic multiplicand = Logic(); - - /// Place to store multiples of the multiplicand - late LogicArray multiples; - - /// Generate required multiples of multiplicand - MultiplicandSelector(this.radix, this.multiplicand, {required bool signed}) - : shift = log2Ceil(radix) { - if (radix > 16) { - throw RohdHclException('Radices beyond 16 are not yet supported'); - } - final width = multiplicand.width + shift; - final numMultiples = radix ~/ 2; - multiples = LogicArray([numMultiples], width); - final extendedMultiplicand = signed - ? multiplicand.signExtend(width) - : multiplicand.zeroExtend(width); - - for (var pos = 0; pos < numMultiples; pos++) { - final ratio = pos + 1; - multiples.elements[pos] <= - switch (ratio) { - 1 => extendedMultiplicand, - 2 => extendedMultiplicand << 1, - 3 => (extendedMultiplicand << 2) - extendedMultiplicand, - 4 => extendedMultiplicand << 2, - 5 => (extendedMultiplicand << 2) + extendedMultiplicand, - 6 => (extendedMultiplicand << 3) - (extendedMultiplicand << 1), - 7 => (extendedMultiplicand << 3) - extendedMultiplicand, - 8 => extendedMultiplicand << 3, - _ => throw RohdHclException('Radix is beyond 16') - }; - } - } - - /// Retrieve the multiples of the multiplicand at current bit position - Logic getMultiples(int col) => [ - for (var i = 0; i < multiples.elements.length; i++) - multiples.elements[i][col] - ].swizzle().reversed; - - Logic _select(Logic multiples, RadixEncode encode) => - (encode.multiples & multiples).or() ^ encode.sign; - - /// Select the partial product term from the multiples using a RadixEncode - Logic select(int col, RadixEncode encode) => - _select(getMultiples(col), encode); -} diff --git a/lib/src/arithmetic/multiplier_lib.dart b/lib/src/arithmetic/multiplier_lib.dart index d37063194..eb6c8bcd2 100644 --- a/lib/src/arithmetic/multiplier_lib.dart +++ b/lib/src/arithmetic/multiplier_lib.dart @@ -11,5 +11,6 @@ // export './addend_compressor.dart'; +export './multiplicand_selector.dart'; export './multiplier_encoder.dart'; export './partial_product_generator.dart'; From edc71c6569724e99fa8aa9c40968ef75bc95df77 Mon Sep 17 00:00:00 2001 From: "Desmond A. Kirkpatrick" Date: Tue, 1 Oct 2024 11:33:34 -0700 Subject: [PATCH 14/19] floating_point_value documentation, bug fix in evaluate_partial_product --- doc/components/floating_point.md | 7 +++++-- lib/src/arithmetic/evaluate_partial_product.dart | 2 +- .../floating_point/floating_point_value.dart | 12 +++++++++++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/doc/components/floating_point.md b/doc/components/floating_point.md index 83505eba5..a746d26b2 100644 --- a/doc/components/floating_point.md +++ b/doc/components/floating_point.md @@ -6,13 +6,16 @@ Floating-point operations require meticulous precision, and have standards like The `FloatingPointValue` class comprises the sign, exponent, and mantissa `LogicValue`s that represent a floating-point number. `FloatingPointValue`s can be converted to and from Dart native `Double`s, as well as constructed from integer and string representations of their fields. They can be operated on (+, -, *, /) and compared. -The various IEEE constants representing corner cases of the field of floating-point values for a given size of `FloatingPointValue`: infinities, zeros, limits for normal (e.g. mantissa in the range of [1,2]) and sub-normal numbers (zero exponent, and mantissa <1). +A `FloatingPointValue` has a mantissa in $[0,2)$ with $$0 <= exponent <= `maxExponent`$$ A normal `isNormal` `FloatingPointValue` +has $$`minExponent` <= exponent <= `maxExponent`$$ and a mantissa in the range of $[1,2)$. Subnormal numbers are represented with a zero exponent and leading zeros in the mantissa capture the negative exponent value. + +The various IEEE constants representing corner cases of the field of floating-point values for a given size of `FloatingPointValue`: infinities, zeros, limits for normal (e.g. mantissa in the range of $[1,2])$ and sub-normal numbers (zero exponent, and mantissa <1). Appropriate string representations, comparison operations, and operators are available. The usefulness of `FloatingPointValue` is in the testing of `FloatingPoint` components, where we can leverage the abstraction of a floating-point value type to drive and compare floating-point values operated upon by floating-point components. As 32-bit single precision and 64-bit double-precision floating-point types are most common, we have `FloatingPoint32Value` and `FloatingPoint64Value` subclasses with direct converters from Dart native Double. -Finally, we have a `FloatingPointValue` random generator for testing purposes, generating valid floating-point types, optionally constrained to normal range (mantissa in [1, 2)). +Finally, we have a `FloatingPointValue` random generator for testing purposes, generating valid floating-point types, optionally constrained to normal range (mantissa in $[1, 2)$). ## FloatingPoint diff --git a/lib/src/arithmetic/evaluate_partial_product.dart b/lib/src/arithmetic/evaluate_partial_product.dart index f684b0bfa..0fd425385 100644 --- a/lib/src/arithmetic/evaluate_partial_product.dart +++ b/lib/src/arithmetic/evaluate_partial_product.dart @@ -66,7 +66,7 @@ extension EvaluateLivePartialProduct on PartialProductGenerator { str.write(' ' * shortPrefix); } } else { - str.write('$rowStr ${'M='} S= : '); + str.write('$rowStr ${'M='} S= : '); } final entry = partialProducts[row].reversed.toList(); final prefixCnt = diff --git a/lib/src/arithmetic/floating_point/floating_point_value.dart b/lib/src/arithmetic/floating_point/floating_point_value.dart index 4525c7339..605e3a8c2 100644 --- a/lib/src/arithmetic/floating_point/floating_point_value.dart +++ b/lib/src/arithmetic/floating_point/floating_point_value.dart @@ -72,7 +72,12 @@ enum FloatingPointRoundingMode { roundTowardsNegativeInfinity } -/// A flexible representation of floating point values +/// A flexible representation of floating point values. +/// A[FloatingPointValue] hasa mantissa in [0,2) with +/// 0 <= exponent <= maxExponent(); A normal [isNormal] [FloatingPointValue] +/// has minExponent() <= exponent <= maxExponent() and a mantissa in the +/// range of [1,2). Subnormal numbers are represented with a zero exponent +/// and leading zeros in the mantissa capture the negative exponent value. @immutable class FloatingPointValue implements Comparable { /// The full floating point value bit storage @@ -360,6 +365,11 @@ class FloatingPointValue implements Comparable { } /// Generate a random [FloatingPointValue], supplying random seed [rv]. + /// This generates a valid [FloatingPointValue] anywhere in the range + /// it can represent:a general [FloatingPointValue] has + /// a mantissa in [0,2) with 0 <= exponent <= maxExponent(); + /// If [normal] is true, This routine will only generate mantissas in the + /// range of [1,2) and minExponent() <= exponent <= maxExponent(). factory FloatingPointValue.random(Random rv, {required int exponentWidth, required int mantissaWidth, From e0918bc36546e83f371a9d9ad76059897b8fabea Mon Sep 17 00:00:00 2001 From: "Desmond A. Kirkpatrick" Date: Tue, 1 Oct 2024 18:08:57 -0700 Subject: [PATCH 15/19] documentation fixes, small bug in evaluate routines --- doc/README.md | 2 +- doc/components/floating_point.md | 13 ++- doc/components/multiplier_components.md | 37 +++++++ lib/src/arithmetic/arithmetic_utils.dart | 100 ++++++++++-------- lib/src/arithmetic/evaluate_compressor.dart | 4 +- .../floating_point/floating_point_adder.dart | 4 +- .../floating_point/floating_point_value.dart | 8 +- .../arithmetic/partial_product_generator.dart | 2 +- 8 files changed, 113 insertions(+), 57 deletions(-) diff --git a/doc/README.md b/doc/README.md index a4ed6fc99..be5fecc7c 100644 --- a/doc/README.md +++ b/doc/README.md @@ -59,7 +59,7 @@ Some in-development items will have opened issues, as well. Feel free to create - BFloat16 (16-bit) - BFloat8 (8-bit) - BFloat4 (4-bit) - - [Simple Floating-Point Adder](./componeents/floating_point.md#floatingpointadder) + - Fixed point - Binary-Coded Decimal (BCD) - [Rotate](./components/rotate.md) diff --git a/doc/components/floating_point.md b/doc/components/floating_point.md index a746d26b2..920c1efac 100644 --- a/doc/components/floating_point.md +++ b/doc/components/floating_point.md @@ -6,8 +6,15 @@ Floating-point operations require meticulous precision, and have standards like The `FloatingPointValue` class comprises the sign, exponent, and mantissa `LogicValue`s that represent a floating-point number. `FloatingPointValue`s can be converted to and from Dart native `Double`s, as well as constructed from integer and string representations of their fields. They can be operated on (+, -, *, /) and compared. -A `FloatingPointValue` has a mantissa in $[0,2)$ with $$0 <= exponent <= `maxExponent`$$ A normal `isNormal` `FloatingPointValue` -has $$`minExponent` <= exponent <= `maxExponent`$$ and a mantissa in the range of $[1,2)$. Subnormal numbers are represented with a zero exponent and leading zeros in the mantissa capture the negative exponent value. +A `FloatingPointValue` has a mantissa in $[0,2)$ with + +$$0 <= exponent <= maxExponent$$ + +A normal `isNormal` `FloatingPointValue` has: + +$$minExponent <= exponent <= maxExponent$$ + + and a mantissa in the range of $[1,2)$. Subnormal numbers are represented with a zero exponent and leading zeros in the mantissa capture the negative exponent value. The various IEEE constants representing corner cases of the field of floating-point values for a given size of `FloatingPointValue`: infinities, zeros, limits for normal (e.g. mantissa in the range of $[1,2])$ and sub-normal numbers (zero exponent, and mantissa <1). @@ -25,6 +32,6 @@ Again, like `FloatingPointValue`, `FloatingPoint64` and `FloatingPoint32` subcla ## FloatingPointAdder -A very basic `FloatingPointAdder` component is available which does not perform any rounding. It takes two `FloatingPoint` `LogicStructure`s and adds them, returning a normalized `FloatingPointValue` on the output. An option on input is the type of `ParallelPrefixTree` used in the internal addition of the mantissas. +A very basic `FloatingPointAdder` component is available which does not perform any rounding. It takes two `FloatingPoint` `LogicStructure`s and adds them, returning a normalized `FloatingPoint` on the output. An option on input is the type of `ParallelPrefixTree` used in the internal addition of the mantissas. Currently, the `FloatingPointAdder` is close in accuracy (as it has no rounding) and is not optimized for circuit performance, but only provides the key functionalities of alignment, addition, and normalization. Still, this component is a starting point for more realistic floating-point components that leverage the logical `FloatingPoint` and literal `FloatingPointValue` type abstractions. diff --git a/doc/components/multiplier_components.md b/doc/components/multiplier_components.md index 2d084b6f8..59c1342c1 100644 --- a/doc/components/multiplier_components.md +++ b/doc/components/multiplier_components.md @@ -199,3 +199,40 @@ Finally, we produce the product. compressor.exractRow(0), compressor.extractRow(1), BrentKung.new); product <= adder.sum.slice(a.width + b.width - 1, 0); ``` + +## Utility: Aligned Vector Formatting + +We provide an extension on `LogicValue` which permits formatting of binary vectors in an aligned way to help with debugging arithmetic components. + +The `vecString` extension provides a basic string printer with an optional `header` flag for bit numbering. A `prefix` value can be used to specify the name lengths to be used to keep vectors aligned. + +`alignHigh` controls the highest (toward MSB) alignment column of the output whereas `alignLow` controls the lower limit (toward the LSB). + +`sepPos' is optional and allows you to set a marker for a separator in the number. +`sepChar` is the separation character you wish to use (do not use '|' with markdown formatting.) + +```dart + final ref = FloatingPoint64Value.fromDouble(3.14159); + print(ref.mantissa + .vecString('pi', align: 55, lowLimit: 40, header: true, sep: 52)); +``` + +Produces + +```text + 54 53 52* 51 50 49 48 47 46 45 44 43 42 41 40 +pi * 1 0 0 1 0 0 1 0 0 0 0 1 +``` + +The routine also allows for output in markdown format: + +```dart + print(ref.mantissa.vecString('pi', + align: 58, lowLimit: 40, header: true, sep: 52, markDown: true)); +``` + +producing: + +| Name | 54 | 53 | 52* | 51 | 50 | 49 | 48 | 47 | 46 | 45 | 44 | 43 | 42 | 41 | 40 | +|:--:|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:---| +pi|||* | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | \ No newline at end of file diff --git a/lib/src/arithmetic/arithmetic_utils.dart b/lib/src/arithmetic/arithmetic_utils.dart index 93b17bf0e..23d6c3f13 100644 --- a/lib/src/arithmetic/arithmetic_utils.dart +++ b/lib/src/arithmetic/arithmetic_utils.dart @@ -9,6 +9,8 @@ // ignore_for_file: avoid_print +import 'dart:math'; + import 'package:rohd/rohd.dart'; import 'package:rohd_hcl/rohd_hcl.dart'; @@ -17,55 +19,55 @@ extension NumericVector on LogicValue { /// Print aligned bitvector with an optional header. /// [name] is printed at the LHS of the line, trimmed by [prefix]. /// [prefix] is the distance from the margin bebore the vector is printed. - /// You can align with longer bitvectors by stating the length [align]. - /// [lowLimit] will trim the vector below this bit position. - /// You can insert a separator [sepChar] at position [sep]. + /// You can align with longer bitvectors by stating the length [alignHigh]. + /// [alignLow] will trim the vector below this bit position. + /// You can insert a separator [sepChar] at position [sepPos]. /// A header can be printed by setting [header] to true. /// Markdown format can be produced by setting [markDown] to true. String vecString(String name, {int prefix = 10, - int? align, - int? sep, + int? alignHigh, + int? sepPos, bool header = false, String sepChar = '*', - int lowLimit = 0, + int alignLow = 0, bool markDown = false}) { final str = StringBuffer(); + final length = + BigInt.from(min(alignHigh ?? width, width)).toString().length + 1; // ignore: cascade_invocations if (header) { - str.write(markDown ? '|Name' : ' ' * prefix); + str.write(markDown ? '| Name' : ' ' * prefix); - for (var col = ((align ?? width) - width) + width - 1; - col >= lowLimit; + for (var col = ((alignHigh ?? width) - width) + width - 1; + col >= alignLow; col--) { - final bits = col > 9 ? 2 : 1; - if (sep != null && sep == col) { - str.write(markDown ? '' : ' ' * (2 - bits)); - if (col > 10 || col == lowLimit) { - str.write('${markDown ? '|' : ' '}$col$sepChar'); - } else { - str.write('${markDown ? '|' : ' '}$col $sepChar'); - } - str.write(markDown ? '|' : ''); - } else if (sep != null && sep == col + 1) { - if (sep == width) { + final chars = BigInt.from(col).toString().length + 1; + if (sepPos != null && sepPos == col) { + str + ..write(markDown ? ' | ' : ' ' * (length - chars + 2)) + ..write('$col$sepChar') + ..write(markDown ? ' | ' : ''); + } else if (sepPos != null && sepPos == col + 1) { + if (sepPos == max(alignHigh ?? width, width)) { str ..write(sepChar) - ..write(markDown ? '|' : ' ' * (2 - bits)); + ..write(markDown ? ' | ' : ' ' * (length - chars - 1)); } - str.write('$col'); + str.write('${' ' * (length - chars + 1)}$col'); } else { + // untested str - ..write(markDown ? '|' : ' ' * (2 - bits)) - ..write(' $col'); + ..write(markDown ? ' | ' : ' ' * (length - chars + 2)) + ..write('$col'); } } - str.write(markDown ? '|\n' : '\n'); + str.write(markDown ? ' |\n' : '\n'); if (markDown) { str.write(markDown ? '|:--:' : ' ' * prefix); - for (var col = ((align ?? width) - width) + width - 1; - col >= lowLimit; + for (var col = ((alignHigh ?? width) - width) + width - 1; + col >= alignLow; col--) { str.write('|:--'); } @@ -73,39 +75,47 @@ extension NumericVector on LogicValue { } } final String strPrefix; - strPrefix = (name.length <= prefix) - ? name.padRight(prefix) - : name.substring(0, prefix); + strPrefix = markDown + ? name + : (name.length <= prefix) + ? name.padRight(prefix) + : name.substring(0, prefix); + str ..write(strPrefix) - ..write(' ' * ((align ?? width) - width)); - for (var col = lowLimit; col < width; col++) { - final pos = width - 1 - col + lowLimit; + ..write((markDown ? '|' : ' ' * (length + 1)) * + ((alignHigh ?? width) - width)); + for (var col = alignLow; col < min(alignHigh ?? width, width); col++) { + final pos = min(alignHigh ?? width, width) - 1 - col + alignLow; + final chars = BigInt.from(pos).toString().length + 1; final v = this[pos].bitString; - if (sep != null && sep == pos) { + if (sepPos != null && sepPos == pos) { if (markDown) { - str.write('|$v $sepChar'); + str.write(' | $v $sepChar'); } else { - str.write( - ((pos > 9) | (pos == 0)) ? ' $v$sepChar ' : ' $v $sepChar'); + str.write('${' ' * length}$v$sepChar'); } - } else if (sep != null && sep == pos + 1) { - if (markDown) { - str.write('|'); + } else if (sepPos != null && sepPos == pos + 1) { + if (sepPos == min(alignHigh ?? width, width)) { + str.write(sepChar); } - if (sep == width) { - str.write('$sepChar '); + if (markDown) { + str.write(' | '); + } else { + str.write(' ' * (length - 1)); } str.write(v); } else { if (markDown) { - str.write('|'); + str.write(' | '); + } else { + str.write(' ' * length); } - str.write(' $v'); + str.write(v); } } if (markDown) { - str.write('|'); + str.write(' |'); } return str.toString(); } diff --git a/lib/src/arithmetic/evaluate_compressor.dart b/lib/src/arithmetic/evaluate_compressor.dart index 465743472..f214ffa3d 100644 --- a/lib/src/arithmetic/evaluate_compressor.dart +++ b/lib/src/arithmetic/evaluate_compressor.dart @@ -40,7 +40,9 @@ extension EvaluateLiveColumnCompressor on ColumnCompressor { } } rowBits.addAll(List.filled(pp.rowShift[row], LogicValue.zero)); - final val = rowBits.swizzle().zeroExtend(width).toBigInt(); + // final val = rowBits.swizzle().zeroExtend(width).toBigInt(); + final val = rowBits.swizzle().toBigInt(); + accum += val; if (printOut) { ts.write('\t${rowBits.swizzle().zeroExtend(width).bitString} ($val)'); diff --git a/lib/src/arithmetic/floating_point/floating_point_adder.dart b/lib/src/arithmetic/floating_point/floating_point_adder.dart index a171f48ed..e6e3f1d20 100644 --- a/lib/src/arithmetic/floating_point/floating_point_adder.dart +++ b/lib/src/arithmetic/floating_point/floating_point_adder.dart @@ -1,8 +1,8 @@ // Copyright (C) 2024 Intel Corporation // SPDX-License-Identifier: BSD-3-Clause // -// floating_point_test.dart -// Tests of Floating Point stuff +// floating_point_adder.dart +// A very basic Floating-point adder component. // // 2024 August 30 // Author: Desmond A Kirkpatrick Date: Tue, 1 Oct 2024 18:47:34 -0700 Subject: [PATCH 16/19] markdown issues --- doc/components/multiplier_components.md | 2 +- lib/src/arithmetic/arithmetic_utils.dart | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/components/multiplier_components.md b/doc/components/multiplier_components.md index 59c1342c1..0203de0e2 100644 --- a/doc/components/multiplier_components.md +++ b/doc/components/multiplier_components.md @@ -235,4 +235,4 @@ producing: | Name | 54 | 53 | 52* | 51 | 50 | 49 | 48 | 47 | 46 | 45 | 44 | 43 | 42 | 41 | 40 | |:--:|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:---| -pi|||* | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | \ No newline at end of file +|pi|||* | 1 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | diff --git a/lib/src/arithmetic/arithmetic_utils.dart b/lib/src/arithmetic/arithmetic_utils.dart index 23d6c3f13..0c85691fc 100644 --- a/lib/src/arithmetic/arithmetic_utils.dart +++ b/lib/src/arithmetic/arithmetic_utils.dart @@ -76,7 +76,7 @@ extension NumericVector on LogicValue { } final String strPrefix; strPrefix = markDown - ? name + ? '| $name' : (name.length <= prefix) ? name.padRight(prefix) : name.substring(0, prefix); From 743298fb426663f53b330bc01623b50396eba603 Mon Sep 17 00:00:00 2001 From: "Desmond A. Kirkpatrick" Date: Tue, 1 Oct 2024 23:02:19 -0700 Subject: [PATCH 17/19] documentation fixes --- doc/components/floating_point.md | 2 +- doc/components/multiplier.md | 2 +- doc/components/multiplier_components.md | 10 +- lib/src/arithmetic/arithmetic_utils.dart | 119 +++++++++++++++-------- 4 files changed, 85 insertions(+), 48 deletions(-) diff --git a/doc/components/floating_point.md b/doc/components/floating_point.md index 920c1efac..8f525226c 100644 --- a/doc/components/floating_point.md +++ b/doc/components/floating_point.md @@ -14,7 +14,7 @@ A normal `isNormal` `FloatingPointValue` has: $$minExponent <= exponent <= maxExponent$$ - and a mantissa in the range of $[1,2)$. Subnormal numbers are represented with a zero exponent and leading zeros in the mantissa capture the negative exponent value. + And a mantissa in the range of $[1,2)$. Subnormal numbers are represented with a zero exponent and leading zeros in the mantissa capture the negative exponent value. The various IEEE constants representing corner cases of the field of floating-point values for a given size of `FloatingPointValue`: infinities, zeros, limits for normal (e.g. mantissa in the range of $[1,2])$ and sub-normal numbers (zero exponent, and mantissa <1). diff --git a/doc/components/multiplier.md b/doc/components/multiplier.md index e98ab5dac..14d2b8fea 100644 --- a/doc/components/multiplier.md +++ b/doc/components/multiplier.md @@ -109,7 +109,7 @@ Here is an example of use of the `CompressionTreeMultiplier`: ## Compression Tree Multiply Accumulate -A compression tree multiply accumulate is similar to a compress tree +A compression tree multiply-accumulate is similar to a compress tree multiplier, but it inserts an additional addend into the compression tree to allow for accumulation into this third input. diff --git a/doc/components/multiplier_components.md b/doc/components/multiplier_components.md index 0203de0e2..af1b3e08e 100644 --- a/doc/components/multiplier_components.md +++ b/doc/components/multiplier_components.md @@ -103,7 +103,7 @@ The partial product generator produces a set of addends in shifted position to b An argument to the `PartialProductGenerator` is the `RadixEncoder` to be used. The [`RadixEncoder`] takes a single argument which is the radix (power of 2) to be used. -Instead of using the 1's in the multiplier to select shifted versions of the multiplicand to add in a partial product matrix, radix-encoding will encode multiples of the multiplicand by examining adjacent bits of the multiplier. For radix-4, for example, for a multiplier of size M, instead of M rows of partial products, M/2 rows are formed by selecting from multiples [-2, -1, 0, 1, 2] of the multiplicand. These multiples are computed from an 3 bit slices, overlapped by 1 bit, of the multiplier. Higher radices use wider slices of the multiplier to encode fewer multiples and therefore fewer rows. +Instead of using the 1's in the multiplier to select shifted versions of the multiplicand to add in a partial product matrix, radix-encoding will encode multiples of the multiplicand by examining adjacent bits of the multiplier. For radix-4, for example, for a multiplier of size M, instead of M rows of partial products, M/2 rows are formed by selecting from multiples [-2, -1, 0, 1, 2] of the multiplicand. These multiples are computed from an 3 bit slices, overlapped by 1 bit, of the multiplier. Higher radixes use wider slices of the multiplier to encode fewer multiples and therefore fewer rows. | bit_i | bit_i-1 | bit_i-2 | multiple| |:-----:|:-------:|:-------:|:-------:| @@ -209,12 +209,12 @@ The `vecString` extension provides a basic string printer with an optional `head `alignHigh` controls the highest (toward MSB) alignment column of the output whereas `alignLow` controls the lower limit (toward the LSB). `sepPos' is optional and allows you to set a marker for a separator in the number. -`sepChar` is the separation character you wish to use (do not use '|' with markdown formatting.) +`sepChar` is the separation character you wish to use (do not use '|' with Markdown formatting.) ```dart final ref = FloatingPoint64Value.fromDouble(3.14159); print(ref.mantissa - .vecString('pi', align: 55, lowLimit: 40, header: true, sep: 52)); + .vecString('pi', alignHigh: 55, alignLow: 40, header: true, sepPos: 52)); ``` Produces @@ -224,11 +224,11 @@ Produces pi * 1 0 0 1 0 0 1 0 0 0 0 1 ``` -The routine also allows for output in markdown format: +The routine also allows for output in Markdown format: ```dart print(ref.mantissa.vecString('pi', - align: 58, lowLimit: 40, header: true, sep: 52, markDown: true)); + alignHigh: 58, alignLow: 40, header: true, sepPos: 52, markDown: true)); ``` producing: diff --git a/lib/src/arithmetic/arithmetic_utils.dart b/lib/src/arithmetic/arithmetic_utils.dart index 0c85691fc..22a9d5199 100644 --- a/lib/src/arithmetic/arithmetic_utils.dart +++ b/lib/src/arithmetic/arithmetic_utils.dart @@ -33,90 +33,127 @@ extension NumericVector on LogicValue { int alignLow = 0, bool markDown = false}) { final str = StringBuffer(); - final length = - BigInt.from(min(alignHigh ?? width, width)).toString().length + 1; + final minHigh = min(alignHigh ?? width, width); + final length = BigInt.from(minHigh).toString().length + 1; // ignore: cascade_invocations + const hdrSep = '| '; + const hdrSepStart = '| '; + const hdrSepEnd = '|'; + + final highLimit = ((alignHigh ?? width) - width) + width - 1; + if (header) { - str.write(markDown ? '| Name' : ' ' * prefix); + str.write(markDown ? '$hdrSepStart Name' : ' ' * prefix); - for (var col = ((alignHigh ?? width) - width) + width - 1; - col >= alignLow; - col--) { + for (var col = highLimit; col >= alignLow; col--) { final chars = BigInt.from(col).toString().length + 1; if (sepPos != null && sepPos == col) { str - ..write(markDown ? ' | ' : ' ' * (length - chars + 2)) + ..write(markDown ? ' $hdrSep' : ' ' * (length - chars + 2)) ..write('$col$sepChar') - ..write(markDown ? ' | ' : ''); + ..write(markDown ? ' $hdrSep' : ''); } else if (sepPos != null && sepPos == col + 1) { if (sepPos == max(alignHigh ?? width, width)) { str ..write(sepChar) - ..write(markDown ? ' | ' : ' ' * (length - chars - 1)); + ..write(markDown ? ' $hdrSep' : ' ' * (length - chars - 1)); } str.write('${' ' * (length - chars + 1)}$col'); } else { - // untested str - ..write(markDown ? ' | ' : ' ' * (length - chars + 2)) + ..write(markDown ? ' $hdrSep' : ' ' * (length - chars + 2)) ..write('$col'); } } - str.write(markDown ? ' |\n' : '\n'); + str.write(markDown ? ' $hdrSepEnd\n' : '\n'); if (markDown) { str.write(markDown ? '|:--:' : ' ' * prefix); - - for (var col = ((alignHigh ?? width) - width) + width - 1; - col >= alignLow; - col--) { + for (var col = highLimit; col >= alignLow; col--) { str.write('|:--'); } str.write('-|\n'); } } + const dataSepStart = '|'; + const dataSep = '| '; + const dataSepEnd = '|'; final String strPrefix; strPrefix = markDown - ? '| $name' + ? '$dataSepStart $name' : (name.length <= prefix) ? name.padRight(prefix) : name.substring(0, prefix); - str ..write(strPrefix) - ..write((markDown ? '|' : ' ' * (length + 1)) * + ..write((markDown ? dataSep : ' ' * (length + 1)) * ((alignHigh ?? width) - width)); - for (var col = alignLow; col < min(alignHigh ?? width, width); col++) { - final pos = min(alignHigh ?? width, width) - 1 - col + alignLow; - final chars = BigInt.from(pos).toString().length + 1; + for (var col = alignLow; col < minHigh; col++) { + final pos = minHigh - 1 - col + alignLow; final v = this[pos].bitString; if (sepPos != null && sepPos == pos) { - if (markDown) { - str.write(' | $v $sepChar'); - } else { - str.write('${' ' * length}$v$sepChar'); - } + str.write( + markDown ? ' $dataSep$v $sepChar' : '${' ' * length}$v$sepChar'); } else if (sepPos != null && sepPos == pos + 1) { - if (sepPos == min(alignHigh ?? width, width)) { + if (sepPos == minHigh) { str.write(sepChar); } - if (markDown) { - str.write(' | '); - } else { - str.write(' ' * (length - 1)); - } - str.write(v); + str + ..write(markDown ? ' $dataSep' : ' ' * (length - 1)) + ..write(v); } else { - if (markDown) { - str.write(' | '); - } else { - str.write(' ' * length); - } - str.write(v); + str + ..write(markDown ? ' $dataSep' : ' ' * length) + ..write(v); } } if (markDown) { - str.write(' |'); + str.write(' $dataSepEnd'); } return str.toString(); } } + +void main() { + final lv0 = LogicValue.ofInt(42, 15); + final lv1 = LogicValue.ofInt(117, 15); + // No separator + print(lv0.vecString('lv0', header: true)); + print(lv1.vecString('lv1_with_ridiculously_long_name')); + // Separator + print(lv0.vecString('lv0', sepPos: 8)); + print(lv1.vecString('lv1_with_ridiculously_long_name', sepPos: 8)); + print(lv1.vecString('lv1_with_ridiculously_long_name', sepPos: 8)); + // separator at double-digits + print(lv0.vecString('lv0', sepPos: 12, alignHigh: 24, header: true)); + print(lv1.vecString('lv1_with_ridiculously_long_name', + alignHigh: 24, sepPos: 12)); + // transition to single-digit separator + print(lv0.vecString('lv0', sepPos: 10, alignHigh: 24, header: true)); + print(lv1.vecString('lv1_with_ridiculously_long_name', + alignHigh: 24, sepPos: 10)); + print(lv0.vecString('lv0', sepPos: 9, alignHigh: 24, header: true)); + print(lv1.vecString('lv1_with_ridiculously_long_name', + alignHigh: 24, sepPos: 9)); + // Single digit separator + print(lv0.vecString('lv0', sepPos: 8, alignHigh: 24, header: true)); + print(lv1.vecString('lv1_with_ridiculously_long_name', + alignHigh: 24, sepPos: 8)); + // Separator at zero + print(lv0.vecString('lv0', sepPos: 0, alignHigh: 24, header: true)); + print(lv1.vecString('lv1_with_ridiculously_long_name', + alignHigh: 24, sepPos: 0)); + final ref = FloatingPoint64Value.fromDouble(3.14159); + print(ref); + print(ref.mantissa + .vecString('reference', alignLow: 31, header: true, sepPos: 52)); + print(''); + + print(ref.mantissa.vecString('reference', + alignLow: 31, header: true, sepPos: 48, markDown: true)); + print(''); + final lv2 = LogicValue.ofInt(42, 12); + print(lv2.vecString('lv2', header: true, markDown: true)); + for (var i = lv2.width; i >= 0; i--) { + print(lv2.vecString('lv2', sepPos: i, markDown: true)); + } +} From f2971ee3a8e46664677fa2868fd872c03af5ca79 Mon Sep 17 00:00:00 2001 From: "Desmond A. Kirkpatrick" Date: Wed, 2 Oct 2024 11:52:46 -0700 Subject: [PATCH 18/19] evaluate fixes and better vector print --- lib/src/arithmetic/arithmetic_utils.dart | 59 ++++----------------- lib/src/arithmetic/evaluate_compressor.dart | 7 +-- 2 files changed, 10 insertions(+), 56 deletions(-) diff --git a/lib/src/arithmetic/arithmetic_utils.dart b/lib/src/arithmetic/arithmetic_utils.dart index 22a9d5199..a49cec811 100644 --- a/lib/src/arithmetic/arithmetic_utils.dart +++ b/lib/src/arithmetic/arithmetic_utils.dart @@ -24,6 +24,7 @@ extension NumericVector on LogicValue { /// You can insert a separator [sepChar] at position [sepPos]. /// A header can be printed by setting [header] to true. /// Markdown format can be produced by setting [markDown] to true. + /// The output can have space by setting [extraSpace] String vecString(String name, {int prefix = 10, int? alignHigh, @@ -31,10 +32,11 @@ extension NumericVector on LogicValue { bool header = false, String sepChar = '*', int alignLow = 0, + int extraSpace = 0, bool markDown = false}) { final str = StringBuffer(); final minHigh = min(alignHigh ?? width, width); - final length = BigInt.from(minHigh).toString().length + 1; + final length = BigInt.from(minHigh).toString().length + extraSpace; // ignore: cascade_invocations const hdrSep = '| '; const hdrSepStart = '| '; @@ -46,10 +48,11 @@ extension NumericVector on LogicValue { str.write(markDown ? '$hdrSepStart Name' : ' ' * prefix); for (var col = highLimit; col >= alignLow; col--) { - final chars = BigInt.from(col).toString().length + 1; + final chars = BigInt.from(col).toString().length + extraSpace; if (sepPos != null && sepPos == col) { str - ..write(markDown ? ' $hdrSep' : ' ' * (length - chars + 2)) + ..write( + markDown ? ' $hdrSep' : ' ' * (length - chars + 1 + extraSpace)) ..write('$col$sepChar') ..write(markDown ? ' $hdrSep' : ''); } else if (sepPos != null && sepPos == col + 1) { @@ -58,10 +61,11 @@ extension NumericVector on LogicValue { ..write(sepChar) ..write(markDown ? ' $hdrSep' : ' ' * (length - chars - 1)); } - str.write('${' ' * (length - chars + 1)}$col'); + str.write('${' ' * (length - chars + extraSpace + 0)}$col'); } else { str - ..write(markDown ? ' $hdrSep' : ' ' * (length - chars + 2)) + ..write( + markDown ? ' $hdrSep' : ' ' * (length - chars + 1 + extraSpace)) ..write('$col'); } } @@ -112,48 +116,3 @@ extension NumericVector on LogicValue { return str.toString(); } } - -void main() { - final lv0 = LogicValue.ofInt(42, 15); - final lv1 = LogicValue.ofInt(117, 15); - // No separator - print(lv0.vecString('lv0', header: true)); - print(lv1.vecString('lv1_with_ridiculously_long_name')); - // Separator - print(lv0.vecString('lv0', sepPos: 8)); - print(lv1.vecString('lv1_with_ridiculously_long_name', sepPos: 8)); - print(lv1.vecString('lv1_with_ridiculously_long_name', sepPos: 8)); - // separator at double-digits - print(lv0.vecString('lv0', sepPos: 12, alignHigh: 24, header: true)); - print(lv1.vecString('lv1_with_ridiculously_long_name', - alignHigh: 24, sepPos: 12)); - // transition to single-digit separator - print(lv0.vecString('lv0', sepPos: 10, alignHigh: 24, header: true)); - print(lv1.vecString('lv1_with_ridiculously_long_name', - alignHigh: 24, sepPos: 10)); - print(lv0.vecString('lv0', sepPos: 9, alignHigh: 24, header: true)); - print(lv1.vecString('lv1_with_ridiculously_long_name', - alignHigh: 24, sepPos: 9)); - // Single digit separator - print(lv0.vecString('lv0', sepPos: 8, alignHigh: 24, header: true)); - print(lv1.vecString('lv1_with_ridiculously_long_name', - alignHigh: 24, sepPos: 8)); - // Separator at zero - print(lv0.vecString('lv0', sepPos: 0, alignHigh: 24, header: true)); - print(lv1.vecString('lv1_with_ridiculously_long_name', - alignHigh: 24, sepPos: 0)); - final ref = FloatingPoint64Value.fromDouble(3.14159); - print(ref); - print(ref.mantissa - .vecString('reference', alignLow: 31, header: true, sepPos: 52)); - print(''); - - print(ref.mantissa.vecString('reference', - alignLow: 31, header: true, sepPos: 48, markDown: true)); - print(''); - final lv2 = LogicValue.ofInt(42, 12); - print(lv2.vecString('lv2', header: true, markDown: true)); - for (var i = lv2.width; i >= 0; i--) { - print(lv2.vecString('lv2', sepPos: i, markDown: true)); - } -} diff --git a/lib/src/arithmetic/evaluate_compressor.dart b/lib/src/arithmetic/evaluate_compressor.dart index f214ffa3d..b1740d382 100644 --- a/lib/src/arithmetic/evaluate_compressor.dart +++ b/lib/src/arithmetic/evaluate_compressor.dart @@ -40,8 +40,7 @@ extension EvaluateLiveColumnCompressor on ColumnCompressor { } } rowBits.addAll(List.filled(pp.rowShift[row], LogicValue.zero)); - // final val = rowBits.swizzle().zeroExtend(width).toBigInt(); - final val = rowBits.swizzle().toBigInt(); + final val = rowBits.swizzle().zeroExtend(width).toBigInt(); accum += val; if (printOut) { @@ -54,10 +53,6 @@ extension EvaluateLiveColumnCompressor on ColumnCompressor { } } } - if (printOut) { - // We need this to be able to debug, but git lint flunks print - // print(ts); - } return (accum.toSigned(width), ts); } From 055ee0b8996c96c3f8bb49a51218602534319440 Mon Sep 17 00:00:00 2001 From: "Desmond A. Kirkpatrick" Date: Wed, 2 Oct 2024 18:22:02 -0700 Subject: [PATCH 19/19] doc typo --- doc/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/README.md b/doc/README.md index be5fecc7c..efbd52501 100644 --- a/doc/README.md +++ b/doc/README.md @@ -59,7 +59,7 @@ Some in-development items will have opened issues, as well. Feel free to create - BFloat16 (16-bit) - BFloat8 (8-bit) - BFloat4 (4-bit) - + - [Simple Floating-Point Adder](./components/floating_point.md#floatingpointadder) - Fixed point - Binary-Coded Decimal (BCD) - [Rotate](./components/rotate.md)