Skip to content

Commit

Permalink
subnormal support
Browse files Browse the repository at this point in the history
  • Loading branch information
soneryaldiz committed Oct 28, 2024
1 parent e425b74 commit 31605d7
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 27 deletions.
104 changes: 78 additions & 26 deletions lib/src/arithmetic/fixed_to_float.dart
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ import 'dart:math';
import 'package:rohd/rohd.dart';
import 'package:rohd_hcl/rohd_hcl.dart';

/// [FixedToFloatConverter] converts a fixed point input to
/// a floating point by rounding to nearest even.
/// [FixedToFloatConverter] converts a fixed point input to floating point.
/// Normals are rounded to nearest even. Subnormals are truncated.
class FixedToFloatConverter extends Module {
/// Width of exponent, must be greater than 0.
final int exponentWidth;
Expand Down Expand Up @@ -51,8 +51,10 @@ class FixedToFloatConverter extends Module {
addOutput('float', width: _float.width) <= _float;

final bias = FloatingPointValue.computeBias(exponentWidth);
final maxE = FloatingPointValue.computeMaxExponent(exponentWidth);

final indexWidth = max(fixed.n, max(log2Ceil(fixed.width), exponentWidth));
final iWidth =
(2 + max(fixed.n, max(log2Ceil(fixed.width), exponentWidth))).toInt();

// Extract sign bit
if (fixed.signed) {
Expand All @@ -64,13 +66,13 @@ class FixedToFloatConverter extends Module {
final absValue = Logic(name: 'absValue', width: fixed.width)
..gets(mux(_float.sign, ~(fixed - 1), fixed));

// Find jBit position
final jBit = Logic(name: 'jBit', width: indexWidth);
// Find jBit position. TODO: Re-use ParallelPrefixPriorityEncoder()?
final jBit = Logic(name: 'jBit', width: iWidth);
Combinational([
CaseZ(absValue, conditionalType: ConditionalType.priority, [
for (var i = 0; i < absValue.width; i++)
CaseItem(_generateCaseItem(i, absValue.width), [
jBit < Const(absValue.width - 1 - i, width: indexWidth),
jBit < Const(absValue.width - 1 - i, width: iWidth),
])
], defaultItem: [
jBit < 0,
Expand All @@ -83,23 +85,28 @@ class FixedToFloatConverter extends Module {
final sticky = Logic(name: 'stickBit');
Combinational([
Case(jBit, conditionalType: ConditionalType.unique, [
CaseItem(Const(0, width: indexWidth), [
CaseItem(Const(0, width: iWidth), [
mantissa < 0,
guard < 0,
sticky < 0,
]),
for (var i = 1; i < mantissaWidth + 2; i++)
CaseItem(Const(i, width: indexWidth), [
for (var i = 1; i <= mantissaWidth; i++)
CaseItem(Const(i, width: iWidth), [
mantissa <
[
absValue.slice(i - 1, max(0, i - mantissaWidth)),
absValue.slice(i - 1, 0),
Const(0, width: max(0, mantissaWidth - i))
].swizzle(),
guard < 0,
sticky < 0,
]),
CaseItem(Const(mantissaWidth + 1, width: iWidth), [
mantissa < absValue.slice(mantissaWidth, 1),
guard < absValue[0],
sticky < 0,
]),
for (var i = mantissaWidth + 2; i < absValue.width; i++)
CaseItem(Const(i, width: indexWidth), [
CaseItem(Const(i, width: iWidth), [
mantissa <
[
absValue.slice(i - 1, max(0, i - mantissaWidth)),
Expand All @@ -115,22 +122,67 @@ class FixedToFloatConverter extends Module {
]),
]);

/// Round to nearest even: mantissa | guard sticky)
final mantissaRounded =
mux(guard & (sticky | mantissa[0]), mantissa + 1, mantissa);
/// Round to nearest even: mantissa | guard sticky
final roundUp = guard & (sticky | mantissa[0]);
final mantissaRounded = mux(roundUp, mantissa + 1, mantissa);

// Extract exponent
final exponent = Logic(name: 'exponent', width: exponentWidth)
..gets((jBit + Const(bias - fixed.n, width: indexWidth))
.slice(exponentWidth - 1, 0));
final exponentRounded = mux(mantissaRounded.or(), exponent, exponent + 1);

_float.exponent <= exponentRounded;
_float.mantissa <= mantissaRounded;

// TODO: what if RNE causes overflow in exponent?
// TODO: handle subnormals
// TODO: handle all zeros
// TODO: handle infinities
final expoRaw =
jBit + Const(bias, width: iWidth) - Const(fixed.n, width: iWidth);
final expoRawRne =
mux(roundUp & ~mantissaRounded.or(), expoRaw + 1, expoRaw);

// For subnormal, prefix mantissa 0.000 1 mantissa
final padAmount = Const(1, width: iWidth) - expoRawRne;
final mantissaSub = Logic(name: 'mantissaSub', width: mantissaWidth);
Combinational([
Case(padAmount, conditionalType: ConditionalType.unique, [
for (var i = 1; i < mantissaWidth; i++)
CaseItem(
Const(i, width: iWidth),
[
mantissaSub <
[
Const(0, width: i - 1),
Const(1),
mantissaRounded.slice(mantissaWidth - 1, i)
].swizzle()
],
),
CaseItem(Const(mantissaWidth, width: iWidth),
[mantissaSub < Const(1).zeroExtend(mantissaWidth)]),
], defaultItem: [
mantissaSub < Const(0, width: mantissaWidth)
])
]);

// Select output with corner cases
final expoLessThanOne = expoRawRne[-1] | ~expoRawRne.or();
final expoMoreThanMax = ~expoRawRne[-1] & (expoRawRne.gt(maxE));
Combinational([
If.block([
Iff(~absValue.or(), [
// Zero
_float.exponent < Const(0, width: exponentWidth),
_float.mantissa < Const(0, width: mantissaWidth),
]),
ElseIf(expoMoreThanMax, [
// Infinity
_float.exponent < Const(1, width: exponentWidth),
_float.mantissa < Const(0, width: mantissaWidth),
]),
ElseIf(expoLessThanOne, [
// Subnormal
_float.exponent < Const(0, width: exponentWidth),
_float.mantissa < mantissaSub
]),
Else([
// Normal
_float.exponent < expoRawRne.slice(exponentWidth - 1, 0),
_float.mantissa < mantissaRounded
])
])
]);

}
}
27 changes: 26 additions & 1 deletion test/arithmetic/fixed_to_float_test.dart
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
// Author: Soner Yaldiz <[email protected]>

import 'dart:io';
import 'dart:math';
import 'package:rohd/rohd.dart';
import 'package:rohd_hcl/rohd_hcl.dart';
import 'package:test/test.dart';

void main() async {
test('FixedToFloat: simple', () async {
test('Smoke', () async {
final fixed = FixedPoint(signed: true, m: 34, n: 33);
final dut =
FixedToFloatConverter(fixed, exponentWidth: 4, mantissaWidth: 3);
Expand All @@ -21,4 +23,27 @@ void main() async {
fixed.put(FixedPointValue.ofDouble(1.25, signed: true, m: 34, n: 33));
expect(dut.float.floatingPointValue.toDouble(), 1.25);
});

test('Q16.16 to E5M2', () async {
final fixed = FixedPoint(signed: true, m: 16, n: 16);
final dut =
FixedToFloatConverter(fixed, exponentWidth: 5, mantissaWidth: 2);
await dut.build();
for (var val = 0; val < pow(2, 14); val++) {
final fixedValue = FixedPointValue(
value: LogicValue.ofInt(val, fixed.width),
signed: true,
m: fixed.m,
n: fixed.n);
fixed.put(fixedValue);
final fpv = dut.float.floatingPointValue;
final fpvExpected = FloatingPointValue.ofDouble(fixedValue.toDouble(),
exponentWidth: dut.exponentWidth, mantissaWidth: dut.mantissaWidth);
expect(fpv.sign, fpvExpected.sign);
expect(fpv.exponent.bitString, fpvExpected.exponent.bitString,
reason: 'exponent');
expect(fpv.mantissa.bitString, fpvExpected.mantissa.bitString,
reason: 'mantissa');
}
});
}

0 comments on commit 31605d7

Please sign in to comment.