explain float16conv

aws · Jan 12, 2024 · 698102b · 698102b
1 parent 693c486
commit 698102b
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 35 deletions.
diff --git a/encoding/cbor/cbor.go b/encoding/cbor/cbor.go
@@ -297,6 +297,6 @@ func getLen(ln int) int {
 	return 9 // type + 8-byte len
 }
 
-func compose(major, minor byte) byte {
-	return major << 5 & minor
+func compose(major MajorType, minor byte) byte {
+	return byte(major) << 5 & minor
 }
diff --git a/encoding/cbor/decode.go b/encoding/cbor/decode.go
@@ -254,11 +254,13 @@ func decodeMajor7(p []byte) (Value, int, error) {
 	case major7Undefined:
 		return &Major7Undefined{}, 1, nil
 	case major7Float16:
+		return nil, 0, fmt.Errorf("TODO")
 	case major7Float32:
-
-		return Major7Float32(), 5, nil
+		return nil, 0, fmt.Errorf("TODO")
+	case major7Float64:
+		return nil, 0, fmt.Errorf("TODO")
 	default:
-		return nil, 0, fmt.Errorf("unexpected minor value %d", minor)
+		return nil, 0, fmt.Errorf("unexpected minor value %d", m)
 	}
 }
 

diff --git a/encoding/cbor/float16.go b/encoding/cbor/float16.go
@@ -1,46 +1,45 @@
 package cbor
 
-// float16:
-// sign|exp(5)|mant(10)
-//
-// float32:
-// sign|exp(8)|mant(23)
 func float16to32(f uint16) uint32 {
 	sign, exp, mant := splitf16(f)
 	if exp == 0x1f {
-		return sign | 0xff<<23 | exp // infinity/NaN
+		return sign | 0xff<<23 | mant // infinity/NaN
 	}
 
-	if exp == 0 {
-		if mant == 0 { // subnormal 0, but keep the exponent
-			return sign | (exp+127-15)<<23
-		}
-
-		// this is a float16 subnormal (true exponent -14)
-		// starting from there, we shift the mantissa over until we've
-		// chopped off the most-significant 1, i.e. that becomes the hidden
-		// mantissa bit and we're back in normal float32 space
-		exp = -14 + 127
-		for mant&0x800000 == 0 { // repeat until bit 24 is 1
-			mant <<= 1
-			exp--
+	if exp == 0 { // subnormal
+		if mant == 0 {
+			return sign
 		}
-		mant &= 0x7FFFFF // remask to 23bit
-	} else {
-		exp += 127 - 15
+		return normalize(sign, mant)
 	}
 
-	return sign | exp<<23 | mant
+	return sign | (exp+127-15)<<23 | mant // rebias exp by the difference between the two
 }
 
-// breaks a float16 down into its components:
-//   - sign, in float32 position
-//   - exponent, as a number (for bias shifting and subnormal conversion)
-//   - mantissa, in float32 position
 func splitf16(f uint16) (sign, exp, mantissa uint32) {
-	const smask = 0b_1 << 15
-	const emask = 0b_11111 << 10
-	const mmask = 0b_1111111111
+	const smask = 0x1 << 15  // put sign in float32 position
+	const emask = 0x1f << 10 // pull exponent as a number (for bias shift)
+	const mmask = 0x3ff      // put mantissa in float32 position
 
 	return uint32(f&smask) << 16, uint32(f&emask) >> 10, uint32(f&mmask) << 13
 }
+
+// moves a float16 normal into normal float32 space
+// to do this we must re-express the float16 mantissa in terms of a normal
+// float32 where the hidden bit is 1, e.g.
+//
+// f16: 0    00000              0001010000 = 0.000101 * 2^(-14), which is equal to
+// f32: 0 00000000 00100000000000000000000 =     1.01 * 2^(-18)
+//
+// this is achieved by shifting the mantissa to the right until the leading bit
+// that == 1 reaches position 24, then the number of positions shifted over is
+// equal to the offset from the subnormal exponent
+func normalize(sign, mant uint32) uint32 {
+	exp := (uint32(-14 + 127)) // f16 subnormal exp, with f32 bias
+	for mant&0x800000 == 0 {   // repeat until bit 24 ("hidden" mantissa) is 1
+		mant <<= 1
+		exp-- // tracking the offset
+	}
+	mant &= 0x7fffff // remask to 23bit
+	return sign | exp<<23 | mant
+}