Skip to content

Commit

Permalink
explain float16conv
Browse files Browse the repository at this point in the history
  • Loading branch information
lucix-aws committed Jan 12, 2024
1 parent 693c486 commit 698102b
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 35 deletions.
4 changes: 2 additions & 2 deletions encoding/cbor/cbor.go
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,6 @@ func getLen(ln int) int {
return 9 // type + 8-byte len
}

func compose(major, minor byte) byte {
return major << 5 & minor
func compose(major MajorType, minor byte) byte {
return byte(major) << 5 & minor
}
8 changes: 5 additions & 3 deletions encoding/cbor/decode.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,11 +254,13 @@ func decodeMajor7(p []byte) (Value, int, error) {
case major7Undefined:
return &Major7Undefined{}, 1, nil
case major7Float16:
return nil, 0, fmt.Errorf("TODO")
case major7Float32:

return Major7Float32(), 5, nil
return nil, 0, fmt.Errorf("TODO")
case major7Float64:
return nil, 0, fmt.Errorf("TODO")
default:
return nil, 0, fmt.Errorf("unexpected minor value %d", minor)
return nil, 0, fmt.Errorf("unexpected minor value %d", m)
}
}

Expand Down
59 changes: 29 additions & 30 deletions encoding/cbor/float16.go
Original file line number Diff line number Diff line change
@@ -1,46 +1,45 @@
package cbor

// float16:
// sign|exp(5)|mant(10)
//
// float32:
// sign|exp(8)|mant(23)
func float16to32(f uint16) uint32 {
sign, exp, mant := splitf16(f)
if exp == 0x1f {
return sign | 0xff<<23 | exp // infinity/NaN
return sign | 0xff<<23 | mant // infinity/NaN
}

if exp == 0 {
if mant == 0 { // subnormal 0, but keep the exponent
return sign | (exp+127-15)<<23
}

// this is a float16 subnormal (true exponent -14)
// starting from there, we shift the mantissa over until we've
// chopped off the most-significant 1, i.e. that becomes the hidden
// mantissa bit and we're back in normal float32 space
exp = -14 + 127
for mant&0x800000 == 0 { // repeat until bit 24 is 1
mant <<= 1
exp--
if exp == 0 { // subnormal
if mant == 0 {
return sign
}
mant &= 0x7FFFFF // remask to 23bit
} else {
exp += 127 - 15
return normalize(sign, mant)
}

return sign | exp<<23 | mant
return sign | (exp+127-15)<<23 | mant // rebias exp by the difference between the two
}

// breaks a float16 down into its components:
// - sign, in float32 position
// - exponent, as a number (for bias shifting and subnormal conversion)
// - mantissa, in float32 position
func splitf16(f uint16) (sign, exp, mantissa uint32) {
const smask = 0b_1 << 15
const emask = 0b_11111 << 10
const mmask = 0b_1111111111
const smask = 0x1 << 15 // put sign in float32 position
const emask = 0x1f << 10 // pull exponent as a number (for bias shift)
const mmask = 0x3ff // put mantissa in float32 position

return uint32(f&smask) << 16, uint32(f&emask) >> 10, uint32(f&mmask) << 13
}

// moves a float16 normal into normal float32 space
// to do this we must re-express the float16 mantissa in terms of a normal
// float32 where the hidden bit is 1, e.g.
//
// f16: 0 00000 0001010000 = 0.000101 * 2^(-14), which is equal to
// f32: 0 00000000 00100000000000000000000 = 1.01 * 2^(-18)
//
// this is achieved by shifting the mantissa to the right until the leading bit
// that == 1 reaches position 24, then the number of positions shifted over is
// equal to the offset from the subnormal exponent
func normalize(sign, mant uint32) uint32 {
exp := (uint32(-14 + 127)) // f16 subnormal exp, with f32 bias
for mant&0x800000 == 0 { // repeat until bit 24 ("hidden" mantissa) is 1
mant <<= 1
exp-- // tracking the offset
}
mant &= 0x7fffff // remask to 23bit
return sign | exp<<23 | mant
}

0 comments on commit 698102b

Please sign in to comment.