-
Notifications
You must be signed in to change notification settings - Fork 1
/
Float8m3e4s1.h
36 lines (33 loc) · 3.02 KB
/
Float8m3e4s1.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
//-----------------------------------------------------------------------------
//
// See:
// https://arxiv.org/abs/2209.05433 FP8 Formats for Deep Learning
// https://arxiv.org/abs/2206.02915 8-bit Numerical Formats for Deep Neural Networks 2022-10-24
//
//-----------------------------------------------------------------------------
#pragma once
using float8m3e4s1_t = FloatNumber<uint8_t, 3, 4, true, true, false, true>; // No infinity and one NaN representation (S1111.111).
inline float8m3e4s1_t operator +(float8m3e4s1_t a, float8m3e4s1_t b) noexcept { return float(a) + float(b); }
inline float8m3e4s1_t operator -(float8m3e4s1_t a, float8m3e4s1_t b) noexcept { return float(a) - float(b); }
inline float8m3e4s1_t operator *(float8m3e4s1_t a, float8m3e4s1_t b) noexcept { return float(a) * float(b); }
inline float8m3e4s1_t operator /(float8m3e4s1_t a, float8m3e4s1_t b) noexcept { return float(a) / float(b); }
inline float8m3e4s1_t operator +(float8m3e4s1_t a, double b) noexcept { return float(a) + float(b); }
inline float8m3e4s1_t operator -(float8m3e4s1_t a, double b) noexcept { return float(a) - float(b); }
inline float8m3e4s1_t operator *(float8m3e4s1_t a, double b) noexcept { return float(a) * float(b); }
inline float8m3e4s1_t operator /(float8m3e4s1_t a, double b) noexcept { return float(a) / float(b); }
inline float8m3e4s1_t operator +(double a, float8m3e4s1_t b) noexcept { return float(a) + float(b); }
inline float8m3e4s1_t operator -(double a, float8m3e4s1_t b) noexcept { return float(a) - float(b); }
inline float8m3e4s1_t operator *(double a, float8m3e4s1_t b) noexcept { return float(a) * float(b); }
inline float8m3e4s1_t operator /(double a, float8m3e4s1_t b) noexcept { return float(a) / float(b); }
inline float8m3e4s1_t& operator +=(float8m3e4s1_t& a, float8m3e4s1_t b) noexcept { return a = (float(a) + float(b)); }
inline float8m3e4s1_t& operator -=(float8m3e4s1_t& a, float8m3e4s1_t b) noexcept { return a = (float(a) - float(b)); }
inline float8m3e4s1_t& operator *=(float8m3e4s1_t& a, float8m3e4s1_t b) noexcept { return a = (float(a) * float(b)); }
inline float8m3e4s1_t& operator /=(float8m3e4s1_t& a, float8m3e4s1_t b) noexcept { return a = (float(a) / float(b)); }
inline float8m3e4s1_t& operator ++(float8m3e4s1_t& a) noexcept { return a = float(a) + 1; }
inline float8m3e4s1_t& operator --(float8m3e4s1_t& a) noexcept { return a = float(a) + 1; }
inline bool operator==(float8m3e4s1_t lhs, float8m3e4s1_t rhs) noexcept { return float(lhs) == float(rhs); }
inline bool operator!=(float8m3e4s1_t lhs, float8m3e4s1_t rhs) noexcept { return float(lhs) != float(rhs); }
inline bool operator< (float8m3e4s1_t lhs, float8m3e4s1_t rhs) noexcept { return float(lhs) < float(rhs); }
inline bool operator> (float8m3e4s1_t lhs, float8m3e4s1_t rhs) noexcept { return float(lhs) > float(rhs); }
inline bool operator<=(float8m3e4s1_t lhs, float8m3e4s1_t rhs) noexcept { return float(lhs) <= float(rhs); }
inline bool operator>=(float8m3e4s1_t lhs, float8m3e4s1_t rhs) noexcept { return float(lhs) >= float(rhs); }