55#define VL_HALF_SIGN_MASK 0x8000
57#define VL_HALF_EXP_MASK 0x7C00
59#define VL_HALF_FRAC_MASK 0x03FF
62#define VL_HALF_EXP_SHIFT 10
64#define VL_HALF_EXP_BIAS 15
67#define VL_HALF_POS_ZERO ((vl_half)0x0000)
68#define VL_HALF_NEG_ZERO ((vl_half)0x8000)
71#define VL_HALF_POS_INF ((vl_half)0x7C00)
72#define VL_HALF_NEG_INF ((vl_half)0xFC00)
75#define VL_HALF_QNAN ((vl_half)0x7E00)
76#define VL_HALF_SNAN ((vl_half)0x7D00)
79#define VL_HALF_ONE ((vl_half)0x3C00)
80#define VL_HALF_NEG_ONE ((vl_half)0xBC00)
83#define VL_HALF_MAX ((vl_half)0x7BFF)
84#define VL_HALF_MIN ((vl_half)0xFBFF)
87#define VL_HALF_MIN_POS ((vl_half)0x0400)
88#define VL_HALF_MIN_NEG ((vl_half)0x8400)
91#define VL_HALF_TRUE_MIN_POS ((vl_half)0x0001)
92#define VL_HALF_TRUE_MIN_NEG ((vl_half)0x8001)
95#define VL_HALF_EPSILON ((vl_half)0x1400)
131static inline int vlHalfIsSubnormal(
vl_half_t h) {
return (vlHalfExp(h) == 0) && (vlHalfFrac(h) != 0); }
141static inline int vlHalfIsNaN(
vl_half_t h) {
return (vlHalfExp(h) == 31) && (vlHalfFrac(h) != 0); }
157static inline vl_half_t vlHalfPack(vl_uint16_t sign, vl_uint16_t exp, vl_uint16_t frac)
180static inline vl_half_t vlHalfFromFloat(
float x)
183 memcpy(&f, &x,
sizeof f);
185 vl_uint32_t sign = (f >> 31) & 1;
186 vl_int32_t exp = (f >> 23) & 0xFF;
187 vl_uint32_t frac = f & 0x7FFFFF;
192 return vlHalfPack(sign, 31, 0);
193 return vlHalfPack(sign, 31, 1);
196 vl_int32_t e = exp - 127;
199 return vlHalfPack(sign, 31, 0);
204 return vlHalfPack(sign, 0, 0);
206 vl_uint32_t mant = frac | 0x800000;
207 vl_uint32_t shift = (vl_uint32_t)(-e - 1);
208 vl_uint32_t rshift = shift + 13;
210 vl_uint32_t frac16 = mant >> rshift;
211 vl_uint32_t rem = mant & ((1u << rshift) - 1);
212 vl_uint32_t half = 1u << (rshift - 1);
214 if (rem > half || (rem == half && (frac16 & 1)))
217 return vlHalfPack(sign, 0, frac16);
220 vl_uint32_t frac16 = frac >> 13;
221 vl_uint32_t rem = frac & 0x1FFF;
223 if (rem > 0x1000 || (rem == 0x1000 && (frac16 & 1)))
231 return vlHalfPack(sign, 31, 0);
244static inline float vlHalfToFloat(
vl_half_t h)
246 const vl_uint32_t sign = vlHalfSign(h);
247 const vl_uint32_t exp = vlHalfExp(h);
248 vl_uint32_t frac = vlHalfFrac(h);
261 while ((frac & 0x400) == 0)
267 value = (sign << 31) | ((e + 127) << 23) | (frac << 13);
272 value = (sign << 31) | (255 << 23) | (frac << 13);
276 value = (sign << 31) | ((exp -
VL_HALF_EXP_BIAS + 127) << 23) | (frac << 13);
280 memcpy(&result, &value,
sizeof result);
291 return vlHalfFromFloat(vlHalfToFloat(a) + vlHalfToFloat(b));
297 return vlHalfFromFloat(vlHalfToFloat(a) - vlHalfToFloat(b));
303 return vlHalfFromFloat(vlHalfToFloat(a) * vlHalfToFloat(b));
309 return vlHalfFromFloat(vlHalfToFloat(a) / vlHalfToFloat(b));
#define VL_HALF_EXP_SHIFT
Exponent field shift.
Definition vl_half.h:62
#define VL_HALF_EXP_MASK
Exponent field mask.
Definition vl_half.h:57
#define VL_HALF_SIGN_MASK
Sign bit mask.
Definition vl_half.h:55
#define VL_HALF_FRAC_MASK
Fraction (mantissa) mask.
Definition vl_half.h:59
vl_uint16_t vl_half_t
Raw half-precision floating-point bit pattern.
Definition vl_half.h:48
#define VL_HALF_EXP_BIAS
Exponent bias for binary16.
Definition vl_half.h:64
VL_F32_T vl_float32_t
32-bit floating point number type.
Definition vl_numtypes.h:173