Baremetal-NN
Baremetal-NN API documentation
Loading...
Searching...
No Matches
float16.h
Go to the documentation of this file.
1
12#ifndef __FLOAT16_H
13#define __FLOAT16_H
14
15#include <stdint.h>
16#include <stdlib.h>
17#include <float.h>
18
19#ifdef X86
20 #include <immintrin.h>
21#endif
22
23#ifdef FLT16_MAX
24 #define NATIVE_FLOAT16_SUPPORT 1
25#else
26 #define NATIVE_FLOAT16_SUPPORT 0
27 #define FLT16_MAX 0x7bff
28 #define FLT16_MIN 0xfbff
29#endif
30
31
32#ifdef NATIVE_FLOAT16_SUPPORT
33 typedef _Float16 float16_t;
34#else
35 #warning "float16_t type is not supported, using manual implementations"
36 typedef union {
37 uint32_t i;
38 float f;
39 } float_uint32_union_t;
40
41 typedef uint16_t float16_t;
42#endif
43
50static inline float as_f32(float16_t h) {
51 #ifdef NATIVE_FLOAT16_SUPPORT
52 return (float)h;
53 #else
54 // from https://github.com/AcademySoftwareFoundation/Imath/blob/main/src/Imath/half.h
55 // Note: This only supports the "round to even" rounding mode, which
56 // was the only mode supported by the original OpenEXR library
57
58 float_uint32_union_t v;
59 // this code would be clearer, although it does appear to be faster
60 // (1.06 vs 1.08 ns/call) to avoid the constants and just do 4
61 // shifts.
62 //
63 uint32_t hexpmant = ((uint32_t) (h) << 17) >> 4;
64 v.i = ((uint32_t) (h >> 15)) << 31;
65
66 // the likely really does help if most of your numbers are "normal" half numbers
67 if ((hexpmant >= 0x00800000)) {
68 v.i |= hexpmant;
69 // either we are a normal number, in which case add in the bias difference
70 // otherwise make sure all exponent bits are set
71 if ((hexpmant < 0x0f800000)) {
72 v.i += 0x38000000;
73 }
74 else {
75 v.i |= 0x7f800000;
76 }
77 }
78 else if (hexpmant != 0) {
79 // exponent is 0 because we're denormal, don't have to extract
80 // the mantissa, can just use as is
81 //
82 // other compilers may provide count-leading-zeros primitives,
83 // but we need the community to inform us of the variants
84 uint32_t lc;
85 lc = 0;
86 while (0 == ((hexpmant << lc) & 0x80000000)) {
87 lc += 1;
88 }
89 lc -= 8;
90 // so nominally we want to remove that extra bit we shifted
91 // up, but we are going to add that bit back in, then subtract
92 // from it with the 0x38800000 - (lc << 23)....
93 //
94 // by combining, this allows us to skip the & operation (and
95 // remove a constant)
96 //
97 // hexpmant &= ~0x00800000;
98 v.i |= 0x38800000;
99 // lc is now x, where the desired exponent is then
100 // -14 - lc
101 // + 127 -> new exponent
102 v.i |= (hexpmant << lc);
103 v.i -= (lc << 23);
104 }
105 return v.f;
106 #endif
107}
108
109
116static inline float16_t as_f16(float f) {
117 #ifdef NATIVE_FLOAT16_SUPPORT
118 return (_Float16)f;
119 #else
120 // from https://github.com/AcademySoftwareFoundation/Imath/blob/main/src/Imath/half.h
121 // Note: This only supports the "round to even" rounding mode, which
122 // was the only mode supported by the original OpenEXR library
123
124 float_uint32_union_t v;
125 float16_t ret;
126 uint32_t e, m, ui, r, shift;
127
128 v.f = f;
129
130 ui = (v.i & ~0x80000000);
131 ret = ((v.i >> 16) & 0x8000);
132
133 // exponent large enough to result in a normal number, round and return
134 if (ui >= 0x38800000) {
135 // inf or nan
136 if (ui >= 0x7f800000) {
137 ret |= 0x7c00;
138 if (ui == 0x7f800000) {
139 return ret;
140 }
141 m = (ui & 0x7fffff) >> 13;
142 // make sure we have at least one bit after shift to preserve nan-ness
143 return ret | (uint16_t) m | (uint16_t) (m == 0);
144 }
145
146 // too large, round to infinity
147 if (ui > 0x477fefff) {
148 return ret | 0x7c00;
149 }
150
151 ui -= 0x38000000;
152 ui = ((ui + 0x00000fff + ((ui >> 13) & 1)) >> 13);
153 return ret | (uint16_t) ui;
154 }
155
156 // zero or flush to 0
157 if (ui < 0x33000001) {
158 return ret;
159 }
160
161 // produce a denormalized half
162 e = (ui >> 23);
163 shift = 0x7e - e;
164 m = 0x800000 | (ui & 0x7fffff);
165 r = m << (32 - shift);
166 ret |= (m >> shift);
167 if (r > 0x80000000 || (r == 0x80000000 && (ret & 0x1) != 0)) {
168 ret += 1;
169 }
170 return ret;
171 #endif
172}
173
174#endif // __FLOAT16_H
static float16_t as_f16(float f)
Definition: float16.h:116
static float as_f32(float16_t h)
Definition: float16.h:50
_Float16 float16_t
Definition: float16.h:33