Baremetal-NN
Baremetal-NN API documentation
Loading...
Searching...
No Matches
float16.h
Go to the documentation of this file.
1
12#ifndef __FLOAT16_H
13#define __FLOAT16_H
14
15#include <stdint.h>
16#include <stdlib.h>
17#include <float.h>
18
19#ifdef X86
20 #include <immintrin.h>
21#endif
22
23
24#ifdef FLT16_MAX
25 typedef _Float16 float16_t;
26#else
27 #warning "float16_t type is not supported, using manual implementations"
28 typedef union {
29 uint32_t i;
30 float f;
32
33 typedef uint16_t float16_t;
34#endif
35
42static inline float as_f32(float16_t h) {
43 #ifdef FLT16_MAX
44 return (float)h;
45 #else
46 // from https://github.com/AcademySoftwareFoundation/Imath/blob/main/src/Imath/half.h
47 // Note: This only supports the "round to even" rounding mode, which
48 // was the only mode supported by the original OpenEXR library
49
51 // this code would be clearer, although it does appear to be faster
52 // (1.06 vs 1.08 ns/call) to avoid the constants and just do 4
53 // shifts.
54 //
55 uint32_t hexpmant = ((uint32_t) (h) << 17) >> 4;
56 v.i = ((uint32_t) (h >> 15)) << 31;
57
58 // the likely really does help if most of your numbers are "normal" half numbers
59 if ((hexpmant >= 0x00800000)) {
60 v.i |= hexpmant;
61 // either we are a normal number, in which case add in the bias difference
62 // otherwise make sure all exponent bits are set
63 if ((hexpmant < 0x0f800000)) {
64 v.i += 0x38000000;
65 }
66 else {
67 v.i |= 0x7f800000;
68 }
69 }
70 else if (hexpmant != 0) {
71 // exponent is 0 because we're denormal, don't have to extract
72 // the mantissa, can just use as is
73 //
74 // other compilers may provide count-leading-zeros primitives,
75 // but we need the community to inform us of the variants
76 uint32_t lc;
77 lc = 0;
78 while (0 == ((hexpmant << lc) & 0x80000000)) {
79 lc += 1;
80 }
81 lc -= 8;
82 // so nominally we want to remove that extra bit we shifted
83 // up, but we are going to add that bit back in, then subtract
84 // from it with the 0x38800000 - (lc << 23)....
85 //
86 // by combining, this allows us to skip the & operation (and
87 // remove a constant)
88 //
89 // hexpmant &= ~0x00800000;
90 v.i |= 0x38800000;
91 // lc is now x, where the desired exponent is then
92 // -14 - lc
93 // + 127 -> new exponent
94 v.i |= (hexpmant << lc);
95 v.i -= (lc << 23);
96 }
97 return v.f;
98 #endif
99}
100
101
108static inline float16_t as_f16(float f) {
109 #ifdef FLT16_MAX
110 return (_Float16)f;
111 #else
112 // from https://github.com/AcademySoftwareFoundation/Imath/blob/main/src/Imath/half.h
113 // Note: This only supports the "round to even" rounding mode, which
114 // was the only mode supported by the original OpenEXR library
115
117 float16_t ret;
118 uint32_t e, m, ui, r, shift;
119
120 v.f = f;
121
122 ui = (v.i & ~0x80000000);
123 ret = ((v.i >> 16) & 0x8000);
124
125 // exponent large enough to result in a normal number, round and return
126 if (ui >= 0x38800000) {
127 // inf or nan
128 if (ui >= 0x7f800000) {
129 ret |= 0x7c00;
130 if (ui == 0x7f800000) {
131 return ret;
132 }
133 m = (ui & 0x7fffff) >> 13;
134 // make sure we have at least one bit after shift to preserve nan-ness
135 return ret | (uint16_t) m | (uint16_t) (m == 0);
136 }
137
138 // too large, round to infinity
139 if (ui > 0x477fefff) {
140 return ret | 0x7c00;
141 }
142
143 ui -= 0x38000000;
144 ui = ((ui + 0x00000fff + ((ui >> 13) & 1)) >> 13);
145 return ret | (uint16_t) ui;
146 }
147
148 // zero or flush to 0
149 if (ui < 0x33000001) {
150 return ret;
151 }
152
153 // produce a denormalized half
154 e = (ui >> 23);
155 shift = 0x7e - e;
156 m = 0x800000 | (ui & 0x7fffff);
157 r = m << (32 - shift);
158 ret |= (m >> shift);
159 if (r > 0x80000000 || (r == 0x80000000 && (ret & 0x1) != 0)) {
160 ret += 1;
161 }
162 return ret;
163 #endif
164}
165
166#endif // __FLOAT16_H
uint16_t float16_t
Definition: float16.h:33
static float16_t as_f16(float f)
Definition: float16.h:108
static float as_f32(float16_t h)
Definition: float16.h:42
float f
Definition: float16.h:30
uint32_t i
Definition: float16.h:29
Definition: float16.h:28