Baremetal-NN
Baremetal-NN API documentation
Loading...
Searching...
No Matches
float16.h
Go to the documentation of this file.
1#ifndef __NN_FLOAT16
2#define __NN_FLOAT16
3
4#include <stdint.h>
5#include <stdlib.h>
6#include <float.h>
7
8#ifdef X86
9 #include <immintrin.h>
10#endif
11
12
13#ifdef FLT16_MAX
14 typedef _Float16 float16_t;
15#else
16 typedef union {
17 uint32_t i;
18 float f;
20
21 typedef uint16_t float16_t;
22#endif
23
30static inline float NN_half_to_float(float16_t h) {
31 #ifdef FLT16_MAX
32 return (float)h;
33 #else
34 // from https://github.com/AcademySoftwareFoundation/Imath/blob/main/src/Imath/half.h
35 // Note: This only supports the "round to even" rounding mode, which
36 // was the only mode supported by the original OpenEXR library
37
39 // this code would be clearer, although it does appear to be faster
40 // (1.06 vs 1.08 ns/call) to avoid the constants and just do 4
41 // shifts.
42 //
43 uint32_t hexpmant = ((uint32_t) (h) << 17) >> 4;
44 v.i = ((uint32_t) (h >> 15)) << 31;
45
46 // the likely really does help if most of your numbers are "normal" half numbers
47 if ((hexpmant >= 0x00800000)) {
48 v.i |= hexpmant;
49 // either we are a normal number, in which case add in the bias difference
50 // otherwise make sure all exponent bits are set
51 if ((hexpmant < 0x0f800000)) {
52 v.i += 0x38000000;
53 }
54 else {
55 v.i |= 0x7f800000;
56 }
57 }
58 else if (hexpmant != 0) {
59 // exponent is 0 because we're denormal, don't have to extract
60 // the mantissa, can just use as is
61 //
62 // other compilers may provide count-leading-zeros primitives,
63 // but we need the community to inform us of the variants
64 uint32_t lc;
65 lc = 0;
66 while (0 == ((hexpmant << lc) & 0x80000000)) {
67 lc += 1;
68 }
69 lc -= 8;
70 // so nominally we want to remove that extra bit we shifted
71 // up, but we are going to add that bit back in, then subtract
72 // from it with the 0x38800000 - (lc << 23)....
73 //
74 // by combining, this allows us to skip the & operation (and
75 // remove a constant)
76 //
77 // hexpmant &= ~0x00800000;
78 v.i |= 0x38800000;
79 // lc is now x, where the desired exponent is then
80 // -14 - lc
81 // + 127 -> new exponent
82 v.i |= (hexpmant << lc);
83 v.i -= (lc << 23);
84 }
85 return v.f;
86 #endif
87}
88
89
96static inline float16_t NN_float_to_half(float f) {
97 #ifdef FLT16_MAX
98 return (_Float16)f;
99 #else
100 // from https://github.com/AcademySoftwareFoundation/Imath/blob/main/src/Imath/half.h
101 // Note: This only supports the "round to even" rounding mode, which
102 // was the only mode supported by the original OpenEXR library
103
105 float16_t ret;
106 uint32_t e, m, ui, r, shift;
107
108 v.f = f;
109
110 ui = (v.i & ~0x80000000);
111 ret = ((v.i >> 16) & 0x8000);
112
113 // exponent large enough to result in a normal number, round and return
114 if (ui >= 0x38800000) {
115 // inf or nan
116 if (ui >= 0x7f800000) {
117 ret |= 0x7c00;
118 if (ui == 0x7f800000) {
119 return ret;
120 }
121 m = (ui & 0x7fffff) >> 13;
122 // make sure we have at least one bit after shift to preserve nan-ness
123 return ret | (uint16_t) m | (uint16_t) (m == 0);
124 }
125
126 // too large, round to infinity
127 if (ui > 0x477fefff) {
128 return ret | 0x7c00;
129 }
130
131 ui -= 0x38000000;
132 ui = ((ui + 0x00000fff + ((ui >> 13) & 1)) >> 13);
133 return ret | (uint16_t) ui;
134 }
135
136 // zero or flush to 0
137 if (ui < 0x33000001) {
138 return ret;
139 }
140
141 // produce a denormalized half
142 e = (ui >> 23);
143 shift = 0x7e - e;
144 m = 0x800000 | (ui & 0x7fffff);
145 r = m << (32 - shift);
146 ret |= (m >> shift);
147 if (r > 0x80000000 || (r == 0x80000000 && (ret & 0x1) != 0)) {
148 ret += 1;
149 }
150 return ret;
151 #endif
152}
153
154#endif // __NN_FLOAT16
uint16_t float16_t
Definition: float16.h:21
static float16_t NN_float_to_half(float f)
Definition: float16.h:96
static float NN_half_to_float(float16_t h)
Definition: float16.h:30
float f
Definition: float16.h:18
uint32_t i
Definition: float16.h:17
Definition: float16.h:16