Baremetal-NN
Baremetal-NN API documentation
Loading...
Searching...
No Matches
nn_i32.h
Go to the documentation of this file.
1
8#ifndef __NN_I32_H
9#define __NN_I32_H
10
11#include "float16.h"
12
13
14#ifdef CONFIG_BACKEND_RISCV_V
15 #include "riscv_vector.h"
16#endif
17
23typedef struct {
24 int32_t data;
26
32typedef struct {
33 size_t shape[1];
34 int32_t *data;
36
42typedef struct {
43 size_t shape[2];
44 int32_t *data;
46
47
48/* ======================================================================================================== */
49/* Tensor Creation */
50/* ======================================================================================================== */
59 Tensor0D_I32 *tensor = (Tensor0D_I32 *)malloc(sizeof(Tensor0D_I32));
60 tensor->data = data;
61 return tensor;
62}
63
72Tensor1D_I32 *nn_tensor1d_i32(size_t shape[1], const int32_t *data) {
73 Tensor1D_I32 *tensor = (Tensor1D_I32 *)malloc(sizeof(Tensor1D_I32));
74 tensor->shape[0] = shape[0];
75
76 size_t n_bytes = shape[0] * sizeof(int32_t);
77 tensor->data = (int32_t *)malloc(n_bytes);
78 if (data != NULL) {
79 memcpy(tensor->data, data, n_bytes);
80 }
81 return tensor;
82}
83
92Tensor2D_I32 *nn_tensor2d_i32(size_t shape[2], const int32_t *data) {
93 Tensor2D_I32 *tensor = (Tensor2D_I32 *)malloc(sizeof(Tensor2D_I32));
94 tensor->shape[0] = shape[0];
95 tensor->shape[1] = shape[1];
96
97 size_t n_bytes = shape[0] * shape[1] * sizeof(int32_t);
98 tensor->data = (int32_t *)malloc(n_bytes);
99 if (data != NULL) {
100 memcpy(tensor->data, data, n_bytes);
101 }
102 return tensor;
103}
104
105
107 Tensor0D_I32 *tensor = nn_tensor0d_i32(0);
108 return tensor;
109}
110
111Tensor1D_I32 *nn_zeros1d_i32(size_t shape[1]) {
112 Tensor1D_I32 *tensor = nn_tensor1d_i32(shape, NULL);
113 size_t n = shape[0];
114 for (size_t i = 0; i < n; i += 1) {
115 tensor->data[i] = 0;
116 }
117 return tensor;
118}
119
120Tensor2D_I32 *nn_zeros2d_i32(size_t shape[2]) {
121 Tensor2D_I32 *tensor = nn_tensor2d_i32(shape, NULL);
122 size_t n = shape[0] * shape[1];
123 for (size_t i = 0; i < n; i += 1) {
124 tensor->data[i] = 0;
125 }
126 return tensor;
127}
128
130 Tensor0D_I32 *tensor = nn_tensor0d_i32(1);
131 return tensor;
132}
133
134Tensor1D_I32 *nn_ones1d_i32(size_t shape[1]) {
135 Tensor1D_I32 *tensor = nn_tensor1d_i32(shape, NULL);
136 size_t n = shape[0];
137 for (size_t i = 0; i < n; i += 1) {
138 tensor->data[i] = 1;
139 }
140 return tensor;
141}
142
143Tensor2D_I32 *nn_ones2d_i32(size_t shape[2]) {
144 Tensor2D_I32 *tensor = nn_tensor2d_i32(shape, NULL);
145 size_t n = shape[0] * shape[1];
146 for (size_t i = 0; i < n; i += 1) {
147 tensor->data[i] = 1;
148 }
149 return tensor;
150}
151
153 Tensor0D_I32 *tensor = nn_tensor0d_i32(data);
154 return tensor;
155}
156
157Tensor1D_I32 *nn_full1d_i32(size_t shape[1], int32_t data) {
158 Tensor1D_I32 *tensor = nn_tensor1d_i32(shape, NULL);
159 size_t n = shape[0];
160 for (size_t i = 0; i < n; i += 1) {
161 tensor->data[i] = data;
162 }
163 return tensor;
164}
165
166Tensor2D_I32 *nn_full2d_i32(size_t shape[2], int32_t data) {
167 Tensor2D_I32 *tensor = nn_tensor2d_i32(shape, NULL);
168 size_t n = shape[0] * shape[1];
169 for (size_t i = 0; i < n; i += 1) {
170 tensor->data[i] = data;
171 }
172 return tensor;
173}
174
176 Tensor0D_I32 *tensor = nn_tensor0d_i32(rand());
177 return tensor;
178}
179
180Tensor1D_I32 *nn_rand1d_i32(size_t shape[1]) {
181 Tensor1D_I32 *tensor = nn_tensor1d_i32(shape, NULL);
182 size_t n = shape[0];
183 for (size_t i = 0; i < n; i += 1) {
184 tensor->data[i] = rand();
185 }
186 return tensor;
187}
188
189Tensor2D_I32 *nn_rand2d_i32(size_t shape[2]) {
190 Tensor2D_I32 *tensor = nn_tensor2d_i32(shape, NULL);
191 size_t n = shape[0] * shape[1];
192 for (size_t i = 0; i < n; i += 1) {
193 tensor->data[i] = rand();
194 }
195 return tensor;
196}
197
198
199/* ======================================================================================================== */
200/* Tensor Prints */
201/* ======================================================================================================== */
210void nn_print_i32(int32_t v, int16_t num_digits) {
211 printf("%d", v);
212}
213
214
224 printf("[");
225 for (size_t i=0; i<tensor->shape[0]; i+=1) {
226 nn_print_i32(*((int32_t *)tensor->data + i), 3);
227 if (i < tensor->shape[0]-1) {
228 printf(" ");
229 }
230 }
231 printf("]\n");
232}
233
242 printf("[");
243 for (size_t i=0; i<tensor->shape[0]; i+=1) {
244 if (i != 0) {
245 printf(" ");
246 }
247 printf("[");
248 for (size_t j=0; j<tensor->shape[1]; j+=1) {
249 nn_print_i32(*((int32_t *)tensor->data + i*tensor->shape[1] + j), 3);
250 if (j < tensor->shape[1]-1) {
251 printf(" ");
252 }
253 }
254 printf("]");
255 if (i < tensor->shape[0]-1) {
256 printf("\n");
257 }
258 }
259 printf("]\n");
260}
261
262
263/* ======================================================================================================== */
264/* Comparision */
265/* ======================================================================================================== */
275uint8_t nn_equals0d_i32(const Tensor0D_I32 *a, const Tensor0D_I32 *b) {
276 return a->data == b->data;
277}
278
288uint8_t nn_equals1d_i32(const Tensor1D_I32 *a, const Tensor1D_I32 *b) {
289 nn_assert(a->shape[0] == b->shape[0], "Cannot compare tensors of different shapes");
290
291 size_t n = a->shape[0];
292 for (size_t i = 0; i < n; i += 1) {
293 if (a->data[i] != b->data[i]) {
294 return 0;
295 }
296 }
297 return 1;
298}
299
309uint8_t nn_equals2d_i32(const Tensor2D_I32 *a, const Tensor2D_I32 *b) {
310 nn_assert(a->shape[0] == b->shape[0] && a->shape[1] == b->shape[1], "Cannot compare tensors of different shapes");
311
312 size_t n = a->shape[0] * a->shape[1];
313 for (size_t i = 0; i < n; i += 1) {
314 if (a->data[i] != b->data[i]) {
315 return 0;
316 }
317 }
318 return 1;
319}
320
321
322/* ======================================================================================================== */
323/* Unary */
324/* ======================================================================================================== */
325
326
327
328
329/* ======================================================================================================== */
330/* Addition */
331/* ======================================================================================================== */
343void nn_add1d_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x1, const Tensor1D_I32 *x2) {
344 nn_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes");
345 nn_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes");
346
347 size_t n = y->shape[0];
348 int32_t *x1_data = x1->data;
349 int32_t *x2_data = x2->data;
350 int32_t *y_data = y->data;
351
352 #ifdef CONFIG_BACKEND_RISCV_VECTOR
353 while (n > 0) {
354 size_t vl = __riscv_vsetvl_e8m1(n);
355 vint32m1_t vec_x1 = __riscv_vle32_v_i32m1(x1_data, vl);
356 vint32m1_t vec_x2 = __riscv_vle32_v_i32m1(x2_data, vl);
357 vint32m1_t vec_y = __riscv_vfadd_vv_i32m1(vec_x1, vec_x2, vl);
358 __riscv_vse32_v_i32m1(y_data, vec_y, vl);
359 x1_data += vl;
360 x2_data += vl;
361 y_data += vl;
362 n -= vl;
363 }
364 #else // scalar implementation
365 for (size_t i = 0; i < n; i += 1) {
366 y->data[i] = x1->data[i] + x2->data[i];
367 }
368 #endif
369}
370
371
372
384void nn_add2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2) {
385 nn_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes");
386 nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes");
387
388 size_t n = y->shape[0] * y->shape[1];
389 int32_t *x1_data = x1->data;
390 int32_t *x2_data = x2->data;
391 int32_t *y_data = y->data;
392
393 #ifdef CONFIG_BACKEND_RISCV_VECTOR
394 while (n > 0) {
395 size_t vl = __riscv_vsetvl_e8m1(n);
396 vint32m1_t vec_x1 = __riscv_vle32_v_i32m1(x1_data, vl);
397 vint32m1_t vec_x2 = __riscv_vle32_v_i32m1(x2_data, vl);
398 vint32m1_t vec_y = __riscv_vfadd_vv_i32m1(vec_x1, vec_x2, vl);
399 __riscv_vse32_v_i32m1(y_data, vec_y, vl);
400 x1_data += vl;
401 x2_data += vl;
402 y_data += vl;
403 n -= vl;
404 }
405 #else // scalar implementation
406 for (size_t i = 0; i < n; i += 1) {
407 y->data[i] = x1->data[i] + x2->data[i];
408 }
409 #endif
410}
411
412void nn_addscalar1d_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x, int32_t scalar) {
413 nn_assert(y->shape[0] == x->shape[0], "Cannot add tensors of different shapes");
414
415 size_t n = y->shape[0];
416 for (size_t i = 0; i < n; i += 1) {
417 y->data[i] = x->data[i] + scalar;
418 }
419}
420
421void nn_addscalar2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x, int32_t scalar) {
422 nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == x->shape[1], "Cannot add tensors of different shapes");
423
424 size_t n = y->shape[0] * y->shape[1];
425
426 for (size_t i = 0; i < n; i += 1) {
427 y->data[i] = x->data[i] + scalar;
428 }
429}
430
431
432
433
434/* ======================================================================================================== */
435/* Multiplication */
436/* ======================================================================================================== */
437
438
439
440
441/* ======================================================================================================== */
442/* MatMul */
443/* ======================================================================================================== */
444void nn_dot_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x1, const Tensor1D_I32 *x2) {
445 nn_assert(x1->shape[0] == x2->shape[0], "Cannot dot tensors of different shapes");
446 nn_assert(y->shape[0] == x1->shape[0], "Cannot dot tensors of different shapes");
447
448 size_t n = y->shape[0];
449 int32_t sum_i32 = 0;
450 for (size_t i = 0; i < n; i += 1) {
451 sum_i32 += x1->data[i] * x2->data[i];
452 }
453 y->data[0] = sum_i32;
454}
455
456
457void nn_mm_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2) {
458 nn_assert(x1->shape[1] == x2->shape[0], "Cannot perform MatMul on tensors of different shapes");
459 nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x2->shape[1], "Cannot perform MatMul on tensors of different shapes");
460
461 const size_t n = x1->shape[0];
462 const size_t m = x1->shape[1];
463 const size_t p = x2->shape[1];
464
465 for (size_t i = 0; i < n; i += 1) {
466 for (size_t j = 0; j < p; j += 1) {
467 int32_t sum = 0;
468 for (size_t k = 0; k < m; k += 1) {
469 sum += x1->data[i * m + k] * x2->data[k * p + j];
470 }
471 y->data[i * p + j] = sum;
472 }
473 }
474}
475
476void nn_addmm_i32(Tensor2D_I32 *y, const Tensor2D_I32 *c, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2) {
477 nn_assert(x1->shape[1] == x2->shape[0], "Cannot perform MatMul on tensors of different shapes");
478 nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x2->shape[1], "Cannot perform MatMul on tensors of different shapes");
479
480 const size_t n = x1->shape[0];
481 const size_t m = x1->shape[1];
482 const size_t p = x2->shape[1];
483
484 for (size_t i = 0; i < n; i += 1) {
485 for (size_t j = 0; j < p; j += 1) {
486 int32_t sum = 0;
487 for (size_t k = 0; k < m; k += 1) {
488 sum += x1->data[i * m + k] * x2->data[k * p + j];
489 }
490 y->data[i * p + j] = sum + c->data[i * p + j];
491 }
492 }
493}
494
495
496
497void nn_linear_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x, const Tensor2D_I32 *weight, const Tensor1D_I32 *bias) {
498 nn_assert(x->shape[1] == weight->shape[1], "Cannot perform Linear on tensors of different shapes");
499 nn_assert(!bias || bias->shape[0] == weight->shape[0], "Cannot perform Linear on tensors of different shapes");
500 nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == weight->shape[0], "Cannot perform Linear on tensors of different shapes");
501
502 const size_t batch_size = x->shape[0];
503 const size_t in_features = x->shape[1];
504 const size_t out_features = weight->shape[0];
505
506 for (size_t i = 0; i < batch_size; i += 1) {
507 for (size_t j = 0; j < out_features; j += 1) {
508 int32_t sum = 0;
509 for (size_t k = 0; k < in_features; k += 1) {
510 sum += x->data[i * in_features + k] * weight->data[j * in_features + k];
511 }
512 if (bias) {
513 sum += bias->data[j];
514 }
515 y->data[i * out_features + j] = sum;
516 }
517 }
518}
519
520
521
522/* ======================================================================================================== */
523/* Non-linear */
524/* ======================================================================================================== */
525
527 nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ReLU on tensors of different shapes");
528
529 size_t n = y->shape[0] * y->shape[1];
530 int32_t *x_data = x->data;
531 int32_t *y_data = y->data;
532
533 for (size_t i = 0; i < n; i += 1) {
534 y->data[i] = x->data[i] > 0 ? x->data[i] : 0;
535 }
536}
537
538
539#endif // __NN_Q8_0_H
Half-Precision Floating-Point (fp16) Definitions.
static void nn_assert(int condition, char *message)
Definition: nn.h:54
void nn_addmm_i32(Tensor2D_I32 *y, const Tensor2D_I32 *c, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2)
Definition: nn_i32.h:476
uint8_t nn_equals2d_i32(const Tensor2D_I32 *a, const Tensor2D_I32 *b)
Definition: nn_i32.h:309
Tensor1D_I32 * nn_rand1d_i32(size_t shape[1])
Definition: nn_i32.h:180
uint8_t nn_equals1d_i32(const Tensor1D_I32 *a, const Tensor1D_I32 *b)
Definition: nn_i32.h:288
void nn_add2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2)
Definition: nn_i32.h:384
void nn_mm_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2)
Definition: nn_i32.h:457
void nn_print_tensor2d_i32(const Tensor2D_I32 *tensor)
Definition: nn_i32.h:241
void nn_addscalar2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x, int32_t scalar)
Definition: nn_i32.h:421
Tensor1D_I32 * nn_ones1d_i32(size_t shape[1])
Definition: nn_i32.h:134
void nn_addscalar1d_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x, int32_t scalar)
Definition: nn_i32.h:412
void nn_dot_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x1, const Tensor1D_I32 *x2)
Definition: nn_i32.h:444
Tensor0D_I32 * nn_full0d_i32(int32_t data)
Definition: nn_i32.h:152
Tensor0D_I32 * nn_rand0d_i32()
Definition: nn_i32.h:175
void nn_print_i32(int32_t v, int16_t num_digits)
Definition: nn_i32.h:210
void nn_linear_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x, const Tensor2D_I32 *weight, const Tensor1D_I32 *bias)
Definition: nn_i32.h:497
void nn_add1d_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x1, const Tensor1D_I32 *x2)
Definition: nn_i32.h:343
uint8_t nn_equals0d_i32(const Tensor0D_I32 *a, const Tensor0D_I32 *b)
Definition: nn_i32.h:275
Tensor2D_I32 * nn_rand2d_i32(size_t shape[2])
Definition: nn_i32.h:189
Tensor1D_I32 * nn_tensor1d_i32(size_t shape[1], const int32_t *data)
Definition: nn_i32.h:72
Tensor0D_I32 * nn_zeros0d_i32()
Definition: nn_i32.h:106
Tensor0D_I32 * nn_ones0d_i32()
Definition: nn_i32.h:129
Tensor1D_I32 * nn_zeros1d_i32(size_t shape[1])
Definition: nn_i32.h:111
Tensor0D_I32 * nn_tensor0d_i32(int32_t data)
Definition: nn_i32.h:58
Tensor2D_I32 * nn_tensor2d_i32(size_t shape[2], const int32_t *data)
Definition: nn_i32.h:92
Tensor2D_I32 * nn_zeros2d_i32(size_t shape[2])
Definition: nn_i32.h:120
void nn_relu2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x)
Definition: nn_i32.h:526
void nn_print_tensor1d_i32(const Tensor1D_I32 *tensor)
Definition: nn_i32.h:223
Tensor1D_I32 * nn_full1d_i32(size_t shape[1], int32_t data)
Definition: nn_i32.h:157
Tensor2D_I32 * nn_full2d_i32(size_t shape[2], int32_t data)
Definition: nn_i32.h:166
Tensor2D_I32 * nn_ones2d_i32(size_t shape[2])
Definition: nn_i32.h:143
int32_t data
Definition: nn_i32.h:24
Definition: nn_i32.h:23
size_t shape[1]
Definition: nn_i32.h:33
int32_t * data
Definition: nn_i32.h:34
Definition: nn_i32.h:32
size_t shape[2]
Definition: nn_i32.h:43
int32_t * data
Definition: nn_i32.h:44
Definition: nn_i32.h:42