Baremetal-NN
Baremetal-NN API documentation
Loading...
Searching...
No Matches
nn_i32.h
Go to the documentation of this file.
1
8#ifndef __NN_I32_H
9#define __NN_I32_H
10
11#include "float16.h"
12
13
14#ifdef CONFIG_BACKEND_RISCV_V
15 #include "riscv_vector.h"
16#endif
17
23typedef struct {
24 int32_t data;
26
32typedef struct {
33 size_t shape[1];
34 int32_t *data;
36
42typedef struct {
43 size_t shape[2];
44 int32_t *data;
46
47
48/* ======================================================================================================== */
49/* Tensor Creation */
50/* ======================================================================================================== */
59 Tensor0D_I32 *tensor = (Tensor0D_I32 *)malloc(sizeof(Tensor0D_I32));
60 tensor->data = data;
61 return tensor;
62}
63
72Tensor1D_I32 *nn_tensor1d_i32(size_t shape[1], const int32_t *data) {
73 Tensor1D_I32 *tensor = (Tensor1D_I32 *)malloc(sizeof(Tensor1D_I32));
74 tensor->shape[0] = shape[0];
75
76 size_t n_bytes = shape[0] * sizeof(int32_t);
77 tensor->data = (int32_t *)malloc(n_bytes);
78 if (data != NULL) {
79 memcpy(tensor->data, data, n_bytes);
80 }
81 return tensor;
82}
83
92Tensor2D_I32 *nn_tensor2d_i32(size_t shape[2], const int32_t *data) {
93 Tensor2D_I32 *tensor = (Tensor2D_I32 *)malloc(sizeof(Tensor2D_I32));
94 tensor->shape[0] = shape[0];
95 tensor->shape[1] = shape[1];
96
97 size_t n_bytes = shape[0] * shape[1] * sizeof(int32_t);
98 tensor->data = (int32_t *)malloc(n_bytes);
99 if (data != NULL) {
100 memcpy(tensor->data, data, n_bytes);
101 }
102 return tensor;
103}
104
113 Tensor0D_I32 *tensor = nn_tensor0d_i32(0);
114 return tensor;
115}
116
125Tensor1D_I32 *nn_zeros1d_i32(size_t shape[1]) {
126 Tensor1D_I32 *tensor = nn_tensor1d_i32(shape, NULL);
127 size_t n = shape[0];
128 for (size_t i = 0; i < n; i += 1) {
129 tensor->data[i] = 0;
130 }
131 return tensor;
132}
133
142Tensor2D_I32 *nn_zeros2d_i32(size_t shape[2]) {
143 Tensor2D_I32 *tensor = nn_tensor2d_i32(shape, NULL);
144 size_t n = shape[0] * shape[1];
145 for (size_t i = 0; i < n; i += 1) {
146 tensor->data[i] = 0;
147 }
148 return tensor;
149}
150
159 Tensor0D_I32 *tensor = nn_tensor0d_i32(1);
160 return tensor;
161}
162
171Tensor1D_I32 *nn_ones1d_i32(size_t shape[1]) {
172 Tensor1D_I32 *tensor = nn_tensor1d_i32(shape, NULL);
173 size_t n = shape[0];
174 for (size_t i = 0; i < n; i += 1) {
175 tensor->data[i] = 1;
176 }
177 return tensor;
178}
179
188Tensor2D_I32 *nn_ones2d_i32(size_t shape[2]) {
189 Tensor2D_I32 *tensor = nn_tensor2d_i32(shape, NULL);
190 size_t n = shape[0] * shape[1];
191 for (size_t i = 0; i < n; i += 1) {
192 tensor->data[i] = 1;
193 }
194 return tensor;
195}
196
206 Tensor0D_I32 *tensor = nn_tensor0d_i32(data);
207 return tensor;
208}
209
219Tensor1D_I32 *nn_full1d_i32(size_t shape[1], int32_t data) {
220 Tensor1D_I32 *tensor = nn_tensor1d_i32(shape, NULL);
221 size_t n = shape[0];
222 for (size_t i = 0; i < n; i += 1) {
223 tensor->data[i] = data;
224 }
225 return tensor;
226}
227
237Tensor2D_I32 *nn_full2d_i32(size_t shape[2], int32_t data) {
238 Tensor2D_I32 *tensor = nn_tensor2d_i32(shape, NULL);
239 size_t n = shape[0] * shape[1];
240 for (size_t i = 0; i < n; i += 1) {
241 tensor->data[i] = data;
242 }
243 return tensor;
244}
245
254 Tensor0D_I32 *tensor = nn_tensor0d_i32(rand());
255 return tensor;
256}
257
266Tensor1D_I32 *nn_rand1d_i32(size_t shape[1]) {
267 Tensor1D_I32 *tensor = nn_tensor1d_i32(shape, NULL);
268 size_t n = shape[0];
269 for (size_t i = 0; i < n; i += 1) {
270 tensor->data[i] = rand();
271 }
272 return tensor;
273}
274
283Tensor2D_I32 *nn_rand2d_i32(size_t shape[2]) {
284 Tensor2D_I32 *tensor = nn_tensor2d_i32(shape, NULL);
285 size_t n = shape[0] * shape[1];
286 for (size_t i = 0; i < n; i += 1) {
287 tensor->data[i] = rand();
288 }
289 return tensor;
290}
291
292
293/* ======================================================================================================== */
294/* Tensor Prints */
295/* ======================================================================================================== */
303void nn_print_i32(int32_t v) {
304 printf("%d", v);
305}
306
307
316 printf("[");
317 for (size_t i=0; i<tensor->shape[0]; i+=1) {
318 nn_print_i32(*((int32_t *)tensor->data + i));
319 if (i < tensor->shape[0]-1) {
320 printf(" ");
321 }
322 }
323 printf("]\n");
324}
325
334 printf("[");
335 for (size_t i=0; i<tensor->shape[0]; i+=1) {
336 if (i != 0) {
337 printf(" ");
338 }
339 printf("[");
340 for (size_t j=0; j<tensor->shape[1]; j+=1) {
341 nn_print_i32(*((int32_t *)tensor->data + i*tensor->shape[1] + j));
342 if (j < tensor->shape[1]-1) {
343 printf(" ");
344 }
345 }
346 printf("]");
347 if (i < tensor->shape[0]-1) {
348 printf("\n");
349 }
350 }
351 printf("]\n");
352}
353
354
355/* ======================================================================================================== */
356/* Comparision */
357/* ======================================================================================================== */
367uint8_t nn_equals0d_i32(const Tensor0D_I32 *a, const Tensor0D_I32 *b) {
368 return a->data == b->data;
369}
370
380uint8_t nn_equals1d_i32(const Tensor1D_I32 *a, const Tensor1D_I32 *b) {
381 nn_assert(a->shape[0] == b->shape[0], "Cannot compare tensors of different shapes");
382
383 size_t n = a->shape[0];
384 for (size_t i = 0; i < n; i += 1) {
385 if (a->data[i] != b->data[i]) {
386 return 0;
387 }
388 }
389 return 1;
390}
391
401uint8_t nn_equals2d_i32(const Tensor2D_I32 *a, const Tensor2D_I32 *b) {
402 nn_assert(a->shape[0] == b->shape[0] && a->shape[1] == b->shape[1], "Cannot compare tensors of different shapes");
403
404 size_t n = a->shape[0] * a->shape[1];
405 for (size_t i = 0; i < n; i += 1) {
406 if (a->data[i] != b->data[i]) {
407 return 0;
408 }
409 }
410 return 1;
411}
412
413
414/* ======================================================================================================== */
415/* Unary */
416/* ======================================================================================================== */
417
418
419
420
421/* ======================================================================================================== */
422/* Addition */
423/* ======================================================================================================== */
435void nn_add1d_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x1, const Tensor1D_I32 *x2) {
436 nn_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes");
437 nn_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes");
438
439 size_t n = y->shape[0];
440 int32_t *x1_data = x1->data;
441 int32_t *x2_data = x2->data;
442 int32_t *y_data = y->data;
443
444 #ifdef CONFIG_BACKEND_RISCV_VECTOR
445 while (n > 0) {
446 size_t vl = __riscv_vsetvl_e8m1(n);
447 vint32m1_t vec_x1 = __riscv_vle32_v_i32m1(x1_data, vl);
448 vint32m1_t vec_x2 = __riscv_vle32_v_i32m1(x2_data, vl);
449 vint32m1_t vec_y = __riscv_vfadd_vv_i32m1(vec_x1, vec_x2, vl);
450 __riscv_vse32_v_i32m1(y_data, vec_y, vl);
451 x1_data += vl;
452 x2_data += vl;
453 y_data += vl;
454 n -= vl;
455 }
456 #else // scalar implementation
457 for (size_t i = 0; i < n; i += 1) {
458 y_data[i] = x1_data[i] + x2_data[i];
459 }
460 #endif
461}
462
463
464
476void nn_add2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2) {
477 nn_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes");
478 nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes");
479
480 size_t n = y->shape[0] * y->shape[1];
481 int32_t *x1_data = x1->data;
482 int32_t *x2_data = x2->data;
483 int32_t *y_data = y->data;
484
485 #ifdef CONFIG_BACKEND_RISCV_VECTOR
486 while (n > 0) {
487 size_t vl = __riscv_vsetvl_e8m1(n);
488 vint32m1_t vec_x1 = __riscv_vle32_v_i32m1(x1_data, vl);
489 vint32m1_t vec_x2 = __riscv_vle32_v_i32m1(x2_data, vl);
490 vint32m1_t vec_y = __riscv_vfadd_vv_i32m1(vec_x1, vec_x2, vl);
491 __riscv_vse32_v_i32m1(y_data, vec_y, vl);
492 x1_data += vl;
493 x2_data += vl;
494 y_data += vl;
495 n -= vl;
496 }
497 #else // scalar implementation
498 for (size_t i = 0; i < n; i += 1) {
499 y_data[i] = x1_data[i] + x2_data[i];
500 }
501 #endif
502}
503
515void nn_addscalar1d_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x, int32_t scalar) {
516 nn_assert(y->shape[0] == x->shape[0], "Cannot add tensors of different shapes");
517
518 size_t n = y->shape[0];
519 int32_t *x_data = x->data;
520 int32_t *y_data = y->data;
521
522 for (size_t i = 0; i < n; i += 1) {
523 y_data[i] = x_data[i] + scalar;
524 }
525}
526
538void nn_addscalar2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x, int32_t scalar) {
539 nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == x->shape[1], "Cannot add tensors of different shapes");
540
541 size_t n = y->shape[0] * y->shape[1];
542 int32_t *x_data = x->data;
543 int32_t *y_data = y->data;
544
545 for (size_t i = 0; i < n; i += 1) {
546 y_data[i] = x_data[i] + scalar;
547 }
548}
549
550
551
552
553/* ======================================================================================================== */
554/* Multiplication */
555/* ======================================================================================================== */
556
557
558
559
560/* ======================================================================================================== */
561/* MatMul */
562/* ======================================================================================================== */
563
575void nn_dot_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x1, const Tensor1D_I32 *x2) {
576 nn_assert(x1->shape[0] == x2->shape[0], "Cannot dot tensors of different shapes");
577 nn_assert(y->shape[0] == x1->shape[0], "Cannot dot tensors of different shapes");
578
579 size_t n = y->shape[0];
580 int32_t *x1_data = x1->data;
581 int32_t *x2_data = x2->data;
582 int32_t *y_data = y->data;
583
584 int32_t sum_i32 = 0;
585 for (size_t i = 0; i < n; i += 1) {
586 sum_i32 += x1_data[i] * x2_data[i];
587 }
588 y_data[0] = sum_i32;
589}
590
602void nn_mm_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2) {
603 nn_assert(x1->shape[1] == x2->shape[0], "Cannot perform MatMul on tensors of different shapes");
604 nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x2->shape[1], "Cannot perform MatMul on tensors of different shapes");
605
606 const size_t n = x1->shape[0];
607 const size_t m = x1->shape[1];
608 const size_t p = x2->shape[1];
609 int32_t *x1_data = x1->data;
610 int32_t *x2_data = x2->data;
611 int32_t *y_data = y->data;
612
613 for (size_t i = 0; i < n; i += 1) {
614 for (size_t j = 0; j < p; j += 1) {
615 int32_t sum = 0;
616 for (size_t k = 0; k < m; k += 1) {
617 sum += x1_data[i * m + k] * x2_data[k * p + j];
618 }
619 y_data[i * p + j] = sum;
620 }
621 }
622}
623
636void nn_addmm_i32(Tensor2D_I32 *y, const Tensor2D_I32 *c, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2) {
637 nn_assert(x1->shape[1] == x2->shape[0], "Cannot perform MatMul on tensors of different shapes");
638 nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x2->shape[1], "Cannot perform MatMul on tensors of different shapes");
639
640 const size_t n = x1->shape[0];
641 const size_t m = x1->shape[1];
642 const size_t p = x2->shape[1];
643 int32_t *x1_data = x1->data;
644 int32_t *x2_data = x2->data;
645 int32_t *c_data = c->data;
646 int32_t *y_data = y->data;
647
648 for (size_t i = 0; i < n; i += 1) {
649 for (size_t j = 0; j < p; j += 1) {
650 int32_t sum = 0;
651 for (size_t k = 0; k < m; k += 1) {
652 sum += x1_data[i * m + k] * x2_data[k * p + j];
653 }
654 y_data[i * p + j] = sum + c_data[i * p + j];
655 }
656 }
657}
658
659
672void nn_linear_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x, const Tensor2D_I32 *weight, const Tensor1D_I32 *bias) {
673 nn_assert(x->shape[1] == weight->shape[1], "Cannot perform Linear on tensors of different shapes");
674 nn_assert(!bias || bias->shape[0] == weight->shape[0], "Cannot perform Linear on tensors of different shapes");
675 nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == weight->shape[0], "Cannot perform Linear on tensors of different shapes");
676
677 const size_t batch_size = x->shape[0];
678 const size_t in_features = x->shape[1];
679 const size_t out_features = weight->shape[0];
680
681 int32_t *x_batch_data = x->data;
682 int32_t *y_batch_data = y->data;
683
684 for (size_t i = 0; i < batch_size; i += 1) {
685 int32_t *x_data = x_batch_data;
686 int32_t *y_data = y_batch_data;
687
688 for (size_t j = 0; j < out_features; j += 1) {
689 int32_t *weight_row = weight->data + j * in_features;
690
691 int32_t sum = 0;
692 for (size_t k = 0; k < in_features; k += 1) {
693 sum += x_data[k] * weight_row[k];
694 }
695 if (bias) {
696 sum += bias->data[j];
697 }
698 y_data[ + j] = sum;
699 }
700
701 x_batch_data += in_features;
702 y_batch_data += out_features;
703 }
704}
705
706
707
708/* ======================================================================================================== */
709/* Non-linear */
710/* ======================================================================================================== */
711
723 nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ReLU on tensors of different shapes");
724
725 size_t n = y->shape[0] * y->shape[1];
726 int32_t *x_data = x->data;
727 int32_t *y_data = y->data;
728
729 for (size_t i = 0; i < n; i += 1) {
730 y_data[i] = x_data[i] > 0 ? x_data[i] : 0;
731 }
732}
733
734
735#endif // __NN_Q8_0_H
Half-Precision Floating-Point (fp16) Definitions.
static void nn_assert(int condition, char *message)
Definition: nn.h:59
void nn_addmm_i32(Tensor2D_I32 *y, const Tensor2D_I32 *c, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2)
Performs matrix multiplication of two 2D tensors and adds the result to a third tensor.
Definition: nn_i32.h:636
uint8_t nn_equals2d_i32(const Tensor2D_I32 *a, const Tensor2D_I32 *b)
Checks if two 2D tensors with type I32 are equal.
Definition: nn_i32.h:401
Tensor1D_I32 * nn_rand1d_i32(size_t shape[1])
Creates a 1D tensor with type I32 and initializes it to a random value.
Definition: nn_i32.h:266
uint8_t nn_equals1d_i32(const Tensor1D_I32 *a, const Tensor1D_I32 *b)
Checks if two 1D tensors with type I32 are equal.
Definition: nn_i32.h:380
void nn_add2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2)
Adds x1 and x2 element-wise and stores the result in y.
Definition: nn_i32.h:476
void nn_mm_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2)
Performs matrix multiplication of two 2D tensors and stores the result in y.
Definition: nn_i32.h:602
void nn_print_tensor2d_i32(const Tensor2D_I32 *tensor)
Prints the content of a 2D tensor with type I32.
Definition: nn_i32.h:333
void nn_addscalar2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x, int32_t scalar)
Adds a scalar to a 2D tensor and stores the result in y.
Definition: nn_i32.h:538
Tensor1D_I32 * nn_ones1d_i32(size_t shape[1])
Creates a 1D tensor with type I32 and initializes it to 1.
Definition: nn_i32.h:171
void nn_addscalar1d_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x, int32_t scalar)
Adds a scalar to a 1D tensor and stores the result in y.
Definition: nn_i32.h:515
void nn_print_i32(int32_t v)
Prints a int32_t number.
Definition: nn_i32.h:303
void nn_dot_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x1, const Tensor1D_I32 *x2)
Computes the dot product of two 1D tensors and stores the result in y.
Definition: nn_i32.h:575
Tensor0D_I32 * nn_full0d_i32(int32_t data)
Creates a 0D tensor with type I32 and initializes it to a given value.
Definition: nn_i32.h:205
Tensor0D_I32 * nn_rand0d_i32()
Creates a 0D tensor with type I32 and initializes it to a random value.
Definition: nn_i32.h:253
void nn_linear_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x, const Tensor2D_I32 *weight, const Tensor1D_I32 *bias)
Linear neural network layer.
Definition: nn_i32.h:672
void nn_add1d_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x1, const Tensor1D_I32 *x2)
Adds x1 and x2 element-wise and stores the result in y.
Definition: nn_i32.h:435
uint8_t nn_equals0d_i32(const Tensor0D_I32 *a, const Tensor0D_I32 *b)
Checks if two 0D tensors with type I32 are equal.
Definition: nn_i32.h:367
Tensor2D_I32 * nn_rand2d_i32(size_t shape[2])
Creates a 2D tensor with type I32 and initializes it to a random value.
Definition: nn_i32.h:283
Tensor1D_I32 * nn_tensor1d_i32(size_t shape[1], const int32_t *data)
Creates a 1D tensor with type I32.
Definition: nn_i32.h:72
Tensor0D_I32 * nn_zeros0d_i32()
Creates a 0D tensor with type I32 and initializes it to 0.
Definition: nn_i32.h:112
Tensor0D_I32 * nn_ones0d_i32()
Creates a 0D tensor with type I32 and initializes it to 1.
Definition: nn_i32.h:158
Tensor1D_I32 * nn_zeros1d_i32(size_t shape[1])
Creates a 1D tensor with type I32 and initializes it to 0.
Definition: nn_i32.h:125
Tensor0D_I32 * nn_tensor0d_i32(int32_t data)
Creates a 0D tensor with type I32.
Definition: nn_i32.h:58
Tensor2D_I32 * nn_tensor2d_i32(size_t shape[2], const int32_t *data)
Creates a 2D tensor with type I32.
Definition: nn_i32.h:92
Tensor2D_I32 * nn_zeros2d_i32(size_t shape[2])
Creates a 2D tensor with type I32 and initializes it to 0.
Definition: nn_i32.h:142
void nn_relu2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x)
Applies the ReLU activation function to a 2D tensor.
Definition: nn_i32.h:722
void nn_print_tensor1d_i32(const Tensor1D_I32 *tensor)
Prints the content of a 1D tensor with type I32.
Definition: nn_i32.h:315
Tensor1D_I32 * nn_full1d_i32(size_t shape[1], int32_t data)
Creates a 1D tensor with type I32 and initializes it to a given value.
Definition: nn_i32.h:219
Tensor2D_I32 * nn_full2d_i32(size_t shape[2], int32_t data)
Creates a 2D tensor with type I32 and initializes it to a given value.
Definition: nn_i32.h:237
Tensor2D_I32 * nn_ones2d_i32(size_t shape[2])
Creates a 2D tensor with type I32 and initializes it to 1.
Definition: nn_i32.h:188
int32_t data
Definition: nn_i32.h:24
A 0D tensor (scalar) with a int32_t data type.
Definition: nn_i32.h:23
size_t shape[1]
Definition: nn_i32.h:33
int32_t * data
Definition: nn_i32.h:34
A 1D tensor with a int32_t data type.
Definition: nn_i32.h:32
size_t shape[2]
Definition: nn_i32.h:43
int32_t * data
Definition: nn_i32.h:44
A 2D tensor with a int32_t data type.
Definition: nn_i32.h:42