Baremetal-NN
Baremetal-NN API documentation
Loading...
Searching...
No Matches
nn_f32.h
Go to the documentation of this file.
1
8#ifndef __NN_F32_H
9#define __NN_F32_H
10
11#include <float.h>
12
13#ifdef CONFIG_BACKEND_RISCV_V
14 #include "riscv_vector.h"
15#endif
16
20typedef struct {
21 float data;
23
24
28typedef struct {
29 size_t shape[1];
30 float *data;
32
33
37typedef struct {
38 size_t shape[2];
39 float *data;
41
45typedef struct {
46 size_t shape[3];
47 float *data;
49
53typedef struct {
54 size_t shape[4];
55 float *data;
57
58
67static inline uint8_t nn_equal_f32(float golden, float actual, float rel_err) {
68 return (fabs(actual - golden) < rel_err) || (fabs((actual - golden) / actual) < rel_err);
69}
70
71
72/* ======================================================================================================== */
73/* Tensor Creation */
74/* ======================================================================================================== */
75
84 Tensor0D_F32 *tensor = (Tensor0D_F32 *)malloc(sizeof(Tensor0D_F32));
85 tensor->data = data;
86 return tensor;
87}
88
97Tensor1D_F32 *nn_tensor1d_f32(size_t shape[1], const float *data) {
98 Tensor1D_F32 *tensor = (Tensor1D_F32 *)malloc(sizeof(Tensor1D_F32));
99 tensor->shape[0] = shape[0];
100
101 size_t n_bytes = shape[0] * sizeof(float);
102 tensor->data = (float *)malloc(n_bytes);
103 if (data != NULL) {
104 memcpy(tensor->data, data, n_bytes);
105 }
106 return tensor;
107}
108
117Tensor2D_F32 *nn_tensor2d_f32(size_t shape[2], const float *data) {
118 Tensor2D_F32 *tensor = (Tensor2D_F32 *)malloc(sizeof(Tensor2D_F32));
119 tensor->shape[0] = shape[0];
120 tensor->shape[1] = shape[1];
121
122 size_t n_bytes = shape[0] * shape[1] * sizeof(float);
123 tensor->data = (float *)malloc(n_bytes);
124 if (data != NULL) {
125 memcpy(tensor->data, data, n_bytes);
126 }
127 return tensor;
128}
129
138Tensor3D_F32 *nn_tensor3d_f32(size_t shape[3], const float *data) {
139 Tensor3D_F32 *tensor = (Tensor3D_F32 *)malloc(sizeof(Tensor3D_F32));
140 tensor->shape[0] = shape[0];
141 tensor->shape[1] = shape[1];
142 tensor->shape[2] = shape[2];
143
144 size_t n_bytes = shape[0] * shape[1] * shape[2] * sizeof(float);
145 tensor->data = (float *)malloc(n_bytes);
146 if (data != NULL) {
147 memcpy(tensor->data, data, n_bytes);
148 }
149 return tensor;
150}
151
160Tensor4D_F32 *nn_tensor4d_f32(size_t shape[4], const float *data) {
161 Tensor4D_F32 *tensor = (Tensor4D_F32 *)malloc(sizeof(Tensor4D_F32));
162 tensor->shape[0] = shape[0];
163 tensor->shape[1] = shape[1];
164 tensor->shape[2] = shape[2];
165 tensor->shape[3] = shape[3];
166
167 size_t n_bytes = shape[0] * shape[1] * shape[2] * shape[3] * sizeof(float);
168 tensor->data = (float *)malloc(n_bytes);
169 if (data != NULL) {
170 memcpy(tensor->data, data, n_bytes);
171 }
172 return tensor;
173}
174
181Tensor1D_F32 *nn_as_tensor1d_f32(size_t shape[1], float *data) {
182 Tensor1D_F32 *tensor = (Tensor1D_F32 *)malloc(sizeof(Tensor1D_F32));
183 tensor->shape[0] = shape[0];
184 tensor->data = data;
185 return tensor;
186}
187
194Tensor2D_F32 *nn_as_tensor2d_f32(size_t shape[2], float *data) {
195 Tensor2D_F32 *tensor = (Tensor2D_F32 *)malloc(sizeof(Tensor2D_F32));
196 tensor->shape[0] = shape[0];
197 tensor->shape[1] = shape[1];
198 tensor->data = data;
199 return tensor;
200}
201
208Tensor3D_F32 *nn_as_tensor3d_f32(size_t shape[3], float *data) {
209 Tensor3D_F32 *tensor = (Tensor3D_F32 *)malloc(sizeof(Tensor3D_F32));
210 tensor->shape[0] = shape[0];
211 tensor->shape[1] = shape[1];
212 tensor->shape[2] = shape[2];
213 tensor->data = data;
214 return tensor;
215}
216
223Tensor4D_F32 *nn_as_tensor4d_f32(size_t shape[4], float *data) {
224 Tensor4D_F32 *tensor = (Tensor4D_F32 *)malloc(sizeof(Tensor4D_F32));
225 tensor->shape[0] = shape[0];
226 tensor->shape[1] = shape[1];
227 tensor->shape[2] = shape[2];
228 tensor->shape[3] = shape[3];
229 tensor->data = data;
230 return tensor;
231}
232
233
238 Tensor0D_F32 *tensor = nn_tensor0d_f32(0);
239 return tensor;
240}
241
245Tensor1D_F32 *nn_zeros1d_f32(size_t shape[1]) {
246 Tensor1D_F32 *tensor = nn_tensor1d_f32(shape, NULL);
247 size_t n = shape[0];
248 for (size_t i = 0; i < n; i += 1) {
249 tensor->data[i] = 0;
250 }
251 return tensor;
252}
253
257Tensor2D_F32 *nn_zeros2d_f32(size_t shape[2]) {
258 Tensor2D_F32 *tensor = nn_tensor2d_f32(shape, NULL);
259 size_t n = shape[0] * shape[1];
260 for (size_t i = 0; i < n; i += 1) {
261 tensor->data[i] = 0;
262 }
263 return tensor;
264}
265
269Tensor3D_F32 *nn_zeros3d_f32(size_t shape[3]) {
270 Tensor3D_F32 *tensor = nn_tensor3d_f32(shape, NULL);
271 size_t n = shape[0] * shape[1] * shape[2];
272 for (size_t i = 0; i < n; i += 1) {
273 tensor->data[i] = 0;
274 }
275 return tensor;
276}
277
281Tensor4D_F32 *nn_zeros4d_f32(size_t shape[4]) {
282 Tensor4D_F32 *tensor = nn_tensor4d_f32(shape, NULL);
283 size_t n = shape[0] * shape[1] * shape[2] * shape[3];
284 for (size_t i = 0; i < n; i += 1) {
285 tensor->data[i] = 0;
286 }
287 return tensor;
288}
289
294 Tensor0D_F32 *tensor = nn_tensor0d_f32(1);
295 return tensor;
296}
297
301Tensor1D_F32 *nn_ones1d_f32(size_t shape[1]) {
302 Tensor1D_F32 *tensor = nn_tensor1d_f32(shape, NULL);
303 size_t n = shape[0];
304 for (size_t i = 0; i < n; i += 1) {
305 tensor->data[i] = 1;
306 }
307 return tensor;
308}
309
313Tensor2D_F32 *nn_ones2d_f32(size_t shape[2]) {
314 Tensor2D_F32 *tensor = nn_tensor2d_f32(shape, NULL);
315 size_t n = shape[0] * shape[1];
316 for (size_t i = 0; i < n; i += 1) {
317 tensor->data[i] = 1;
318 }
319 return tensor;
320}
321
326 Tensor0D_F32 *tensor = nn_tensor0d_f32(data);
327 return tensor;
328}
329
333Tensor1D_F32 *nn_full1d_f32(size_t shape[1], float data) {
334 Tensor1D_F32 *tensor = nn_tensor1d_f32(shape, NULL);
335 size_t n = shape[0];
336 for (size_t i = 0; i < n; i += 1) {
337 tensor->data[i] = data;
338 }
339 return tensor;
340}
341
345Tensor2D_F32 *nn_full2d_f32(size_t shape[2], float data) {
346 Tensor2D_F32 *tensor = nn_tensor2d_f32(shape, NULL);
347 size_t n = shape[0] * shape[1];
348 for (size_t i = 0; i < n; i += 1) {
349 tensor->data[i] = data;
350 }
351 return tensor;
352}
353
358 Tensor0D_F32 *tensor = nn_tensor0d_f32(rand());
359 return tensor;
360}
361
365Tensor1D_F32 *nn_rand1d_f32(size_t shape[1]) {
366 Tensor1D_F32 *tensor = nn_tensor1d_f32(shape, NULL);
367 size_t n = shape[0];
368 for (size_t i = 0; i < n; i += 1) {
369 tensor->data[i] = rand();
370 }
371 return tensor;
372}
373
377Tensor2D_F32 *nn_rand2d_f32(size_t shape[2]) {
378 Tensor2D_F32 *tensor = nn_tensor2d_f32(shape, NULL);
379 size_t n = shape[0] * shape[1];
380 for (size_t i = 0; i < n; i += 1) {
381 tensor->data[i] = rand();
382 }
383 return tensor;
384}
385
386
387/* ======================================================================================================== */
388/* Tensor Prints */
389/* ======================================================================================================== */
390
397 printf("[");
398 for (size_t i=0; i<tensor->shape[0]; i+=1) {
399 nn_print_f32(*((float *)tensor->data + i), 3);
400 if (i < tensor->shape[0]-1) {
401 printf(" ");
402 }
403 }
404 printf("]\n");
405}
406
413 printf("[");
414 for (size_t i=0; i<tensor->shape[0]; i+=1) {
415 if (i == 0) {
416 printf("[");
417 }
418 else {
419 printf(" [");
420 }
421 for (size_t j=0; j<tensor->shape[1]; j+=1) {
422 nn_print_f32(*((float *)tensor->data + i*tensor->shape[1] + j), 3);
423 if (j < tensor->shape[1]-1) {
424 printf(" ");
425 }
426 }
427 printf(" ]");
428 if (i < tensor->shape[0]-1) {
429 printf("\n");
430 }
431 }
432 printf("]\n");
433}
434
441 printf("[");
442 for (size_t i=0; i<tensor->shape[0]; i+=1) {
443 if (i == 0) {
444 printf("[");
445 }
446 else {
447 printf("\n [");
448 }
449 for (size_t j=0; j<tensor->shape[1]; j+=1) {
450 if (j == 0) {
451 printf("[");
452 }
453 else {
454 printf(" [");
455 }
456 for (size_t k=0; k<tensor->shape[2]; k+=1) {
457 nn_print_f32(*((float *)tensor->data + i*tensor->shape[1]*tensor->shape[2] + j*tensor->shape[2] + k), 3);
458 if (k < tensor->shape[2]-1) {
459 printf(" ");
460 }
461 }
462 printf(" ]");
463 }
464 printf("]");
465 if (i < tensor->shape[0]-1) {
466 printf("\n");
467 }
468 }
469 printf("]\n");
470}
471
478 printf("[");
479 for (size_t i=0; i<tensor->shape[0]; i+=1) {
480 if (i == 0) {
481 printf("[");
482 }
483 else {
484 printf("\n [");
485 }
486 for (size_t j=0; j<tensor->shape[1]; j+=1) {
487 if (j == 0) {
488 printf("[");
489 }
490 else {
491 printf("\n [");
492 }
493 for (size_t k=0; k<tensor->shape[2]; k+=1) {
494 if (k == 0) {
495 printf("[");
496 }
497 else {
498 printf(" [");
499 }
500 for (size_t l=0; l<tensor->shape[3]; l+=1) {
501 nn_print_f32(*((float *)tensor->data + i*tensor->shape[1]*tensor->shape[2]*tensor->shape[3] + j*tensor->shape[2]*tensor->shape[3] + k*tensor->shape[3] + l), 3);
502 if (l < tensor->shape[3]-1) {
503 printf(" ");
504 }
505 }
506 printf(" ]");
507 if (k < tensor->shape[2]-1) {
508 printf("\n");
509 }
510 }
511 printf("]");
512 if (j < tensor->shape[1]-1) {
513 printf("\n");
514 }
515 }
516 printf("]");
517 if (i < tensor->shape[0]-1) {
518 printf("\n");
519 }
520 }
521 printf("]\n");
522}
523
524
525/* ======================================================================================================== */
526/* Comparision */
527/* ======================================================================================================== */
536uint8_t nn_equals0d_f32(const Tensor0D_F32 *a, const Tensor0D_F32 *b, float rel_err) {
537 return nn_equal_f32(a->data, b->data, rel_err);
538}
539
548uint8_t nn_equals1d_f32(const Tensor1D_F32 *a, const Tensor1D_F32 *b, float rel_err) {
549 nn_assert(a->shape[0] == b->shape[0], "Cannot compare tensors of different shapes");
550
551 size_t n = a->shape[0];
552 for (size_t i = 0; i < n; i += 1) {
553 if (!nn_equal_f32(a->data[i], b->data[i], rel_err)) {
554 return 0;
555 }
556 }
557 return 1;
558}
559
568uint8_t nn_equals2d_f32(const Tensor2D_F32 *a, const Tensor2D_F32 *b, float rel_err) {
569 nn_assert(a->shape[0] == b->shape[0] && a->shape[1] == b->shape[1], "Cannot compare tensors of different shapes");
570
571 size_t n = a->shape[0] * a->shape[1];
572 for (size_t i = 0; i < n; i += 1) {
573 if (!nn_equal_f32(a->data[i], b->data[i], rel_err)) {
574 return 0;
575 }
576 }
577 return 1;
578}
579
588uint8_t nn_equals3d_f32(const Tensor3D_F32 *a, const Tensor3D_F32 *b, float rel_err) {
589 nn_assert(a->shape[0] == b->shape[0] && a->shape[1] == b->shape[1] && a->shape[2] == b->shape[2], "Cannot compare tensors of different shapes");
590
591 size_t n = a->shape[0] * a->shape[1] * a->shape[2];
592 for (size_t i = 0; i < n; i += 1) {
593 if (!nn_equal_f32(a->data[i], b->data[i], rel_err)) {
594 return 0;
595 }
596 }
597 return 1;
598}
599
608uint8_t nn_equals4d_f32(const Tensor4D_F32 *a, const Tensor4D_F32 *b, float rel_err) {
609 nn_assert(a->shape[0] == b->shape[0] && a->shape[1] == b->shape[1] && a->shape[2] == b->shape[2] && a->shape[3] == b->shape[3], "Cannot compare tensors of different shapes");
610
611 size_t n = a->shape[0] * a->shape[1] * a->shape[2] * a->shape[3];
612 for (size_t i = 0; i < n; i += 1) {
613 if (!nn_equal_f32(a->data[i], b->data[i], rel_err)) {
614 return 0;
615 }
616 }
617 return 1;
618}
619
620
621/* ======================================================================================================== */
622/* Unary */
623/* ======================================================================================================== */
625 size_t n = x->shape[0];
626 float *x_data = x->data;
627
628 #ifdef CONFIG_BACKEND_RISCV_V
629 vfloat32m1_t vec_max = __riscv_vfmv_s_f_f32m1(-FLT_MAX, 1);
630
631 while (n > 0) {
632 size_t vl = __riscv_vsetvl_e32m1(n);
633 vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x_data, vl);
634 vec_max = __riscv_vfredmax_vs_f32m1_f32m1(vec_x, vec_max, vl);
635 x_data += vl;
636 n -= vl;
637 }
638 y->data = __riscv_vfmv_f_s_f32m1_f32(vec_max);
639 #else /* scalar implementation */
640 y->data = -FLT_MAX;
641 for (size_t i = 0; i < n; i += 1) {
642 float val = x->data[i];
643 y->data = val > y->data ? val : y->data;
644 }
645 #endif
646}
647
649 size_t n = x->shape[0] * x->shape[1];
650 float *x_data = x->data;
651
652 #ifdef CONFIG_BACKEND_RISCV_V
653 vfloat32m1_t vec_max = __riscv_vfmv_s_f_f32m1(-FLT_MAX, 1);
654
655 while (n > 0) {
656 size_t vl = __riscv_vsetvl_e32m1(n);
657 vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x_data, vl);
658 vec_max = __riscv_vfredmax_vs_f32m1_f32m1(vec_x, vec_max, vl);
659 x_data += vl;
660 n -= vl;
661 }
662 y->data = __riscv_vfmv_f_s_f32m1_f32(vec_max);
663 #else /* scalar implementation */
664 y->data = -FLT_MAX;
665 for (size_t i = 0; i < n; i += 1) {
666 float val = x->data[i];
667 y->data = val > y->data ? val : y->data;
668 }
669 #endif
670}
671
673 size_t n = x->shape[0];
674 float *x_data = x->data;
675
676 #ifdef CONFIG_BACKEND_RISCV_V
677 vfloat32m1_t vec_min = __riscv_vfmv_s_f_f32m1(FLT_MAX, 1);
678
679 while (n > 0) {
680 size_t vl = __riscv_vsetvl_e32m1(n);
681 vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x_data, vl);
682 vec_min = __riscv_vfredmin_vs_f32m1_f32m1(vec_x, vec_min, vl);
683 x_data += vl;
684 n -= vl;
685 }
686 y->data = __riscv_vfmv_f_s_f32m1_f32(vec_min);
687 #else /* scalar implementation */
688 y->data = FLT_MAX;
689 for (size_t i = 0; i < n; i += 1) {
690 float val = x->data[i];
691 y->data = val < y->data ? val : y->data;
692 }
693 #endif
694}
695
697 size_t n = x->shape[0] * x->shape[1];
698 float *x_data = x->data;
699
700 #ifdef CONFIG_BACKEND_RISCV_V
701 vfloat32m1_t vec_min = __riscv_vfmv_s_f_f32m1(FLT_MAX, 1);
702
703 while (n > 0) {
704 size_t vl = __riscv_vsetvl_e32m1(n);
705 vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x_data, vl);
706 vec_min = __riscv_vfredmin_vs_f32m1_f32m1(vec_x, vec_min, vl);
707 x_data += vl;
708 n -= vl;
709 }
710 y->data = __riscv_vfmv_f_s_f32m1_f32(vec_min);
711 #else /* scalar implementation */
712 y->data = FLT_MAX;
713 for (size_t i = 0; i < n; i += 1) {
714 float val = x->data[i];
715 y->data = val < y->data ? val : y->data;
716 }
717 #endif
718}
719
720/* ======================================================================================================== */
721/* Addition */
722/* ======================================================================================================== */
734void nn_add1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2) {
735 nn_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes");
736 nn_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes");
737
738 size_t n = y->shape[0];
739 float *x1_data = x1->data;
740 float *x2_data = x2->data;
741 float *y_data = y->data;
742
743 #ifdef CONFIG_BACKEND_RISCV_V
744 while (n > 0) {
745 size_t vl = __riscv_vsetvl_e32m1(n);
746 vfloat32m1_t vec_x1 = __riscv_vle32_v_f32m1(x1_data, vl);
747 vfloat32m1_t vec_x2 = __riscv_vle32_v_f32m1(x2_data, vl);
748 vfloat32m1_t vec_y = __riscv_vfadd_vv_f32m1(vec_x1, vec_x2, vl);
749 __riscv_vse32_v_f32m1(y_data, vec_y, vl);
750 x1_data += vl;
751 x2_data += vl;
752 y_data += vl;
753 n -= vl;
754 }
755 #else /* scalar implementation */
756 for (size_t i = 0; i < n; i += 1) {
757 y->data[i] = x1->data[i] + x2->data[i];
758 }
759 #endif
760}
761
773void nn_add2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) {
774 nn_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes");
775 nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes");
776
777 size_t n = y->shape[0] * y->shape[1];
778 float *x1_data = x1->data;
779 float *x2_data = x2->data;
780 float *y_data = y->data;
781
782 #ifdef CONFIG_BACKEND_RISCV_V
783 while (n > 0) {
784 size_t vl = __riscv_vsetvl_e32m1(n);
785 vfloat32m1_t vec_x1 = __riscv_vle32_v_f32m1(x1_data, vl);
786 vfloat32m1_t vec_x2 = __riscv_vle32_v_f32m1(x2_data, vl);
787 vfloat32m1_t vec_y = __riscv_vfadd_vv_f32m1(vec_x1, vec_x2, vl);
788 __riscv_vse32_v_f32m1(y_data, vec_y, vl);
789 x1_data += vl;
790 x2_data += vl;
791 y_data += vl;
792 n -= vl;
793 }
794 #else /* scalar implementation */
795 for (size_t i = 0; i < n; i += 1) {
796 y->data[i] = x1->data[i] + x2->data[i];
797 }
798 #endif
799}
800
801void nn_addscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar) {
802 nn_assert(y->shape[0] == x->shape[0], "Cannot add tensors of different shapes");
803
804 size_t n = y->shape[0];
805 float *x_data = x->data;
806 float *y_data = y->data;
807
808 #ifdef CONFIG_BACKEND_RISCV_V
809 while (n > 0) {
810 size_t vl = __riscv_vsetvl_e32m1(n);
811 vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x_data, vl);
812 vfloat32m1_t vec_y = __riscv_vfadd_vf_f32m1(vec_x, scalar, vl);
813 __riscv_vse32_v_f32m1(y_data, vec_y, vl);
814 x_data += vl;
815 y_data += vl;
816 n -= vl;
817 }
818 #else /* scalar implementation */
819 for (size_t i = 0; i < n; i += 1) {
820 y->data[i] = x->data[i] + scalar;
821 }
822 #endif
823}
824
825void nn_addscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar) {
826 nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == x->shape[1], "Cannot add tensors of different shapes");
827
828 size_t n = y->shape[0] * y->shape[1];
829 float *x_data = x->data;
830 float *y_data = y->data;
831
832 #ifdef CONFIG_BACKEND_RISCV_V
833 while (n > 0) {
834 size_t vl = __riscv_vsetvl_e32m1(n);
835 vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x_data, vl);
836 vfloat32m1_t vec_y = __riscv_vfadd_vf_f32m1(vec_x, scalar, vl);
837 __riscv_vse32_v_f32m1(y_data, vec_y, vl);
838 x_data += vl;
839 y_data += vl;
840 n -= vl;
841 }
842 #else /* scalar implementation */
843 for (size_t i = 0; i < n; i += 1) {
844 y->data[i] = x->data[i] + scalar;
845 }
846 #endif
847}
848
849/* ======================================================================================================== */
850/* Multiplication */
851/* ======================================================================================================== */
852
853
854void nn_mul1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2) {
855 nn_assert(x1->shape[0] == x2->shape[0], "Cannot add tensors of different shapes");
856 nn_assert(y->shape[0] == x1->shape[0], "Cannot add tensors of different shapes");
857
858 size_t n = y->shape[0];
859 float *x1_data = x1->data;
860 float *x2_data = x2->data;
861 float *y_data = y->data;
862
863 #ifdef CONFIG_BACKEND_RISCV_V
864 while (n > 0) {
865 size_t vl = __riscv_vsetvl_e32m1(n);
866 vfloat32m1_t vec_x1 = __riscv_vle32_v_f32m1(x1_data, vl);
867 vfloat32m1_t vec_x2 = __riscv_vle32_v_f32m1(x2_data, vl);
868 vfloat32m1_t vec_y = __riscv_vfmul_vv_f32m1(vec_x1, vec_x2, vl);
869 __riscv_vse32_v_f32m1(y_data, vec_y, vl);
870 x1_data += vl;
871 x2_data += vl;
872 y_data += vl;
873 n -= vl;
874 }
875 #else /* scalar implementation */
876 for (size_t i = 0; i < n; i += 1) {
877 y->data[i] = x1->data[i] * x2->data[i];
878 }
879 #endif
880}
881
882void nn_mul2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) {
883 nn_assert(x1->shape[0] == x2->shape[0] && x1->shape[1] == x2->shape[1], "Cannot add tensors of different shapes");
884 nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x1->shape[1], "Cannot add tensors of different shapes");
885
886 size_t n = y->shape[0] * y->shape[1];
887 float *x1_data = x1->data;
888 float *x2_data = x2->data;
889 float *y_data = y->data;
890
891 #ifdef CONFIG_BACKEND_RISCV_V
892 while (n > 0) {
893 size_t vl = __riscv_vsetvl_e32m1(n);
894 vfloat32m1_t vec_x1 = __riscv_vle32_v_f32m1(x1_data, vl);
895 vfloat32m1_t vec_x2 = __riscv_vle32_v_f32m1(x2_data, vl);
896 vfloat32m1_t vec_y = __riscv_vfmul_vv_f32m1(vec_x1, vec_x2, vl);
897 __riscv_vse32_v_f32m1(y_data, vec_y, vl);
898 x1_data += vl;
899 x2_data += vl;
900 y_data += vl;
901 n -= vl;
902 }
903 #else /* scalar implementation */
904 for (size_t i = 0; i < n; i += 1) {
905 y->data[i] = x1->data[i] * x2->data[i];
906 }
907 #endif
908}
909
910void nn_mulscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar) {
911 nn_assert(y->shape[0] == x->shape[0], "Cannot add tensors of different shapes");
912
913 size_t n = y->shape[0];
914 float *x_data = x->data;
915 float *y_data = y->data;
916
917 #ifdef CONFIG_BACKEND_RISCV_V
918 while (n > 0) {
919 size_t vl = __riscv_vsetvl_e32m1(n);
920 vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x_data, vl);
921 vfloat32m1_t vec_y = __riscv_vfmul_vf_f32m1(vec_x, scalar, vl);
922 __riscv_vse32_v_f32m1(y_data, vec_y, vl);
923 x_data += vl;
924 y_data += vl;
925 n -= vl;
926 }
927 #else /* scalar implementation */
928 for (size_t i = 0; i < n; i += 1) {
929 y->data[i] = x->data[i] * scalar;
930 }
931 #endif
932}
933
934void nn_mulscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar) {
935 nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == x->shape[1], "Cannot add tensors of different shapes");
936
937 size_t n = y->shape[0] * y->shape[1];
938 float *x_data = x->data;
939 float *y_data = y->data;
940
941 #ifdef CONFIG_BACKEND_RISCV_V
942 while (n > 0) {
943 size_t vl = __riscv_vsetvl_e32m1(n);
944 vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x_data, vl);
945 vfloat32m1_t vec_y = __riscv_vfmul_vf_f32m1(vec_x, scalar, vl);
946 __riscv_vse32_v_f32m1(y_data, vec_y, vl);
947 x_data += vl;
948 y_data += vl;
949 n -= vl;
950 }
951 #else /* scalar implementation */
952 for (size_t i = 0; i < n; i += 1) {
953 y->data[i] = x->data[i] * scalar;
954 }
955 #endif
956}
957
958
959/* ======================================================================================================== */
960/* MatMul */
961/* ======================================================================================================== */
962void nn_dot_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2) {
963 nn_assert(x1->shape[0] == x2->shape[0], "Cannot dot tensors of different shapes");
964 nn_assert(y->shape[0] == x1->shape[0], "Cannot dot tensors of different shapes");
965
966 size_t n = y->shape[0];
967 float *x1_data = x1->data;
968 float *x2_data = x2->data;
969 float *y_data = y->data;
970
971 #ifdef CONFIG_BACKEND_RISCV_V
972 while (n > 0) {
973 size_t vl = __riscv_vsetvl_e32m1(n);
974 vfloat32m1_t vec_x1 = __riscv_vle32_v_f32m1(x1_data, vl);
975 vfloat32m1_t vec_x2 = __riscv_vle32_v_f32m1(x2_data, vl);
976 vfloat32m1_t vec_y = __riscv_vfmul_vv_f32m1(vec_x1, vec_x2, vl);
977 __riscv_vse32_v_f32m1(y_data, vec_y, vl);
978 x1_data += vl;
979 x2_data += vl;
980 y_data += vl;
981 n -= vl;
982 }
983 #else /* scalar implementation */
984 float sum = 0.0f;
985 for (size_t i = 0; i < n; i += 1) {
986 sum += x1->data[i] * x2->data[i];
987 }
988 y->data[0] = sum;
989 #endif
990}
991
999void nn_mm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) {
1000 nn_assert(x1->shape[1] == x2->shape[0], "Cannot perform MatMul on tensors of different shapes");
1001 nn_assert(y->shape[0] == x1->shape[0] && y->shape[1] == x2->shape[1], "Cannot perform MatMul on tensors of different shapes");
1002
1003 const size_t n = x1->shape[0];
1004 const size_t m = x1->shape[1];
1005 const size_t p = x2->shape[1];
1006
1007 for (size_t i = 0; i < n; i += 1) {
1008 #ifdef CONFIG_BACKEND_RISCV_V
1009 float *x1_row = x1->data + i * m;
1010 float *y_row = y->data + i * p;
1011
1012 size_t vlmax = __riscv_vsetvlmax_e32m1();
1013 for (size_t j = 0; j < p; j += 1) {
1014 vfloat32m1_t vec_zero = __riscv_vfmv_v_f_f32m1(0, vlmax);
1015 vfloat32m1_t vec_sum = __riscv_vfmv_v_f_f32m1(0, vlmax);
1016
1017 float *x1_ptr = x1_row;
1018 float *x2_ptr = x2->data + j;
1019 size_t k = m;
1020
1021 while (k > 0) {
1022 size_t vl = __riscv_vsetvl_e32m1(k);
1023 vfloat32m1_t vec_x1 = __riscv_vle32_v_f32m1(x1_ptr, vl);
1024 vfloat32m1_t vec_x2 = __riscv_vlse32_v_f32m1(x2_ptr, p * sizeof(float), vl);
1025 vec_sum = __riscv_vfmacc_vv_f32m1(vec_sum, vec_x1, vec_x2, vl);
1026
1027 x1_ptr += vl;
1028 x2_ptr += vl * p;
1029 k -= vl;
1030 }
1031
1032 #ifdef CONFIG_DEBUG_RISCV_V_USE_REDOSUM
1033 vec_sum = __riscv_vfredosum_vs_f32m1_f32m1(vec_sum, vec_zero, vlmax);
1034 #else
1035 vec_sum = __riscv_vfredusum_vs_f32m1_f32m1(vec_sum, vec_zero, vlmax);
1036 #endif
1037 y_row[j] = __riscv_vfmv_f_s_f32m1_f32(vec_sum);
1038 }
1039 #else
1040 for (size_t j = 0; j < p; j += 1) {
1041 float sum = 0.f;
1042 for (size_t k = 0; k < m; k += 1) {
1043 sum += x1->data[i * m + k] * x2->data[k * p + j];
1044 }
1045 y->data[i * p + j] = sum;
1046 }
1047 #endif
1048 }
1049}
1050
1051
1052void nn_addmm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *c, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2) {
1053 nn_assert(x1->shape[1] == x2->shape[0], "Cannot perform Linear on tensors of different shapes");
1054 nn_assert(y->shape[0] == c->shape[0] && y->shape[1] == x2->shape[1], "Cannot perform Linear on tensors of different shapes");
1055
1056 const size_t n = x1->shape[0];
1057 const size_t m = x1->shape[1];
1058 const size_t p = x2->shape[1];
1059
1060 for (size_t i = 0; i < n; i += 1) {
1061 #ifdef CONFIG_BACKEND_RISCV_V
1062 float *x1_row = x1->data + i * m;
1063 float *y_row = y->data + i * p;
1064
1065 size_t vlmax = __riscv_vsetvlmax_e32m1();
1066 for (size_t j = 0; j < p; j += 1) {
1067 vfloat32m1_t vec_zero = __riscv_vfmv_v_f_f32m1(0, vlmax);
1068 vfloat32m1_t vec_sum = __riscv_vfmv_v_f_f32m1(0, vlmax);
1069
1070 float *x1_ptr = x1_row;
1071 float *x2_ptr = x2->data + j;
1072 size_t k = m;
1073
1074 while (k > 0) {
1075 size_t vl = __riscv_vsetvl_e32m1(k);
1076 vfloat32m1_t vec_x1 = __riscv_vle32_v_f32m1(x1_ptr, vl);
1077 vfloat32m1_t vec_x2 = __riscv_vlse32_v_f32m1(x2_ptr, p * sizeof(float), vl);
1078 vec_sum = __riscv_vfmacc_vv_f32m1(vec_sum, vec_x1, vec_x2, vl);
1079
1080 x1_ptr += vl;
1081 x2_ptr += vl * p;
1082 k -= vl;
1083 }
1084
1085 #ifdef CONFIG_DEBUG_RISCV_V_USE_REDOSUM
1086 vec_sum = __riscv_vfredosum_vs_f32m1_f32m1(vec_sum, vec_zero, vlmax);
1087 #else
1088 vec_sum = __riscv_vfredusum_vs_f32m1_f32m1(vec_sum, vec_zero, vlmax);
1089 #endif
1090 y_row[j] = __riscv_vfmv_f_s_f32m1_f32(vec_sum) + c->data[i * p + j];
1091 }
1092
1093 x1_row += m;
1094 y_row += p;
1095 #else
1096 for (size_t j = 0; j < p; j += 1) {
1097 float sum = 0.f;
1098 for (size_t k = 0; k < m; k += 1) {
1099 sum += x1->data[i * m + k] * x2->data[k * p + j];
1100 }
1101 y->data[i * p + j] = sum + c->data[i * p + j];
1102 }
1103 #endif
1104 }
1105}
1106
1107void nn_linear_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, const Tensor2D_F32 *weight, const Tensor1D_F32 *bias) {
1108 nn_assert(x->shape[1] == weight->shape[1], "Cannot perform Linear on tensors of different shapes");
1109 nn_assert(!bias || bias->shape[0] == weight->shape[0], "Cannot perform Linear on tensors of different shapes");
1110 nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == weight->shape[0], "Cannot perform Linear on tensors of different shapes");
1111
1112 const size_t batch_size = x->shape[0];
1113 const size_t in_features = x->shape[1];
1114 const size_t out_features = weight->shape[0];
1115
1116 float *x_batch_data = x->data;
1117 float *y_batch_data = y->data;
1118
1119 for (size_t i = 0; i < batch_size; i += 1) {
1120 #ifdef CONFIG_BACKEND_RISCV_V
1121 float *x_data = x_batch_data;
1122 float *y_data = y_batch_data;
1123
1124 size_t vlmax = __riscv_vsetvlmax_e32m1();
1125
1126 for (size_t j = 0; j < out_features; j += 1) {
1127 vfloat32m1_t vec_zero = __riscv_vfmv_v_f_f32m1(0, vlmax);
1128 vfloat32m1_t vec_sum = __riscv_vfmv_v_f_f32m1(0, vlmax);
1129
1130 float *weight_row = weight->data + j * in_features;
1131 size_t n = in_features;
1132
1133 while (n > 0) {
1134 size_t vl = __riscv_vsetvl_e32m1(n);
1135 vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x_data, vl);
1136 vfloat32m1_t vec_w = __riscv_vle32_v_f32m1(weight_row, vl);
1137 vec_sum = __riscv_vfmacc_vv_f32m1(vec_sum, vec_x, vec_w, vl);
1138
1139 x_data += vl;
1140 weight_row += vl;
1141 n -= vl;
1142 }
1143
1144 #ifdef CONFIG_DEBUG_RISCV_V_USE_REDOSUM
1145 vec_sum = __riscv_vfredosum_vs_f32m1_f32m1(vec_sum, vec_zero, vlmax);
1146 #else
1147 vec_sum = __riscv_vfredusum_vs_f32m1_f32m1(vec_sum, vec_zero, vlmax);
1148 #endif
1149
1150 float sum = __riscv_vfmv_f_s_f32m1_f32(vec_sum);
1151 if (bias) {
1152 sum += bias->data[j];
1153 }
1154 y_data[j] = sum;
1155 x_data = x_batch_data; // reset x_data pointer for next output feature
1156 }
1157
1158 x_batch_data += in_features;
1159 y_batch_data += out_features;
1160 #else /* scalar implementation */
1161 for (size_t j = 0; j < out_features; j += 1) {
1162 float sum = 0.f;
1163 for (size_t k = 0; k < in_features; k += 1) {
1164 sum += x->data[i * in_features + k] * weight->data[j * in_features + k];
1165 }
1166 if (bias) {
1167 sum += bias->data[j];
1168 }
1169 y->data[i * out_features + j] = sum;
1170 }
1171 #endif
1172 }
1173}
1174
1175
1176
1177/* ======================================================================================================== */
1178/* Non-linear */
1179/* ======================================================================================================== */
1180void nn_elu2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float alpha) {
1181 nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ELU on tensors of different shapes");
1182
1183 const size_t n = y->shape[0] * y->shape[1];
1184 for (size_t i = 0; i < n; i += 1) {
1185 if (x->data[i] > 0) {
1186 y->data[i] = x->data[i];
1187 }
1188 else {
1189 y->data[i] = alpha * (expf(x->data[i]) - 1.f);
1190 }
1191 }
1192}
1193
1195 nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ReLU on tensors of different shapes");
1196
1197 size_t n = y->shape[0] * y->shape[1];
1198 float *x_data = x->data;
1199 float *y_data = y->data;
1200
1201 #ifdef CONFIG_BACKEND_RISCV_V
1202 float zero = 0.0f;
1203
1204 while (n > 0) {
1205 size_t vl = __riscv_vsetvl_e32m1(n);
1206 vfloat32m1_t vec_x = __riscv_vle32_v_f32m1(x_data, vl);
1207 vfloat32m1_t vec_y = __riscv_vfmax_vf_f32m1(vec_x, zero, vl);
1208 __riscv_vse32_v_f32m1(y_data, vec_y, vl);
1209 x_data += vl;
1210 y_data += vl;
1211 n -= vl;
1212 }
1213 #else /* scalar implementation */
1214 for (size_t i = 0; i < n; i += 1) {
1215 float x_val = x->data[i];
1216 y->data[i] = x_val > 0 ? x_val : 0;
1217 }
1218 #endif
1219}
1220
1221
1223 nn_assert(y->shape[0] == x->shape[0], "Cannot add tensors of different shapes");
1224
1225 size_t n = y->shape[0];
1226 float sum = 0.0f;
1227 for (size_t i = 0; i < n; i += 1) {
1228 sum += expf(x->data[i]);
1229 }
1230
1231 for (size_t i = 0; i < n; i += 1) {
1232 y->data[i] = expf(x->data[i]) / sum;
1233 }
1234}
1235
1236void nn_softmax2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, size_t dim) {
1237 nn_assert(y->shape[0] == x->shape[0] && y->shape[1] == x->shape[1], "Cannot add tensors of different shapes");
1238
1239 float *y_data = y->data;
1240 float *x_data = x->data;
1241
1242 if (dim == 0) {
1243 for (size_t i = 0; i < y->shape[1]; i += 1) {
1244 size_t n = y->shape[0];
1245 size_t m = y->shape[1];
1246 float sum = 0.0f;
1247 for (size_t j = 0; j < n; j += 1) {
1248 sum += expf(x_data[j * m]);
1249 }
1250
1251 for (size_t j = 0; j < n; j += 1) {
1252 y_data[j * m] = expf(x_data[j * m]) / sum;
1253 }
1254
1255 x_data += 1;
1256 y_data += 1;
1257 }
1258 }
1259 else if (dim == 1) {
1260 // HACK: fix batch size
1261 for (size_t i = 0; i < y->shape[0]; i += 1) {
1262 size_t n = y->shape[1];
1263 float sum = 0.0f;
1264 for (size_t j = 0; j < n; j += 1) {
1265 sum += expf(x_data[j]);
1266 }
1267
1268 for (size_t j = 0; j < n; j += 1) {
1269 y_data[j] = expf(x_data[j]) / sum;
1270 }
1271
1272 x_data += n;
1273 y_data += n;
1274 }
1275 }
1276 else {
1277 nn_assert(0, "Invalid dimension for softmax");
1278 }
1279}
1280
1281
1282
1284 nn_assert(x->shape[0] == y->shape[0] && x->shape[1] == y->shape[1], "Cannot perform ReLU on tensors of different shapes");
1285
1286 const size_t n = y->shape[0] * y->shape[1];
1287 for (size_t i = 0; i < n; i += 1) {
1288 float x_val = x->data[i];
1289 y->data[i] = tanh(x_val);
1290 }
1291}
1292
1310 nn_assert(query->shape[0] == key->shape[0] && query->shape[0] == value->shape[0], "Query, key, and value must have the same batch size");
1311 nn_assert(query->shape[1] == key->shape[1] && query->shape[1] == value->shape[1], "Query, key, and value must have the same sequence length");
1312 nn_assert(query->shape[2] == key->shape[2] && query->shape[2] == value->shape[2], "Query, key, and value must have the same head count");
1313 nn_assert(query->shape[3] == key->shape[3] && query->shape[3] == value->shape[3], "Query, key, and value must have the same embedding dimension");
1314
1315 // L, S = query.size(-2), key.size(-2)
1316 size_t n = query->shape[0]; // batch size
1317 size_t h = query->shape[1]; // head count
1318 size_t s = key->shape[2]; // source sequence length
1319 size_t l = query->shape[2]; // target sequence length
1320 size_t e = query->shape[3]; // embedding dimension
1321 size_t ev = value->shape[3]; // value embedding dimension
1322
1323 // scale_factor = 1 / math.sqrt(query.size(-1)) if scale is None else scale
1324 float scale_factor = 1 / sqrt(query->shape[3]);
1325 // attn_bias = torch.zeros(L, S, dtype=query.dtype)
1326
1327 // if is_causal:
1328 // assert attn_mask is None
1329 // temp_mask = torch.ones(L, S, dtype=torch.bool).tril(diagonal=0)
1330 // attn_bias.masked_fill_(temp_mask.logical_not(), float("-inf"))
1331 // attn_bias.to(query.dtype)
1332 // if attn_mask is not None:
1333 // if attn_mask.dtype == torch.bool:
1334 // attn_bias.masked_fill_(attn_mask.logical_not(), float("-inf"))
1335 // else:
1336 // attn_bias += attn_mask
1337
1338 // (n, hq, l, s) = (n, hq, l, e) @ (n, h, s, e).T
1339 size_t attn_weight_dims[4] = {n, h, l, s};
1340 Tensor4D_F32 *attn_weight = nn_tensor4d_f32(attn_weight_dims, NULL);
1341
1342 size_t query_head_dims[2] = {l, e};
1343 size_t key_head_dims[2] = {l, e};
1344 size_t attn_weight_head_dims[2] = {l, s};
1345 size_t value_head_dims[2] = {s, ev};
1346 size_t y_head_dims[2] = {l, ev};
1347
1348 for (size_t head = 0; head < h; head += 1) {
1349 Tensor2D_F32 *query_head = nn_as_tensor2d_f32(query_head_dims, (float *)query->data + head * l * e);
1350 Tensor2D_F32 *key_head = nn_as_tensor2d_f32(key_head_dims, (float *)key->data + head * s * e);
1351 Tensor2D_F32 *attn_weight_head = nn_as_tensor2d_f32(attn_weight_head_dims, (float *)attn_weight->data + head * l * s);
1352 Tensor2D_F32 *value_head = nn_as_tensor2d_f32(value_head_dims, (float *)value->data + head * s * ev);
1353 Tensor2D_F32 *y_head = nn_as_tensor2d_f32(y_head_dims, (float *)y->data + head * l * ev);
1354
1355 // attn_weight = query @ key.transpose(-2, -1) * scale_factor
1356 nn_linear_f32(attn_weight_head, query_head, key_head, NULL);
1357
1358 // attn_weight += attn_bias
1359
1360 // attn_weight = torch.softmax(attn_weight, dim=-1)
1361 nn_softmax2d_f32(attn_weight_head, attn_weight_head, 1);
1362
1363 // attn_weight = torch.dropout(attn_weight, dropout_p, train=True)
1364
1365 // (n, hq, l, ev) = (n, hq, l, s) @ (n, h, s, ev)
1366 // attn_weight @ value
1367 nn_mm_f32(y_head, attn_weight_head, value_head);
1368 }
1369}
1370
1371
1372#endif // __NN_F32_H
void nn_print_f32(float v, int16_t num_digits)
Definition: nn.h:91
static void nn_assert(int condition, char *message)
Definition: nn.h:54
void nn_linear_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, const Tensor2D_F32 *weight, const Tensor1D_F32 *bias)
Definition: nn_f32.h:1107
Tensor3D_F32 * nn_zeros3d_f32(size_t shape[3])
Definition: nn_f32.h:269
void nn_softmax1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x)
Definition: nn_f32.h:1222
Tensor1D_F32 * nn_zeros1d_f32(size_t shape[1])
Definition: nn_f32.h:245
void nn_print_tensor4d_f32(const Tensor4D_F32 *tensor)
Definition: nn_f32.h:477
uint8_t nn_equals0d_f32(const Tensor0D_F32 *a, const Tensor0D_F32 *b, float rel_err)
Definition: nn_f32.h:536
Tensor0D_F32 * nn_full0d_f32(float data)
Definition: nn_f32.h:325
void nn_mm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2)
Definition: nn_f32.h:999
Tensor2D_F32 * nn_rand2d_f32(size_t shape[2])
Definition: nn_f32.h:377
void nn_mulscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar)
Definition: nn_f32.h:934
void nn_addscalar2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float scalar)
Definition: nn_f32.h:825
Tensor2D_F32 * nn_full2d_f32(size_t shape[2], float data)
Definition: nn_f32.h:345
uint8_t nn_equals3d_f32(const Tensor3D_F32 *a, const Tensor3D_F32 *b, float rel_err)
Definition: nn_f32.h:588
void nn_max2d_f32(Tensor0D_F32 *y, const Tensor2D_F32 *x)
Definition: nn_f32.h:648
Tensor3D_F32 * nn_tensor3d_f32(size_t shape[3], const float *data)
Definition: nn_f32.h:138
void nn_relu2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x)
Definition: nn_f32.h:1194
Tensor2D_F32 * nn_tensor2d_f32(size_t shape[2], const float *data)
Definition: nn_f32.h:117
void nn_add2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2)
Definition: nn_f32.h:773
void nn_scaled_dot_product_attention_f32(Tensor4D_F32 *y, const Tensor4D_F32 *query, const Tensor4D_F32 *key, const Tensor4D_F32 *value)
Definition: nn_f32.h:1309
void nn_dot_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2)
Definition: nn_f32.h:962
void nn_addmm_f32(Tensor2D_F32 *y, const Tensor2D_F32 *c, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2)
Definition: nn_f32.h:1052
Tensor1D_F32 * nn_ones1d_f32(size_t shape[1])
Definition: nn_f32.h:301
void nn_min2d_f32(Tensor0D_F32 *y, const Tensor2D_F32 *x)
Definition: nn_f32.h:696
Tensor0D_F32 * nn_ones0d_f32()
Definition: nn_f32.h:293
static uint8_t nn_equal_f32(float golden, float actual, float rel_err)
Definition: nn_f32.h:67
Tensor4D_F32 * nn_tensor4d_f32(size_t shape[4], const float *data)
Definition: nn_f32.h:160
void nn_elu2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, float alpha)
Definition: nn_f32.h:1180
void nn_print_tensor3d_f32(const Tensor3D_F32 *tensor)
Definition: nn_f32.h:440
void nn_addscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar)
Definition: nn_f32.h:801
void nn_tanh2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x)
Definition: nn_f32.h:1283
Tensor2D_F32 * nn_as_tensor2d_f32(size_t shape[2], float *data)
Definition: nn_f32.h:194
Tensor1D_F32 * nn_rand1d_f32(size_t shape[1])
Definition: nn_f32.h:365
void nn_mul1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2)
Definition: nn_f32.h:854
void nn_softmax2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x, size_t dim)
Definition: nn_f32.h:1236
uint8_t nn_equals4d_f32(const Tensor4D_F32 *a, const Tensor4D_F32 *b, float rel_err)
Definition: nn_f32.h:608
void nn_mulscalar1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x, float scalar)
Definition: nn_f32.h:910
void nn_add1d_f32(Tensor1D_F32 *y, const Tensor1D_F32 *x1, const Tensor1D_F32 *x2)
Definition: nn_f32.h:734
Tensor2D_F32 * nn_zeros2d_f32(size_t shape[2])
Definition: nn_f32.h:257
uint8_t nn_equals1d_f32(const Tensor1D_F32 *a, const Tensor1D_F32 *b, float rel_err)
Definition: nn_f32.h:548
void nn_print_tensor2d_f32(const Tensor2D_F32 *tensor)
Definition: nn_f32.h:412
void nn_min1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x)
Definition: nn_f32.h:672
Tensor0D_F32 * nn_rand0d_f32()
Definition: nn_f32.h:357
Tensor1D_F32 * nn_as_tensor1d_f32(size_t shape[1], float *data)
Definition: nn_f32.h:181
void nn_mul2d_f32(Tensor2D_F32 *y, const Tensor2D_F32 *x1, const Tensor2D_F32 *x2)
Definition: nn_f32.h:882
Tensor3D_F32 * nn_as_tensor3d_f32(size_t shape[3], float *data)
Definition: nn_f32.h:208
Tensor1D_F32 * nn_full1d_f32(size_t shape[1], float data)
Definition: nn_f32.h:333
Tensor0D_F32 * nn_zeros0d_f32()
Definition: nn_f32.h:237
uint8_t nn_equals2d_f32(const Tensor2D_F32 *a, const Tensor2D_F32 *b, float rel_err)
Definition: nn_f32.h:568
Tensor0D_F32 * nn_tensor0d_f32(float data)
Definition: nn_f32.h:83
Tensor4D_F32 * nn_as_tensor4d_f32(size_t shape[4], float *data)
Definition: nn_f32.h:223
void nn_print_tensor1d_f32(const Tensor1D_F32 *tensor)
Definition: nn_f32.h:396
Tensor2D_F32 * nn_ones2d_f32(size_t shape[2])
Definition: nn_f32.h:313
void nn_max1d_f32(Tensor0D_F32 *y, const Tensor1D_F32 *x)
Definition: nn_f32.h:624
Tensor1D_F32 * nn_tensor1d_f32(size_t shape[1], const float *data)
Definition: nn_f32.h:97
Tensor4D_F32 * nn_zeros4d_f32(size_t shape[4])
Definition: nn_f32.h:281
float data
Definition: nn_f32.h:21
Definition: nn_f32.h:20
size_t shape[1]
Definition: nn_f32.h:29
float * data
Definition: nn_f32.h:30
Definition: nn_f32.h:28
float * data
Definition: nn_f32.h:39
size_t shape[2]
Definition: nn_f32.h:38
Definition: nn_f32.h:37
size_t shape[3]
Definition: nn_f32.h:46
float * data
Definition: nn_f32.h:47
Definition: nn_f32.h:45
size_t shape[4]
Definition: nn_f32.h:54
float * data
Definition: nn_f32.h:55
Definition: nn_f32.h:53