14#ifdef CONFIG_BACKEND_RISCV_V
15 #include "riscv_vector.h"
74 tensor->
shape[0] = shape[0];
76 size_t n_bytes = shape[0] *
sizeof(int32_t);
77 tensor->
data = (int32_t *)malloc(n_bytes);
79 memcpy(tensor->
data, data, n_bytes);
94 tensor->
shape[0] = shape[0];
95 tensor->
shape[1] = shape[1];
97 size_t n_bytes = shape[0] * shape[1] *
sizeof(int32_t);
98 tensor->
data = (int32_t *)malloc(n_bytes);
100 memcpy(tensor->
data, data, n_bytes);
128 for (
size_t i = 0; i < n; i += 1) {
144 size_t n = shape[0] * shape[1];
145 for (
size_t i = 0; i < n; i += 1) {
174 for (
size_t i = 0; i < n; i += 1) {
190 size_t n = shape[0] * shape[1];
191 for (
size_t i = 0; i < n; i += 1) {
222 for (
size_t i = 0; i < n; i += 1) {
223 tensor->
data[i] = data;
239 size_t n = shape[0] * shape[1];
240 for (
size_t i = 0; i < n; i += 1) {
241 tensor->
data[i] = data;
269 for (
size_t i = 0; i < n; i += 1) {
270 tensor->
data[i] = rand();
285 size_t n = shape[0] * shape[1];
286 for (
size_t i = 0; i < n; i += 1) {
287 tensor->
data[i] = rand();
317 for (
size_t i=0; i<tensor->
shape[0]; i+=1) {
319 if (i < tensor->shape[0]-1) {
335 for (
size_t i=0; i<tensor->
shape[0]; i+=1) {
340 for (
size_t j=0; j<tensor->
shape[1]; j+=1) {
342 if (j < tensor->shape[1]-1) {
347 if (i < tensor->shape[0]-1) {
383 size_t n = a->
shape[0];
384 for (
size_t i = 0; i < n; i += 1) {
405 for (
size_t i = 0; i < n; i += 1) {
439 size_t n = y->
shape[0];
440 int32_t *x1_data = x1->
data;
441 int32_t *x2_data = x2->
data;
442 int32_t *y_data = y->
data;
444 #ifdef CONFIG_BACKEND_RISCV_VECTOR
446 size_t vl = __riscv_vsetvl_e8m1(n);
447 vint32m1_t vec_x1 = __riscv_vle32_v_i32m1(x1_data, vl);
448 vint32m1_t vec_x2 = __riscv_vle32_v_i32m1(x2_data, vl);
449 vint32m1_t vec_y = __riscv_vfadd_vv_i32m1(vec_x1, vec_x2, vl);
450 __riscv_vse32_v_i32m1(y_data, vec_y, vl);
457 for (
size_t i = 0; i < n; i += 1) {
458 y_data[i] = x1_data[i] + x2_data[i];
481 int32_t *x1_data = x1->
data;
482 int32_t *x2_data = x2->
data;
483 int32_t *y_data = y->
data;
485 #ifdef CONFIG_BACKEND_RISCV_VECTOR
487 size_t vl = __riscv_vsetvl_e8m1(n);
488 vint32m1_t vec_x1 = __riscv_vle32_v_i32m1(x1_data, vl);
489 vint32m1_t vec_x2 = __riscv_vle32_v_i32m1(x2_data, vl);
490 vint32m1_t vec_y = __riscv_vfadd_vv_i32m1(vec_x1, vec_x2, vl);
491 __riscv_vse32_v_i32m1(y_data, vec_y, vl);
498 for (
size_t i = 0; i < n; i += 1) {
499 y_data[i] = x1_data[i] + x2_data[i];
518 size_t n = y->
shape[0];
519 int32_t *x_data = x->
data;
520 int32_t *y_data = y->
data;
522 for (
size_t i = 0; i < n; i += 1) {
523 y_data[i] = x_data[i] + scalar;
542 int32_t *x_data = x->
data;
543 int32_t *y_data = y->
data;
545 for (
size_t i = 0; i < n; i += 1) {
546 y_data[i] = x_data[i] + scalar;
579 size_t n = y->
shape[0];
580 int32_t *x1_data = x1->
data;
581 int32_t *x2_data = x2->
data;
582 int32_t *y_data = y->
data;
585 for (
size_t i = 0; i < n; i += 1) {
586 sum_i32 += x1_data[i] * x2_data[i];
603 nn_assert(x1->
shape[1] == x2->
shape[0],
"Cannot perform MatMul on tensors of different shapes");
606 const size_t n = x1->
shape[0];
607 const size_t m = x1->
shape[1];
608 const size_t p = x2->
shape[1];
609 int32_t *x1_data = x1->
data;
610 int32_t *x2_data = x2->
data;
611 int32_t *y_data = y->
data;
613 for (
size_t i = 0; i < n; i += 1) {
614 for (
size_t j = 0; j < p; j += 1) {
616 for (
size_t k = 0; k < m; k += 1) {
617 sum += x1_data[i * m + k] * x2_data[k * p + j];
619 y_data[i * p + j] = sum;
637 nn_assert(x1->
shape[1] == x2->
shape[0],
"Cannot perform MatMul on tensors of different shapes");
640 const size_t n = x1->
shape[0];
641 const size_t m = x1->
shape[1];
642 const size_t p = x2->
shape[1];
643 int32_t *x1_data = x1->
data;
644 int32_t *x2_data = x2->
data;
645 int32_t *c_data = c->
data;
646 int32_t *y_data = y->
data;
648 for (
size_t i = 0; i < n; i += 1) {
649 for (
size_t j = 0; j < p; j += 1) {
651 for (
size_t k = 0; k < m; k += 1) {
652 sum += x1_data[i * m + k] * x2_data[k * p + j];
654 y_data[i * p + j] = sum + c_data[i * p + j];
673 nn_assert(x->
shape[1] == weight->
shape[1],
"Cannot perform Linear on tensors of different shapes");
674 nn_assert(!bias || bias->
shape[0] == weight->
shape[0],
"Cannot perform Linear on tensors of different shapes");
677 const size_t batch_size = x->
shape[0];
678 const size_t in_features = x->
shape[1];
679 const size_t out_features = weight->
shape[0];
681 int32_t *x_batch_data = x->
data;
682 int32_t *y_batch_data = y->
data;
684 for (
size_t i = 0; i < batch_size; i += 1) {
685 int32_t *x_data = x_batch_data;
686 int32_t *y_data = y_batch_data;
688 for (
size_t j = 0; j < out_features; j += 1) {
689 int32_t *weight_row = weight->
data + j * in_features;
692 for (
size_t k = 0; k < in_features; k += 1) {
693 sum += x_data[k] * weight_row[k];
696 sum += bias->
data[j];
701 x_batch_data += in_features;
702 y_batch_data += out_features;
726 int32_t *x_data = x->
data;
727 int32_t *y_data = y->
data;
729 for (
size_t i = 0; i < n; i += 1) {
730 y_data[i] = x_data[i] > 0 ? x_data[i] : 0;
Half-Precision Floating-Point (fp16) Definitions.
static void nn_assert(int condition, char *message)
Definition: nn.h:59
void nn_addmm_i32(Tensor2D_I32 *y, const Tensor2D_I32 *c, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2)
Performs matrix multiplication of two 2D tensors and adds the result to a third tensor.
Definition: nn_i32.h:636
uint8_t nn_equals2d_i32(const Tensor2D_I32 *a, const Tensor2D_I32 *b)
Checks if two 2D tensors with type I32 are equal.
Definition: nn_i32.h:401
Tensor1D_I32 * nn_rand1d_i32(size_t shape[1])
Creates a 1D tensor with type I32 and initializes it to a random value.
Definition: nn_i32.h:266
uint8_t nn_equals1d_i32(const Tensor1D_I32 *a, const Tensor1D_I32 *b)
Checks if two 1D tensors with type I32 are equal.
Definition: nn_i32.h:380
void nn_add2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2)
Adds x1 and x2 element-wise and stores the result in y.
Definition: nn_i32.h:476
void nn_mm_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2)
Performs matrix multiplication of two 2D tensors and stores the result in y.
Definition: nn_i32.h:602
void nn_print_tensor2d_i32(const Tensor2D_I32 *tensor)
Prints the content of a 2D tensor with type I32.
Definition: nn_i32.h:333
void nn_addscalar2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x, int32_t scalar)
Adds a scalar to a 2D tensor and stores the result in y.
Definition: nn_i32.h:538
Tensor1D_I32 * nn_ones1d_i32(size_t shape[1])
Creates a 1D tensor with type I32 and initializes it to 1.
Definition: nn_i32.h:171
void nn_addscalar1d_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x, int32_t scalar)
Adds a scalar to a 1D tensor and stores the result in y.
Definition: nn_i32.h:515
void nn_print_i32(int32_t v)
Prints a int32_t number.
Definition: nn_i32.h:303
void nn_dot_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x1, const Tensor1D_I32 *x2)
Computes the dot product of two 1D tensors and stores the result in y.
Definition: nn_i32.h:575
Tensor0D_I32 * nn_full0d_i32(int32_t data)
Creates a 0D tensor with type I32 and initializes it to a given value.
Definition: nn_i32.h:205
Tensor0D_I32 * nn_rand0d_i32()
Creates a 0D tensor with type I32 and initializes it to a random value.
Definition: nn_i32.h:253
void nn_linear_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x, const Tensor2D_I32 *weight, const Tensor1D_I32 *bias)
Linear neural network layer.
Definition: nn_i32.h:672
void nn_add1d_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x1, const Tensor1D_I32 *x2)
Adds x1 and x2 element-wise and stores the result in y.
Definition: nn_i32.h:435
uint8_t nn_equals0d_i32(const Tensor0D_I32 *a, const Tensor0D_I32 *b)
Checks if two 0D tensors with type I32 are equal.
Definition: nn_i32.h:367
Tensor2D_I32 * nn_rand2d_i32(size_t shape[2])
Creates a 2D tensor with type I32 and initializes it to a random value.
Definition: nn_i32.h:283
Tensor1D_I32 * nn_tensor1d_i32(size_t shape[1], const int32_t *data)
Creates a 1D tensor with type I32.
Definition: nn_i32.h:72
Tensor0D_I32 * nn_zeros0d_i32()
Creates a 0D tensor with type I32 and initializes it to 0.
Definition: nn_i32.h:112
Tensor0D_I32 * nn_ones0d_i32()
Creates a 0D tensor with type I32 and initializes it to 1.
Definition: nn_i32.h:158
Tensor1D_I32 * nn_zeros1d_i32(size_t shape[1])
Creates a 1D tensor with type I32 and initializes it to 0.
Definition: nn_i32.h:125
Tensor0D_I32 * nn_tensor0d_i32(int32_t data)
Creates a 0D tensor with type I32.
Definition: nn_i32.h:58
Tensor2D_I32 * nn_tensor2d_i32(size_t shape[2], const int32_t *data)
Creates a 2D tensor with type I32.
Definition: nn_i32.h:92
Tensor2D_I32 * nn_zeros2d_i32(size_t shape[2])
Creates a 2D tensor with type I32 and initializes it to 0.
Definition: nn_i32.h:142
void nn_relu2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x)
Applies the ReLU activation function to a 2D tensor.
Definition: nn_i32.h:722
void nn_print_tensor1d_i32(const Tensor1D_I32 *tensor)
Prints the content of a 1D tensor with type I32.
Definition: nn_i32.h:315
Tensor1D_I32 * nn_full1d_i32(size_t shape[1], int32_t data)
Creates a 1D tensor with type I32 and initializes it to a given value.
Definition: nn_i32.h:219
Tensor2D_I32 * nn_full2d_i32(size_t shape[2], int32_t data)
Creates a 2D tensor with type I32 and initializes it to a given value.
Definition: nn_i32.h:237
Tensor2D_I32 * nn_ones2d_i32(size_t shape[2])
Creates a 2D tensor with type I32 and initializes it to 1.
Definition: nn_i32.h:188
int32_t data
Definition: nn_i32.h:24
A 0D tensor (scalar) with a int32_t data type.
Definition: nn_i32.h:23
size_t shape[1]
Definition: nn_i32.h:33
int32_t * data
Definition: nn_i32.h:34
A 1D tensor with a int32_t data type.
Definition: nn_i32.h:32
size_t shape[2]
Definition: nn_i32.h:43
int32_t * data
Definition: nn_i32.h:44
A 2D tensor with a int32_t data type.
Definition: nn_i32.h:42