14#ifdef CONFIG_BACKEND_RISCV_V
15 #include "riscv_vector.h"
74 tensor->
shape[0] = shape[0];
76 size_t n_bytes = shape[0] *
sizeof(int32_t);
77 tensor->
data = (int32_t *)malloc(n_bytes);
79 memcpy(tensor->
data, data, n_bytes);
94 tensor->
shape[0] = shape[0];
95 tensor->
shape[1] = shape[1];
97 size_t n_bytes = shape[0] * shape[1] *
sizeof(int32_t);
98 tensor->
data = (int32_t *)malloc(n_bytes);
100 memcpy(tensor->
data, data, n_bytes);
114 for (
size_t i = 0; i < n; i += 1) {
122 size_t n = shape[0] * shape[1];
123 for (
size_t i = 0; i < n; i += 1) {
137 for (
size_t i = 0; i < n; i += 1) {
145 size_t n = shape[0] * shape[1];
146 for (
size_t i = 0; i < n; i += 1) {
160 for (
size_t i = 0; i < n; i += 1) {
161 tensor->
data[i] = data;
168 size_t n = shape[0] * shape[1];
169 for (
size_t i = 0; i < n; i += 1) {
170 tensor->
data[i] = data;
183 for (
size_t i = 0; i < n; i += 1) {
184 tensor->
data[i] = rand();
191 size_t n = shape[0] * shape[1];
192 for (
size_t i = 0; i < n; i += 1) {
193 tensor->
data[i] = rand();
225 for (
size_t i=0; i<tensor->
shape[0]; i+=1) {
227 if (i < tensor->shape[0]-1) {
243 for (
size_t i=0; i<tensor->
shape[0]; i+=1) {
248 for (
size_t j=0; j<tensor->
shape[1]; j+=1) {
250 if (j < tensor->shape[1]-1) {
255 if (i < tensor->shape[0]-1) {
291 size_t n = a->
shape[0];
292 for (
size_t i = 0; i < n; i += 1) {
313 for (
size_t i = 0; i < n; i += 1) {
347 size_t n = y->
shape[0];
348 int32_t *x1_data = x1->
data;
349 int32_t *x2_data = x2->
data;
350 int32_t *y_data = y->
data;
352 #ifdef CONFIG_BACKEND_RISCV_VECTOR
354 size_t vl = __riscv_vsetvl_e8m1(n);
355 vint32m1_t vec_x1 = __riscv_vle32_v_i32m1(x1_data, vl);
356 vint32m1_t vec_x2 = __riscv_vle32_v_i32m1(x2_data, vl);
357 vint32m1_t vec_y = __riscv_vfadd_vv_i32m1(vec_x1, vec_x2, vl);
358 __riscv_vse32_v_i32m1(y_data, vec_y, vl);
365 for (
size_t i = 0; i < n; i += 1) {
389 int32_t *x1_data = x1->
data;
390 int32_t *x2_data = x2->
data;
391 int32_t *y_data = y->
data;
393 #ifdef CONFIG_BACKEND_RISCV_VECTOR
395 size_t vl = __riscv_vsetvl_e8m1(n);
396 vint32m1_t vec_x1 = __riscv_vle32_v_i32m1(x1_data, vl);
397 vint32m1_t vec_x2 = __riscv_vle32_v_i32m1(x2_data, vl);
398 vint32m1_t vec_y = __riscv_vfadd_vv_i32m1(vec_x1, vec_x2, vl);
399 __riscv_vse32_v_i32m1(y_data, vec_y, vl);
406 for (
size_t i = 0; i < n; i += 1) {
415 size_t n = y->
shape[0];
416 for (
size_t i = 0; i < n; i += 1) {
426 for (
size_t i = 0; i < n; i += 1) {
448 size_t n = y->
shape[0];
450 for (
size_t i = 0; i < n; i += 1) {
451 sum_i32 += x1->
data[i] * x2->
data[i];
453 y->
data[0] = sum_i32;
458 nn_assert(x1->
shape[1] == x2->
shape[0],
"Cannot perform MatMul on tensors of different shapes");
461 const size_t n = x1->
shape[0];
462 const size_t m = x1->
shape[1];
463 const size_t p = x2->
shape[1];
465 for (
size_t i = 0; i < n; i += 1) {
466 for (
size_t j = 0; j < p; j += 1) {
468 for (
size_t k = 0; k < m; k += 1) {
469 sum += x1->
data[i * m + k] * x2->
data[k * p + j];
471 y->
data[i * p + j] = sum;
477 nn_assert(x1->
shape[1] == x2->
shape[0],
"Cannot perform MatMul on tensors of different shapes");
480 const size_t n = x1->
shape[0];
481 const size_t m = x1->
shape[1];
482 const size_t p = x2->
shape[1];
484 for (
size_t i = 0; i < n; i += 1) {
485 for (
size_t j = 0; j < p; j += 1) {
487 for (
size_t k = 0; k < m; k += 1) {
488 sum += x1->
data[i * m + k] * x2->
data[k * p + j];
490 y->
data[i * p + j] = sum + c->
data[i * p + j];
498 nn_assert(x->
shape[1] == weight->
shape[1],
"Cannot perform Linear on tensors of different shapes");
499 nn_assert(!bias || bias->
shape[0] == weight->
shape[0],
"Cannot perform Linear on tensors of different shapes");
502 const size_t batch_size = x->
shape[0];
503 const size_t in_features = x->
shape[1];
504 const size_t out_features = weight->
shape[0];
506 for (
size_t i = 0; i < batch_size; i += 1) {
507 for (
size_t j = 0; j < out_features; j += 1) {
509 for (
size_t k = 0; k < in_features; k += 1) {
510 sum += x->
data[i * in_features + k] * weight->
data[j * in_features + k];
513 sum += bias->
data[j];
515 y->
data[i * out_features + j] = sum;
530 int32_t *x_data = x->
data;
531 int32_t *y_data = y->
data;
533 for (
size_t i = 0; i < n; i += 1) {
Half-Precision Floating-Point (fp16) Definitions.
static void nn_assert(int condition, char *message)
Definition: nn.h:54
void nn_addmm_i32(Tensor2D_I32 *y, const Tensor2D_I32 *c, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2)
Definition: nn_i32.h:476
uint8_t nn_equals2d_i32(const Tensor2D_I32 *a, const Tensor2D_I32 *b)
Definition: nn_i32.h:309
Tensor1D_I32 * nn_rand1d_i32(size_t shape[1])
Definition: nn_i32.h:180
uint8_t nn_equals1d_i32(const Tensor1D_I32 *a, const Tensor1D_I32 *b)
Definition: nn_i32.h:288
void nn_add2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2)
Definition: nn_i32.h:384
void nn_mm_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x1, const Tensor2D_I32 *x2)
Definition: nn_i32.h:457
void nn_print_tensor2d_i32(const Tensor2D_I32 *tensor)
Definition: nn_i32.h:241
void nn_addscalar2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x, int32_t scalar)
Definition: nn_i32.h:421
Tensor1D_I32 * nn_ones1d_i32(size_t shape[1])
Definition: nn_i32.h:134
void nn_addscalar1d_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x, int32_t scalar)
Definition: nn_i32.h:412
void nn_dot_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x1, const Tensor1D_I32 *x2)
Definition: nn_i32.h:444
Tensor0D_I32 * nn_full0d_i32(int32_t data)
Definition: nn_i32.h:152
Tensor0D_I32 * nn_rand0d_i32()
Definition: nn_i32.h:175
void nn_print_i32(int32_t v, int16_t num_digits)
Definition: nn_i32.h:210
void nn_linear_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x, const Tensor2D_I32 *weight, const Tensor1D_I32 *bias)
Definition: nn_i32.h:497
void nn_add1d_i32(Tensor1D_I32 *y, const Tensor1D_I32 *x1, const Tensor1D_I32 *x2)
Definition: nn_i32.h:343
uint8_t nn_equals0d_i32(const Tensor0D_I32 *a, const Tensor0D_I32 *b)
Definition: nn_i32.h:275
Tensor2D_I32 * nn_rand2d_i32(size_t shape[2])
Definition: nn_i32.h:189
Tensor1D_I32 * nn_tensor1d_i32(size_t shape[1], const int32_t *data)
Definition: nn_i32.h:72
Tensor0D_I32 * nn_zeros0d_i32()
Definition: nn_i32.h:106
Tensor0D_I32 * nn_ones0d_i32()
Definition: nn_i32.h:129
Tensor1D_I32 * nn_zeros1d_i32(size_t shape[1])
Definition: nn_i32.h:111
Tensor0D_I32 * nn_tensor0d_i32(int32_t data)
Definition: nn_i32.h:58
Tensor2D_I32 * nn_tensor2d_i32(size_t shape[2], const int32_t *data)
Definition: nn_i32.h:92
Tensor2D_I32 * nn_zeros2d_i32(size_t shape[2])
Definition: nn_i32.h:120
void nn_relu2d_i32(Tensor2D_I32 *y, const Tensor2D_I32 *x)
Definition: nn_i32.h:526
void nn_print_tensor1d_i32(const Tensor1D_I32 *tensor)
Definition: nn_i32.h:223
Tensor1D_I32 * nn_full1d_i32(size_t shape[1], int32_t data)
Definition: nn_i32.h:157
Tensor2D_I32 * nn_full2d_i32(size_t shape[2], int32_t data)
Definition: nn_i32.h:166
Tensor2D_I32 * nn_ones2d_i32(size_t shape[2])
Definition: nn_i32.h:143
int32_t data
Definition: nn_i32.h:24
size_t shape[1]
Definition: nn_i32.h:33
int32_t * data
Definition: nn_i32.h:34
size_t shape[2]
Definition: nn_i32.h:43
int32_t * data
Definition: nn_i32.h:44