From aecb7f9537600dc9990b4efb2e1dea40cc3f97be Mon Sep 17 00:00:00 2001 From: Chip Kerchner Date: Tue, 7 Oct 2025 13:14:20 +0000 Subject: [PATCH] Change signature of SBGEMV. --- kernel/riscv64/sbgemv_n_vector.c | 37 +++++++++++++++++++++++++++++++- kernel/riscv64/sbgemv_t_vector.c | 6 +++--- 2 files changed, 39 insertions(+), 4 deletions(-) diff --git a/kernel/riscv64/sbgemv_n_vector.c b/kernel/riscv64/sbgemv_n_vector.c index 0c6064b67..94b9488cf 100644 --- a/kernel/riscv64/sbgemv_n_vector.c +++ b/kernel/riscv64/sbgemv_n_vector.c @@ -32,6 +32,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VLSEV_FLOAT RISCV_RVV(vlse32_v_f32m8) #define VSEV_FLOAT RISCV_RVV(vse32_v_f32m8) #define VSSEV_FLOAT RISCV_RVV(vsse32_v_f32m8) +#define VFMULVF_FLOAT RISCV_RVV(vfmul_vf_f32m8) +#define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8) #define VSETVL(n) RISCV_RVV(vsetvl_e16m4)(n) @@ -45,7 +47,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMACCVF_FLOAT RISCV_RVV(vfwmaccbf16_vf_f32m8) #endif -int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) { if (n < 0) return(0); @@ -55,7 +57,24 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASL IFLOAT_V_T va; FLOAT_V_T vy; + y_ptr = y; if (inc_y == 1) { + if (beta == 0.0) { + for (i = m; i > 0; i -= vl) { + vl = VSETVL(i); + vy = VFMVVF_FLOAT(0.0, vl); + VSEV_FLOAT(y_ptr, vy, vl); + y_ptr += vl; + } + } else if (beta != 1.0) { + for (i = m; i > 0; i -= vl) { + vl = VSETVL(i); + vy = VLEV_FLOAT(y_ptr, vl); + vy = VFMULVF_FLOAT(vy, beta, vl); + VSEV_FLOAT(y_ptr, vy, vl); + y_ptr += vl; + } + } for (j = 0; j < n; j++) { temp = (IFLOAT)(alpha * (FLOAT)(x[0])); y_ptr = y; @@ -74,6 +93,22 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASL } } else { BLASLONG stride_y = inc_y * sizeof(FLOAT); + if (beta == 0.0) { + for (i = m; i > 0; i -= vl) { + vl = VSETVL(i); + vy = VFMVVF_FLOAT(0.0, vl); + VSSEV_FLOAT(y_ptr, stride_y, vy, vl); + y_ptr += vl * inc_y; + } + } else if (beta != 1.0) { + for (i = m; i > 0; i -= vl) { + vl = VSETVL(i); + vy = VLSEV_FLOAT(y_ptr, stride_y, vl); + vy = VFMULVF_FLOAT(vy, beta, vl); + VSSEV_FLOAT(y_ptr, stride_y, vy, vl); + y_ptr += vl * inc_y; + } + } for (j = 0; j < n; j++) { temp = (IFLOAT)(alpha * (FLOAT)(x[0])); y_ptr = y; diff --git a/kernel/riscv64/sbgemv_t_vector.c b/kernel/riscv64/sbgemv_t_vector.c index 09069caaa..7fdccee1b 100644 --- a/kernel/riscv64/sbgemv_t_vector.c +++ b/kernel/riscv64/sbgemv_t_vector.c @@ -54,7 +54,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define VFMVVF_FLOAT RISCV_RVV(vfmv_v_f_f32m8) #define VFMVVF_FLOAT_M1 RISCV_RVV(vfmv_v_f_f32m1) -int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *buffer) +int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y) { BLASLONG i = 0, j = 0, k = 0; BLASLONG ix = 0, iy = 0; @@ -92,7 +92,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASL v_res = VFREDSUM_FLOAT(vr, v_res, gvl); } temp = (FLOAT)EXTRACT_FLOAT(v_res); - y[iy] += alpha * temp; + y[iy] = y[iy] * beta + alpha * temp; iy += inc_y; a_ptr += lda; @@ -123,7 +123,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, IFLOAT *a, BLASL v_res = VFREDSUM_FLOAT(vr, v_res, gvl); } temp = (FLOAT)EXTRACT_FLOAT(v_res); - y[iy] += alpha * temp; + y[iy] = y[iy] * beta + alpha * temp; iy += inc_y; a_ptr += lda;