mirror of
https://github.com/OpenMathLib/OpenBLAS
synced 2026-05-31 00:45:48 +08:00
Missing one gemv conversion.
This commit is contained in:
@@ -60,8 +60,10 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *
|
||||
BLASLONG ix = 0, iy = 0;
|
||||
#if defined(HFLOAT16)
|
||||
_Float16 *a_ptr = (_Float16 *)(a);
|
||||
_Float16 *x_ptr = (_Float16 *)(x);
|
||||
#else
|
||||
__bf16 *a_ptr = (__bf16 *)(a);
|
||||
__bf16 *x_ptr = (__bf16 *)(x);
|
||||
#endif
|
||||
FLOAT temp;
|
||||
|
||||
@@ -83,7 +85,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *
|
||||
#endif
|
||||
for (k = 0; k < m/gvl; k++) {
|
||||
va = VLEV_IFLOAT(&a_ptr[j], gvl);
|
||||
vx = VLEV_IFLOAT(&x[j], gvl);
|
||||
vx = VLEV_IFLOAT(&x_ptr[j], gvl);
|
||||
vr = VFMACCVV_FLOAT(vz, va, vx, gvl); // could vfmacc here and reduce outside loop
|
||||
v_res = VFREDSUM_FLOAT(vr, v_res, gvl); // but that reordering diverges far enough from scalar path to make tests fail
|
||||
j += gvl;
|
||||
@@ -91,7 +93,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *
|
||||
if (j < m) {
|
||||
gvl = VSETVL(m-j);
|
||||
va = VLEV_IFLOAT(&a_ptr[j], gvl);
|
||||
vx = VLEV_IFLOAT(&x[j], gvl);
|
||||
vx = VLEV_IFLOAT(&x_ptr[j], gvl);
|
||||
vr = VFMACCVV_FLOAT(vz, va, vx, gvl);
|
||||
v_res = VFREDSUM_FLOAT(vr, v_res, gvl);
|
||||
}
|
||||
@@ -113,7 +115,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *
|
||||
#endif
|
||||
for (k = 0; k < m/gvl; k++) {
|
||||
va = VLEV_IFLOAT(&a_ptr[j], gvl);
|
||||
vx = VLSEV_IFLOAT(&x[ix], stride_x, gvl);
|
||||
vx = VLSEV_IFLOAT(&x_ptr[ix], stride_x, gvl);
|
||||
vr = VFMACCVV_FLOAT(vz, va, vx, gvl);
|
||||
v_res = VFREDSUM_FLOAT(vr, v_res, gvl);
|
||||
j += gvl;
|
||||
@@ -122,7 +124,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *
|
||||
if (j < m) {
|
||||
gvl = VSETVL(m-j);
|
||||
va = VLEV_IFLOAT(&a_ptr[j], gvl);
|
||||
vx = VLSEV_IFLOAT(&x[ix], stride_x, gvl);
|
||||
vx = VLSEV_IFLOAT(&x_ptr[ix], stride_x, gvl);
|
||||
vr = VFMACCVV_FLOAT(vz, va, vx, gvl);
|
||||
v_res = VFREDSUM_FLOAT(vr, v_res, gvl);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user