From 708d586599774ca642389d7570ca177ddbcde697 Mon Sep 17 00:00:00 2001 From: Dayuxiaoshui <792179245@qq.com> Date: Thu, 11 Sep 2025 19:20:26 +0800 Subject: [PATCH] Add OMATCOPY_CT performance test with RVV optimization Co-authored-by: gong-flying --- build_and_test.sh | 72 ++++++++++++---- kernel/riscv64/KERNEL.RISCV64_ZVL128B | 3 + kernel/riscv64/KERNEL.RISCV64_ZVL256B | 3 + kernel/riscv64/omatcopy_ct_rvv.c | 118 ++++++++++++++++++++++++++ test_omatcopy_ct | Bin 0 -> 16544 bytes test_omatcopy_ct_rvv | Bin 0 -> 740336 bytes test_omatcopy_ct_scalar | Bin 0 -> 740072 bytes 7 files changed, 180 insertions(+), 16 deletions(-) create mode 100644 kernel/riscv64/omatcopy_ct_rvv.c create mode 100755 test_omatcopy_ct create mode 100755 test_omatcopy_ct_rvv create mode 100755 test_omatcopy_ct_scalar diff --git a/build_and_test.sh b/build_and_test.sh index 52b99e2b3..318dad5b5 100755 --- a/build_and_test.sh +++ b/build_and_test.sh @@ -8,19 +8,29 @@ echo "适用于 SG2044 RISC-V 服务器" echo # 检查编译器 -if ! command -v gcc &> /dev/null; then - echo "错误: 未找到 GCC 编译器" +if command -v riscv64-unknown-linux-gnu-gcc &> /dev/null; then + CC="riscv64-unknown-linux-gnu-gcc" + echo "使用 RISC-V 交叉编译器" +elif command -v gcc &> /dev/null; then + CC="gcc" + echo "使用系统 GCC 编译器" +else + echo "错误: 未找到合适的编译器" exit 1 fi -# 显示 GCC 版本 -echo "GCC 版本:" -gcc --version | head -1 +# 显示编译器版本 +echo "编译器版本:" +$CC --version | head -1 echo # 编译标准版本(无RVV) echo "[1/3] 编译标准版本(标量优化)..." -gcc -O3 -march=rv64gc test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar +if [[ "$CC" == *"riscv64"* ]]; then + $CC -O3 -march=rv64gc test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar -static +else + $CC -O3 test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar +fi if [ $? -eq 0 ]; then echo "✓ 标准版本编译成功: test_omatcopy_ct_scalar" else @@ -30,7 +40,11 @@ fi # 编译RVV版本 echo "[2/3] 编译RVV优化版本..." -gcc -O3 -march=rv64gcv -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv +if [[ "$CC" == *"riscv64"* ]]; then + $CC -O3 -march=rv64gcv -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv -static +else + $CC -O3 -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv +fi if [ $? -eq 0 ]; then echo "✓ RVV版本编译成功: test_omatcopy_ct_rvv" else @@ -62,17 +76,33 @@ echo echo "=== 开始性能测试 ===" echo -if [ -f "test_omatcopy_ct_rvv" ]; then - echo "运行 RVV 优化版本测试:" - echo "----------------------------------------" - ./test_omatcopy_ct_rvv +# 如果是交叉编译,提示用户需要在目标平台运行 +if [[ "$CC" == *"riscv64"* ]]; then + echo "⚠ 检测到交叉编译环境,生成的可执行文件需要在 RISC-V 平台上运行" + echo "请将以下文件传输到目标 RISC-V 系统:" + echo " - test_omatcopy_ct_scalar (标量版本)" + if [ -f "test_omatcopy_ct_rvv" ]; then + echo " - test_omatcopy_ct_rvv (RVV优化版本)" + fi echo + echo "在目标系统上运行:" + echo " ./test_omatcopy_ct_scalar # 运行标量版本" + if [ -f "test_omatcopy_ct_rvv" ]; then + echo " ./test_omatcopy_ct_rvv # 运行RVV版本" + fi +else + if [ -f "test_omatcopy_ct_rvv" ]; then + echo "运行 RVV 优化版本测试:" + echo "----------------------------------------" + ./test_omatcopy_ct_rvv + echo + fi + + echo "运行标量版本测试:" + echo "----------------------------------------" + ./test_omatcopy_ct_scalar fi -echo "运行标量版本测试:" -echo "----------------------------------------" -./test_omatcopy_ct_scalar - echo echo "=== 测试完成 ===" echo "文件说明:" @@ -81,4 +111,14 @@ if [ -f "test_omatcopy_ct_rvv" ]; then echo " test_omatcopy_ct_rvv - RVV向量化版本" fi echo " test_omatcopy_ct.c - 源代码文件" -echo " build_and_test.sh - 本编译脚本" \ No newline at end of file +echo " build_and_test.sh - 本编译脚本" +echo +echo "编译器信息:" +echo " 使用编译器: $CC" +if [[ "$CC" == *"riscv64"* ]]; then + echo " 目标架构: RISC-V 64位" + echo " 编译模式: 交叉编译 (静态链接)" +else + echo " 目标架构: 本机架构" + echo " 编译模式: 本地编译" +fi \ No newline at end of file diff --git a/kernel/riscv64/KERNEL.RISCV64_ZVL128B b/kernel/riscv64/KERNEL.RISCV64_ZVL128B index ad5880b72..03c8a4c95 100644 --- a/kernel/riscv64/KERNEL.RISCV64_ZVL128B +++ b/kernel/riscv64/KERNEL.RISCV64_ZVL128B @@ -266,3 +266,6 @@ ifndef SHGEMM_BETA SHGEMM_BETA = gemm_beta_rvv.c endif endif + +DOMATCOPY_CT = omatcopy_ct_rvv.c +SOMATCOPY_CT = omatcopy_ct_rvv.c diff --git a/kernel/riscv64/KERNEL.RISCV64_ZVL256B b/kernel/riscv64/KERNEL.RISCV64_ZVL256B index d8d74254c..d42379e16 100644 --- a/kernel/riscv64/KERNEL.RISCV64_ZVL256B +++ b/kernel/riscv64/KERNEL.RISCV64_ZVL256B @@ -219,6 +219,9 @@ COMATCOPY_CN = zomatcopy_cn_vector.c DOMATCOPY_CN = omatcopy_cn_vector.c SOMATCOPY_CN = omatcopy_cn_vector.c +DOMATCOPY_CT = omatcopy_ct_rvv.c +SOMATCOPY_CT = omatcopy_ct_rvv.c + ifeq ($(BUILD_BFLOAT16), 1) SHGEMMKERNEL = shgemm_kernel_$(SHGEMM_UNROLL_M)x$(SHGEMM_UNROLL_N)_zvl256b.c diff --git a/kernel/riscv64/omatcopy_ct_rvv.c b/kernel/riscv64/omatcopy_ct_rvv.c new file mode 100644 index 000000000..032cce53b --- /dev/null +++ b/kernel/riscv64/omatcopy_ct_rvv.c @@ -0,0 +1,118 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include + +#if !defined(DOUBLE) +#define VSETVL_MAX __riscv_vsetvlmax_e32m8() +#define VSETVL(n) __riscv_vsetvl_e32m8(n) +#define FLOAT_V_T vfloat32m8_t +#define VLEV_FLOAT __riscv_vle32_v_f32m8 +#define VSEV_FLOAT __riscv_vse32_v_f32m8 +#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 +#define VSSEV_FLOAT __riscv_vsse32_v_f32m8 +#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 +#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 +#else +#define VSETVL_MAX __riscv_vsetvlmax_e64m8() +#define VSETVL(n) __riscv_vsetvl_e64m8(n) +#define FLOAT_V_T vfloat64m8_t +#define VLEV_FLOAT __riscv_vle64_v_f64m8 +#define VSEV_FLOAT __riscv_vse64_v_f64m8 +#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 +#define VSSEV_FLOAT __riscv_vsse64_v_f64m8 +#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 +#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 +#endif + +/***************************************************** + * Order ColMajor + * Trans with RVV optimization + * +******************************************************/ + +int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) +{ + BLASLONG i, j; + FLOAT *aptr, *bptr; + size_t vl; + FLOAT_V_T va, vb; + + if (rows <= 0) return(0); + if (cols <= 0) return(0); + + aptr = a; + + if (alpha == 0.0) + { + vl = VSETVL_MAX; + va = VFMVVF_FLOAT(0, vl); + for (i = 0; i < cols; i++) + { + bptr = &b[i]; + for (j = 0; j < rows; j += vl) + { + vl = VSETVL(rows - j); + VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl); + } + } + return(0); + } + + if (alpha == 1.0) + { + for (i = 0; i < cols; i++) + { + bptr = &b[i]; + for (j = 0; j < rows; j += vl) + { + vl = VSETVL(rows - j); + va = VLEV_FLOAT(aptr + j, vl); + VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl); + } + aptr += lda; + } + return(0); + } + + // General case with alpha scaling + for (i = 0; i < cols; i++) + { + bptr = &b[i]; + for (j = 0; j < rows; j += vl) + { + vl = VSETVL(rows - j); + va = VLEV_FLOAT(aptr + j, vl); + va = VFMULVF_FLOAT(va, alpha, vl); + VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl); + } + aptr += lda; + } + + return(0); +} \ No newline at end of file diff --git a/test_omatcopy_ct b/test_omatcopy_ct new file mode 100755 index 0000000000000000000000000000000000000000..2420ee55597ba50e953a1eff0460e6060bbc2d6d GIT binary patch literal 16544 zcmeHOeRLGpb$=_6kuk8s1_wiI))ZW@&GKo24T=R~@u(4oU}1x6d%S3O(H2Ro?2fSH zgYnw3uoOwLoW{12k|4MAK%2zK38W$gTf!~KIZi6*X^wo{o~js$-9l^`;zpqj+uwcj z?r3(^3LgVXdebN~0{y%R3#RNLQPRtlG*tmpk!wnl zUN&VF@1%v3OgW}pZq?^8%Y}pPQ~oox>rref=bL(!dFixG#pQsatVb;A#g$%M=`kH7 zhme?ZJSir0Em!_H=|WOMV(Qc@ReDak##9qbsSZjH^&5ddq`nPGZ|HW@j*~uS!h$K+ z_hsl&Tz=QY-MmrNm#H6GRQ#E$@QMXx;jPP-E+`AkFAG;xRnMm6QA(-5-B=K-=4TXvV!uXGY%q<7D*PE|MX? zNrz-8kv$$F#8cjli)74rJcpc#5j_?BSvk>6>2oN!afCveXCvW@GT^PiJ-D2S{2Iv) z`8pCF%7D+#fa@9Xf(-cP4EQ179$e1!4gil61iuE?Nd6ZBpEZKKsH`$#Lf2!4zht{! zQo3Ca`NL%*6b%NcsHD^%72AS_5iSo#LIHmbL^0w@NIq)l<^FJmh(-Ms0pKOoemxYf z@Rx<34vG?gSy`k+ln2XWK|@qV!xctIkR}byW#WON!qo-(V((Hbw`4fG*n5xA3yat3 zfnYSaEgUm~(c*OlWs!5LIm}k5niZbW?OhL9{mue`7ETv#{z=-u!b(gvO`~ zr8E$E9!?2|C(;V_l)?_Z{3?dT81bCqtEPvqC#y~1De-#A>(rAf_|TD|{RPniHH_Y`A@1x7u)d zpHd2KHk{hP>3|KVIykl4aE=9)b=YtO+De@^e7uDU(QU(Tu;D#6T-`ElrK8!=2#iKx zGy&Sa1U?H9xa7X=vQ~Q`M{CS}e}NEMU8j+i>d|WdA%}LtLZt552skyT82{Zf z^99M5QbF=eDwPVgNS-!0$Zq=4qpoe14dx z4NfvT%+tmuxpkPQ4NY>R#i!=n25&>|nb@?Gf2ld&@uu^yI{2?S_}vbEmxJHo;42(_ z$iYA1;2%!o7yY8J;nWja!x^pi?BIrC-=fY%?`q9?e~)ITrY?c#FGF7U%oor|bWn5N z79d)4_C_N24jEI?bvIKUU3Yz|&pk6va_=Y#`FT?Qo>@fZUC|l_wXTou*SdzrXs#35 zsVl}580epG8puiYnQ>!%ieEgBibd63o3z@z6&sA1g!t@fE-@$C2x zYC-Y>h82Jw3;2M5MjteMgCY0Foo=u1y{DhyTvbdv>FXoS^P3>mr!`NhBM$j?&D8^? zAqXU-z#M=H&3({!z$P^WDKZAnP_HlHl0qwdy*p2V?IX5NTjA^3`MT*}5!|`t&dm-_ z!qk8(?yv5HaY7zpr0(nF0J~*K3)gUKxvX(mU{G@%myqe{1a1#J?M6LWS9|&v#5=UE zzAVk<8%+25dDH7x?H<+qHjnbI_6R+fOF@nLb9AFfuwu5|~xJJAdCXQhUJ@&>F|inM(s2lY3F8*6izh3mn*!iH9zyQZ%|T zqu;%#x$o@0h>3XukGL1zTe@lZ(p0B{&WrBNT>ypx+FhnKBRlcwuT!aahsR^>g@stR z5?HtTuw-HVTGxYFJxFyWXeNFG54jt|3~FhGgU# zG0_K!9R%CwZmPhHYH3Mxe*-VOaK9VwcVfluu$r)v7FN}SA_(dJ1n zShWaTL#oG=%DLoztP4K-E>N@G3w#%LOog)V0{%^W5O;3(Bg4gr2+1wT_EEuZdKn~} zmC+Sd30B=lJFhLNoX&w4ry+eRq`7yU(i%4nky^+-w;Mo3m)39sx7J@_1A;NsLHn4* zHJ-YvlaGEchk>tPf5*R5zzz~p-s?NbYRq8j+gIt_hJ+bnZzP4*o^c(eZrI|vGW2QilwbM$0g zkrICUPG}?wQQ^tD<-ov+wiZ?*4Cz=1Q`Olq`Tsqhz&K^un4tbj;+s4JbxWeSm@ZZJEbl?4<#SH7t9jS+5*M ziJhpNV~V!Q_6%Vxgi*(22#f@)q8CFe zFS?)Tx`^@FYD(VDlBP#%V1+!jdsP3sdvp-Py0(stz#}4-M@O)@%a*5m^dA^z>`|N6 zc#ML8>deNzVs>*C9Pq^p8-}ROd*E+FT`|PaAYUtVeQng71GUEp6vp-F&1W&4&W}Up zX=EU`?f?nH{-#zeHduI0@#1qFLXSfTraM4HB7TWlKtsu84xtrt=DM4{`%y^eKYpI__zlmW&M6DrjqCCIJaa;x z9Wnf;(Iit|UY=+Dx>dym>o;uC3yM7(g3(YUTJEnX33`fyF~b7^5*#?(m^k=K|JU~R z?{6GvdolUqOXnKu2ljlA%nclPeV}n~f6L22`zoQ4i|+Lny(Mg1mNeYa=e`+s%r^*7)e zkl1)Z?N@DkqiLYN#cJ;l4-C8j4-;>HbD;I$*^cqVipFP@YHU8|;Zw>5e82F1r1Fdf*UVhDr#lTA~{mn0a7=IpZh^t>M=-iL6Uj*$1rRkmkr7cVS z$Ej2`=vM~ufDif#W_=DeH%ES%O3ekGL-!oev!JD*Te0HSgHod3x+%Gy-Y8tvxvm)# z#^to2jBxs`IS01yAb~aGa*52wwG>y&FH)&?0w(3Ioiy!F+&4Ux6BqYSzxU21vu6=Z z`sDXC_$U<#L?Yb4l?%KLloQoO+sNlo-)tJ{zDc>evkE3&H)ci_T#~+g4{+Tlnakfp zc>`9^GEhrDtAy00{sqAQLThbW`H!sf323*0nC+t2SoK%Axc<9=pGFMN&_2g1e}Akr zKsHCyXaq(hFdBi;2#iKxGy&SZHUj(|8-KS(TS7|ZCJ_@9f z^Y?Kx6pyW_Ow$$R@8oFzONq-ry^@L$-%f8Bknk2prp+pY*KsoSs&and`!|L2w_wv0 zKUGmCN=;O%rxe<9QyMgh^;|$d=~ANaQ7LU!0OPI75%#-NN(sT=u5tY;ELQvQ_D!}A zah0lgA1nFI${yZ*aJjUIM;pm6Q{{LyBzbO!s`mek@ORu!gA3KeOunKU6n#?BN=4&} zwkX=BXuG1FiaPE8Ha;iclz5<^;6BgXOlm(El0 zwJp-P9xFbPFYxS=)PF9NG^qf4RN+maBgs?$W{F(UuFfMUap6&Tp95c_aGqZ*Ux