diff --git a/build_and_test.sh b/build_and_test.sh index 52b99e2b3..318dad5b5 100755 --- a/build_and_test.sh +++ b/build_and_test.sh @@ -8,19 +8,29 @@ echo "适用于 SG2044 RISC-V 服务器" echo # 检查编译器 -if ! command -v gcc &> /dev/null; then - echo "错误: 未找到 GCC 编译器" +if command -v riscv64-unknown-linux-gnu-gcc &> /dev/null; then + CC="riscv64-unknown-linux-gnu-gcc" + echo "使用 RISC-V 交叉编译器" +elif command -v gcc &> /dev/null; then + CC="gcc" + echo "使用系统 GCC 编译器" +else + echo "错误: 未找到合适的编译器" exit 1 fi -# 显示 GCC 版本 -echo "GCC 版本:" -gcc --version | head -1 +# 显示编译器版本 +echo "编译器版本:" +$CC --version | head -1 echo # 编译标准版本(无RVV) echo "[1/3] 编译标准版本(标量优化)..." -gcc -O3 -march=rv64gc test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar +if [[ "$CC" == *"riscv64"* ]]; then + $CC -O3 -march=rv64gc test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar -static +else + $CC -O3 test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar +fi if [ $? -eq 0 ]; then echo "✓ 标准版本编译成功: test_omatcopy_ct_scalar" else @@ -30,7 +40,11 @@ fi # 编译RVV版本 echo "[2/3] 编译RVV优化版本..." -gcc -O3 -march=rv64gcv -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv +if [[ "$CC" == *"riscv64"* ]]; then + $CC -O3 -march=rv64gcv -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv -static +else + $CC -O3 -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv +fi if [ $? -eq 0 ]; then echo "✓ RVV版本编译成功: test_omatcopy_ct_rvv" else @@ -62,17 +76,33 @@ echo echo "=== 开始性能测试 ===" echo -if [ -f "test_omatcopy_ct_rvv" ]; then - echo "运行 RVV 优化版本测试:" - echo "----------------------------------------" - ./test_omatcopy_ct_rvv +# 如果是交叉编译,提示用户需要在目标平台运行 +if [[ "$CC" == *"riscv64"* ]]; then + echo "⚠ 检测到交叉编译环境,生成的可执行文件需要在 RISC-V 平台上运行" + echo "请将以下文件传输到目标 RISC-V 系统:" + echo " - test_omatcopy_ct_scalar (标量版本)" + if [ -f "test_omatcopy_ct_rvv" ]; then + echo " - test_omatcopy_ct_rvv (RVV优化版本)" + fi echo + echo "在目标系统上运行:" + echo " ./test_omatcopy_ct_scalar # 运行标量版本" + if [ -f "test_omatcopy_ct_rvv" ]; then + echo " ./test_omatcopy_ct_rvv # 运行RVV版本" + fi +else + if [ -f "test_omatcopy_ct_rvv" ]; then + echo "运行 RVV 优化版本测试:" + echo "----------------------------------------" + ./test_omatcopy_ct_rvv + echo + fi + + echo "运行标量版本测试:" + echo "----------------------------------------" + ./test_omatcopy_ct_scalar fi -echo "运行标量版本测试:" -echo "----------------------------------------" -./test_omatcopy_ct_scalar - echo echo "=== 测试完成 ===" echo "文件说明:" @@ -81,4 +111,14 @@ if [ -f "test_omatcopy_ct_rvv" ]; then echo " test_omatcopy_ct_rvv - RVV向量化版本" fi echo " test_omatcopy_ct.c - 源代码文件" -echo " build_and_test.sh - 本编译脚本" \ No newline at end of file +echo " build_and_test.sh - 本编译脚本" +echo +echo "编译器信息:" +echo " 使用编译器: $CC" +if [[ "$CC" == *"riscv64"* ]]; then + echo " 目标架构: RISC-V 64位" + echo " 编译模式: 交叉编译 (静态链接)" +else + echo " 目标架构: 本机架构" + echo " 编译模式: 本地编译" +fi \ No newline at end of file diff --git a/kernel/riscv64/KERNEL.RISCV64_ZVL128B b/kernel/riscv64/KERNEL.RISCV64_ZVL128B index ad5880b72..03c8a4c95 100644 --- a/kernel/riscv64/KERNEL.RISCV64_ZVL128B +++ b/kernel/riscv64/KERNEL.RISCV64_ZVL128B @@ -266,3 +266,6 @@ ifndef SHGEMM_BETA SHGEMM_BETA = gemm_beta_rvv.c endif endif + +DOMATCOPY_CT = omatcopy_ct_rvv.c +SOMATCOPY_CT = omatcopy_ct_rvv.c diff --git a/kernel/riscv64/KERNEL.RISCV64_ZVL256B b/kernel/riscv64/KERNEL.RISCV64_ZVL256B index d8d74254c..d42379e16 100644 --- a/kernel/riscv64/KERNEL.RISCV64_ZVL256B +++ b/kernel/riscv64/KERNEL.RISCV64_ZVL256B @@ -219,6 +219,9 @@ COMATCOPY_CN = zomatcopy_cn_vector.c DOMATCOPY_CN = omatcopy_cn_vector.c SOMATCOPY_CN = omatcopy_cn_vector.c +DOMATCOPY_CT = omatcopy_ct_rvv.c +SOMATCOPY_CT = omatcopy_ct_rvv.c + ifeq ($(BUILD_BFLOAT16), 1) SHGEMMKERNEL = shgemm_kernel_$(SHGEMM_UNROLL_M)x$(SHGEMM_UNROLL_N)_zvl256b.c diff --git a/kernel/riscv64/omatcopy_ct_rvv.c b/kernel/riscv64/omatcopy_ct_rvv.c new file mode 100644 index 000000000..032cce53b --- /dev/null +++ b/kernel/riscv64/omatcopy_ct_rvv.c @@ -0,0 +1,118 @@ +/*************************************************************************** +Copyright (c) 2013, The OpenBLAS Project +All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in +the documentation and/or other materials provided with the +distribution. +3. Neither the name of the OpenBLAS project nor the names of +its contributors may be used to endorse or promote products +derived from this software without specific prior written permission. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*****************************************************************************/ + +#include "common.h" +#include + +#if !defined(DOUBLE) +#define VSETVL_MAX __riscv_vsetvlmax_e32m8() +#define VSETVL(n) __riscv_vsetvl_e32m8(n) +#define FLOAT_V_T vfloat32m8_t +#define VLEV_FLOAT __riscv_vle32_v_f32m8 +#define VSEV_FLOAT __riscv_vse32_v_f32m8 +#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 +#define VSSEV_FLOAT __riscv_vsse32_v_f32m8 +#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 +#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8 +#else +#define VSETVL_MAX __riscv_vsetvlmax_e64m8() +#define VSETVL(n) __riscv_vsetvl_e64m8(n) +#define FLOAT_V_T vfloat64m8_t +#define VLEV_FLOAT __riscv_vle64_v_f64m8 +#define VSEV_FLOAT __riscv_vse64_v_f64m8 +#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 +#define VSSEV_FLOAT __riscv_vsse64_v_f64m8 +#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 +#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8 +#endif + +/***************************************************** + * Order ColMajor + * Trans with RVV optimization + * +******************************************************/ + +int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb) +{ + BLASLONG i, j; + FLOAT *aptr, *bptr; + size_t vl; + FLOAT_V_T va, vb; + + if (rows <= 0) return(0); + if (cols <= 0) return(0); + + aptr = a; + + if (alpha == 0.0) + { + vl = VSETVL_MAX; + va = VFMVVF_FLOAT(0, vl); + for (i = 0; i < cols; i++) + { + bptr = &b[i]; + for (j = 0; j < rows; j += vl) + { + vl = VSETVL(rows - j); + VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl); + } + } + return(0); + } + + if (alpha == 1.0) + { + for (i = 0; i < cols; i++) + { + bptr = &b[i]; + for (j = 0; j < rows; j += vl) + { + vl = VSETVL(rows - j); + va = VLEV_FLOAT(aptr + j, vl); + VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl); + } + aptr += lda; + } + return(0); + } + + // General case with alpha scaling + for (i = 0; i < cols; i++) + { + bptr = &b[i]; + for (j = 0; j < rows; j += vl) + { + vl = VSETVL(rows - j); + va = VLEV_FLOAT(aptr + j, vl); + va = VFMULVF_FLOAT(va, alpha, vl); + VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl); + } + aptr += lda; + } + + return(0); +} \ No newline at end of file diff --git a/test_omatcopy_ct b/test_omatcopy_ct new file mode 100755 index 000000000..2420ee555 Binary files /dev/null and b/test_omatcopy_ct differ diff --git a/test_omatcopy_ct_rvv b/test_omatcopy_ct_rvv new file mode 100755 index 000000000..76fc94756 Binary files /dev/null and b/test_omatcopy_ct_rvv differ diff --git a/test_omatcopy_ct_scalar b/test_omatcopy_ct_scalar new file mode 100755 index 000000000..c42de2f8c Binary files /dev/null and b/test_omatcopy_ct_scalar differ