Add OMATCOPY_CT performance test with RVV optimization

Co-authored-by: gong-flying <gongxiaofei24@iscas.ac.cn>
This commit is contained in:
Dayuxiaoshui
2025-09-11 19:20:26 +08:00
parent 2953c7d244
commit 708d586599
7 changed files with 180 additions and 16 deletions

View File

@@ -8,19 +8,29 @@ echo "适用于 SG2044 RISC-V 服务器"
echo
# 检查编译器
if ! command -v gcc &> /dev/null; then
echo "错误: 未找到 GCC 编译器"
if command -v riscv64-unknown-linux-gnu-gcc &> /dev/null; then
CC="riscv64-unknown-linux-gnu-gcc"
echo "使用 RISC-V 交叉编译器"
elif command -v gcc &> /dev/null; then
CC="gcc"
echo "使用系统 GCC 编译器"
else
echo "错误: 未找到合适的编译器"
exit 1
fi
# 显示 GCC 版本
echo "GCC 版本:"
gcc --version | head -1
# 显示编译器版本
echo "编译器版本:"
$CC --version | head -1
echo
# 编译标准版本无RVV
echo "[1/3] 编译标准版本(标量优化)..."
gcc -O3 -march=rv64gc test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar
if [[ "$CC" == *"riscv64"* ]]; then
$CC -O3 -march=rv64gc test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar -static
else
$CC -O3 test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar
fi
if [ $? -eq 0 ]; then
echo "✓ 标准版本编译成功: test_omatcopy_ct_scalar"
else
@@ -30,7 +40,11 @@ fi
# 编译RVV版本
echo "[2/3] 编译RVV优化版本..."
gcc -O3 -march=rv64gcv -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv
if [[ "$CC" == *"riscv64"* ]]; then
$CC -O3 -march=rv64gcv -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv -static
else
$CC -O3 -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv
fi
if [ $? -eq 0 ]; then
echo "✓ RVV版本编译成功: test_omatcopy_ct_rvv"
else
@@ -62,17 +76,33 @@ echo
echo "=== 开始性能测试 ==="
echo
if [ -f "test_omatcopy_ct_rvv" ]; then
echo "运行 RVV 优化版本测试:"
echo "----------------------------------------"
./test_omatcopy_ct_rvv
# 如果是交叉编译,提示用户需要在目标平台运行
if [[ "$CC" == *"riscv64"* ]]; then
echo "⚠ 检测到交叉编译环境,生成的可执行文件需要在 RISC-V 平台上运行"
echo "请将以下文件传输到目标 RISC-V 系统:"
echo " - test_omatcopy_ct_scalar (标量版本)"
if [ -f "test_omatcopy_ct_rvv" ]; then
echo " - test_omatcopy_ct_rvv (RVV优化版本)"
fi
echo
echo "在目标系统上运行:"
echo " ./test_omatcopy_ct_scalar # 运行标量版本"
if [ -f "test_omatcopy_ct_rvv" ]; then
echo " ./test_omatcopy_ct_rvv # 运行RVV版本"
fi
else
if [ -f "test_omatcopy_ct_rvv" ]; then
echo "运行 RVV 优化版本测试:"
echo "----------------------------------------"
./test_omatcopy_ct_rvv
echo
fi
echo "运行标量版本测试:"
echo "----------------------------------------"
./test_omatcopy_ct_scalar
fi
echo "运行标量版本测试:"
echo "----------------------------------------"
./test_omatcopy_ct_scalar
echo
echo "=== 测试完成 ==="
echo "文件说明:"
@@ -81,4 +111,14 @@ if [ -f "test_omatcopy_ct_rvv" ]; then
echo " test_omatcopy_ct_rvv - RVV向量化版本"
fi
echo " test_omatcopy_ct.c - 源代码文件"
echo " build_and_test.sh - 本编译脚本"
echo " build_and_test.sh - 本编译脚本"
echo
echo "编译器信息:"
echo " 使用编译器: $CC"
if [[ "$CC" == *"riscv64"* ]]; then
echo " 目标架构: RISC-V 64位"
echo " 编译模式: 交叉编译 (静态链接)"
else
echo " 目标架构: 本机架构"
echo " 编译模式: 本地编译"
fi

View File

@@ -266,3 +266,6 @@ ifndef SHGEMM_BETA
SHGEMM_BETA = gemm_beta_rvv.c
endif
endif
DOMATCOPY_CT = omatcopy_ct_rvv.c
SOMATCOPY_CT = omatcopy_ct_rvv.c

View File

@@ -219,6 +219,9 @@ COMATCOPY_CN = zomatcopy_cn_vector.c
DOMATCOPY_CN = omatcopy_cn_vector.c
SOMATCOPY_CN = omatcopy_cn_vector.c
DOMATCOPY_CT = omatcopy_ct_rvv.c
SOMATCOPY_CT = omatcopy_ct_rvv.c
ifeq ($(BUILD_BFLOAT16), 1)
SHGEMMKERNEL = shgemm_kernel_$(SHGEMM_UNROLL_M)x$(SHGEMM_UNROLL_N)_zvl256b.c

View File

@@ -0,0 +1,118 @@
/***************************************************************************
Copyright (c) 2013, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/
#include "common.h"
#include <stdio.h>
#if !defined(DOUBLE)
#define VSETVL_MAX __riscv_vsetvlmax_e32m8()
#define VSETVL(n) __riscv_vsetvl_e32m8(n)
#define FLOAT_V_T vfloat32m8_t
#define VLEV_FLOAT __riscv_vle32_v_f32m8
#define VSEV_FLOAT __riscv_vse32_v_f32m8
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
#else
#define VSETVL_MAX __riscv_vsetvlmax_e64m8()
#define VSETVL(n) __riscv_vsetvl_e64m8(n)
#define FLOAT_V_T vfloat64m8_t
#define VLEV_FLOAT __riscv_vle64_v_f64m8
#define VSEV_FLOAT __riscv_vse64_v_f64m8
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
#endif
/*****************************************************
* Order ColMajor
* Trans with RVV optimization
*
******************************************************/
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
{
BLASLONG i, j;
FLOAT *aptr, *bptr;
size_t vl;
FLOAT_V_T va, vb;
if (rows <= 0) return(0);
if (cols <= 0) return(0);
aptr = a;
if (alpha == 0.0)
{
vl = VSETVL_MAX;
va = VFMVVF_FLOAT(0, vl);
for (i = 0; i < cols; i++)
{
bptr = &b[i];
for (j = 0; j < rows; j += vl)
{
vl = VSETVL(rows - j);
VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl);
}
}
return(0);
}
if (alpha == 1.0)
{
for (i = 0; i < cols; i++)
{
bptr = &b[i];
for (j = 0; j < rows; j += vl)
{
vl = VSETVL(rows - j);
va = VLEV_FLOAT(aptr + j, vl);
VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl);
}
aptr += lda;
}
return(0);
}
// General case with alpha scaling
for (i = 0; i < cols; i++)
{
bptr = &b[i];
for (j = 0; j < rows; j += vl)
{
vl = VSETVL(rows - j);
va = VLEV_FLOAT(aptr + j, vl);
va = VFMULVF_FLOAT(va, alpha, vl);
VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl);
}
aptr += lda;
}
return(0);
}

BIN
test_omatcopy_ct Executable file

Binary file not shown.

BIN
test_omatcopy_ct_rvv Executable file

Binary file not shown.

BIN
test_omatcopy_ct_scalar Executable file

Binary file not shown.