mirror of
https://github.com/OpenMathLib/OpenBLAS
synced 2026-06-08 01:15:39 +08:00
Add OMATCOPY_CT performance test with RVV optimization
Co-authored-by: gong-flying <gongxiaofei24@iscas.ac.cn>
This commit is contained in:
@@ -8,19 +8,29 @@ echo "适用于 SG2044 RISC-V 服务器"
|
||||
echo
|
||||
|
||||
# 检查编译器
|
||||
if ! command -v gcc &> /dev/null; then
|
||||
echo "错误: 未找到 GCC 编译器"
|
||||
if command -v riscv64-unknown-linux-gnu-gcc &> /dev/null; then
|
||||
CC="riscv64-unknown-linux-gnu-gcc"
|
||||
echo "使用 RISC-V 交叉编译器"
|
||||
elif command -v gcc &> /dev/null; then
|
||||
CC="gcc"
|
||||
echo "使用系统 GCC 编译器"
|
||||
else
|
||||
echo "错误: 未找到合适的编译器"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 显示 GCC 版本
|
||||
echo "GCC 版本:"
|
||||
gcc --version | head -1
|
||||
# 显示编译器版本
|
||||
echo "编译器版本:"
|
||||
$CC --version | head -1
|
||||
echo
|
||||
|
||||
# 编译标准版本(无RVV)
|
||||
echo "[1/3] 编译标准版本(标量优化)..."
|
||||
gcc -O3 -march=rv64gc test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar
|
||||
if [[ "$CC" == *"riscv64"* ]]; then
|
||||
$CC -O3 -march=rv64gc test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar -static
|
||||
else
|
||||
$CC -O3 test_omatcopy_ct.c -lm -o test_omatcopy_ct_scalar
|
||||
fi
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "✓ 标准版本编译成功: test_omatcopy_ct_scalar"
|
||||
else
|
||||
@@ -30,7 +40,11 @@ fi
|
||||
|
||||
# 编译RVV版本
|
||||
echo "[2/3] 编译RVV优化版本..."
|
||||
gcc -O3 -march=rv64gcv -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv
|
||||
if [[ "$CC" == *"riscv64"* ]]; then
|
||||
$CC -O3 -march=rv64gcv -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv -static
|
||||
else
|
||||
$CC -O3 -DUSE_RVV test_omatcopy_ct.c -lm -o test_omatcopy_ct_rvv
|
||||
fi
|
||||
if [ $? -eq 0 ]; then
|
||||
echo "✓ RVV版本编译成功: test_omatcopy_ct_rvv"
|
||||
else
|
||||
@@ -62,17 +76,33 @@ echo
|
||||
echo "=== 开始性能测试 ==="
|
||||
echo
|
||||
|
||||
if [ -f "test_omatcopy_ct_rvv" ]; then
|
||||
echo "运行 RVV 优化版本测试:"
|
||||
echo "----------------------------------------"
|
||||
./test_omatcopy_ct_rvv
|
||||
# 如果是交叉编译,提示用户需要在目标平台运行
|
||||
if [[ "$CC" == *"riscv64"* ]]; then
|
||||
echo "⚠ 检测到交叉编译环境,生成的可执行文件需要在 RISC-V 平台上运行"
|
||||
echo "请将以下文件传输到目标 RISC-V 系统:"
|
||||
echo " - test_omatcopy_ct_scalar (标量版本)"
|
||||
if [ -f "test_omatcopy_ct_rvv" ]; then
|
||||
echo " - test_omatcopy_ct_rvv (RVV优化版本)"
|
||||
fi
|
||||
echo
|
||||
echo "在目标系统上运行:"
|
||||
echo " ./test_omatcopy_ct_scalar # 运行标量版本"
|
||||
if [ -f "test_omatcopy_ct_rvv" ]; then
|
||||
echo " ./test_omatcopy_ct_rvv # 运行RVV版本"
|
||||
fi
|
||||
else
|
||||
if [ -f "test_omatcopy_ct_rvv" ]; then
|
||||
echo "运行 RVV 优化版本测试:"
|
||||
echo "----------------------------------------"
|
||||
./test_omatcopy_ct_rvv
|
||||
echo
|
||||
fi
|
||||
|
||||
echo "运行标量版本测试:"
|
||||
echo "----------------------------------------"
|
||||
./test_omatcopy_ct_scalar
|
||||
fi
|
||||
|
||||
echo "运行标量版本测试:"
|
||||
echo "----------------------------------------"
|
||||
./test_omatcopy_ct_scalar
|
||||
|
||||
echo
|
||||
echo "=== 测试完成 ==="
|
||||
echo "文件说明:"
|
||||
@@ -81,4 +111,14 @@ if [ -f "test_omatcopy_ct_rvv" ]; then
|
||||
echo " test_omatcopy_ct_rvv - RVV向量化版本"
|
||||
fi
|
||||
echo " test_omatcopy_ct.c - 源代码文件"
|
||||
echo " build_and_test.sh - 本编译脚本"
|
||||
echo " build_and_test.sh - 本编译脚本"
|
||||
echo
|
||||
echo "编译器信息:"
|
||||
echo " 使用编译器: $CC"
|
||||
if [[ "$CC" == *"riscv64"* ]]; then
|
||||
echo " 目标架构: RISC-V 64位"
|
||||
echo " 编译模式: 交叉编译 (静态链接)"
|
||||
else
|
||||
echo " 目标架构: 本机架构"
|
||||
echo " 编译模式: 本地编译"
|
||||
fi
|
||||
@@ -266,3 +266,6 @@ ifndef SHGEMM_BETA
|
||||
SHGEMM_BETA = gemm_beta_rvv.c
|
||||
endif
|
||||
endif
|
||||
|
||||
DOMATCOPY_CT = omatcopy_ct_rvv.c
|
||||
SOMATCOPY_CT = omatcopy_ct_rvv.c
|
||||
|
||||
@@ -219,6 +219,9 @@ COMATCOPY_CN = zomatcopy_cn_vector.c
|
||||
DOMATCOPY_CN = omatcopy_cn_vector.c
|
||||
SOMATCOPY_CN = omatcopy_cn_vector.c
|
||||
|
||||
DOMATCOPY_CT = omatcopy_ct_rvv.c
|
||||
SOMATCOPY_CT = omatcopy_ct_rvv.c
|
||||
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16), 1)
|
||||
SHGEMMKERNEL = shgemm_kernel_$(SHGEMM_UNROLL_M)x$(SHGEMM_UNROLL_N)_zvl256b.c
|
||||
|
||||
118
kernel/riscv64/omatcopy_ct_rvv.c
Normal file
118
kernel/riscv64/omatcopy_ct_rvv.c
Normal file
@@ -0,0 +1,118 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#if !defined(DOUBLE)
|
||||
#define VSETVL_MAX __riscv_vsetvlmax_e32m8()
|
||||
#define VSETVL(n) __riscv_vsetvl_e32m8(n)
|
||||
#define FLOAT_V_T vfloat32m8_t
|
||||
#define VLEV_FLOAT __riscv_vle32_v_f32m8
|
||||
#define VSEV_FLOAT __riscv_vse32_v_f32m8
|
||||
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8
|
||||
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8
|
||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8
|
||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m8
|
||||
#else
|
||||
#define VSETVL_MAX __riscv_vsetvlmax_e64m8()
|
||||
#define VSETVL(n) __riscv_vsetvl_e64m8(n)
|
||||
#define FLOAT_V_T vfloat64m8_t
|
||||
#define VLEV_FLOAT __riscv_vle64_v_f64m8
|
||||
#define VSEV_FLOAT __riscv_vse64_v_f64m8
|
||||
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8
|
||||
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8
|
||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8
|
||||
#define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m8
|
||||
#endif
|
||||
|
||||
/*****************************************************
|
||||
* Order ColMajor
|
||||
* Trans with RVV optimization
|
||||
*
|
||||
******************************************************/
|
||||
|
||||
int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG ldb)
|
||||
{
|
||||
BLASLONG i, j;
|
||||
FLOAT *aptr, *bptr;
|
||||
size_t vl;
|
||||
FLOAT_V_T va, vb;
|
||||
|
||||
if (rows <= 0) return(0);
|
||||
if (cols <= 0) return(0);
|
||||
|
||||
aptr = a;
|
||||
|
||||
if (alpha == 0.0)
|
||||
{
|
||||
vl = VSETVL_MAX;
|
||||
va = VFMVVF_FLOAT(0, vl);
|
||||
for (i = 0; i < cols; i++)
|
||||
{
|
||||
bptr = &b[i];
|
||||
for (j = 0; j < rows; j += vl)
|
||||
{
|
||||
vl = VSETVL(rows - j);
|
||||
VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl);
|
||||
}
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
if (alpha == 1.0)
|
||||
{
|
||||
for (i = 0; i < cols; i++)
|
||||
{
|
||||
bptr = &b[i];
|
||||
for (j = 0; j < rows; j += vl)
|
||||
{
|
||||
vl = VSETVL(rows - j);
|
||||
va = VLEV_FLOAT(aptr + j, vl);
|
||||
VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl);
|
||||
}
|
||||
aptr += lda;
|
||||
}
|
||||
return(0);
|
||||
}
|
||||
|
||||
// General case with alpha scaling
|
||||
for (i = 0; i < cols; i++)
|
||||
{
|
||||
bptr = &b[i];
|
||||
for (j = 0; j < rows; j += vl)
|
||||
{
|
||||
vl = VSETVL(rows - j);
|
||||
va = VLEV_FLOAT(aptr + j, vl);
|
||||
va = VFMULVF_FLOAT(va, alpha, vl);
|
||||
VSSEV_FLOAT(bptr + j * ldb, sizeof(FLOAT) * ldb, va, vl);
|
||||
}
|
||||
aptr += lda;
|
||||
}
|
||||
|
||||
return(0);
|
||||
}
|
||||
BIN
test_omatcopy_ct
Executable file
BIN
test_omatcopy_ct
Executable file
Binary file not shown.
BIN
test_omatcopy_ct_rvv
Executable file
BIN
test_omatcopy_ct_rvv
Executable file
Binary file not shown.
BIN
test_omatcopy_ct_scalar
Executable file
BIN
test_omatcopy_ct_scalar
Executable file
Binary file not shown.
Reference in New Issue
Block a user