mirror of
https://github.com/OpenMathLib/OpenBLAS
synced 2026-05-31 00:45:48 +08:00
Merge pull request #5381 from Mousius/bgemv-infrastructure
Add infrastructure for BGEMV
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -81,7 +81,9 @@ test/ZBLAT2.SUMM
|
||||
test/ZBLAT3.SUMM
|
||||
test/ZBLAT3_3M.SUMM
|
||||
test/SHBLAT3.SUMM
|
||||
test/SBBLAT2.SUMM
|
||||
test/SBBLAT3.SUMM
|
||||
test/BBLAT2.SUMM
|
||||
test/BBLAT3.SUMM
|
||||
test/cblat1
|
||||
test/cblat2
|
||||
@@ -97,7 +99,9 @@ test/sblat3
|
||||
test/sblat3_3m
|
||||
test/test_shgemm
|
||||
test/test_sbgemm
|
||||
test/test_sbgemv
|
||||
test/test_bgemm
|
||||
test/test_bgemv
|
||||
test/zblat1
|
||||
test/zblat2
|
||||
test/zblat3
|
||||
|
||||
1
cblas.h
1
cblas.h
@@ -465,6 +465,7 @@ void cblas_sbdtobf16(OPENBLAS_CONST blasint n, OPENBLAS_CONST double *in, OPEN
|
||||
void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, float *out, OPENBLAS_CONST blasint incout);
|
||||
/* convert BFLOAT16 array to double array */
|
||||
void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout);
|
||||
void cblas_bgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 alpha, OPENBLAS_CONST bfloat16 *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 beta, bfloat16 *y, OPENBLAS_CONST blasint incy);
|
||||
/* dot production of BFLOAT16 input arrays, and output as float */
|
||||
float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy);
|
||||
void cblas_sbgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
|
||||
|
||||
@@ -110,6 +110,7 @@ macro(SetDefaultL1)
|
||||
SetFallback(SROTMKERNEL rotm.S)
|
||||
SetFallback(DROTMKERNEL rotm.S)
|
||||
SetFallback(QROTMKERNEL rotm.S)
|
||||
SetFallback(BSCALKERNEL ../generic/scal.c)
|
||||
SetFallback(SSCALKERNEL scal.S)
|
||||
SetFallback(DSCALKERNEL scal.S)
|
||||
SetFallback(CSCALKERNEL zscal.S)
|
||||
@@ -169,6 +170,8 @@ if (BUILD_BFLOAT16)
|
||||
SetFallback(SHSWAPKERNEL ../arm/swap.c)
|
||||
SetFallback(TOBF16KERNEL ../x86_64/tobf16.c)
|
||||
SetFallback(BF16TOKERNEL ../x86_64/bf16to.c)
|
||||
SetFallback(BGEMVNKERNEL ../generic/gemv_n.c)
|
||||
SetFallback(BGEMVTKERNEL ../generic/gemv_t.c)
|
||||
SetFallback(SBGEMVNKERNEL ../x86_64/sbgemv_n.c)
|
||||
SetFallback(SBGEMVTKERNEL ../x86_64/sbgemv_t.c)
|
||||
endif ()
|
||||
@@ -221,6 +224,8 @@ macro(SetDefaultL2)
|
||||
SetFallback(XHEMV_V_KERNEL ../generic/zhemv_k.c)
|
||||
SetFallback(XHEMV_M_KERNEL ../generic/zhemv_k.c)
|
||||
if (BUILD_BFLOAT16)
|
||||
SetFallback(BGEMVNKERNEL ../generic/gemv_n.c)
|
||||
SetFallback(BGEMVTKERNEL ../generic/gemv_t.c)
|
||||
SetFallback(SBGEMVNKERNEL ../x86_64/sbgemv_n.c)
|
||||
SetFallback(SBGEMVTKERNEL ../x86_64/sbgemv_t.c)
|
||||
SetFallback(SHGERKERNEL ../generic/ger.c)
|
||||
|
||||
@@ -375,7 +375,7 @@ function(GenerateNamedObjects sources_in)
|
||||
if (NOT no_float_type)
|
||||
string(SUBSTRING ${float_type} 0 1 float_char)
|
||||
string(TOLOWER ${float_char} float_char)
|
||||
if (${float_type} STREQUAL "BFLOAT16" AND NOT "${defines_in}" MATCHES "BGEMM")
|
||||
if (${float_type} STREQUAL "BFLOAT16" AND NOT "${defines_in}" MATCHES "BGEM")
|
||||
set (float_char "sb")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
@@ -30,6 +30,11 @@
|
||||
#define COMMON_B_H
|
||||
|
||||
#ifndef DYNAMIC_ARCH
|
||||
#define BGEMV_N_K bgemv_n
|
||||
#define BGEMV_T_K bgemv_t
|
||||
|
||||
#define BSCAL_K bscal_k
|
||||
|
||||
#define BGEMM_ONCOPY bgemm_oncopy
|
||||
#define BGEMM_OTCOPY bgemm_otcopy
|
||||
|
||||
@@ -45,6 +50,10 @@
|
||||
#define BGEMM_KERNEL bgemm_kernel
|
||||
|
||||
#else
|
||||
#define BGEMV_N_K gotoblas->bgemv_n
|
||||
#define BGEMV_T_K gotoblas->bgemv_t
|
||||
|
||||
#define BSCAL_K gotoblas->bscal_k
|
||||
|
||||
#define BGEMM_ONCOPY gotoblas->bgemm_oncopy
|
||||
#define BGEMM_OTCOPY gotoblas->bgemm_otcopy
|
||||
|
||||
@@ -60,6 +60,7 @@ double BLASFUNC(dsdot) (blasint *, float *, blasint *, float *, blasint *);
|
||||
double BLASFUNC(ddot) (blasint *, double *, blasint *, double *, blasint *);
|
||||
xdouble BLASFUNC(qdot) (blasint *, xdouble *, blasint *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(bscal) (blasint *, bfloat16 *, bfloat16 *, blasint *);
|
||||
float BLASFUNC(sbdot) (blasint *, bfloat16 *, blasint *, bfloat16 *, blasint *);
|
||||
void BLASFUNC(sbstobf16) (blasint *, float *, blasint *, bfloat16 *, blasint *);
|
||||
void BLASFUNC(sbdtobf16) (blasint *, double *, blasint *, bfloat16 *, blasint *);
|
||||
@@ -256,6 +257,8 @@ void BLASFUNC(xgeru)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
|
||||
xdouble *, blasint *, xdouble *, blasint *);
|
||||
|
||||
void BLASFUNC(bgemv)(char *, blasint *, blasint *, bfloat16 *, bfloat16 *, blasint *,
|
||||
bfloat16 *, blasint *, bfloat16 *, bfloat16 *, blasint *);
|
||||
void BLASFUNC(sbgemv)(char *, blasint *, blasint *, float *, bfloat16 *, blasint *,
|
||||
bfloat16 *, blasint *, float *, float *, blasint *);
|
||||
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2025 The OpenBLAS Project. */
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
@@ -169,6 +170,9 @@ BLASLONG icmin_k(BLASLONG, float *, BLASLONG);
|
||||
BLASLONG izmin_k(BLASLONG, double *, BLASLONG);
|
||||
BLASLONG ixmin_k(BLASLONG, xdouble *, BLASLONG);
|
||||
|
||||
|
||||
int bscal_k(BLASLONG, BLASLONG, BLASLONG, bfloat16,
|
||||
bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);
|
||||
int sscal_k(BLASLONG, BLASLONG, BLASLONG, float,
|
||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int dscal_k(BLASLONG, BLASLONG, BLASLONG, double,
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2025 The OpenBLAS Project */
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
@@ -44,6 +45,11 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
int bgemv_n(BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG);
|
||||
int bgemv_t(BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG);
|
||||
int bgemv_thread_n(BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG, int);
|
||||
int bgemv_thread_t(BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG, int);
|
||||
int sbgemv_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
|
||||
int sbgemv_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
|
||||
int sbgemv_thread_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);
|
||||
|
||||
@@ -705,6 +705,11 @@
|
||||
|
||||
|
||||
#elif defined(BFLOAT16) && defined(BGEMM)
|
||||
#define SCAL_K BSCAL_K
|
||||
|
||||
#define GEMV_N BGEMV_N_K
|
||||
#define GEMV_T BGEMV_T_K
|
||||
|
||||
#define GEMM_BETA BGEMM_BETA
|
||||
#define GEMM_KERNEL_N BGEMM_KERNEL
|
||||
#define GEMM_KERNEL_L BGEMM_KERNEL
|
||||
@@ -754,8 +759,8 @@
|
||||
#define D_BF16_TO_K DBF16TOD_K
|
||||
#define S_TO_BF16_K SBSTOBF16_K
|
||||
#define S_BF16_TO_K SBF16TOS_K
|
||||
#define SBGEMV_N SBGEMV_N_K
|
||||
#define SBGEMV_T SBGEMV_T_K
|
||||
#define GEMV_N SBGEMV_N_K
|
||||
#define GEMV_T SBGEMV_T_K
|
||||
|
||||
#define AMAX_K SAMAX_K
|
||||
#define AMIN_K SAMIN_K
|
||||
@@ -773,8 +778,6 @@
|
||||
#define AXPYC_K SAXPYC_K
|
||||
#define AXPBY_K SAXPBY_K
|
||||
#define SCAL_K SSCAL_K
|
||||
#define GEMV_N SGEMV_N
|
||||
#define GEMV_T SGEMV_T
|
||||
#define SYMV_U SSYMV_U
|
||||
#define SYMV_L SSYMV_L
|
||||
#define GERU_K SGERU_K
|
||||
|
||||
@@ -98,10 +98,14 @@ int (*shgemm_otcopy )(BLASLONG, BLASLONG, hfloat16 *, BLASLONG, hfloat16 *);
|
||||
int (*sbrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float);
|
||||
int (*sbrotm_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
|
||||
int (*bscal_k) (BLASLONG, BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);
|
||||
int (*sbaxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int (*sbscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
int (*sbswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
|
||||
|
||||
int (*bgemv_n) (BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG);
|
||||
int (*bgemv_t) (BLASLONG, BLASLONG, bfloat16, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, bfloat16, bfloat16 *, BLASLONG);
|
||||
|
||||
int (*sbgemv_n) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
|
||||
int (*sbgemv_t) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
|
||||
int (*sbger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
|
||||
|
||||
@@ -1,3 +1,31 @@
|
||||
###############################################################################
|
||||
# Copyright (c) 2025 The OpenBLAS Project
|
||||
# All rights reserved.
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# 3. Neither the name of the OpenBLAS project nor the names of
|
||||
# its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
###############################################################################
|
||||
|
||||
TOPDIR = ../..
|
||||
include ../../Makefile.system
|
||||
|
||||
@@ -423,6 +451,9 @@ XBLASOBJS += \
|
||||
xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLN.$(SUFFIX)
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
BBLASOBJS += \
|
||||
bgemv_thread_n$(TSUFFIX).$(SUFFIX) \
|
||||
bgemv_thread_t$(TSUFFIX).$(SUFFIX)
|
||||
SBBLASOBJS += \
|
||||
sbgemv_thread_n$(TSUFFIX).$(SUFFIX) \
|
||||
sbgemv_thread_t$(TSUFFIX).$(SUFFIX)
|
||||
@@ -3707,6 +3738,10 @@ xtrsv_CUN.$(SUFFIX) xtrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
|
||||
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
bgemv_thread_n.$(SUFFIX) bgemv_thread_n.$(PSUFFIX) : sbgemv_thread.c ../../common.h
|
||||
$(CC) -c $(CFLAGS) -DBGEMM -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
|
||||
bgemv_thread_t.$(SUFFIX) bgemv_thread_t.$(PSUFFIX) : sbgemv_thread.c ../../common.h
|
||||
$(CC) -c $(CFLAGS) -DBGEMM -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
|
||||
sbgemv_thread_n.$(SUFFIX) sbgemv_thread_n.$(PSUFFIX) : sbgemv_thread.c ../../common.h
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
|
||||
sbgemv_thread_t.$(SUFFIX) sbgemv_thread_t.$(PSUFFIX) : sbgemv_thread.c ../../common.h
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2025 The OpenBLAS Project. */
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
@@ -41,21 +42,21 @@
|
||||
#include "common.h"
|
||||
|
||||
#ifndef TRANSA
|
||||
#define SBGEMV SBGEMV_N
|
||||
#define GEMV GEMV_N
|
||||
#else
|
||||
#define SBGEMV SBGEMV_T
|
||||
#define GEMV GEMV_T
|
||||
#endif
|
||||
|
||||
static int sbgemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *dummy2, BLASLONG dummy3){
|
||||
|
||||
bfloat16 *a, *x;
|
||||
float *y;
|
||||
IFLOAT *a, *x;
|
||||
FLOAT *y;
|
||||
BLASLONG lda, incx, incy;
|
||||
BLASLONG m_from, m_to, n_from, n_to;
|
||||
|
||||
a = (bfloat16 *)args->a;
|
||||
x = (bfloat16 *)args->b;
|
||||
y = (float *)args->c;
|
||||
a = (IFLOAT *)args->a;
|
||||
x = (IFLOAT *)args->b;
|
||||
y = (FLOAT *)args->c;
|
||||
|
||||
lda = args->lda;
|
||||
incx = args->ldb;
|
||||
@@ -77,12 +78,12 @@ static int sbgemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
|
||||
y += n_from * incy;
|
||||
#endif
|
||||
|
||||
SBGEMV(m_to - m_from, n_to - n_from, *((FLOAT *)(args->alpha)), a, lda, x, incx, *((FLOAT *)(args->beta)), y, incy);
|
||||
GEMV(m_to - m_from, n_to - n_from, *((FLOAT *)(args->alpha)), a, lda, x, incx, *((FLOAT *)(args->beta)), y, incy);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG n, float alpha, bfloat16 *a, BLASLONG lda, bfloat16 *x, BLASLONG incx, float beta, float *y, BLASLONG incy, int threads)
|
||||
int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG incx, FLOAT beta, FLOAT *y, BLASLONG incy, int threads)
|
||||
{
|
||||
blas_arg_t args;
|
||||
blas_queue_t queue[MAX_CPU_NUMBER];
|
||||
|
||||
@@ -1,5 +1,33 @@
|
||||
#!/bin/sh
|
||||
|
||||
###############################################################################
|
||||
# Copyright (c) 2025, The OpenBLAS Project
|
||||
# All rights reserved.
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# 3. Neither the name of the OpenBLAS project nor the names of
|
||||
# its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
###############################################################################
|
||||
|
||||
# Changelog
|
||||
# 2017/09/03 staticfloat
|
||||
# Added zsymv and csymv into @lapackobjs2 so they are properly renamed
|
||||
@@ -51,7 +79,7 @@ blasobjsz="
|
||||
zgeadd dzsum zgemmt zgemmtr"
|
||||
|
||||
blasobjs="lsame xerbla"
|
||||
bfblasobjs="bgemm sbgemm sbgemmt sbgemmtr sbgemv sbdot sbstobf16 sbdtobf16 sbf16tos dbf16tod"
|
||||
bfblasobjs="bgemm bgemv sbgemm sbgemmt sbgemmtr sbgemv sbdot sbstobf16 sbdtobf16 sbf16tos dbf16tod"
|
||||
hfblasobjs="shgemm"
|
||||
cblasobjsc="
|
||||
cblas_caxpy cblas_ccopy cblas_cdotc cblas_cdotu cblas_cgbmv cblas_cgemm cblas_cgemv
|
||||
|
||||
@@ -1,5 +1,33 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
###############################################################################
|
||||
# Copyright (c) 2025, The OpenBLAS Project
|
||||
# All rights reserved.
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# 3. Neither the name of the OpenBLAS project nor the names of
|
||||
# its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
###############################################################################
|
||||
|
||||
# Changelog
|
||||
# 2017/09/03 staticfloat
|
||||
# Added zsymv and csymv into @lapackobjs2 so they are properly renamed
|
||||
@@ -51,7 +79,7 @@
|
||||
zgeadd, dzsum, zgemmt,zgemmtr);
|
||||
|
||||
@blasobjs = (lsame, xerbla);
|
||||
@bfblasobjs = (bgemm, sbgemm, sbgemmt, sbgemmtr, sbgemv, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
|
||||
@bfblasobjs = (bgemm, bgemv, sbgemm, sbgemmt, sbgemmtr, sbgemv, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
|
||||
@hfblasobjs = (shgemm);
|
||||
@cblasobjsc = (
|
||||
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
|
||||
|
||||
@@ -150,11 +150,13 @@ endif ()
|
||||
GenerateNamedObjects("imax.c" "USE_MIN" "i*min" ${CBLAS_FLAG})
|
||||
|
||||
if (BUILD_BFLOAT16)
|
||||
GenerateNamedObjects("scal.c" "BGEMM" "bscal" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
GenerateNamedObjects("bf16dot.c" "" "sbdot" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
GenerateNamedObjects("gemm.c" "BGEMM" "bgemm" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
GenerateNamedObjects("gemm.c" "" "sbgemm" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
GenerateNamedObjects("sbgemmt.c" "" "sbgemmt" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
GenerateNamedObjects("sbgemmt.c" "RNAME" "sbgemmtr" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
GenerateNamedObjects("sbgemv.c" "BGEMM" "bgemv" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
GenerateNamedObjects("sbgemv.c" "" "sbgemv" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
GenerateNamedObjects("tobf16.c" "SINGLE_PREC" "sbstobf16" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
GenerateNamedObjects("tobf16.c" "DOUBLE_PREC" "sbdtobf16" ${CBLAS_FLAG} "" "" true "BFLOAT16")
|
||||
|
||||
@@ -75,7 +75,9 @@ SBLAS3OBJS = \
|
||||
sgeadd.$(SUFFIX) sgemmt.$(SUFFIX) sgemmtr.$(SUFFIX)
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
BBLAS3OBJ = bgemm.$(SUFFIX)
|
||||
BBLAS3OBJS = bgemm.$(SUFFIX)
|
||||
BBLAS2OBJS = bgemv.$(SUFFIX)
|
||||
BBLAS1OBJS = bscal.$(SUFFIX)
|
||||
SBBLAS1OBJS = sbdot.$(SUFFIX)
|
||||
SBBLAS2OBJS = sbgemv.$(SUFFIX)
|
||||
SBBLAS3OBJS = sbgemm.$(SUFFIX) sbgemmt.$(SUFFIX) sbgemmtr.$(SUFFIX)
|
||||
@@ -319,6 +321,8 @@ CSBLAS3OBJS = \
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
CBBLAS3OBJS = cblas_bgemm.$(SUFFIX)
|
||||
CBBLAS2OBJS = cblas_bgemv.$(SUFFIX)
|
||||
CBBLAS1OBJS = cblas_bscal.$(SUFFIX)
|
||||
CSBBLAS1OBJS = cblas_sbdot.$(SUFFIX)
|
||||
CSBBLAS2OBJS = cblas_sbgemv.$(SUFFIX)
|
||||
CSBBLAS3OBJS = cblas_sbgemm.$(SUFFIX) cblas_sbgemmt.$(SUFFIX) cblas_sbgemmtr.$(SUFFIX) cblas_sbgemm_batch.$(SUFFIX)
|
||||
@@ -423,7 +427,9 @@ override CFLAGS += -I.
|
||||
SBLAS1OBJS += $(CSBLAS1OBJS)
|
||||
SBLAS2OBJS += $(CSBLAS2OBJS)
|
||||
SBLAS3OBJS += $(CSBLAS3OBJS)
|
||||
BBLAS3OBJ += $(CBBLAS3OBJS)
|
||||
BBLAS3OBJS += $(CBBLAS3OBJS)
|
||||
BBLAS2OBJS += $(CBBLAS2OBJS)
|
||||
BBLAS1OBJS += $(CBBLAS1OBJS)
|
||||
SBBLAS1OBJS += $(CSBBLAS1OBJS)
|
||||
SBBLAS2OBJS += $(CSBBLAS2OBJS)
|
||||
SBBLAS3OBJS += $(CSBBLAS3OBJS)
|
||||
@@ -443,7 +449,7 @@ SBEXTOBJS += $(CSBEXTOBJS)
|
||||
CBAUXOBJS += $(CXERBLAOBJ)
|
||||
endif
|
||||
|
||||
BBLASOBJS = $(BBLAS3OBJ)
|
||||
BBLASOBJS = $(BBLAS3OBJS) $(BBLAS2OBJS) $(BBLAS1OBJS)
|
||||
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS)
|
||||
SBBLASOBJS = $(SBBLAS1OBJS) $(SBBLAS2OBJS) $(SBBLAS3OBJS)
|
||||
SHBLASOBJS = $(SHBLAS3OBJS)
|
||||
@@ -589,7 +595,7 @@ clean ::
|
||||
level1 : $(SBEXTOBJS) $(SBBLAS1OBJS) $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS)
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||
|
||||
level2 : $(SBBLAS2OBJS) $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
|
||||
level2 : $(SBBLAS2OBJS) $(BBLAS2OBJS) $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS)
|
||||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^
|
||||
|
||||
level3 : $(SBBLAS3OBJS) $(BBLAS3OBJ) $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) $(SHBLAS3OBJS)
|
||||
@@ -824,6 +830,8 @@ dsdot.$(SUFFIX) dsdot.$(PSUFFIX) : dsdot.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
bscal.$(SUFFIX) bscal.$(PSUFFIX) : scal.c
|
||||
$(CC) $(CFLAGS) -DBGEMM -c $< -o $(@F)
|
||||
sbdot.$(SUFFIX) sbdot.$(PSUFFIX) : bf16dot.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
sbstobf16.$(SUFFIX) sbstobf16.$(PSUFFIX) : tobf16.c
|
||||
@@ -981,6 +989,8 @@ xgerc.$(SUFFIX) xgerc.$(PSUFFIX) : zger.c
|
||||
$(CC) -c $(CFLAGS) -DCONJ $< -o $(@F)
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
bgemv.$(SUFFIX) bgemv.$(PSUFFIX) : sbgemv.c
|
||||
$(CC) $(CFLAGS) -DBGEMM -c $< -o $(@F)
|
||||
sbgemv.$(SUFFIX) sbgemv.$(PSUFFIX) : sbgemv.c
|
||||
$(CC) $(CFLAGS) -c $< -o $(@F)
|
||||
endif
|
||||
@@ -1653,6 +1663,8 @@ cblas_dsdot.$(SUFFIX) cblas_dsdot.$(PSUFFIX) : dsdot.c
|
||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
cblas_bscal.$(SUFFIX) cblas_bscal.$(PSUFFIX) : scal.c
|
||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||
cblas_sbdot.$(SUFFIX) cblas_sbdot.$(PSUFFIX) : bf16dot.c
|
||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||
cblas_sbstobf16.$(SUFFIX) cblas_sbstobf16.$(PSUFFIX) : tobf16.c
|
||||
@@ -1807,6 +1819,8 @@ cblas_zdrot.$(SUFFIX) cblas_zdrot.$(PSUFFIX) : zrot.c
|
||||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F)
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
cblas_bgemv.$(SUFFIX) cblas_bgemv.$(PSUFFIX) : sbgemv.c
|
||||
$(CC) -DCBLAS -DBGEMM -c $(CFLAGS) $< -o $(@F)
|
||||
cblas_sbgemv.$(SUFFIX) cblas_sbgemv.$(PSUFFIX) : sbgemv.c
|
||||
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
|
||||
endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2024, The OpenBLAS Project. */
|
||||
/* Copyright 2024-2025 The OpenBLAS Project. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
/* Redistribution and use in source and binary forms, with or */
|
||||
@@ -305,7 +305,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
|
||||
#endif
|
||||
int (*gemv[]) (BLASLONG, BLASLONG, FLOAT, IFLOAT *, BLASLONG,
|
||||
IFLOAT *, BLASLONG, FLOAT, FLOAT *, BLASLONG) = {
|
||||
SBGEMV_N, SBGEMV_T,};
|
||||
GEMV_N, GEMV_T,};
|
||||
|
||||
|
||||
if (m == 0)
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2025 The OpenBLAS Project. */
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
@@ -43,17 +44,25 @@
|
||||
#include "functable.h"
|
||||
#endif
|
||||
|
||||
#ifdef BGEMM
|
||||
#define GEMV_THREAD_N bgemv_thread_n
|
||||
#define GEMV_THREAD_T bgemv_thread_t
|
||||
#define ERROR_NAME "BGEMV "
|
||||
#else
|
||||
#define GEMV_THREAD_N sbgemv_thread_n
|
||||
#define GEMV_THREAD_T sbgemv_thread_t
|
||||
#define ERROR_NAME "SBGEMV "
|
||||
#endif
|
||||
|
||||
#ifdef SMP
|
||||
static int (*sbgemv_thread[])(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 * , BLASLONG, float, float *, BLASLONG, int) = {
|
||||
sbgemv_thread_n, sbgemv_thread_t,
|
||||
static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, IFLOAT *, BLASLONG, IFLOAT * , BLASLONG, FLOAT, FLOAT *, BLASLONG, int) = {
|
||||
GEMV_THREAD_N, GEMV_THREAD_T,
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifndef CBLAS
|
||||
|
||||
void NAME(char *TRANS, blasint *M, blasint *N, float *ALPHA, bfloat16 *a, blasint *LDA, bfloat16 *x, blasint *INCX, float *BETA, float *y, blasint *INCY)
|
||||
void NAME(char *TRANS, blasint *M, blasint *N, FLOAT *ALPHA, IFLOAT *a, blasint *LDA, IFLOAT *x, blasint *INCX, FLOAT *BETA, FLOAT *y, blasint *INCY)
|
||||
{
|
||||
char trans = *TRANS;
|
||||
blasint m = *M;
|
||||
@@ -61,14 +70,14 @@ void NAME(char *TRANS, blasint *M, blasint *N, float *ALPHA, bfloat16 *a, blasin
|
||||
blasint lda = *LDA;
|
||||
blasint incx = *INCX;
|
||||
blasint incy = *INCY;
|
||||
float alpha = *ALPHA;
|
||||
float beta = *BETA;
|
||||
FLOAT alpha = *ALPHA;
|
||||
FLOAT beta = *BETA;
|
||||
#ifdef SMP
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
int (*sbgemv[])(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 * , BLASLONG, float, float *, BLASLONG) = {
|
||||
SBGEMV_N, SBGEMV_T,
|
||||
int (*gemv[])(BLASLONG, BLASLONG, FLOAT, IFLOAT *, BLASLONG, IFLOAT * , BLASLONG, FLOAT, FLOAT *, BLASLONG) = {
|
||||
GEMV_N, GEMV_T,
|
||||
};
|
||||
|
||||
blasint info;
|
||||
@@ -104,7 +113,7 @@ void NAME(char *TRANS, blasint *M, blasint *N, float *ALPHA, bfloat16 *a, blasin
|
||||
|
||||
#else
|
||||
|
||||
void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasint n, float alpha, bfloat16 *a, blasint lda, bfloat16 *x, blasint incx, float beta, float *y, blasint incy)
|
||||
void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasint n, FLOAT alpha, IFLOAT *a, blasint lda, IFLOAT *x, blasint incx, FLOAT beta, FLOAT *y, blasint incy)
|
||||
{
|
||||
blasint lenx, leny;
|
||||
int trans;
|
||||
@@ -113,8 +122,8 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasi
|
||||
int nthreads;
|
||||
#endif
|
||||
|
||||
int (*sbgemv[])(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 * , BLASLONG, float, float *, BLASLONG) = {
|
||||
SBGEMV_N, SBGEMV_T,
|
||||
int (*gemv[])(BLASLONG, BLASLONG, FLOAT, IFLOAT *, BLASLONG, IFLOAT * , BLASLONG, FLOAT, FLOAT *, BLASLONG) = {
|
||||
GEMV_N, GEMV_T,
|
||||
};
|
||||
|
||||
PRINT_DEBUG_CNAME;
|
||||
@@ -166,8 +175,17 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasi
|
||||
leny = m;
|
||||
}
|
||||
|
||||
if (alpha == ZERO) {
|
||||
if (beta != ONE) SCAL_K(leny, 0, 0, beta, y, blasabs(incy), NULL, 0, NULL, 0);
|
||||
#ifdef BGEMM
|
||||
float alpha_float, beta_float;
|
||||
SBF16TOS_K(1, &alpha, 1, &alpha_float, 1);
|
||||
SBF16TOS_K(1, &beta, 1, &beta_float, 1);
|
||||
#else
|
||||
float alpha_float = alpha;
|
||||
float beta_float = beta;
|
||||
#endif
|
||||
|
||||
if (alpha_float == ZERO) {
|
||||
if (beta_float != ONE) SCAL_K(leny, 0, 0, beta, y, blasabs(incy), NULL, 0, NULL, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -185,10 +203,10 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint m, blasi
|
||||
|
||||
if (nthreads == 1) {
|
||||
#endif
|
||||
(sbgemv[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy);
|
||||
(gemv[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy);
|
||||
#ifdef SMP
|
||||
} else {
|
||||
(sbgemv_thread[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy, nthreads);
|
||||
(gemv_thread[(int)trans])(m, n, alpha, a, lda, x, incx, beta, y, incy, nthreads);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2025 The OpenBLAS Project. */
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
@@ -68,7 +69,14 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx){
|
||||
|
||||
if (incx <= 0 || n <= 0) return;
|
||||
|
||||
if (alpha == ONE) return;
|
||||
#ifdef BGEMM
|
||||
float alpha_float;
|
||||
SBF16TOS_K(1, &alpha, 1, &alpha_float, 1);
|
||||
#else
|
||||
float alpha_float = alpha;
|
||||
#endif
|
||||
|
||||
if (alpha_float == ONE) return;
|
||||
|
||||
IDEBUG_START;
|
||||
|
||||
|
||||
@@ -121,6 +121,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
||||
|
||||
# sbdot
|
||||
if (BUILD_BFLOAT16)
|
||||
GenerateNamedObjects("${KERNELDIR}/${BSCALKERNEL}" "BGEMM" "scal_k" false "" "" false "BFLOAT16")
|
||||
GenerateNamedObjects("${KERNELDIR}/${SBDOTKERNEL}" "SBDOT" "dot_k" false "" "" false "BFLOAT16")
|
||||
GenerateNamedObjects("${KERNELDIR}/${BF16TOKERNEL}" "SINGLE" "f16tos_k" false "" "" false "BFLOAT16")
|
||||
GenerateNamedObjects("${KERNELDIR}/${BF16TOKERNEL}" "DOUBLE" "bf16tod_k" false "" "" false "DOUBLE")
|
||||
@@ -222,6 +223,8 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
|
||||
GenerateNamedObjects("${KERNELDIR}/${SGEMVTKERNEL}" "TRANS" "gemv_t" false "" "" false "SINGLE")
|
||||
endif ()
|
||||
if (BUILD_BFLOAT16)
|
||||
GenerateNamedObjects("${KERNELDIR}/${BGEMVNKERNEL}" "BGEMM" "gemv_n" false "" "" false "BFLOAT16")
|
||||
GenerateNamedObjects("${KERNELDIR}/${BGEMVTKERNEL}" "BGEMM" "gemv_t" false "" "" false "BFLOAT16")
|
||||
GenerateNamedObjects("${KERNELDIR}/${SBGEMVNKERNEL}" "" "gemv_n" false "" "" false "BFLOAT16")
|
||||
GenerateNamedObjects("${KERNELDIR}/${SBGEMVTKERNEL}" "" "gemv_t" false "" "" false "BFLOAT16")
|
||||
endif ()
|
||||
|
||||
@@ -1,3 +1,31 @@
|
||||
###############################################################################
|
||||
# Copyright (c) 2025 The OpenBLAS Project
|
||||
# All rights reserved.
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# 3. Neither the name of the OpenBLAS project nor the names of
|
||||
# its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
###############################################################################
|
||||
|
||||
FMAFLAG=
|
||||
ifndef OLDGCC
|
||||
ifdef HAVE_FMA3
|
||||
@@ -271,6 +299,10 @@ XDOTKERNEL = zdot.S
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
ifndef BSCALKERNEL
|
||||
BSCALKERNEL = ../generic/scal.c
|
||||
endif
|
||||
|
||||
ifndef SBDOTKERNEL
|
||||
SBDOTKERNEL = ../x86_64/sbdot.c
|
||||
endif
|
||||
@@ -551,6 +583,8 @@ XBLASOBJS += \
|
||||
xscal_k$(TSUFFIX).$(SUFFIX) xswap_k$(TSUFFIX).$(SUFFIX) xsum_k$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
BBLASOBJS += \
|
||||
bscal_k$(TSUFFIX).$(SUFFIX)
|
||||
SBBLASOBJS += \
|
||||
sbdot_k$(TSUFFIX).$(SUFFIX)
|
||||
SBEXTOBJS += \
|
||||
@@ -778,6 +812,8 @@ $(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
$(KDIR)bscal_k$(TSUFFIX).$(SUFFIX) $(KDIR)bscal_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(BSCALKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@
|
||||
$(KDIR)sbdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sbdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBDOTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX $< -o $@
|
||||
$(KDIR)sbstobf16_k$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(TOBF16KERNEL)
|
||||
|
||||
@@ -1,3 +1,31 @@
|
||||
###############################################################################
|
||||
# Copyright (c) 2025 The OpenBLAS Project
|
||||
# All rights reserved.
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in
|
||||
# the documentation and/or other materials provided with the
|
||||
# distribution.
|
||||
# 3. Neither the name of the OpenBLAS project nor the names of
|
||||
# its contributors may be used to endorse or promote products
|
||||
# derived from this software without specific prior written permission.
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
###############################################################################
|
||||
|
||||
FMAFLAG=
|
||||
ifndef OLDGCC
|
||||
ifdef HAVE_FMA3
|
||||
@@ -56,6 +84,14 @@ XGEMVTKERNEL = zgemv_t.S
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
ifndef BGEMVNKERNEL
|
||||
BGEMVNKERNEL = ../generic/gemv_n.c
|
||||
endif
|
||||
|
||||
ifndef BGEMVTKERNEL
|
||||
BGEMVTKERNEL = ../generic/gemv_t.c
|
||||
endif
|
||||
|
||||
ifndef SBGEMVNKERNEL
|
||||
SBGEMVNKERNEL = ../x86_64/sbgemv_n.c
|
||||
endif
|
||||
@@ -255,6 +291,9 @@ XBLASOBJS += \
|
||||
xgeru_k$(TSUFFIX).$(SUFFIX) xgerc_k$(TSUFFIX).$(SUFFIX) xgerv_k$(TSUFFIX).$(SUFFIX) xgerd_k$(TSUFFIX).$(SUFFIX)
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
BBLASOBJS += \
|
||||
bgemv_n$(TSUFFIX).$(SUFFIX) \
|
||||
bgemv_t$(TSUFFIX).$(SUFFIX)
|
||||
SBBLASOBJS += \
|
||||
sbgemv_n$(TSUFFIX).$(SUFFIX) \
|
||||
sbgemv_t$(TSUFFIX).$(SUFFIX)
|
||||
@@ -513,5 +552,9 @@ $(KDIR)sbgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)sbgemv_n$(TPSUFFIX).$(PSUFFIX) : $(KE
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX $< -o $@
|
||||
$(KDIR)sbgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)sbgemv_t$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMVTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -UCOMPLEX $< -o $@
|
||||
$(KDIR)bgemv_n$(TSUFFIX).$(SUFFIX) $(KDIR)bgemv_n$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(BGEMVNKERNEL)
|
||||
$(CC) -c $(CFLAGS) -DBGEMM -UCOMPLEX $< -o $@
|
||||
$(KDIR)bgemv_t$(TSUFFIX).$(SUFFIX) $(KDIR)bgemv_t$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(BGEMVTKERNEL)
|
||||
$(CC) -c $(CFLAGS) -DBGEMM -UCOMPLEX $< -o $@
|
||||
endif
|
||||
|
||||
|
||||
64
kernel/generic/bf16_macros.h
Normal file
64
kernel/generic/bf16_macros.h
Normal file
@@ -0,0 +1,64 @@
|
||||
/***************************************************************************
|
||||
* Copyright (c) 2025, The OpenBLAS Project
|
||||
* All rights reserved.
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* 3. Neither the name of the OpenBLAS project nor the names of
|
||||
* its contributors may be used to endorse or promote products
|
||||
* derived from this software without specific prior written permission.
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
* POSSIBILITY OF SUCH DAMAGE.
|
||||
* *****************************************************************************/
|
||||
|
||||
#if defined(BFLOAT16) && defined(BFLOAT16CONVERSION)
|
||||
static float
|
||||
bfloat16tof32 (bfloat16 value)
|
||||
{
|
||||
blasint one = 1;
|
||||
float result;
|
||||
sbf16tos_(&one, &value, &one, &result, &one);
|
||||
return result;
|
||||
}
|
||||
|
||||
#ifdef BGEMM
|
||||
static bfloat16 f32tobfloat16(float value) {
|
||||
blasint one = 1;
|
||||
bfloat16 result;
|
||||
sbstobf16_(&one, &value, &one, &result, &one);
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef BGEMM
|
||||
#define ALPHA bfloat16tof32(alpha)
|
||||
#define BETA bfloat16tof32(beta)
|
||||
#define BF16TOF32(x) (bfloat16tof32(x))
|
||||
#define F32TOBF16(x) (f32tobfloat16(x))
|
||||
#else
|
||||
#define ALPHA alpha
|
||||
#define BETA beta
|
||||
#define BF16TOF32(x) (bfloat16tof32(x))
|
||||
#define F32TOBF16(x) x
|
||||
#endif
|
||||
#else
|
||||
#define ALPHA alpha
|
||||
#define BETA beta
|
||||
#define BF16TOF32(x) x
|
||||
#define F32TOBF16(x) x
|
||||
#endif
|
||||
@@ -27,39 +27,8 @@
|
||||
* *****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#if defined(BFLOAT16) && defined(BFLOAT16CONVERSION)
|
||||
static float
|
||||
bfloat16tof32 (bfloat16 value)
|
||||
{
|
||||
blasint one = 1;
|
||||
float result;
|
||||
sbf16tos_(&one, &value, &one, &result, &one);
|
||||
return result;
|
||||
}
|
||||
#include "bf16_macros.h"
|
||||
|
||||
#ifdef BGEMM
|
||||
static bfloat16 f32tobfloat16(float value) {
|
||||
blasint one = 1;
|
||||
bfloat16 result;
|
||||
sbstobf16_(&one, &value, &one, &result, &one);
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef BGEMM
|
||||
#define ALPHA bfloat16tof32(alpha)
|
||||
#define BF16TOF32(x) (bfloat16tof32(x))
|
||||
#define F32TOBF16(x) (f32tobfloat16(x))
|
||||
#else
|
||||
#define ALPHA alpha
|
||||
#define BF16TOF32(x) (bfloat16tof32(x))
|
||||
#define F32TOBF16(x) x
|
||||
#endif
|
||||
#else
|
||||
#define ALPHA alpha
|
||||
#define BF16TOF32(x) x
|
||||
#define F32TOBF16(x) x
|
||||
#endif
|
||||
int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,IFLOAT* ba,IFLOAT* bb,FLOAT* C,BLASLONG ldc
|
||||
#ifdef TRMMKERNEL
|
||||
,BLASLONG offset
|
||||
|
||||
70
kernel/generic/gemv_n.c
Normal file
70
kernel/generic/gemv_n.c
Normal file
@@ -0,0 +1,70 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013-2014, 2025 The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include "bf16_macros.h"
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
BLASLONG i;
|
||||
BLASLONG ix, iy;
|
||||
BLASLONG j;
|
||||
FLOAT *a_ptr;
|
||||
#ifdef BGEMM
|
||||
float temp;
|
||||
#else
|
||||
FLOAT temp;
|
||||
#endif
|
||||
|
||||
iy = 0;
|
||||
for (BLASLONG i = 0; i < m; i++)
|
||||
{
|
||||
temp = 0.0;
|
||||
|
||||
ix = 0;
|
||||
a_ptr = a;
|
||||
for (BLASLONG j = 0; j < n; j++)
|
||||
{
|
||||
temp += BF16TOF32(a_ptr[i]) * BF16TOF32(x[ix]);
|
||||
ix += inc_x;
|
||||
a_ptr += lda;
|
||||
}
|
||||
|
||||
if (BETA == ZERO)
|
||||
{
|
||||
y[iy] = F32TOBF16(ALPHA * temp);
|
||||
}
|
||||
else
|
||||
{
|
||||
y[iy] = F32TOBF16(ALPHA * temp + BETA * BF16TOF32(y[iy]));
|
||||
}
|
||||
|
||||
iy += inc_y;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
60
kernel/generic/gemv_t.c
Normal file
60
kernel/generic/gemv_t.c
Normal file
@@ -0,0 +1,60 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, 2025 The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
#include "bf16_macros.h"
|
||||
|
||||
int CNAME(BLASLONG m, BLASLONG n, FLOAT alpha, IFLOAT *a, BLASLONG lda, IFLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *y, BLASLONG inc_y)
|
||||
{
|
||||
BLASLONG i;
|
||||
BLASLONG ix, iy;
|
||||
BLASLONG j;
|
||||
FLOAT *a_ptr;
|
||||
#ifdef BGEMM
|
||||
float temp;
|
||||
#else
|
||||
FLOAT temp;
|
||||
#endif
|
||||
|
||||
iy = 0;
|
||||
a_ptr = a;
|
||||
|
||||
for (j = 0; j < n; j++)
|
||||
{
|
||||
temp = 0.0;
|
||||
ix = 0;
|
||||
for (i = 0; i < m; i++)
|
||||
{
|
||||
temp += BF16TOF32(a_ptr[i]) * BF16TOF32(x[ix]);
|
||||
ix += inc_x;
|
||||
}
|
||||
y[iy] += F32TOBF16(ALPHA * temp);
|
||||
iy += inc_y;
|
||||
a_ptr += lda;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
106
kernel/generic/scal.c
Normal file
106
kernel/generic/scal.c
Normal file
@@ -0,0 +1,106 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2013, 2025 The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
|
||||
#include "common.h"
|
||||
|
||||
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
|
||||
{
|
||||
BLASLONG i = 0, j = 0;
|
||||
#if defined(BFLOAT16)
|
||||
float x_float, da_float;
|
||||
SBF16TOS_K(1, &da, 1, &da_float, 1);
|
||||
#else
|
||||
float x_float;
|
||||
float da_float = da;
|
||||
#endif
|
||||
|
||||
if ((n <= 0) || (inc_x <= 0))
|
||||
return (0);
|
||||
|
||||
if (dummy2 == 0)
|
||||
{
|
||||
while (j < n)
|
||||
{
|
||||
|
||||
if (da_float == 0.0)
|
||||
x_float = 0.0;
|
||||
else
|
||||
{
|
||||
#if defined(BFLOAT16)
|
||||
SBF16TOS_K(1, &x[i], 1, &x_float, 1);
|
||||
#else
|
||||
float x_float = x[i];
|
||||
#endif
|
||||
x_float = da_float * x_float;
|
||||
}
|
||||
|
||||
#if defined(BFLOAT16)
|
||||
SBSTOBF16_K(1, &x_float, 1, &x[i], 1);
|
||||
#else
|
||||
x[i] = x_float;
|
||||
#endif
|
||||
|
||||
i += inc_x;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
while (j < n)
|
||||
{
|
||||
#if defined(BFLOAT16)
|
||||
SBF16TOS_K(1, &x[i], 1, &x_float, 1);
|
||||
#else
|
||||
float x_float = x[i];
|
||||
#endif
|
||||
if (da == 0.0)
|
||||
if (!isnan(x_float) && !isinf(x_float))
|
||||
{
|
||||
x_float = 0.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
x_float = NAN;
|
||||
}
|
||||
else
|
||||
{
|
||||
x_float = da_float * x_float;
|
||||
}
|
||||
|
||||
#if defined(BFLOAT16)
|
||||
SBSTOBF16_K(1, &x_float, 1, &x[i], 1);
|
||||
#else
|
||||
x[i] = x_float;
|
||||
#endif
|
||||
|
||||
i += inc_x;
|
||||
j++;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@@ -83,8 +83,8 @@ gotoblas_t TABLE_NAME = {
|
||||
isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS,
|
||||
snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS,
|
||||
dsdot_kTS,
|
||||
srot_kTS, srotm_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
|
||||
sbgemv_nTS, sbgemv_tTS, sger_kTS,
|
||||
srot_kTS, srotm_kTS, bscal_kTS, saxpy_kTS, sscal_kTS, sswap_kTS,
|
||||
bgemv_nTS, bgemv_tTS, sbgemv_nTS, sbgemv_tTS, sger_kTS,
|
||||
ssymv_LTS, ssymv_UTS,
|
||||
|
||||
bgemm_kernelTS, bgemm_betaTS,
|
||||
|
||||
@@ -119,6 +119,10 @@ endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16), 1)
|
||||
BB2 = test_bgemv
|
||||
B2 = test_sbgemv
|
||||
endif
|
||||
ifeq ($(BUILD_SINGLE),1)
|
||||
S2=sblat2
|
||||
endif
|
||||
@@ -132,11 +136,17 @@ ifeq ($(BUILD_COMPLEX16),1)
|
||||
Z2=zblat2
|
||||
endif
|
||||
|
||||
level2: $(S2) $(D2) $(C2) $(Z2)
|
||||
level2: $(BB2) $(B2) $(S2) $(D2) $(C2) $(Z2)
|
||||
|
||||
|
||||
ifneq ($(CROSS), 1)
|
||||
rm -f ?BLAT2.SUMM
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_bgemv > BBLAT2.SUMM
|
||||
@$(GREP) -q FATAL BBLAT2.SUMM && cat BBLAT2.SUMM || exit 0
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./test_sbgemv > SBBLAT2.SUMM
|
||||
@$(GREP) -q FATAL SBBLAT2.SUMM && cat SBBLAT2.SUMM || exit 0
|
||||
endif
|
||||
ifeq ($(BUILD_SINGLE),1)
|
||||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 ./sblat2 < ./sblat2.dat
|
||||
@$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0
|
||||
@@ -156,6 +166,12 @@ endif
|
||||
ifdef SMP
|
||||
rm -f ?BLAT2.SUMM
|
||||
ifeq ($(USE_OPENMP), 1)
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
OMP_NUM_THREADS=2 ./test_bgemv > BBLAT2.SUMM
|
||||
@$(GREP) -q FATAL BBLAT2.SUMM && cat BBLAT2.SUMM || exit 0
|
||||
OMP_NUM_THREADS=2 ./test_sbgemv > SBBLAT2.SUMM
|
||||
@$(GREP) -q FATAL SBBLAT2.SUMM && cat SBBLAT2.SUMM || exit 0
|
||||
endif
|
||||
ifeq ($(BUILD_SINGLE),1)
|
||||
OMP_NUM_THREADS=2 ./sblat2 < ./sblat2.dat
|
||||
@$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0
|
||||
@@ -173,6 +189,12 @@ ifeq ($(BUILD_COMPLEX16),1)
|
||||
@$(GREP) -q FATAL ZBLAT2.SUMM && cat ZBLAT2.SUMM || exit 0
|
||||
endif
|
||||
else
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
OMP_NUM_THREADS=2 ./test_bgemv > BBLAT2.SUMM
|
||||
@$(GREP) -q FATAL BBLAT2.SUMM && cat BBLAT2.SUMM || exit 0
|
||||
OMP_NUM_THREADS=2 ./test_sbgemv > SBBLAT2.SUMM
|
||||
@$(GREP) -q FATAL SBBLAT2.SUMM && cat SBBLAT2.SUMM || exit 0
|
||||
endif
|
||||
ifeq ($(BUILD_SINGLE),1)
|
||||
OPENBLAS_NUM_THREADS=2 ./sblat2 < ./sblat2.dat
|
||||
@$(GREP) -q FATAL SBLAT2.SUMM && cat SBLAT2.SUMM || exit 0
|
||||
@@ -195,7 +217,7 @@ endif
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
BF3= test_bgemm
|
||||
B3= test_sbgemm
|
||||
B3 = test_sbgemm
|
||||
endif
|
||||
ifeq ($(BUILD_SINGLE),1)
|
||||
S3=sblat3
|
||||
@@ -404,10 +426,16 @@ endif
|
||||
|
||||
ifeq ($(BUILD_BFLOAT16),1)
|
||||
test_bgemm : compare_sgemm_bgemm.c test_helpers.h ../$(LIBNAME)
|
||||
$(CC) $(CLDFLAGS) -o test_bgemm compare_sgemm_bgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
|
||||
$(CC) $(CLDFLAGS) -DIBFLOAT16 -DOBFLOAT16 -o test_bgemm compare_sgemm_bgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
test_bgemv : compare_sgemv_bgemv.c ../$(LIBNAME)
|
||||
$(CC) $(CLDFLAGS) -DIBFLOAT16 -DOBFLOAT16 -o test_bgemv compare_sgemv_bgemv.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
test_sbgemm : compare_sgemm_sbgemm.c test_helpers.h ../$(LIBNAME)
|
||||
$(CC) $(CLDFLAGS) -o test_sbgemm compare_sgemm_sbgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
|
||||
$(CC) $(CLDFLAGS) -DIBFLOAT16 -o test_sbgemm compare_sgemm_sbgemm.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
|
||||
|
||||
test_sbgemv : compare_sgemv_sbgemv.c ../$(LIBNAME)
|
||||
$(CC) $(CLDFLAGS) -DIBFLOAT16 -o test_sbgemv compare_sgemv_sbgemv.c ../$(LIBNAME) $(EXTRALIB) $(CEXTRALIB)
|
||||
endif
|
||||
|
||||
ifeq ($(BUILD_COMPLEX),1)
|
||||
@@ -426,7 +454,7 @@ clean:
|
||||
@rm -f *.$(SUFFIX) *.$(PSUFFIX) gmon.$(SUFFIX)ut *.SUMM *.cxml *.exe *.pdb *.dwf \
|
||||
sblat1 dblat1 cblat1 zblat1 \
|
||||
sblat2 dblat2 cblat2 zblat2 \
|
||||
test_bgemm test_sbgemm sblat3 dblat3 cblat3 zblat3 \
|
||||
test_bgemm test_bgemv test_sbgemm test_sbgemv sblat3 dblat3 cblat3 zblat3 \
|
||||
sblat1p dblat1p cblat1p zblat1p \
|
||||
sblat2p dblat2p cblat2p zblat2p \
|
||||
sblat3p dblat3p cblat3p zblat3p \
|
||||
|
||||
@@ -34,15 +34,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#define BGEMM BLASFUNC(bgemm)
|
||||
#define BGEMM_LARGEST 256
|
||||
|
||||
static float truncate_float32_to_bfloat16(float value) {
|
||||
blasint one = 1;
|
||||
bfloat16 tmp;
|
||||
float result;
|
||||
sbstobf16_(&one, &value, &one, &tmp, &one);
|
||||
sbf16tos_(&one, &tmp, &one, &result, &one);
|
||||
return result;
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
@@ -158,6 +149,7 @@ main (int argc, char *argv[])
|
||||
|
||||
if (ret != 0) {
|
||||
fprintf (stderr, "FATAL ERROR BGEMM - Return code: %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -141,87 +141,7 @@ main (int argc, char *argv[])
|
||||
|
||||
if (ret != 0) {
|
||||
fprintf (stderr, "FATAL ERROR SBGEMM - Return code: %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
for (beta = 0; beta < 3; beta += 1) {
|
||||
for (alpha = 0; alpha < 3; alpha += 1) {
|
||||
for (l = 0; l < 2; l++) { // l = 1 to test inc_x & inc_y not equal to one.
|
||||
for (x = 1; x <= loop; x++)
|
||||
{
|
||||
k = (x == 0) ? 0 : l + 1;
|
||||
float *A = (float *)malloc_safe(x * x * sizeof(FLOAT));
|
||||
float *B = (float *)malloc_safe(x * sizeof(FLOAT) << l);
|
||||
float *C = (float *)malloc_safe(x * sizeof(FLOAT) << l);
|
||||
bfloat16 *AA = (bfloat16 *)malloc_safe(x * x * sizeof(bfloat16));
|
||||
bfloat16 *BB = (bfloat16 *)malloc_safe(x * sizeof(bfloat16) << l);
|
||||
float *DD = (float *)malloc_safe(x * sizeof(FLOAT));
|
||||
float *CC = (float *)malloc_safe(x * sizeof(FLOAT) << l);
|
||||
if ((A == NULL) || (B == NULL) || (C == NULL) || (AA == NULL) || (BB == NULL) ||
|
||||
(DD == NULL) || (CC == NULL))
|
||||
return 1;
|
||||
blasint one = 1;
|
||||
|
||||
for (j = 0; j < x; j++)
|
||||
{
|
||||
for (i = 0; i < x; i++)
|
||||
{
|
||||
A[j * x + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
|
||||
sbstobf16_(&one, &A[j*x+i], &one, &AA[j * x + i], &one);
|
||||
}
|
||||
B[j << l] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
|
||||
sbstobf16_(&one, &B[j << l], &one, &BB[j << l], &one);
|
||||
|
||||
CC[j << l] = C[j << l] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
|
||||
}
|
||||
|
||||
for (y = 0; y < 2; y++)
|
||||
{
|
||||
if (y == 0) {
|
||||
transA = 'N';
|
||||
} else {
|
||||
transA = 'T';
|
||||
}
|
||||
|
||||
memset(CC, 0, x * sizeof(FLOAT) << l);
|
||||
memset(DD, 0, x * sizeof(FLOAT));
|
||||
memset(C, 0, x * sizeof(FLOAT) << l);
|
||||
|
||||
SGEMV (&transA, &x, &x, &alpha, A, &x, B, &k, &beta, C, &k);
|
||||
SBGEMV (&transA, &x, &x, &alpha, (bfloat16*) AA, &x, (bfloat16*) BB, &k, &beta, CC, &k);
|
||||
|
||||
for (int i = 0; i < x; i ++) DD[i] *= beta;
|
||||
|
||||
for (j = 0; j < x; j++)
|
||||
for (i = 0; i < x; i++)
|
||||
if (transA == 'N') {
|
||||
DD[i] += alpha * float16to32 (AA[j * x + i]) * float16to32 (BB[j << l]);
|
||||
} else if (transA == 'T') {
|
||||
DD[j] += alpha * float16to32 (AA[j * x + i]) * float16to32 (BB[i << l]);
|
||||
}
|
||||
|
||||
for (j = 0; j < x; j++) {
|
||||
if (!is_close(CC[j << l], C[j << l], 0.01, 0.001)) {
|
||||
ret++;
|
||||
}
|
||||
if (!is_close(CC[j << l], DD[j], 0.001, 0.0001)) {
|
||||
ret++;
|
||||
}
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
free(AA);
|
||||
free(BB);
|
||||
free(DD);
|
||||
free(CC);
|
||||
} // x
|
||||
} // l
|
||||
} // alpha
|
||||
} // beta
|
||||
|
||||
if (ret != 0)
|
||||
fprintf (stderr, "FATAL ERROR SBGEMV - Return code: %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
149
test/compare_sgemv_bgemv.c
Normal file
149
test/compare_sgemv_bgemv.c
Normal file
@@ -0,0 +1,149 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2020,2025 The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include "../common.h"
|
||||
|
||||
#include "test_helpers.h"
|
||||
|
||||
#define SGEMV BLASFUNC(sgemv)
|
||||
#define BGEMV BLASFUNC(bgemv)
|
||||
#define BGEMV_LARGEST 256
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
blasint k;
|
||||
int i, j, l;
|
||||
blasint x, y;
|
||||
blasint one = 1;
|
||||
int ret = 0;
|
||||
int loop = BGEMV_LARGEST;
|
||||
char transA = 'N';
|
||||
float alpha = 1.0, beta = 0.0;
|
||||
bfloat16 alpha_bf16, beta_bf16;
|
||||
|
||||
for (beta = 0; beta < 3; beta += 1)
|
||||
{
|
||||
for (alpha = 0; alpha < 3; alpha += 1)
|
||||
{
|
||||
for (l = 0; l < 2; l++)
|
||||
{ // l = 1 to test inc_x & inc_y not equal to one.
|
||||
for (x = 1; x <= loop; x++)
|
||||
{
|
||||
k = (x == 0) ? 0 : l + 1;
|
||||
float *A = (float *)malloc_safe(x * x * sizeof(FLOAT));
|
||||
float *B = (float *)malloc_safe(x * sizeof(FLOAT) << l);
|
||||
float *C = (float *)malloc_safe(x * sizeof(FLOAT) << l);
|
||||
bfloat16 *AA = (bfloat16 *)malloc_safe(x * x * sizeof(bfloat16));
|
||||
bfloat16 *BB = (bfloat16 *)malloc_safe(x * sizeof(bfloat16) << l);
|
||||
bfloat16 *CC = (bfloat16 *)malloc_safe(x * sizeof(bfloat16) << l);
|
||||
float *DD = (float *)malloc_safe(x * sizeof(FLOAT));
|
||||
if ((A == NULL) || (B == NULL) || (C == NULL) || (AA == NULL) || (BB == NULL) ||
|
||||
(CC == NULL) || (DD == NULL))
|
||||
return 1;
|
||||
|
||||
for (j = 0; j < x; j++)
|
||||
{
|
||||
for (i = 0; i < x; i++)
|
||||
{
|
||||
A[j * x + i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) + 0.5;
|
||||
sbstobf16_(&one, &A[j * x + i], &one, &AA[j * x + i], &one);
|
||||
}
|
||||
B[j << l] = ((FLOAT)rand() / (FLOAT)RAND_MAX) + 0.5;
|
||||
sbstobf16_(&one, &B[j << l], &one, &BB[j << l], &one);
|
||||
|
||||
C[j << l] = ((FLOAT)rand() / (FLOAT)RAND_MAX) + 0.5;
|
||||
sbstobf16_(&one, &B[j << l], &one, &CC[j << l], &one);
|
||||
}
|
||||
|
||||
for (y = 0; y < 2; y++)
|
||||
{
|
||||
if (y == 0)
|
||||
{
|
||||
transA = 'N';
|
||||
}
|
||||
else
|
||||
{
|
||||
transA = 'T';
|
||||
}
|
||||
|
||||
memset(C, 0, x * sizeof(FLOAT) << l);
|
||||
memset(CC, 0, x * sizeof(bfloat16) << l);
|
||||
memset(DD, 0, x * sizeof(FLOAT));
|
||||
|
||||
sbstobf16_(&one, &alpha, &one, &alpha_bf16, &one);
|
||||
sbstobf16_(&one, &beta, &one, &beta_bf16, &one);
|
||||
SGEMV(&transA, &x, &x, &alpha, A, &x, B, &k, &beta, C, &k);
|
||||
BGEMV(&transA, &x, &x, &alpha_bf16, AA, &x, BB, &k, &beta_bf16, CC, &k);
|
||||
|
||||
for (int i = 0; i < x; i++)
|
||||
DD[i] *= beta;
|
||||
|
||||
for (j = 0; j < x; j++)
|
||||
for (i = 0; i < x; i++)
|
||||
if (transA == 'N')
|
||||
{
|
||||
DD[i] += alpha * float16to32(AA[j * x + i]) * float16to32(BB[j << l]);
|
||||
}
|
||||
else if (transA == 'T')
|
||||
{
|
||||
DD[j] += alpha * float16to32(AA[j * x + i]) * float16to32(BB[i << l]);
|
||||
}
|
||||
|
||||
for (j = 0; j < x; j++)
|
||||
{
|
||||
if (!is_close(float16to32(CC[j << l]), truncate_float32_to_bfloat16(C[j << l]), 0.01, 0.001))
|
||||
{
|
||||
printf("Mismatch at trans=%c, alpha=%.2f, beta=%.2f, i=%d, j=%d, k=%d: CC=%.6f, C=%.6f\n",
|
||||
transA, alpha, beta, i, j, k, float16to32(CC[j << l]), truncate_float32_to_bfloat16(C[j << l]));
|
||||
ret++;
|
||||
}
|
||||
if (!is_close(float16to32(CC[j << l]), truncate_float32_to_bfloat16(DD[j]), 0.001, 0.0001))
|
||||
{
|
||||
printf("Mismatch at trans=%c, alpha=%.2f, beta=%.2f, i=%d, j=%d, k=%d: CC=%.6f, C=%.6f\n",
|
||||
transA, alpha, beta, i, j, k, float16to32(CC[j << l]), truncate_float32_to_bfloat16(DD[j]));
|
||||
ret++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
free(AA);
|
||||
free(BB);
|
||||
free(CC);
|
||||
free(DD);
|
||||
} // x
|
||||
} // l
|
||||
} // alpha
|
||||
} // beta
|
||||
|
||||
if (ret != 0)
|
||||
fprintf(stderr, "FATAL ERROR BGEMV - Return code: %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
128
test/compare_sgemv_sbgemv.c
Normal file
128
test/compare_sgemv_sbgemv.c
Normal file
@@ -0,0 +1,128 @@
|
||||
/***************************************************************************
|
||||
Copyright (c) 2020,2025 The OpenBLAS Project
|
||||
All rights reserved.
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
1. Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
2. Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in
|
||||
the documentation and/or other materials provided with the
|
||||
distribution.
|
||||
3. Neither the name of the OpenBLAS project nor the names of
|
||||
its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
|
||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*****************************************************************************/
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include "../common.h"
|
||||
|
||||
#include "test_helpers.h"
|
||||
|
||||
#define SGEMV BLASFUNC(sgemv)
|
||||
#define SBGEMV BLASFUNC(sbgemv)
|
||||
#define SBGEMV_LARGEST 256
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
blasint k;
|
||||
int i, j, l;
|
||||
blasint x, y;
|
||||
int ret = 0;
|
||||
int loop = SBGEMV_LARGEST;
|
||||
char transA = 'N';
|
||||
float alpha = 1.0, beta = 0.0;
|
||||
|
||||
for (beta = 0; beta < 3; beta += 1) {
|
||||
for (alpha = 0; alpha < 3; alpha += 1) {
|
||||
for (l = 0; l < 2; l++) { // l = 1 to test inc_x & inc_y not equal to one.
|
||||
for (x = 1; x <= loop; x++)
|
||||
{
|
||||
k = (x == 0) ? 0 : l + 1;
|
||||
float *A = (float *)malloc_safe(x * x * sizeof(FLOAT));
|
||||
float *B = (float *)malloc_safe(x * sizeof(FLOAT) << l);
|
||||
float *C = (float *)malloc_safe(x * sizeof(FLOAT) << l);
|
||||
bfloat16 *AA = (bfloat16 *)malloc_safe(x * x * sizeof(bfloat16));
|
||||
bfloat16 *BB = (bfloat16 *)malloc_safe(x * sizeof(bfloat16) << l);
|
||||
float *CC = (float *)malloc_safe(x * sizeof(FLOAT) << l);
|
||||
float *DD = (float *)malloc_safe(x * sizeof(FLOAT));
|
||||
if ((A == NULL) || (B == NULL) || (C == NULL) || (AA == NULL) || (BB == NULL) ||
|
||||
(DD == NULL) || (CC == NULL))
|
||||
return 1;
|
||||
blasint one = 1;
|
||||
|
||||
for (j = 0; j < x; j++)
|
||||
{
|
||||
for (i = 0; i < x; i++)
|
||||
{
|
||||
A[j * x + i] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
|
||||
sbstobf16_(&one, &A[j*x+i], &one, &AA[j * x + i], &one);
|
||||
}
|
||||
B[j << l] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
|
||||
sbstobf16_(&one, &B[j << l], &one, &BB[j << l], &one);
|
||||
|
||||
CC[j << l] = C[j << l] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
|
||||
}
|
||||
|
||||
for (y = 0; y < 2; y++)
|
||||
{
|
||||
if (y == 0) {
|
||||
transA = 'N';
|
||||
} else {
|
||||
transA = 'T';
|
||||
}
|
||||
|
||||
memset(CC, 0, x * sizeof(FLOAT) << l);
|
||||
memset(DD, 0, x * sizeof(FLOAT));
|
||||
memset(C, 0, x * sizeof(FLOAT) << l);
|
||||
|
||||
SGEMV (&transA, &x, &x, &alpha, A, &x, B, &k, &beta, C, &k);
|
||||
SBGEMV (&transA, &x, &x, &alpha, (bfloat16*) AA, &x, (bfloat16*) BB, &k, &beta, CC, &k);
|
||||
|
||||
for (int i = 0; i < x; i ++) DD[i] *= beta;
|
||||
|
||||
for (j = 0; j < x; j++)
|
||||
for (i = 0; i < x; i++)
|
||||
if (transA == 'N') {
|
||||
DD[i] += alpha * float16to32 (AA[j * x + i]) * float16to32 (BB[j << l]);
|
||||
} else if (transA == 'T') {
|
||||
DD[j] += alpha * float16to32 (AA[j * x + i]) * float16to32 (BB[i << l]);
|
||||
}
|
||||
|
||||
for (j = 0; j < x; j++) {
|
||||
if (!is_close(CC[j << l], C[j << l], 0.01, 0.001)) {
|
||||
ret++;
|
||||
}
|
||||
if (!is_close(CC[j << l], DD[j], 0.001, 0.0001)) {
|
||||
ret++;
|
||||
}
|
||||
}
|
||||
}
|
||||
free(A);
|
||||
free(B);
|
||||
free(C);
|
||||
free(AA);
|
||||
free(BB);
|
||||
free(DD);
|
||||
free(CC);
|
||||
} // x
|
||||
} // l
|
||||
} // alpha
|
||||
} // beta
|
||||
|
||||
if (ret != 0)
|
||||
fprintf (stderr, "FATAL ERROR SBGEMV - Return code: %d\n", ret);
|
||||
return ret;
|
||||
}
|
||||
@@ -31,7 +31,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#include "../common.h"
|
||||
|
||||
#if IFLOAT == bfloat16
|
||||
#ifdef IBFLOAT16
|
||||
static float float16to32(bfloat16 value)
|
||||
{
|
||||
blasint one = 1;
|
||||
@@ -41,6 +41,17 @@ static float float16to32(bfloat16 value)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef OBFLOAT16
|
||||
static float truncate_float32_to_bfloat16(float value) {
|
||||
blasint one = 1;
|
||||
bfloat16 tmp;
|
||||
float result;
|
||||
sbstobf16_(&one, &value, &one, &tmp, &one);
|
||||
sbf16tos_(&one, &tmp, &one, &result, &one);
|
||||
return result;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void *malloc_safe(size_t size) {
|
||||
if (size == 0)
|
||||
return malloc(1);
|
||||
|
||||
Reference in New Issue
Block a user