mirror of
https://github.com/OpenMathLib/OpenBLAS
synced 2026-06-12 00:41:42 +08:00
Add thread throttling profile for SGEMM on NEOVERSEV1
This commit is contained in:
@@ -232,3 +232,6 @@ In chronological order:
|
||||
|
||||
* Aniket P. Garade <https://github.com/garadeaniket> Sushil Pratap Singh <https://github.com/SushilPratap04> Juliya James <https://github.com/Juliya32>
|
||||
* [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE
|
||||
|
||||
* Annop Wongwathanarat <annop.wongwathanarat@arm.com>
|
||||
* [2025-01-10] Add thread throttling profile for SGEMM on NEOVERSEV1
|
||||
@@ -1,5 +1,5 @@
|
||||
/*********************************************************************/
|
||||
/* Copyright 2024 The OpenBLAS Project */
|
||||
/* Copyright 2024, 2025 The OpenBLAS Project */
|
||||
/* Copyright 2009, 2010 The University of Texas at Austin. */
|
||||
/* All rights reserved. */
|
||||
/* */
|
||||
@@ -177,6 +177,49 @@ static int init_amxtile_permission() {
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef DYNAMIC_ARCH
|
||||
extern char* gotoblas_corename(void);
|
||||
#endif
|
||||
|
||||
#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1)
|
||||
static inline int get_gemm_optimal_nthreads_neoversev1(double MNK, int ncpu) {
|
||||
return
|
||||
MNK < 262144L ? 1
|
||||
: MNK < 1124864L ? MIN(ncpu, 6)
|
||||
: MNK < 7880599L ? MIN(ncpu, 12)
|
||||
: MNK < 17173512L ? MIN(ncpu, 16)
|
||||
: MNK < 33386248L ? MIN(ncpu, 20)
|
||||
: MNK < 57066625L ? MIN(ncpu, 24)
|
||||
: MNK < 91733851L ? MIN(ncpu, 32)
|
||||
: MNK < 265847707L ? MIN(ncpu, 40)
|
||||
: MNK < 458314011L ? MIN(ncpu, 48)
|
||||
: MNK < 729000000L ? MIN(ncpu, 56)
|
||||
: ncpu;
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline int get_gemm_optimal_nthreads(double MNK) {
|
||||
int ncpu = num_cpu_avail(3);
|
||||
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
|
||||
return get_gemm_optimal_nthreads_neoversev1(MNK, ncpu);
|
||||
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
|
||||
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
|
||||
return get_gemm_optimal_nthreads_neoversev1(MNK, ncpu);
|
||||
}
|
||||
#endif
|
||||
if ( MNK <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) ) {
|
||||
return 1;
|
||||
}
|
||||
else {
|
||||
if (MNK/ncpu < SMP_THRESHOLD_MIN*(double)GEMM_MULTITHREAD_THRESHOLD) {
|
||||
return MNK/(SMP_THRESHOLD_MIN*(double)GEMM_MULTITHREAD_THRESHOLD);
|
||||
}
|
||||
else {
|
||||
return ncpu;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef CBLAS
|
||||
|
||||
void NAME(char *TRANSA, char *TRANSB,
|
||||
@@ -310,7 +353,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
|
||||
FLOAT *beta = (FLOAT*) vbeta;
|
||||
FLOAT *a = (FLOAT*) va;
|
||||
FLOAT *b = (FLOAT*) vb;
|
||||
FLOAT *c = (FLOAT*) vc;
|
||||
FLOAT *c = (FLOAT*) vc;
|
||||
#endif
|
||||
|
||||
blas_arg_t args;
|
||||
@@ -352,7 +395,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
|
||||
#if !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) && defined(USE_SGEMM_KERNEL_DIRECT)
|
||||
#ifdef DYNAMIC_ARCH
|
||||
if (support_avx512() )
|
||||
#endif
|
||||
#endif
|
||||
if (beta == 0 && alpha == 1.0 && order == CblasRowMajor && TransA == CblasNoTrans && TransB == CblasNoTrans && SGEMM_DIRECT_PERFORMANT(m,n,k)) {
|
||||
SGEMM_DIRECT(m, n, k, a, lda, b, ldb, c, ldc);
|
||||
return;
|
||||
@@ -604,13 +647,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
|
||||
#endif
|
||||
|
||||
MNK = (double) args.m * (double) args.n * (double) args.k;
|
||||
if ( MNK <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) )
|
||||
args.nthreads = 1;
|
||||
else {
|
||||
args.nthreads = num_cpu_avail(3);
|
||||
if (MNK/args.nthreads < SMP_THRESHOLD_MIN*(double)GEMM_MULTITHREAD_THRESHOLD)
|
||||
args.nthreads = MNK/(SMP_THRESHOLD_MIN*(double)GEMM_MULTITHREAD_THRESHOLD);
|
||||
}
|
||||
args.nthreads = get_gemm_optimal_nthreads(MNK);
|
||||
|
||||
args.common = NULL;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user