Serialize accesses to parallelized syrk functions from multiple callers, like it was already done for GEMM in level3_thread.c and GEMM3M in level3_gemm3m_thread.c

This commit is contained in:
lujiaweics
2026-01-20 21:23:02 +08:00
parent 5ffbf38b41
commit 1f3b81e562

View File

@@ -504,6 +504,33 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLOAT *sb, BLASLONG mypos){
#ifdef USE_OPENMP
static omp_lock_t level3_lock, critical_section_lock;
static volatile BLASULONG init_lock = 0, omp_lock_initialized = 0,
parallel_section_left = MAX_PARALLEL_NUMBER;
// Lock initialization; Todo : Maybe this part can be moved to blas_init() in blas_server_omp.c
while(omp_lock_initialized == 0)
{
blas_lock(&init_lock);
{
if(omp_lock_initialized == 0)
{
omp_init_lock(&level3_lock);
omp_init_lock(&critical_section_lock);
omp_lock_initialized = 1;
WMB;
}
blas_unlock(&init_lock);
}
}
#elif defined(OS_WINDOWS)
CRITICAL_SECTION level3_lock;
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
#else
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
#endif
blas_arg_t newarg;
#ifndef USE_ALLOC_HEAP
@@ -560,6 +587,30 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
#endif
#endif
#ifdef USE_OPENMP
omp_set_lock(&level3_lock);
omp_set_lock(&critical_section_lock);
parallel_section_left--;
/*
How OpenMP locks works with NUM_PARALLEL
1) parallel_section_left = Number of available concurrent executions of OpenBLAS - Number of currently executing OpenBLAS executions
2) level3_lock is acting like a master lock or barrier which stops OpenBLAS calls when all the parallel_section are currently busy executing other OpenBLAS calls
3) critical_section_lock is used for updating variables shared between threads executing OpenBLAS calls concurrently and for unlocking of master lock whenever required
4) Unlock master lock only when we have not already exhausted all the parallel_sections and allow another thread with a OpenBLAS call to enter
*/
if(parallel_section_left != 0)
omp_unset_lock(&level3_lock);
omp_unset_lock(&critical_section_lock);
#elif defined(OS_WINDOWS)
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
#else
pthread_mutex_lock(&level3_lock);
#endif
newarg.m = args -> m;
newarg.n = args -> n;
newarg.k = args -> k;
@@ -706,5 +757,25 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO
free(job);
#endif
#ifdef USE_OPENMP
omp_set_lock(&critical_section_lock);
parallel_section_left++;
/*
Unlock master lock only when all the parallel_sections are already exhausted and one of the thread has completed its OpenBLAS call
otherwise just increment the parallel_section_left
The master lock is only locked when we have exhausted all the parallel_sections, So only unlock it then and otherwise just increment the count
*/
if(parallel_section_left == 1)
omp_unset_lock(&level3_lock);
omp_unset_lock(&critical_section_lock);
#elif defined(OS_WINDOWS)
LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock);
#else
pthread_mutex_unlock(&level3_lock);
#endif
return 0;
}