mirror of
https://github.com/OpenMathLib/OpenBLAS
synced 2026-05-31 00:45:48 +08:00
435 lines
11 KiB
Makefile
435 lines
11 KiB
Makefile
###############################################################################
|
|
# Copyright (c) 2025, The OpenBLAS Project
|
|
# All rights reserved.
|
|
# Redistribution and use in source and binary forms, with or without
|
|
# modification, are permitted provided that the following conditions are
|
|
# met:
|
|
# 1. Redistributions of source code must retain the above copyright
|
|
# notice, this list of conditions and the following disclaimer.
|
|
# 2. Redistributions in binary form must reproduce the above copyright
|
|
# notice, this list of conditions and the following disclaimer in
|
|
# the documentation and/or other materials provided with the
|
|
# distribution.
|
|
# 3. Neither the name of the OpenBLAS project nor the names of
|
|
# its contributors may be used to endorse or promote products
|
|
# derived from this software without specific prior written permission.
|
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
|
|
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
# POSSIBILITY OF SUCH DAMAGE.
|
|
###############################################################################
|
|
|
|
ifneq ($(C_COMPILER), PGI)
|
|
|
|
ifeq ($(C_COMPILER), CLANG)
|
|
ISCLANG=1
|
|
endif
|
|
ifeq ($(C_COMPILER), FUJITSU)
|
|
ISCLANG=1
|
|
endif
|
|
ifneq (1, $(filter 1,$(GCCVERSIONGT4) $(ISCLANG)))
|
|
CCOMMON_OPT += -march=armv8-a
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8-a
|
|
endif
|
|
|
|
|
|
else
|
|
|
|
|
|
ifeq ($(CORE), ARMV8)
|
|
CCOMMON_OPT += -march=armv8-a
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8-a
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), ARMV8SVE)
|
|
CCOMMON_OPT += -march=armv8-a+sve
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8-a+sve
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), ARMV9SME)
|
|
CCOMMON_OPT += -march=armv9-a+sve2+sme
|
|
FCOMMON_OPT += -march=armv9-a+sve2
|
|
endif
|
|
|
|
ifeq ($(CORE), CORTEXA53)
|
|
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), CORTEXA57)
|
|
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), CORTEXA72)
|
|
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), CORTEXA73)
|
|
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), CORTEXA76)
|
|
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), FT2000)
|
|
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
|
endif
|
|
endif
|
|
|
|
# Use a72 tunings because Neoverse-N1 is only available
|
|
# in GCC>=9
|
|
ifeq ($(CORE), NEOVERSEN1)
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
|
|
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
|
endif
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
# Use a72 tunings because Neoverse-V1 is only available
|
|
# in GCC>=10.4
|
|
ifeq ($(CORE), NEOVERSEV1)
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
|
|
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
|
|
CCOMMON_OPT += -march=armv8.4-a+sve+bf16
|
|
ifeq (1, $(ISCLANG))
|
|
CCOMMON_OPT += -mtune=cortex-x1
|
|
else
|
|
CCOMMON_OPT += -mtune=neoverse-v1
|
|
endif
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -march=armv8.4-a+sve+bf16
|
|
ifneq ($(CROSS), 1)
|
|
CCOMMON_OPT += -mtune=native
|
|
endif
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.4-a
|
|
ifneq ($(CROSS), 1)
|
|
FCOMMON_OPT += -mtune=native
|
|
endif
|
|
endif
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortex-a72
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
|
endif
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -march=armv8-a+sve -mtune=cortex-a72
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
# Use a72 tunings because Neoverse-N2 is only available
|
|
# in GCC>=10.4
|
|
ifeq ($(CORE), NEOVERSEN2)
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
|
|
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
|
|
ifneq ($(OSNAME), Darwin)
|
|
CCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
|
|
else
|
|
CCOMMON_OPT += -march=armv8.2-a+sve+bf16 -mtune=cortex-a72
|
|
endif
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -march=armv8.5-a+sve+bf16
|
|
ifneq ($(CROSS), 1)
|
|
CCOMMON_OPT += -mtune=native
|
|
endif
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.5-a
|
|
ifneq ($(CROSS), 1)
|
|
FCOMMON_OPT += -mtune=native
|
|
endif
|
|
endif
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -march=armv8.2-a+sve+bf16 -mtune=cortex-a72
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
|
endif
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -march=armv8-a+sve+bf16 -mtune=cortex-a72
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
# Detect ARM Neoverse V2.
|
|
ifeq ($(CORE), NEOVERSEV2)
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ13) $(ISCLANG)))
|
|
CCOMMON_OPT += -mcpu=neoverse-v2
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -mcpu=neoverse-v2
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -march=armv8.2-a+sve+bf16 -mtune=neoverse-n1
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
# Detect Ampere AmpereOne(ampere1,ampere1a) processors.
|
|
ifeq ($(CORE), AMPERE1)
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
|
|
CCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
# Use a53 tunings because a55 is only available in GCC>=8.1
|
|
ifeq ($(CORE), CORTEXA55)
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ8) $(ISCLANG)))
|
|
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53
|
|
endif
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), THUNDERX)
|
|
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8-a -mtune=thunderx
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), FALKOR)
|
|
CCOMMON_OPT += -march=armv8-a -mtune=falkor
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8-a -mtune=falkor
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), THUNDERX2T99)
|
|
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), THUNDERX3T110)
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
|
|
CCOMMON_OPT += -march=armv8.3-a
|
|
ifeq (0, $(ISCLANG))
|
|
CCOMMON_OPT += -mtune=thunderx3t110
|
|
else
|
|
CCOMMON_OPT += -mtune=thunderx2t99
|
|
endif
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), VORTEX)
|
|
CCOMMON_OPT += -march=armv8.3-a
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.3-a
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), VORTEXM4)
|
|
CCOMMON_OPT += -march=armv8.4-a+sme
|
|
FCOMMON_OPT += -march=armv8.4-a+sme
|
|
endif
|
|
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
|
|
ifeq ($(CORE), TSV110)
|
|
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
|
|
ifeq ($(CORE), EMAG8180)
|
|
CCOMMON_OPT += -march=armv8-a
|
|
ifeq ($(ISCLANG), 0)
|
|
CCOMMON_OPT += -mtune=emag
|
|
endif
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8-a -mtune=emag
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), A64FX)
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
|
|
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ3) $(GCCVERSIONGTEQ11) $(ISCLANG)))
|
|
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=a64fx
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.2-a+sve -mtune=a64fx
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -march=armv8.4-a+sve -mtune=neoverse-n1
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-n1
|
|
endif
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
|
ifeq ($(CORE), CORTEXX1)
|
|
CCOMMON_OPT += -march=armv8.2-a
|
|
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ12) $(ISCLANG)))
|
|
CCOMMON_OPT += -mtune=cortex-x1
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-x1
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -mtune=cortex-a72
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
|
|
endif
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
|
ifeq ($(CORE), CORTEXX2)
|
|
CCOMMON_OPT += -march=armv8.4-a+sve
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.4-a+sve
|
|
endif
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
|
|
CCOMMON_OPT += -mtune=cortex-x2
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -mtune=cortex-x2
|
|
endif
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
#ifeq (1, $(filter 1,$(ISCLANG)))
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
|
ifeq ($(CORE), CORTEXA510)
|
|
CCOMMON_OPT += -march=armv8.4-a+sve
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.4-a+sve
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
|
|
ifeq ($(CORE), CORTEXA710)
|
|
CCOMMON_OPT += -march=armv8.4-a+sve
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -march=armv8.4-a+sve
|
|
endif
|
|
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
|
|
CCOMMON_OPT += -mtune=cortex-a710
|
|
ifneq ($(F_COMPILER), NAG)
|
|
FCOMMON_OPT += -mtune=cortex-a710
|
|
endif
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
endif
|
|
|
|
else
|
|
# NVIDIA HPC options necessary to enable SVE in the compiler
|
|
ifeq ($(CORE), THUNDERX2T99)
|
|
CCOMMON_OPT += -tp=thunderx2t99
|
|
FCOMMON_OPT += -tp=thunderx2t99
|
|
endif
|
|
ifeq ($(CORE), NEOVERSEN1)
|
|
CCOMMON_OPT += -tp=neoverse-n1
|
|
FCOMMON_OPT += -tp=neoverse-n1
|
|
endif
|
|
ifeq ($(CORE), NEOVERSEV1)
|
|
CCOMMON_OPT += -tp=neoverse-v1
|
|
FCOMMON_OPT += -tp=neoverse-v1
|
|
endif
|
|
ifeq ($(CORE), NEOVERSEV2)
|
|
CCOMMON_OPT += -tp=neoverse-v2
|
|
FCOMMON_OPT += -tp=neoverse-v2
|
|
endif
|
|
ifeq ($(CORE), ARMV8SVE)
|
|
CCOMMON_OPT += -tp=neoverse-v2
|
|
FCOMMON_OPT += -tp=neoverse-v2
|
|
endif
|
|
ifeq ($(CORE), ARMV9SVE)
|
|
CCOMMON_OPT += -tp=neoverse-v2
|
|
FCOMMON_OPT += -tp=neoverse-v2
|
|
endif
|
|
|
|
endif
|