mirror of
https://github.com/OpenMathLib/OpenBLAS
synced 2026-05-31 00:45:48 +08:00
Ofast enables possibly unsafe optimizations in addition to O3. This appears to have been added and then just continually copied into later Power architectures, and it wasn't included in the CMake build system when that was introduced. Replace this with O3 so that the same level of optimization is done by the compiler.
225 lines
5.7 KiB
Makefile
225 lines
5.7 KiB
Makefile
|
|
ifdef USE_THREAD
|
|
ifeq ($(USE_THREAD), 0)
|
|
USE_OPENMP = 0
|
|
else
|
|
USE_OPENMP = 1
|
|
endif
|
|
else
|
|
USE_OPENMP = 1
|
|
endif
|
|
|
|
ifeq ($(CORE), POWER10)
|
|
ifneq ($(C_COMPILER), PGI)
|
|
ifeq ($(C_COMPILER), GCC)
|
|
ifeq ($(GCCVERSIONGTEQ10), 1)
|
|
CCOMMON_OPT += -O3 -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
|
|
else ifneq ($(GCCVERSIONGT4), 1)
|
|
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
|
|
CCOMMON_OPT += -O3 -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
|
|
else
|
|
$(warning your compiler is too old to fully support POWER10, getting a newer version of gcc is recommended)
|
|
CCOMMON_OPT += -O3 -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -O3 -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
|
|
endif
|
|
ifeq ($(F_COMPILER), IBM)
|
|
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize
|
|
else
|
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), POWER9)
|
|
ifneq ($(C_COMPILER), PGI)
|
|
CCOMMON_OPT += -O3 -mvsx -fno-fast-math
|
|
ifeq ($(C_COMPILER), GCC)
|
|
ifneq ($(GCCVERSIONGT4), 1)
|
|
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
|
|
CCOMMON_OPT += -mcpu=power8 -mtune=power8
|
|
else
|
|
CCOMMON_OPT += -mcpu=power9 -mtune=power9
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -mcpu=power9 -mtune=power9
|
|
endif
|
|
else
|
|
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
|
|
endif
|
|
ifneq ($(F_COMPILER), PGI)
|
|
ifeq ($(F_COMPILER), IBM)
|
|
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr9 -qtune=pwr9 -qfloat=nomaf -qzerosize
|
|
else
|
|
FCOMMON_OPT += -O2 -frecursive -fno-fast-math -mcpu=power9 -mtune=power9
|
|
endif
|
|
|
|
ifeq ($(F_COMPILER), GFORTRAN)
|
|
ifneq ($(GCCVERSIONGT4), 1)
|
|
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
|
|
FCOMMON_OPT += -mcpu=power8 -mtune=power8
|
|
else
|
|
FCOMMON_OPT += -mcpu=power9 -mtune=power9
|
|
endif
|
|
endif
|
|
else
|
|
FCOMMON_OPT += -O2 -Mrecursive
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(CORE), POWER8)
|
|
ifneq ($(C_COMPILER), PGI)
|
|
CCOMMON_OPT += -O3 -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
|
|
else
|
|
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
|
|
endif
|
|
ifneq ($(F_COMPILER), PGI)
|
|
ifeq ($(OSNAME), AIX)
|
|
ifeq ($(F_COMPILER), IBM)
|
|
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
|
|
else
|
|
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
|
|
endif
|
|
else
|
|
ifeq ($(F_COMPILER), IBM)
|
|
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
|
|
else
|
|
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
|
|
endif
|
|
endif
|
|
else
|
|
FCOMMON_OPT += -O2 -Mrecursive
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(USE_OPENMP), 1)
|
|
ifneq ($(C_COMPILER), PGI)
|
|
CCOMMON_OPT += -DUSE_OPENMP -fopenmp
|
|
else
|
|
CCOMMON_OPT += -DUSE_OPENMP -mp
|
|
endif
|
|
ifeq ($(F_COMPILER), IBM)
|
|
FCOMMON_OPT += -DUSE_OPENMP
|
|
else
|
|
ifneq ($(F_COMPILER), PGI)
|
|
FCOMMON_OPT += -DUSE_OPENMP -fopenmp
|
|
else
|
|
FCOMMON_OPT += -DUSE_OPENMP -mp
|
|
endif
|
|
endif
|
|
endif
|
|
|
|
ifeq ($(C_COMPILER), CLANG)
|
|
CCOMMON_OPT += -fno-integrated-as
|
|
endif
|
|
# workaround for C->FORTRAN ABI violation in LAPACKE
|
|
ifeq ($(F_COMPILER), GFORTRAN)
|
|
FCOMMON_OPT += -fno-optimize-sibling-calls
|
|
endif
|
|
|
|
FLAMEPATH = $(HOME)/flame/lib
|
|
|
|
#ifeq ($(CORE), CELL)
|
|
#CELL_SDK_ROOT = /opt/IBM/cell-sdk-1.1/sysroot/usr
|
|
#SPU_CC = spu-gcc
|
|
#EXTRALIB += -lspe
|
|
#endif
|
|
|
|
ifeq ($(OSNAME), Linux)
|
|
ifdef BINARY64
|
|
# COMPILER_PREFIX = powerpc64-linux-
|
|
else
|
|
# COMPILER_PREFIX = powerpc-linux-
|
|
endif
|
|
endif
|
|
|
|
#Either uncomment below line or run make with `USE_MASS=1` to enable support of MASS library
|
|
#USE_MASS = 1
|
|
|
|
ifeq ($(USE_MASS), 1)
|
|
# Path to MASS libs, change it if the libs are installed at any other location
|
|
MASSPATH = /opt/ibm/xlmass/8.1.5/lib
|
|
COMMON_OPT += -mveclibabi=mass -ftree-vectorize -funsafe-math-optimizations -DUSE_MASS
|
|
EXTRALIB += -L$(MASSPATH) -lmass -lmassvp8 -lmass_simdp8
|
|
endif
|
|
|
|
ifdef BINARY64
|
|
|
|
|
|
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), GCCIBMAIX)
|
|
$(error Using GCC and XLF on AIX is not a supported combination.)
|
|
endif
|
|
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), CLANGGFORTRANAIX)
|
|
$(error Using Clang and gFortran on AIX is not a supported combination.)
|
|
endif
|
|
|
|
ifeq ($(OSNAME), AIX)
|
|
ifeq ($(C_COMPILER), GCC)
|
|
CCOMMON_OPT += -mpowerpc64 -maix64
|
|
else
|
|
CCOMMON_OPT += -m64
|
|
endif
|
|
ifeq ($(COMPILER_F77), g77)
|
|
FCOMMON_OPT += -mpowerpc64 -maix64
|
|
endif
|
|
ifeq ($(F_COMPILER), GFORTRAN)
|
|
FCOMMON_OPT += -mpowerpc64 -maix64
|
|
endif
|
|
ifeq ($(COMPILER_F77), xlf)
|
|
FCOMMON_OPT += -q64
|
|
endif
|
|
ARFLAGS = -X 64
|
|
ASFLAGS = -a64
|
|
endif
|
|
else
|
|
ifeq ($(OSNAME), AIX)
|
|
CCOMMON_OPT += -Wa,-a32
|
|
ARFLAGS = -X 32
|
|
ASFLAGS = -a32
|
|
endif
|
|
endif
|
|
|
|
# CCOMMON_OPT += -maltivec -mabi=altivec
|
|
|
|
LIBFLAME = -L$(FLAMEPATH) -llapack2flame -lflame-lapack -lflame-base $(LIBS)
|
|
|
|
ifeq ($(OSNAME), Darwin)
|
|
CCOMMON_OPT += -force_cpusubtype_ALL
|
|
endif
|
|
|
|
|
|
ifndef BINARY64
|
|
ifeq ($(OSNAME), Linux)
|
|
ESSLPATH = -L/opt/ibmcmp/lib -L/opt/ibmcmp/xlf/11.1/lib -Wl,-rpath,/opt/ibmcmp/lib -Wl,-rpath,/opt/ibmcmp/xlf/11.1/lib -lxlf90_r -lxlomp_ser -lxlfmath -lxl -lpthread
|
|
else
|
|
ESSLPATH = -lxlf90_r
|
|
endif
|
|
|
|
|
|
LIBVECLIB = -framework VecLib
|
|
ifndef SMP
|
|
LIBATLAS = -L/usr/lib/atlas3.7.11 -lf77blas -latlas -lg2c -lm
|
|
LIBESSL = -lessl $(ESSLPATH) ../../level1/others/libmisc.a -lm
|
|
else
|
|
LIBATLAS = -L/usr/lib/atlas3.7.11p -lptf77blas -latlas -lm -lpthread
|
|
LIBESSL = -lesslsmp $(ESSLPATH) ../../level1/others/libmisc.a -lm
|
|
endif
|
|
else
|
|
ifeq ($(OSNAME), Linux)
|
|
ESSLPATH = -L/opt/ibmcmp/lib64 -Wl,-rpath,/opt/ibmcmp/lib64 -L/opt/ibmcmp/xlf/11.1/lib64 -Wl,-rpath,/opt/ibmcmp/xlf/11.1/lib64 -lxlf90_r -lxlomp_ser
|
|
else
|
|
ESSLPATH = -lxlf90_r
|
|
endif
|
|
|
|
LIBVECLIB = /System/Library/Frameworks/vecLib.framework/Versions/Current/vecLib
|
|
|
|
ifndef SMP
|
|
LIBATLAS = -L/usr/lib64/atlas3.7.11 -lf77blas -latlas -lg2c -lm
|
|
LIBESSL = -lessl $(ESSLPATH) -lm
|
|
else
|
|
LIBATLAS = -L/usr/lib64/atlas3.7.11p -lptf77blas -latlas -lm -lpthread
|
|
LIBESSL = -lesslsmp $(ESSLPATH) -lxlsmp -lm
|
|
endif
|
|
endif
|