Merge branch 'OpenMathLib:develop' into issue5414

This commit is contained in:
Martin Kroeker
2026-01-11 17:45:11 +01:00
committed by GitHub
3 changed files with 19 additions and 45 deletions

View File

@@ -712,9 +712,10 @@ fully working OpenBLAS for this platform.
Go to the directory where you unpacked OpenBLAS,and enter the following commands:
```bash
CC=/Applications/Xcode_12.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
CC="/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
CFLAGS= -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_12.4.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS14.4.sdk -arch arm64 -miphoneos-version-min=10.0
SDKROOT="$(xcrun --sdk iphoneos --show-sdk-path)"
CFLAGS="-O2 -Wno-macro-redefined -isysroot $SDKROOT -arch arm64 -miphoneos-version-min=10.0"
make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1
```

View File

@@ -35,16 +35,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define I x3
#if !defined(DOUBLE)
#define SSQ s0
#define SCALE s1
#define REGZERO s5
#define REGONE s6
#else
#define SSQF s0
#endif
#define SSQ d0
#define SCALE d1
#define REGZERO d5
#define REGONE d6
#endif
/*******************************************************************************
* Macro definitions
@@ -53,22 +50,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F1
#if !defined(DOUBLE)
ldr s4, [X], #4
fcmp s4, REGZERO
beq 2f /* KERNEL_F1_NEXT_\@ */
fabs s4, s4
fcmp SCALE, s4
bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */
fdiv s2, SCALE, s4
fmul s2, s2, s2
fmul s3, SSQ, s2
fadd SSQ, REGONE, s3
fmov SCALE, s4
b 2f /* KERNEL_F1_NEXT_\@ */
1: /* KERNEL_F1_SCALE_GE_X_\@: */
fdiv s2, s4, SCALE
fmla SSQ, s2, v2.s[0]
fcvt d4, s4
#else
ldr d4, [X], #8
#endif
fcmp d4, REGZERO
beq 2f /* KERNEL_F1_NEXT_\@ */
fabs d4, d4
@@ -83,29 +68,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
1: /* KERNEL_F1_SCALE_GE_X_\@: */
fdiv d2, d4, SCALE
fmla SSQ, d2, v2.d[0]
#endif
2: /* KERNEL_F1_NEXT_\@: */
.endm
.macro KERNEL_S1
#if !defined(DOUBLE)
ldr s4, [X]
fcmp s4, REGZERO
beq KERNEL_S1_NEXT
fabs s4, s4
fcmp SCALE, s4
bge KERNEL_S1_SCALE_GE_X
fdiv s2, SCALE, s4
fmul s2, s2, s2
fmul s3, SSQ, s2
fadd SSQ, REGONE, s3
fmov SCALE, s4
b KERNEL_S1_NEXT
KERNEL_S1_SCALE_GE_X:
fdiv s2, s4, SCALE
fmla SSQ, s2, v2.s[0]
fcvt d4, s4
#else
ldr d4, [X]
#endif
fcmp d4, REGZERO
beq KERNEL_S1_NEXT
fabs d4, d4
@@ -120,7 +92,6 @@ KERNEL_S1_SCALE_GE_X:
KERNEL_S1_SCALE_GE_X:
fdiv d2, d4, SCALE
fmla SSQ, d2, v2.d[0]
#endif
KERNEL_S1_NEXT:
add X, X, INC_X
.endm
@@ -218,7 +189,9 @@ KERNEL_S1_NEXT:
.Lnrm2_kernel_L999:
fsqrt SSQ, SSQ
fmul SSQ, SCALE, SSQ
#if !defined(DOUBLE)
fcvt SSQF, SSQ
#endif
ret
EPILOGUE

12
param.h
View File

@@ -3778,18 +3778,18 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout
#define ZGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_MN 16
#define SGEMM_DEFAULT_P 128
#define DGEMM_DEFAULT_P 160
#define SGEMM_DEFAULT_P 128
#define DGEMM_DEFAULT_P 128
#define CGEMM_DEFAULT_P 128
#define ZGEMM_DEFAULT_P 128
#define SGEMM_DEFAULT_Q 352
#define DGEMM_DEFAULT_Q 128
#define SGEMM_DEFAULT_Q 896
#define DGEMM_DEFAULT_Q 448
#define CGEMM_DEFAULT_Q 224
#define ZGEMM_DEFAULT_Q 112
#define SGEMM_DEFAULT_R 4096
#define DGEMM_DEFAULT_R 4096
#define SGEMM_DEFAULT_R 3072
#define DGEMM_DEFAULT_R 3072
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 4096