From 5f0735832bbad726ad8f814090abdc058eb5c162 Mon Sep 17 00:00:00 2001 From: h-motoki Date: Fri, 28 Nov 2025 13:27:23 +0900 Subject: [PATCH 1/4] fix param.h: turn [sd]gemm_default_[pqr] parameters for a64fx --- param.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/param.h b/param.h index 8e598d8a0..70855ff86 100644 --- a/param.h +++ b/param.h @@ -3778,18 +3778,18 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout #define ZGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_MN 16 -#define SGEMM_DEFAULT_P 128 -#define DGEMM_DEFAULT_P 160 +#define SGEMM_DEFAULT_P 128 +#define DGEMM_DEFAULT_P 128 #define CGEMM_DEFAULT_P 128 #define ZGEMM_DEFAULT_P 128 -#define SGEMM_DEFAULT_Q 352 -#define DGEMM_DEFAULT_Q 128 +#define SGEMM_DEFAULT_Q 896 +#define DGEMM_DEFAULT_Q 448 #define CGEMM_DEFAULT_Q 224 #define ZGEMM_DEFAULT_Q 112 -#define SGEMM_DEFAULT_R 4096 -#define DGEMM_DEFAULT_R 4096 +#define SGEMM_DEFAULT_R 3072 +#define DGEMM_DEFAULT_R 3072 #define CGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096 From d7d1088d21a3014cee470581846bfceb92584bd2 Mon Sep 17 00:00:00 2001 From: moluopro Date: Fri, 9 Jan 2026 23:35:58 +0800 Subject: [PATCH 2/4] docs: fix iOS build script & use xcrun SDK path --- docs/install.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/install.md b/docs/install.md index e6a70ac37..0a63ffa80 100644 --- a/docs/install.md +++ b/docs/install.md @@ -712,11 +712,12 @@ fully working OpenBLAS for this platform. Go to the directory where you unpacked OpenBLAS,and enter the following commands: ```bash -CC=/Applications/Xcode_12.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang +CC="/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang" -CFLAGS= -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_12.4.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS14.4.sdk -arch arm64 -miphoneos-version-min=10.0 +SDKROOT="$(xcrun --sdk iphoneos --show-sdk-path)" +CFLAGS="-O2 -Wno-macro-redefined -isysroot $SDKROOT -arch arm64 -miphoneos-version-min=10.0" -make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 +make libs TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 ``` Adjust `MIN_IOS_VERSION` as necessary for your installation. E.g., change the version number to the minimum iOS version you want to target and execute this file to build the library. From a514760e06ef4472f5054bbf98448fb5cd5a90f9 Mon Sep 17 00:00:00 2001 From: moluopro Date: Sat, 10 Jan 2026 21:14:05 +0800 Subject: [PATCH 3/4] Change 'make libs' back to 'make' --- docs/install.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/install.md b/docs/install.md index 0a63ffa80..5e31b5066 100644 --- a/docs/install.md +++ b/docs/install.md @@ -717,7 +717,7 @@ CC="/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolcha SDKROOT="$(xcrun --sdk iphoneos --show-sdk-path)" CFLAGS="-O2 -Wno-macro-redefined -isysroot $SDKROOT -arch arm64 -miphoneos-version-min=10.0" -make libs TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 +make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 ``` Adjust `MIN_IOS_VERSION` as necessary for your installation. E.g., change the version number to the minimum iOS version you want to target and execute this file to build the library. From d1de282a4e7733fa31683d05c0994a835060ef6c Mon Sep 17 00:00:00 2001 From: Martin Kroeker Date: Sun, 11 Jan 2026 13:04:00 +0100 Subject: [PATCH 4/4] Improve the precision of S/CNRM2 by summing in double precision --- kernel/arm64/nrm2.S | 47 ++++++++++----------------------------------- 1 file changed, 10 insertions(+), 37 deletions(-) diff --git a/kernel/arm64/nrm2.S b/kernel/arm64/nrm2.S index 0e5a8eed1..93218ff6e 100644 --- a/kernel/arm64/nrm2.S +++ b/kernel/arm64/nrm2.S @@ -35,16 +35,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define I x3 #if !defined(DOUBLE) -#define SSQ s0 -#define SCALE s1 -#define REGZERO s5 -#define REGONE s6 -#else +#define SSQF s0 +#endif + #define SSQ d0 #define SCALE d1 #define REGZERO d5 #define REGONE d6 -#endif /******************************************************************************* * Macro definitions @@ -53,22 +50,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro KERNEL_F1 #if !defined(DOUBLE) ldr s4, [X], #4 - fcmp s4, REGZERO - beq 2f /* KERNEL_F1_NEXT_\@ */ - fabs s4, s4 - fcmp SCALE, s4 - bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */ - fdiv s2, SCALE, s4 - fmul s2, s2, s2 - fmul s3, SSQ, s2 - fadd SSQ, REGONE, s3 - fmov SCALE, s4 - b 2f /* KERNEL_F1_NEXT_\@ */ -1: /* KERNEL_F1_SCALE_GE_X_\@: */ - fdiv s2, s4, SCALE - fmla SSQ, s2, v2.s[0] + fcvt d4, s4 #else ldr d4, [X], #8 +#endif fcmp d4, REGZERO beq 2f /* KERNEL_F1_NEXT_\@ */ fabs d4, d4 @@ -83,29 +68,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1: /* KERNEL_F1_SCALE_GE_X_\@: */ fdiv d2, d4, SCALE fmla SSQ, d2, v2.d[0] -#endif 2: /* KERNEL_F1_NEXT_\@: */ .endm .macro KERNEL_S1 #if !defined(DOUBLE) ldr s4, [X] - fcmp s4, REGZERO - beq KERNEL_S1_NEXT - fabs s4, s4 - fcmp SCALE, s4 - bge KERNEL_S1_SCALE_GE_X - fdiv s2, SCALE, s4 - fmul s2, s2, s2 - fmul s3, SSQ, s2 - fadd SSQ, REGONE, s3 - fmov SCALE, s4 - b KERNEL_S1_NEXT -KERNEL_S1_SCALE_GE_X: - fdiv s2, s4, SCALE - fmla SSQ, s2, v2.s[0] + fcvt d4, s4 #else ldr d4, [X] +#endif fcmp d4, REGZERO beq KERNEL_S1_NEXT fabs d4, d4 @@ -120,7 +92,6 @@ KERNEL_S1_SCALE_GE_X: KERNEL_S1_SCALE_GE_X: fdiv d2, d4, SCALE fmla SSQ, d2, v2.d[0] -#endif KERNEL_S1_NEXT: add X, X, INC_X .endm @@ -218,7 +189,9 @@ KERNEL_S1_NEXT: .Lnrm2_kernel_L999: fsqrt SSQ, SSQ fmul SSQ, SCALE, SSQ - +#if !defined(DOUBLE) + fcvt SSQF, SSQ +#endif ret EPILOGUE