diff --git a/docs/install.md b/docs/install.md index e6a70ac37..5e31b5066 100644 --- a/docs/install.md +++ b/docs/install.md @@ -712,9 +712,10 @@ fully working OpenBLAS for this platform. Go to the directory where you unpacked OpenBLAS,and enter the following commands: ```bash -CC=/Applications/Xcode_12.4.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang +CC="/Applications/Xcode.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang" -CFLAGS= -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_12.4.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS14.4.sdk -arch arm64 -miphoneos-version-min=10.0 +SDKROOT="$(xcrun --sdk iphoneos --show-sdk-path)" +CFLAGS="-O2 -Wno-macro-redefined -isysroot $SDKROOT -arch arm64 -miphoneos-version-min=10.0" make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 ``` diff --git a/kernel/arm64/nrm2.S b/kernel/arm64/nrm2.S index 0e5a8eed1..93218ff6e 100644 --- a/kernel/arm64/nrm2.S +++ b/kernel/arm64/nrm2.S @@ -35,16 +35,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define I x3 #if !defined(DOUBLE) -#define SSQ s0 -#define SCALE s1 -#define REGZERO s5 -#define REGONE s6 -#else +#define SSQF s0 +#endif + #define SSQ d0 #define SCALE d1 #define REGZERO d5 #define REGONE d6 -#endif /******************************************************************************* * Macro definitions @@ -53,22 +50,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. .macro KERNEL_F1 #if !defined(DOUBLE) ldr s4, [X], #4 - fcmp s4, REGZERO - beq 2f /* KERNEL_F1_NEXT_\@ */ - fabs s4, s4 - fcmp SCALE, s4 - bge 1f /* KERNEL_F1_SCALE_GE_X_\@ */ - fdiv s2, SCALE, s4 - fmul s2, s2, s2 - fmul s3, SSQ, s2 - fadd SSQ, REGONE, s3 - fmov SCALE, s4 - b 2f /* KERNEL_F1_NEXT_\@ */ -1: /* KERNEL_F1_SCALE_GE_X_\@: */ - fdiv s2, s4, SCALE - fmla SSQ, s2, v2.s[0] + fcvt d4, s4 #else ldr d4, [X], #8 +#endif fcmp d4, REGZERO beq 2f /* KERNEL_F1_NEXT_\@ */ fabs d4, d4 @@ -83,29 +68,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 1: /* KERNEL_F1_SCALE_GE_X_\@: */ fdiv d2, d4, SCALE fmla SSQ, d2, v2.d[0] -#endif 2: /* KERNEL_F1_NEXT_\@: */ .endm .macro KERNEL_S1 #if !defined(DOUBLE) ldr s4, [X] - fcmp s4, REGZERO - beq KERNEL_S1_NEXT - fabs s4, s4 - fcmp SCALE, s4 - bge KERNEL_S1_SCALE_GE_X - fdiv s2, SCALE, s4 - fmul s2, s2, s2 - fmul s3, SSQ, s2 - fadd SSQ, REGONE, s3 - fmov SCALE, s4 - b KERNEL_S1_NEXT -KERNEL_S1_SCALE_GE_X: - fdiv s2, s4, SCALE - fmla SSQ, s2, v2.s[0] + fcvt d4, s4 #else ldr d4, [X] +#endif fcmp d4, REGZERO beq KERNEL_S1_NEXT fabs d4, d4 @@ -120,7 +92,6 @@ KERNEL_S1_SCALE_GE_X: KERNEL_S1_SCALE_GE_X: fdiv d2, d4, SCALE fmla SSQ, d2, v2.d[0] -#endif KERNEL_S1_NEXT: add X, X, INC_X .endm @@ -218,7 +189,9 @@ KERNEL_S1_NEXT: .Lnrm2_kernel_L999: fsqrt SSQ, SSQ fmul SSQ, SCALE, SSQ - +#if !defined(DOUBLE) + fcvt SSQF, SSQ +#endif ret EPILOGUE diff --git a/param.h b/param.h index 490b7c472..8ff59920e 100644 --- a/param.h +++ b/param.h @@ -3778,18 +3778,18 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout #define ZGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_MN 16 -#define SGEMM_DEFAULT_P 128 -#define DGEMM_DEFAULT_P 160 +#define SGEMM_DEFAULT_P 128 +#define DGEMM_DEFAULT_P 128 #define CGEMM_DEFAULT_P 128 #define ZGEMM_DEFAULT_P 128 -#define SGEMM_DEFAULT_Q 352 -#define DGEMM_DEFAULT_Q 128 +#define SGEMM_DEFAULT_Q 896 +#define DGEMM_DEFAULT_Q 448 #define CGEMM_DEFAULT_Q 224 #define ZGEMM_DEFAULT_Q 112 -#define SGEMM_DEFAULT_R 4096 -#define DGEMM_DEFAULT_R 4096 +#define SGEMM_DEFAULT_R 3072 +#define DGEMM_DEFAULT_R 3072 #define CGEMM_DEFAULT_R 4096 #define ZGEMM_DEFAULT_R 4096