From c5941a2731516c365ec30db0b0723c4edbb3f9c7 Mon Sep 17 00:00:00 2001 From: rsdy Date: Fri, 8 Oct 2021 12:45:04 +0100 Subject: [PATCH] Make the C implementation default to using NEON on aarch64 --- c/Makefile.testing | 4 ++++ c/README.md | 15 ++++++++++++--- c/blake3_impl.h | 4 ++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/c/Makefile.testing b/c/Makefile.testing index 41e6b82..b85aedf 100644 --- a/c/Makefile.testing +++ b/c/Makefile.testing @@ -42,6 +42,10 @@ EXTRAFLAGS += -DBLAKE3_USE_NEON TARGETS += blake3_neon.o endif +ifdef BLAKE3_NO_NEON +EXTRAFLAGS += -DBLAKE3_NO_NEON +endif + all: blake3.c blake3_dispatch.c blake3_portable.c main.c $(TARGETS) $(CC) $(CFLAGS) $(EXTRAFLAGS) $^ -o $(NAME) $(LDFLAGS) diff --git a/c/README.md b/c/README.md index 5268818..418a5ab 100644 --- a/c/README.md +++ b/c/README.md @@ -250,15 +250,24 @@ gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_A ## ARM NEON -The NEON implementation is not enabled by default on ARM, since not all -ARM targets support it. To enable it, set `BLAKE3_USE_NEON=1`. Here's an -example of building a shared library on ARM Linux with NEON support: +The NEON implementation is enabled by default on AARCH64, but not on +other ARM targets, since not all of them support it. To enable it, set +`BLAKE3_USE_NEON=1`. Here's an example of building a shared library on +ARM Linux with NEON support: ```bash gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON blake3.c blake3_dispatch.c \ blake3_portable.c blake3_neon.c ``` +To explicitiy disable using NEON instructions on AARCH64, set +`BLAKE3_NO_NEON=1`. + +```bash +gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_NEON blake3.c blake3_dispatch.c \ + blake3_portable.c +``` + Note that on some targets (ARMv7 in particular), extra flags may be required to activate NEON support in the compiler. If you see an error like... diff --git a/c/blake3_impl.h b/c/blake3_impl.h index 86ab6aa..8688479 100644 --- a/c/blake3_impl.h +++ b/c/blake3_impl.h @@ -45,6 +45,10 @@ enum blake3_flags { #include #endif +#if defined(__aarch64__) && !defined(BLAKE3_NO_NEON) && !defined(BLAKE3_USE_NEON) +#define BLAKE3_USE_NEON +#endif + #if defined(IS_X86) #define MAX_SIMD_DEGREE 16 #elif defined(BLAKE3_USE_NEON)