From c5941a2731516c365ec30db0b0723c4edbb3f9c7 Mon Sep 17 00:00:00 2001 From: rsdy
Date: Fri, 8 Oct 2021 12:45:04 +0100
Subject: [PATCH] Make the C implementation default to using NEON on aarch64
---
c/Makefile.testing | 4 ++++
c/README.md | 15 ++++++++++++---
c/blake3_impl.h | 4 ++++
3 files changed, 20 insertions(+), 3 deletions(-)
diff --git a/c/Makefile.testing b/c/Makefile.testing
index 41e6b82..b85aedf 100644
--- a/c/Makefile.testing
+++ b/c/Makefile.testing
@@ -42,6 +42,10 @@ EXTRAFLAGS += -DBLAKE3_USE_NEON
TARGETS += blake3_neon.o
endif
+ifdef BLAKE3_NO_NEON
+EXTRAFLAGS += -DBLAKE3_NO_NEON
+endif
+
all: blake3.c blake3_dispatch.c blake3_portable.c main.c $(TARGETS)
$(CC) $(CFLAGS) $(EXTRAFLAGS) $^ -o $(NAME) $(LDFLAGS)
diff --git a/c/README.md b/c/README.md
index 5268818..418a5ab 100644
--- a/c/README.md
+++ b/c/README.md
@@ -250,15 +250,24 @@ gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_A
## ARM NEON
-The NEON implementation is not enabled by default on ARM, since not all
-ARM targets support it. To enable it, set `BLAKE3_USE_NEON=1`. Here's an
-example of building a shared library on ARM Linux with NEON support:
+The NEON implementation is enabled by default on AARCH64, but not on
+other ARM targets, since not all of them support it. To enable it, set
+`BLAKE3_USE_NEON=1`. Here's an example of building a shared library on
+ARM Linux with NEON support:
```bash
gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON blake3.c blake3_dispatch.c \
blake3_portable.c blake3_neon.c
```
+To explicitiy disable using NEON instructions on AARCH64, set
+`BLAKE3_NO_NEON=1`.
+
+```bash
+gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_NEON blake3.c blake3_dispatch.c \
+ blake3_portable.c
+```
+
Note that on some targets (ARMv7 in particular), extra flags may be
required to activate NEON support in the compiler. If you see an error
like...
diff --git a/c/blake3_impl.h b/c/blake3_impl.h
index 86ab6aa..8688479 100644
--- a/c/blake3_impl.h
+++ b/c/blake3_impl.h
@@ -45,6 +45,10 @@ enum blake3_flags {
#include