From 0a0bb7126e9207d3bb3d9af0f0b5ae646d532cf2 Mon Sep 17 00:00:00 2001 From: rsdy
Date: Thu, 7 Oct 2021 12:23:36 +0100
Subject: [PATCH 1/8] Implement better target detection for NEON
---
build.rs | 11 ++++++++++-
src/lib.rs | 2 +-
src/platform.rs | 18 +++++++++---------
3 files changed, 20 insertions(+), 11 deletions(-)
diff --git a/build.rs b/build.rs
index 4fd3bae..c5677f6 100644
--- a/build.rs
+++ b/build.rs
@@ -44,6 +44,14 @@ fn is_x86_32() -> bool {
arch == "i386" || arch == "i586" || arch == "i686"
}
+fn is_arm() -> bool {
+ is_armv7() || is_aarch64() || target_components()[0] == "arm"
+}
+
+fn is_aarch64() -> bool {
+ target_components()[0] == "aarch64"
+}
+
fn is_armv7() -> bool {
target_components()[0] == "armv7"
}
@@ -237,7 +245,8 @@ fn main() -> Result<(), Box
Date: Fri, 8 Oct 2021 11:34:35 +0100
Subject: [PATCH 2/8] Add `no_neon` feature to disable NEON on aarch64
---
Cargo.toml | 1 +
build.rs | 10 +++++++++-
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/Cargo.toml b/Cargo.toml
index a43f000..b2d8d0c 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -73,6 +73,7 @@ no_sse2 = []
no_sse41 = []
no_avx2 = []
no_avx512 = []
+no_neon = []
[package.metadata.docs.rs]
# Document Hasher::update_rayon on docs.rs.
diff --git a/build.rs b/build.rs
index c5677f6..ac1d6a6 100644
--- a/build.rs
+++ b/build.rs
@@ -17,6 +17,10 @@ fn is_neon() -> bool {
defined("CARGO_FEATURE_NEON")
}
+fn is_no_neon() -> bool {
+ defined("CARGO_FEATURE_NO_NEON")
+}
+
fn is_ci() -> bool {
defined("BLAKE3_CI")
}
@@ -226,6 +230,10 @@ fn main() -> Result<(), Box
Date: Fri, 8 Oct 2021 11:51:18 +0100
Subject: [PATCH 3/8] Add no_neon feature tests to CI
---
.github/workflows/ci.yml | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 7ce4b59..a7641de 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -149,12 +149,17 @@ jobs:
# Test the NEON implementation on ARM targets.
- run: cross test --target ${{ matrix.arch }} --features=neon
if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-')
+ # NEON is enabled by default on aarch64, disabling it through the no_neon feature.
+ - run: cross test --target ${{ matrix.arch }} --features=no_neon
+ if: startsWith(matrix.arch, 'aarch64-')
# Test vectors. Note that this uses a hacky script due to path dependency limitations.
- run: ./test_vectors/cross_test.sh --target ${{ matrix.arch }}
# C code. Same issue with the hacky script.
- run: ./c/blake3_c_rust_bindings/cross_test.sh --target ${{ matrix.arch }}
- run: ./c/blake3_c_rust_bindings/cross_test.sh --target ${{ matrix.arch }} --features=neon
if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-')
+ - run: ./c/blake3_c_rust_bindings/cross_test.sh --target ${{ matrix.arch }} --features=no_neon
+ if: startsWith(matrix.arch, 'aarch64-')
# Currently only on x86.
c_tests:
From c5941a2731516c365ec30db0b0723c4edbb3f9c7 Mon Sep 17 00:00:00 2001
From: rsdy
Date: Fri, 8 Oct 2021 12:45:04 +0100
Subject: [PATCH 4/8] Make the C implementation default to using NEON on
aarch64
---
c/Makefile.testing | 4 ++++
c/README.md | 15 ++++++++++++---
c/blake3_impl.h | 4 ++++
3 files changed, 20 insertions(+), 3 deletions(-)
diff --git a/c/Makefile.testing b/c/Makefile.testing
index 41e6b82..b85aedf 100644
--- a/c/Makefile.testing
+++ b/c/Makefile.testing
@@ -42,6 +42,10 @@ EXTRAFLAGS += -DBLAKE3_USE_NEON
TARGETS += blake3_neon.o
endif
+ifdef BLAKE3_NO_NEON
+EXTRAFLAGS += -DBLAKE3_NO_NEON
+endif
+
all: blake3.c blake3_dispatch.c blake3_portable.c main.c $(TARGETS)
$(CC) $(CFLAGS) $(EXTRAFLAGS) $^ -o $(NAME) $(LDFLAGS)
diff --git a/c/README.md b/c/README.md
index 5268818..418a5ab 100644
--- a/c/README.md
+++ b/c/README.md
@@ -250,15 +250,24 @@ gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_A
## ARM NEON
-The NEON implementation is not enabled by default on ARM, since not all
-ARM targets support it. To enable it, set `BLAKE3_USE_NEON=1`. Here's an
-example of building a shared library on ARM Linux with NEON support:
+The NEON implementation is enabled by default on AARCH64, but not on
+other ARM targets, since not all of them support it. To enable it, set
+`BLAKE3_USE_NEON=1`. Here's an example of building a shared library on
+ARM Linux with NEON support:
```bash
gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON blake3.c blake3_dispatch.c \
blake3_portable.c blake3_neon.c
```
+To explicitiy disable using NEON instructions on AARCH64, set
+`BLAKE3_NO_NEON=1`.
+
+```bash
+gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_NEON blake3.c blake3_dispatch.c \
+ blake3_portable.c
+```
+
Note that on some targets (ARMv7 in particular), extra flags may be
required to activate NEON support in the compiler. If you see an error
like...
diff --git a/c/blake3_impl.h b/c/blake3_impl.h
index 86ab6aa..8688479 100644
--- a/c/blake3_impl.h
+++ b/c/blake3_impl.h
@@ -45,6 +45,10 @@ enum blake3_flags {
#include
Date: Sat, 9 Oct 2021 19:46:07 +0100
Subject: [PATCH 5/8] Match the C binding's target arch detection with the root
crate's
---
c/blake3_c_rust_bindings/build.rs | 16 ++++++++++++----
1 file changed, 12 insertions(+), 4 deletions(-)
diff --git a/c/blake3_c_rust_bindings/build.rs b/c/blake3_c_rust_bindings/build.rs
index d5dc47a..98f8396 100644
--- a/c/blake3_c_rust_bindings/build.rs
+++ b/c/blake3_c_rust_bindings/build.rs
@@ -22,6 +22,10 @@ fn is_armv7() -> bool {
target_components()[0] == "armv7"
}
+fn is_aarch64() -> bool {
+ target_components()[0] == "aarch64"
+}
+
// Windows targets may be using the MSVC toolchain or the GNU toolchain. The
// right compiler flags to use depend on the toolchain. (And we don't want to
// use flag_if_supported, because we don't want features to be silently
@@ -148,10 +152,14 @@ fn main() -> Result<(), Box
Date: Tue, 12 Oct 2021 16:23:28 +0100
Subject: [PATCH 6/8] Include MSVC naming of aarch64 arch
---
c/blake3_impl.h | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)
diff --git a/c/blake3_impl.h b/c/blake3_impl.h
index 8688479..18f201f 100644
--- a/c/blake3_impl.h
+++ b/c/blake3_impl.h
@@ -38,6 +38,10 @@ enum blake3_flags {
#define IS_X86_32
#endif
+#if defined(__aarch64__) || defined(_M_ARM64)
+#define IS_AARCH64
+#endif
+
#if defined(IS_X86)
#if defined(_MSC_VER)
#include
Date: Tue, 12 Oct 2021 16:54:12 +0100
Subject: [PATCH 7/8] Disable no_neon feature for C bindings as we can't
propagate from cargo build
---
.github/workflows/ci.yml | 2 --
1 file changed, 2 deletions(-)
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a7641de..715fe15 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -158,8 +158,6 @@ jobs:
- run: ./c/blake3_c_rust_bindings/cross_test.sh --target ${{ matrix.arch }}
- run: ./c/blake3_c_rust_bindings/cross_test.sh --target ${{ matrix.arch }} --features=neon
if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-')
- - run: ./c/blake3_c_rust_bindings/cross_test.sh --target ${{ matrix.arch }} --features=no_neon
- if: startsWith(matrix.arch, 'aarch64-')
# Currently only on x86.
c_tests:
From 2aa7c963be41eaee47f3ba5af1d5a7d7f1a9658d Mon Sep 17 00:00:00 2001
From: rsdy
Date: Tue, 12 Oct 2021 23:23:25 +0100
Subject: [PATCH 8/8] Use BLAKE3_USE_NEON=0 instead of BLAKE3_NO_NEON def
---
c/Makefile.testing | 4 ++--
c/README.md | 6 +++---
c/blake3_dispatch.c | 4 ++--
c/blake3_impl.h | 13 +++++++++----
4 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/c/Makefile.testing b/c/Makefile.testing
index b85aedf..b540528 100644
--- a/c/Makefile.testing
+++ b/c/Makefile.testing
@@ -38,12 +38,12 @@ ASM_TARGETS += blake3_avx512_x86-64_unix.S
endif
ifdef BLAKE3_USE_NEON
-EXTRAFLAGS += -DBLAKE3_USE_NEON
+EXTRAFLAGS += -DBLAKE3_USE_NEON=1
TARGETS += blake3_neon.o
endif
ifdef BLAKE3_NO_NEON
-EXTRAFLAGS += -DBLAKE3_NO_NEON
+EXTRAFLAGS += -DBLAKE3_USE_NEON=0
endif
all: blake3.c blake3_dispatch.c blake3_portable.c main.c $(TARGETS)
diff --git a/c/README.md b/c/README.md
index 418a5ab..eacdc54 100644
--- a/c/README.md
+++ b/c/README.md
@@ -256,15 +256,15 @@ other ARM targets, since not all of them support it. To enable it, set
ARM Linux with NEON support:
```bash
-gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON blake3.c blake3_dispatch.c \
+gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=1 blake3.c blake3_dispatch.c \
blake3_portable.c blake3_neon.c
```
To explicitiy disable using NEON instructions on AARCH64, set
-`BLAKE3_NO_NEON=1`.
+`BLAKE3_USE_NEON=0`.
```bash
-gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_NEON blake3.c blake3_dispatch.c \
+gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=0 blake3.c blake3_dispatch.c \
blake3_portable.c
```
diff --git a/c/blake3_dispatch.c b/c/blake3_dispatch.c
index 6518478..b498058 100644
--- a/c/blake3_dispatch.c
+++ b/c/blake3_dispatch.c
@@ -232,7 +232,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
#endif
#endif
-#if defined(BLAKE3_USE_NEON)
+#if BLAKE3_USE_NEON == 1
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
return;
@@ -269,7 +269,7 @@ size_t blake3_simd_degree(void) {
}
#endif
#endif
-#if defined(BLAKE3_USE_NEON)
+#if BLAKE3_USE_NEON == 1
return 4;
#endif
return 1;
diff --git a/c/blake3_impl.h b/c/blake3_impl.h
index 18f201f..ba2e91c 100644
--- a/c/blake3_impl.h
+++ b/c/blake3_impl.h
@@ -49,13 +49,18 @@ enum blake3_flags {
#include