From 0a0bb7126e9207d3bb3d9af0f0b5ae646d532cf2 Mon Sep 17 00:00:00 2001 From: rsdy Date: Thu, 7 Oct 2021 12:23:36 +0100 Subject: [PATCH 1/8] Implement better target detection for NEON --- build.rs | 11 ++++++++++- src/lib.rs | 2 +- src/platform.rs | 18 +++++++++--------- 3 files changed, 20 insertions(+), 11 deletions(-) diff --git a/build.rs b/build.rs index 4fd3bae..c5677f6 100644 --- a/build.rs +++ b/build.rs @@ -44,6 +44,14 @@ fn is_x86_32() -> bool { arch == "i386" || arch == "i586" || arch == "i686" } +fn is_arm() -> bool { + is_armv7() || is_aarch64() || target_components()[0] == "arm" +} + +fn is_aarch64() -> bool { + target_components()[0] == "aarch64" +} + fn is_armv7() -> bool { target_components()[0] == "armv7" } @@ -237,7 +245,8 @@ fn main() -> Result<(), Box> { } } - if is_neon() { + if (is_arm() && is_neon()) || (is_aarch64() && !is_pure()) { + println!("cargo:rustc-cfg=blake3_neon"); build_neon_c_intrinsics(); } diff --git a/src/lib.rs b/src/lib.rs index 4123a0b..31e5cd6 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -94,7 +94,7 @@ mod avx2; #[cfg(blake3_avx512_ffi)] #[path = "ffi_avx512.rs"] mod avx512; -#[cfg(feature = "neon")] +#[cfg(blake3_neon)] #[path = "ffi_neon.rs"] mod neon; mod portable; diff --git a/src/platform.rs b/src/platform.rs index 1a732c5..00058b1 100644 --- a/src/platform.rs +++ b/src/platform.rs @@ -10,7 +10,7 @@ cfg_if::cfg_if! { pub const MAX_SIMD_DEGREE: usize = 8; } } - } else if #[cfg(feature = "neon")] { + } else if #[cfg(blake3_neon)] { pub const MAX_SIMD_DEGREE: usize = 4; } else { pub const MAX_SIMD_DEGREE: usize = 1; @@ -30,7 +30,7 @@ cfg_if::cfg_if! { pub const MAX_SIMD_DEGREE_OR_2: usize = 8; } } - } else if #[cfg(feature = "neon")] { + } else if #[cfg(blake3_neon)] { pub const MAX_SIMD_DEGREE_OR_2: usize = 4; } else { pub const MAX_SIMD_DEGREE_OR_2: usize = 2; @@ -49,7 +49,7 @@ pub enum Platform { #[cfg(blake3_avx512_ffi)] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] AVX512, - #[cfg(feature = "neon")] + #[cfg(blake3_neon)] NEON, } @@ -76,7 +76,7 @@ impl Platform { } // We don't use dynamic feature detection for NEON. If the "neon" // feature is on, NEON is assumed to be supported. - #[cfg(feature = "neon")] + #[cfg(blake3_neon)] { return Platform::NEON; } @@ -95,7 +95,7 @@ impl Platform { #[cfg(blake3_avx512_ffi)] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::AVX512 => 16, - #[cfg(feature = "neon")] + #[cfg(blake3_neon)] Platform::NEON => 4, }; debug_assert!(degree <= MAX_SIMD_DEGREE); @@ -129,7 +129,7 @@ impl Platform { crate::avx512::compress_in_place(cv, block, block_len, counter, flags) }, // No NEON compress_in_place() implementation yet. - #[cfg(feature = "neon")] + #[cfg(blake3_neon)] Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags), } } @@ -161,7 +161,7 @@ impl Platform { crate::avx512::compress_xof(cv, block, block_len, counter, flags) }, // No NEON compress_xof() implementation yet. - #[cfg(feature = "neon")] + #[cfg(blake3_neon)] Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags), } } @@ -256,7 +256,7 @@ impl Platform { ) }, // Assumed to be safe if the "neon" feature is on. - #[cfg(feature = "neon")] + #[cfg(blake3_neon)] Platform::NEON => unsafe { crate::neon::hash_many( inputs, @@ -315,7 +315,7 @@ impl Platform { } } - #[cfg(feature = "neon")] + #[cfg(blake3_neon)] pub fn neon() -> Option { // Assumed to be safe if the "neon" feature is on. Some(Self::NEON) From 20fd56ac0fe8c5f824cb7ce0375c6a63102a1ff1 Mon Sep 17 00:00:00 2001 From: rsdy Date: Fri, 8 Oct 2021 11:34:35 +0100 Subject: [PATCH 2/8] Add `no_neon` feature to disable NEON on aarch64 --- Cargo.toml | 1 + build.rs | 10 +++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Cargo.toml b/Cargo.toml index a43f000..b2d8d0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -73,6 +73,7 @@ no_sse2 = [] no_sse41 = [] no_avx2 = [] no_avx512 = [] +no_neon = [] [package.metadata.docs.rs] # Document Hasher::update_rayon on docs.rs. diff --git a/build.rs b/build.rs index c5677f6..ac1d6a6 100644 --- a/build.rs +++ b/build.rs @@ -17,6 +17,10 @@ fn is_neon() -> bool { defined("CARGO_FEATURE_NEON") } +fn is_no_neon() -> bool { + defined("CARGO_FEATURE_NO_NEON") +} + fn is_ci() -> bool { defined("BLAKE3_CI") } @@ -226,6 +230,10 @@ fn main() -> Result<(), Box> { panic!("It doesn't make sense to enable both \"pure\" and \"neon\"."); } + if is_no_neon() && is_neon() { + panic!("It doesn't make sense to enable both \"no_neon\" and \"neon\"."); + } + if is_x86_64() || is_x86_32() { let support = c_compiler_support(); if is_x86_32() || should_prefer_intrinsics() || is_pure() || support == NoCompiler { @@ -245,7 +253,7 @@ fn main() -> Result<(), Box> { } } - if (is_arm() && is_neon()) || (is_aarch64() && !is_pure()) { + if (is_arm() && is_neon()) || (!is_no_neon() && !is_pure() && is_aarch64()) { println!("cargo:rustc-cfg=blake3_neon"); build_neon_c_intrinsics(); } From faddc5af5ce9db25a21a3041fd4c5df63f49c875 Mon Sep 17 00:00:00 2001 From: rsdy Date: Fri, 8 Oct 2021 11:51:18 +0100 Subject: [PATCH 3/8] Add no_neon feature tests to CI --- .github/workflows/ci.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 7ce4b59..a7641de 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -149,12 +149,17 @@ jobs: # Test the NEON implementation on ARM targets. - run: cross test --target ${{ matrix.arch }} --features=neon if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-') + # NEON is enabled by default on aarch64, disabling it through the no_neon feature. + - run: cross test --target ${{ matrix.arch }} --features=no_neon + if: startsWith(matrix.arch, 'aarch64-') # Test vectors. Note that this uses a hacky script due to path dependency limitations. - run: ./test_vectors/cross_test.sh --target ${{ matrix.arch }} # C code. Same issue with the hacky script. - run: ./c/blake3_c_rust_bindings/cross_test.sh --target ${{ matrix.arch }} - run: ./c/blake3_c_rust_bindings/cross_test.sh --target ${{ matrix.arch }} --features=neon if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-') + - run: ./c/blake3_c_rust_bindings/cross_test.sh --target ${{ matrix.arch }} --features=no_neon + if: startsWith(matrix.arch, 'aarch64-') # Currently only on x86. c_tests: From c5941a2731516c365ec30db0b0723c4edbb3f9c7 Mon Sep 17 00:00:00 2001 From: rsdy Date: Fri, 8 Oct 2021 12:45:04 +0100 Subject: [PATCH 4/8] Make the C implementation default to using NEON on aarch64 --- c/Makefile.testing | 4 ++++ c/README.md | 15 ++++++++++++--- c/blake3_impl.h | 4 ++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/c/Makefile.testing b/c/Makefile.testing index 41e6b82..b85aedf 100644 --- a/c/Makefile.testing +++ b/c/Makefile.testing @@ -42,6 +42,10 @@ EXTRAFLAGS += -DBLAKE3_USE_NEON TARGETS += blake3_neon.o endif +ifdef BLAKE3_NO_NEON +EXTRAFLAGS += -DBLAKE3_NO_NEON +endif + all: blake3.c blake3_dispatch.c blake3_portable.c main.c $(TARGETS) $(CC) $(CFLAGS) $(EXTRAFLAGS) $^ -o $(NAME) $(LDFLAGS) diff --git a/c/README.md b/c/README.md index 5268818..418a5ab 100644 --- a/c/README.md +++ b/c/README.md @@ -250,15 +250,24 @@ gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_A ## ARM NEON -The NEON implementation is not enabled by default on ARM, since not all -ARM targets support it. To enable it, set `BLAKE3_USE_NEON=1`. Here's an -example of building a shared library on ARM Linux with NEON support: +The NEON implementation is enabled by default on AARCH64, but not on +other ARM targets, since not all of them support it. To enable it, set +`BLAKE3_USE_NEON=1`. Here's an example of building a shared library on +ARM Linux with NEON support: ```bash gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON blake3.c blake3_dispatch.c \ blake3_portable.c blake3_neon.c ``` +To explicitiy disable using NEON instructions on AARCH64, set +`BLAKE3_NO_NEON=1`. + +```bash +gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_NEON blake3.c blake3_dispatch.c \ + blake3_portable.c +``` + Note that on some targets (ARMv7 in particular), extra flags may be required to activate NEON support in the compiler. If you see an error like... diff --git a/c/blake3_impl.h b/c/blake3_impl.h index 86ab6aa..8688479 100644 --- a/c/blake3_impl.h +++ b/c/blake3_impl.h @@ -45,6 +45,10 @@ enum blake3_flags { #include #endif +#if defined(__aarch64__) && !defined(BLAKE3_NO_NEON) && !defined(BLAKE3_USE_NEON) +#define BLAKE3_USE_NEON +#endif + #if defined(IS_X86) #define MAX_SIMD_DEGREE 16 #elif defined(BLAKE3_USE_NEON) From 6b9cbe5e23443eb05d5cbdf645cb54b3706ea0c6 Mon Sep 17 00:00:00 2001 From: rsdy Date: Sat, 9 Oct 2021 19:46:07 +0100 Subject: [PATCH 5/8] Match the C binding's target arch detection with the root crate's --- c/blake3_c_rust_bindings/build.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/c/blake3_c_rust_bindings/build.rs b/c/blake3_c_rust_bindings/build.rs index d5dc47a..98f8396 100644 --- a/c/blake3_c_rust_bindings/build.rs +++ b/c/blake3_c_rust_bindings/build.rs @@ -22,6 +22,10 @@ fn is_armv7() -> bool { target_components()[0] == "armv7" } +fn is_aarch64() -> bool { + target_components()[0] == "aarch64" +} + // Windows targets may be using the MSVC toolchain or the GNU toolchain. The // right compiler flags to use depend on the toolchain. (And we don't want to // use flag_if_supported, because we don't want features to be silently @@ -148,10 +152,14 @@ fn main() -> Result<(), Box> { avx512_build.compile("blake3_avx512"); } - // We only build NEON code here if 1) it's requested and 2) the root crate - // is not already building it. The only time this will really happen is if - // you build this crate by hand with the "neon" feature for some reason. - if defined("CARGO_FEATURE_NEON") { + // We only build NEON code here if + // 1) it's requested + // and 2) the root crate is not already building it. + // The only time this will really happen is if you build this + // crate by hand with the "neon" feature for some reason. + // + // In addition, 3) if the target is aarch64, NEON is on by default. + if defined("CARGO_FEATURE_NEON") || is_aarch64() { let mut neon_build = new_build(); neon_build.file(c_dir_path("blake3_neon.c")); // ARMv7 platforms that support NEON generally need the following From ed09e45e7aaeec5d62763dbf70e9c82b5b48a376 Mon Sep 17 00:00:00 2001 From: rsdy Date: Tue, 12 Oct 2021 16:23:28 +0100 Subject: [PATCH 6/8] Include MSVC naming of aarch64 arch --- c/blake3_impl.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/c/blake3_impl.h b/c/blake3_impl.h index 8688479..18f201f 100644 --- a/c/blake3_impl.h +++ b/c/blake3_impl.h @@ -38,6 +38,10 @@ enum blake3_flags { #define IS_X86_32 #endif +#if defined(__aarch64__) || defined(_M_ARM64) +#define IS_AARCH64 +#endif + #if defined(IS_X86) #if defined(_MSC_VER) #include @@ -45,7 +49,7 @@ enum blake3_flags { #include #endif -#if defined(__aarch64__) && !defined(BLAKE3_NO_NEON) && !defined(BLAKE3_USE_NEON) +#if defined(IS_AARCH64) && !defined(BLAKE3_NO_NEON) && !defined(BLAKE3_USE_NEON) #define BLAKE3_USE_NEON #endif From f4d5c6e78546d3c8e3e159b1f10ff624bec9352c Mon Sep 17 00:00:00 2001 From: rsdy Date: Tue, 12 Oct 2021 16:54:12 +0100 Subject: [PATCH 7/8] Disable no_neon feature for C bindings as we can't propagate from cargo build --- .github/workflows/ci.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a7641de..715fe15 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -158,8 +158,6 @@ jobs: - run: ./c/blake3_c_rust_bindings/cross_test.sh --target ${{ matrix.arch }} - run: ./c/blake3_c_rust_bindings/cross_test.sh --target ${{ matrix.arch }} --features=neon if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-') - - run: ./c/blake3_c_rust_bindings/cross_test.sh --target ${{ matrix.arch }} --features=no_neon - if: startsWith(matrix.arch, 'aarch64-') # Currently only on x86. c_tests: From 2aa7c963be41eaee47f3ba5af1d5a7d7f1a9658d Mon Sep 17 00:00:00 2001 From: rsdy Date: Tue, 12 Oct 2021 23:23:25 +0100 Subject: [PATCH 8/8] Use BLAKE3_USE_NEON=0 instead of BLAKE3_NO_NEON def --- c/Makefile.testing | 4 ++-- c/README.md | 6 +++--- c/blake3_dispatch.c | 4 ++-- c/blake3_impl.h | 13 +++++++++---- 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/c/Makefile.testing b/c/Makefile.testing index b85aedf..b540528 100644 --- a/c/Makefile.testing +++ b/c/Makefile.testing @@ -38,12 +38,12 @@ ASM_TARGETS += blake3_avx512_x86-64_unix.S endif ifdef BLAKE3_USE_NEON -EXTRAFLAGS += -DBLAKE3_USE_NEON +EXTRAFLAGS += -DBLAKE3_USE_NEON=1 TARGETS += blake3_neon.o endif ifdef BLAKE3_NO_NEON -EXTRAFLAGS += -DBLAKE3_NO_NEON +EXTRAFLAGS += -DBLAKE3_USE_NEON=0 endif all: blake3.c blake3_dispatch.c blake3_portable.c main.c $(TARGETS) diff --git a/c/README.md b/c/README.md index 418a5ab..eacdc54 100644 --- a/c/README.md +++ b/c/README.md @@ -256,15 +256,15 @@ other ARM targets, since not all of them support it. To enable it, set ARM Linux with NEON support: ```bash -gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON blake3.c blake3_dispatch.c \ +gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=1 blake3.c blake3_dispatch.c \ blake3_portable.c blake3_neon.c ``` To explicitiy disable using NEON instructions on AARCH64, set -`BLAKE3_NO_NEON=1`. +`BLAKE3_USE_NEON=0`. ```bash -gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_NEON blake3.c blake3_dispatch.c \ +gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=0 blake3.c blake3_dispatch.c \ blake3_portable.c ``` diff --git a/c/blake3_dispatch.c b/c/blake3_dispatch.c index 6518478..b498058 100644 --- a/c/blake3_dispatch.c +++ b/c/blake3_dispatch.c @@ -232,7 +232,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, #endif #endif -#if defined(BLAKE3_USE_NEON) +#if BLAKE3_USE_NEON == 1 blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out); return; @@ -269,7 +269,7 @@ size_t blake3_simd_degree(void) { } #endif #endif -#if defined(BLAKE3_USE_NEON) +#if BLAKE3_USE_NEON == 1 return 4; #endif return 1; diff --git a/c/blake3_impl.h b/c/blake3_impl.h index 18f201f..ba2e91c 100644 --- a/c/blake3_impl.h +++ b/c/blake3_impl.h @@ -49,13 +49,18 @@ enum blake3_flags { #include #endif -#if defined(IS_AARCH64) && !defined(BLAKE3_NO_NEON) && !defined(BLAKE3_USE_NEON) -#define BLAKE3_USE_NEON +#if !defined(BLAKE3_USE_NEON) + // If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness + #if defined(IS_AARCH64) + #define BLAKE3_USE_NEON 1 + #else + #define BLAKE3_USE_NEON 0 + #endif #endif #if defined(IS_X86) #define MAX_SIMD_DEGREE 16 -#elif defined(BLAKE3_USE_NEON) +#elif BLAKE3_USE_NEON == 1 #define MAX_SIMD_DEGREE 4 #else #define MAX_SIMD_DEGREE 1 @@ -265,7 +270,7 @@ void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs, #endif #endif -#if defined(BLAKE3_USE_NEON) +#if BLAKE3_USE_NEON == 1 void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, bool increment_counter,