1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-06-07 19:36:03 +02:00

Merge pull request #201 from symmetree-labs/master

Improve compile-time target detection for NEON
This commit is contained in:
Jack O'Connor 2021-10-14 21:48:02 -04:00 committed by GitHub
commit 5957d7d48f
Signed by: GitHub
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 79 additions and 24 deletions

View File

@ -149,6 +149,9 @@ jobs:
# Test the NEON implementation on ARM targets. # Test the NEON implementation on ARM targets.
- run: cross test --target ${{ matrix.arch }} --features=neon - run: cross test --target ${{ matrix.arch }} --features=neon
if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-') if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-')
# NEON is enabled by default on aarch64, disabling it through the no_neon feature.
- run: cross test --target ${{ matrix.arch }} --features=no_neon
if: startsWith(matrix.arch, 'aarch64-')
# Test vectors. Note that this uses a hacky script due to path dependency limitations. # Test vectors. Note that this uses a hacky script due to path dependency limitations.
- run: ./test_vectors/cross_test.sh --target ${{ matrix.arch }} - run: ./test_vectors/cross_test.sh --target ${{ matrix.arch }}
# C code. Same issue with the hacky script. # C code. Same issue with the hacky script.

View File

@ -73,6 +73,7 @@ no_sse2 = []
no_sse41 = [] no_sse41 = []
no_avx2 = [] no_avx2 = []
no_avx512 = [] no_avx512 = []
no_neon = []
[package.metadata.docs.rs] [package.metadata.docs.rs]
# Document Hasher::update_rayon on docs.rs. # Document Hasher::update_rayon on docs.rs.

View File

@ -17,6 +17,10 @@ fn is_neon() -> bool {
defined("CARGO_FEATURE_NEON") defined("CARGO_FEATURE_NEON")
} }
fn is_no_neon() -> bool {
defined("CARGO_FEATURE_NO_NEON")
}
fn is_ci() -> bool { fn is_ci() -> bool {
defined("BLAKE3_CI") defined("BLAKE3_CI")
} }
@ -44,6 +48,14 @@ fn is_x86_32() -> bool {
arch == "i386" || arch == "i586" || arch == "i686" arch == "i386" || arch == "i586" || arch == "i686"
} }
fn is_arm() -> bool {
is_armv7() || is_aarch64() || target_components()[0] == "arm"
}
fn is_aarch64() -> bool {
target_components()[0] == "aarch64"
}
fn is_armv7() -> bool { fn is_armv7() -> bool {
target_components()[0] == "armv7" target_components()[0] == "armv7"
} }
@ -218,6 +230,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
panic!("It doesn't make sense to enable both \"pure\" and \"neon\"."); panic!("It doesn't make sense to enable both \"pure\" and \"neon\".");
} }
if is_no_neon() && is_neon() {
panic!("It doesn't make sense to enable both \"no_neon\" and \"neon\".");
}
if is_x86_64() || is_x86_32() { if is_x86_64() || is_x86_32() {
let support = c_compiler_support(); let support = c_compiler_support();
if is_x86_32() || should_prefer_intrinsics() || is_pure() || support == NoCompiler { if is_x86_32() || should_prefer_intrinsics() || is_pure() || support == NoCompiler {
@ -237,7 +253,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
} }
} }
if is_neon() { if (is_arm() && is_neon()) || (!is_no_neon() && !is_pure() && is_aarch64()) {
println!("cargo:rustc-cfg=blake3_neon");
build_neon_c_intrinsics(); build_neon_c_intrinsics();
} }

View File

@ -38,10 +38,14 @@ ASM_TARGETS += blake3_avx512_x86-64_unix.S
endif endif
ifdef BLAKE3_USE_NEON ifdef BLAKE3_USE_NEON
EXTRAFLAGS += -DBLAKE3_USE_NEON EXTRAFLAGS += -DBLAKE3_USE_NEON=1
TARGETS += blake3_neon.o TARGETS += blake3_neon.o
endif endif
ifdef BLAKE3_NO_NEON
EXTRAFLAGS += -DBLAKE3_USE_NEON=0
endif
all: blake3.c blake3_dispatch.c blake3_portable.c main.c $(TARGETS) all: blake3.c blake3_dispatch.c blake3_portable.c main.c $(TARGETS)
$(CC) $(CFLAGS) $(EXTRAFLAGS) $^ -o $(NAME) $(LDFLAGS) $(CC) $(CFLAGS) $(EXTRAFLAGS) $^ -o $(NAME) $(LDFLAGS)

View File

@ -250,15 +250,24 @@ gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_A
## ARM NEON ## ARM NEON
The NEON implementation is not enabled by default on ARM, since not all The NEON implementation is enabled by default on AARCH64, but not on
ARM targets support it. To enable it, set `BLAKE3_USE_NEON=1`. Here's an other ARM targets, since not all of them support it. To enable it, set
example of building a shared library on ARM Linux with NEON support: `BLAKE3_USE_NEON=1`. Here's an example of building a shared library on
ARM Linux with NEON support:
```bash ```bash
gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON blake3.c blake3_dispatch.c \ gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=1 blake3.c blake3_dispatch.c \
blake3_portable.c blake3_neon.c blake3_portable.c blake3_neon.c
``` ```
To explicitiy disable using NEON instructions on AARCH64, set
`BLAKE3_USE_NEON=0`.
```bash
gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=0 blake3.c blake3_dispatch.c \
blake3_portable.c
```
Note that on some targets (ARMv7 in particular), extra flags may be Note that on some targets (ARMv7 in particular), extra flags may be
required to activate NEON support in the compiler. If you see an error required to activate NEON support in the compiler. If you see an error
like... like...

View File

@ -22,6 +22,10 @@ fn is_armv7() -> bool {
target_components()[0] == "armv7" target_components()[0] == "armv7"
} }
fn is_aarch64() -> bool {
target_components()[0] == "aarch64"
}
// Windows targets may be using the MSVC toolchain or the GNU toolchain. The // Windows targets may be using the MSVC toolchain or the GNU toolchain. The
// right compiler flags to use depend on the toolchain. (And we don't want to // right compiler flags to use depend on the toolchain. (And we don't want to
// use flag_if_supported, because we don't want features to be silently // use flag_if_supported, because we don't want features to be silently
@ -148,10 +152,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
avx512_build.compile("blake3_avx512"); avx512_build.compile("blake3_avx512");
} }
// We only build NEON code here if 1) it's requested and 2) the root crate // We only build NEON code here if
// is not already building it. The only time this will really happen is if // 1) it's requested
// you build this crate by hand with the "neon" feature for some reason. // and 2) the root crate is not already building it.
if defined("CARGO_FEATURE_NEON") { // The only time this will really happen is if you build this
// crate by hand with the "neon" feature for some reason.
//
// In addition, 3) if the target is aarch64, NEON is on by default.
if defined("CARGO_FEATURE_NEON") || is_aarch64() {
let mut neon_build = new_build(); let mut neon_build = new_build();
neon_build.file(c_dir_path("blake3_neon.c")); neon_build.file(c_dir_path("blake3_neon.c"));
// ARMv7 platforms that support NEON generally need the following // ARMv7 platforms that support NEON generally need the following

View File

@ -232,7 +232,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
#endif #endif
#endif #endif
#if defined(BLAKE3_USE_NEON) #if BLAKE3_USE_NEON == 1
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter, blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out); increment_counter, flags, flags_start, flags_end, out);
return; return;
@ -269,7 +269,7 @@ size_t blake3_simd_degree(void) {
} }
#endif #endif
#endif #endif
#if defined(BLAKE3_USE_NEON) #if BLAKE3_USE_NEON == 1
return 4; return 4;
#endif #endif
return 1; return 1;

View File

@ -38,6 +38,10 @@ enum blake3_flags {
#define IS_X86_32 #define IS_X86_32
#endif #endif
#if defined(__aarch64__) || defined(_M_ARM64)
#define IS_AARCH64
#endif
#if defined(IS_X86) #if defined(IS_X86)
#if defined(_MSC_VER) #if defined(_MSC_VER)
#include <intrin.h> #include <intrin.h>
@ -45,9 +49,18 @@ enum blake3_flags {
#include <immintrin.h> #include <immintrin.h>
#endif #endif
#if !defined(BLAKE3_USE_NEON)
// If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
#if defined(IS_AARCH64)
#define BLAKE3_USE_NEON 1
#else
#define BLAKE3_USE_NEON 0
#endif
#endif
#if defined(IS_X86) #if defined(IS_X86)
#define MAX_SIMD_DEGREE 16 #define MAX_SIMD_DEGREE 16
#elif defined(BLAKE3_USE_NEON) #elif BLAKE3_USE_NEON == 1
#define MAX_SIMD_DEGREE 4 #define MAX_SIMD_DEGREE 4
#else #else
#define MAX_SIMD_DEGREE 1 #define MAX_SIMD_DEGREE 1
@ -257,7 +270,7 @@ void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
#endif #endif
#endif #endif
#if defined(BLAKE3_USE_NEON) #if BLAKE3_USE_NEON == 1
void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs, void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
size_t blocks, const uint32_t key[8], size_t blocks, const uint32_t key[8],
uint64_t counter, bool increment_counter, uint64_t counter, bool increment_counter,

View File

@ -94,7 +94,7 @@ mod avx2;
#[cfg(blake3_avx512_ffi)] #[cfg(blake3_avx512_ffi)]
#[path = "ffi_avx512.rs"] #[path = "ffi_avx512.rs"]
mod avx512; mod avx512;
#[cfg(feature = "neon")] #[cfg(blake3_neon)]
#[path = "ffi_neon.rs"] #[path = "ffi_neon.rs"]
mod neon; mod neon;
mod portable; mod portable;

View File

@ -10,7 +10,7 @@ cfg_if::cfg_if! {
pub const MAX_SIMD_DEGREE: usize = 8; pub const MAX_SIMD_DEGREE: usize = 8;
} }
} }
} else if #[cfg(feature = "neon")] { } else if #[cfg(blake3_neon)] {
pub const MAX_SIMD_DEGREE: usize = 4; pub const MAX_SIMD_DEGREE: usize = 4;
} else { } else {
pub const MAX_SIMD_DEGREE: usize = 1; pub const MAX_SIMD_DEGREE: usize = 1;
@ -30,7 +30,7 @@ cfg_if::cfg_if! {
pub const MAX_SIMD_DEGREE_OR_2: usize = 8; pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
} }
} }
} else if #[cfg(feature = "neon")] { } else if #[cfg(blake3_neon)] {
pub const MAX_SIMD_DEGREE_OR_2: usize = 4; pub const MAX_SIMD_DEGREE_OR_2: usize = 4;
} else { } else {
pub const MAX_SIMD_DEGREE_OR_2: usize = 2; pub const MAX_SIMD_DEGREE_OR_2: usize = 2;
@ -49,7 +49,7 @@ pub enum Platform {
#[cfg(blake3_avx512_ffi)] #[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
AVX512, AVX512,
#[cfg(feature = "neon")] #[cfg(blake3_neon)]
NEON, NEON,
} }
@ -76,7 +76,7 @@ impl Platform {
} }
// We don't use dynamic feature detection for NEON. If the "neon" // We don't use dynamic feature detection for NEON. If the "neon"
// feature is on, NEON is assumed to be supported. // feature is on, NEON is assumed to be supported.
#[cfg(feature = "neon")] #[cfg(blake3_neon)]
{ {
return Platform::NEON; return Platform::NEON;
} }
@ -95,7 +95,7 @@ impl Platform {
#[cfg(blake3_avx512_ffi)] #[cfg(blake3_avx512_ffi)]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => 16, Platform::AVX512 => 16,
#[cfg(feature = "neon")] #[cfg(blake3_neon)]
Platform::NEON => 4, Platform::NEON => 4,
}; };
debug_assert!(degree <= MAX_SIMD_DEGREE); debug_assert!(degree <= MAX_SIMD_DEGREE);
@ -129,7 +129,7 @@ impl Platform {
crate::avx512::compress_in_place(cv, block, block_len, counter, flags) crate::avx512::compress_in_place(cv, block, block_len, counter, flags)
}, },
// No NEON compress_in_place() implementation yet. // No NEON compress_in_place() implementation yet.
#[cfg(feature = "neon")] #[cfg(blake3_neon)]
Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags), Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags),
} }
} }
@ -161,7 +161,7 @@ impl Platform {
crate::avx512::compress_xof(cv, block, block_len, counter, flags) crate::avx512::compress_xof(cv, block, block_len, counter, flags)
}, },
// No NEON compress_xof() implementation yet. // No NEON compress_xof() implementation yet.
#[cfg(feature = "neon")] #[cfg(blake3_neon)]
Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags), Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags),
} }
} }
@ -256,7 +256,7 @@ impl Platform {
) )
}, },
// Assumed to be safe if the "neon" feature is on. // Assumed to be safe if the "neon" feature is on.
#[cfg(feature = "neon")] #[cfg(blake3_neon)]
Platform::NEON => unsafe { Platform::NEON => unsafe {
crate::neon::hash_many( crate::neon::hash_many(
inputs, inputs,
@ -315,7 +315,7 @@ impl Platform {
} }
} }
#[cfg(feature = "neon")] #[cfg(blake3_neon)]
pub fn neon() -> Option<Self> { pub fn neon() -> Option<Self> {
// Assumed to be safe if the "neon" feature is on. // Assumed to be safe if the "neon" feature is on.
Some(Self::NEON) Some(Self::NEON)