mirror of
https://github.com/BLAKE3-team/BLAKE3
synced 2024-06-07 19:36:03 +02:00
Merge pull request #201 from symmetree-labs/master
Improve compile-time target detection for NEON
This commit is contained in:
commit
5957d7d48f
|
@ -149,6 +149,9 @@ jobs:
|
||||||
# Test the NEON implementation on ARM targets.
|
# Test the NEON implementation on ARM targets.
|
||||||
- run: cross test --target ${{ matrix.arch }} --features=neon
|
- run: cross test --target ${{ matrix.arch }} --features=neon
|
||||||
if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-')
|
if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-')
|
||||||
|
# NEON is enabled by default on aarch64, disabling it through the no_neon feature.
|
||||||
|
- run: cross test --target ${{ matrix.arch }} --features=no_neon
|
||||||
|
if: startsWith(matrix.arch, 'aarch64-')
|
||||||
# Test vectors. Note that this uses a hacky script due to path dependency limitations.
|
# Test vectors. Note that this uses a hacky script due to path dependency limitations.
|
||||||
- run: ./test_vectors/cross_test.sh --target ${{ matrix.arch }}
|
- run: ./test_vectors/cross_test.sh --target ${{ matrix.arch }}
|
||||||
# C code. Same issue with the hacky script.
|
# C code. Same issue with the hacky script.
|
||||||
|
|
|
@ -73,6 +73,7 @@ no_sse2 = []
|
||||||
no_sse41 = []
|
no_sse41 = []
|
||||||
no_avx2 = []
|
no_avx2 = []
|
||||||
no_avx512 = []
|
no_avx512 = []
|
||||||
|
no_neon = []
|
||||||
|
|
||||||
[package.metadata.docs.rs]
|
[package.metadata.docs.rs]
|
||||||
# Document Hasher::update_rayon on docs.rs.
|
# Document Hasher::update_rayon on docs.rs.
|
||||||
|
|
19
build.rs
19
build.rs
|
@ -17,6 +17,10 @@ fn is_neon() -> bool {
|
||||||
defined("CARGO_FEATURE_NEON")
|
defined("CARGO_FEATURE_NEON")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_no_neon() -> bool {
|
||||||
|
defined("CARGO_FEATURE_NO_NEON")
|
||||||
|
}
|
||||||
|
|
||||||
fn is_ci() -> bool {
|
fn is_ci() -> bool {
|
||||||
defined("BLAKE3_CI")
|
defined("BLAKE3_CI")
|
||||||
}
|
}
|
||||||
|
@ -44,6 +48,14 @@ fn is_x86_32() -> bool {
|
||||||
arch == "i386" || arch == "i586" || arch == "i686"
|
arch == "i386" || arch == "i586" || arch == "i686"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_arm() -> bool {
|
||||||
|
is_armv7() || is_aarch64() || target_components()[0] == "arm"
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_aarch64() -> bool {
|
||||||
|
target_components()[0] == "aarch64"
|
||||||
|
}
|
||||||
|
|
||||||
fn is_armv7() -> bool {
|
fn is_armv7() -> bool {
|
||||||
target_components()[0] == "armv7"
|
target_components()[0] == "armv7"
|
||||||
}
|
}
|
||||||
|
@ -218,6 +230,10 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
panic!("It doesn't make sense to enable both \"pure\" and \"neon\".");
|
panic!("It doesn't make sense to enable both \"pure\" and \"neon\".");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if is_no_neon() && is_neon() {
|
||||||
|
panic!("It doesn't make sense to enable both \"no_neon\" and \"neon\".");
|
||||||
|
}
|
||||||
|
|
||||||
if is_x86_64() || is_x86_32() {
|
if is_x86_64() || is_x86_32() {
|
||||||
let support = c_compiler_support();
|
let support = c_compiler_support();
|
||||||
if is_x86_32() || should_prefer_intrinsics() || is_pure() || support == NoCompiler {
|
if is_x86_32() || should_prefer_intrinsics() || is_pure() || support == NoCompiler {
|
||||||
|
@ -237,7 +253,8 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if is_neon() {
|
if (is_arm() && is_neon()) || (!is_no_neon() && !is_pure() && is_aarch64()) {
|
||||||
|
println!("cargo:rustc-cfg=blake3_neon");
|
||||||
build_neon_c_intrinsics();
|
build_neon_c_intrinsics();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -38,10 +38,14 @@ ASM_TARGETS += blake3_avx512_x86-64_unix.S
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifdef BLAKE3_USE_NEON
|
ifdef BLAKE3_USE_NEON
|
||||||
EXTRAFLAGS += -DBLAKE3_USE_NEON
|
EXTRAFLAGS += -DBLAKE3_USE_NEON=1
|
||||||
TARGETS += blake3_neon.o
|
TARGETS += blake3_neon.o
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifdef BLAKE3_NO_NEON
|
||||||
|
EXTRAFLAGS += -DBLAKE3_USE_NEON=0
|
||||||
|
endif
|
||||||
|
|
||||||
all: blake3.c blake3_dispatch.c blake3_portable.c main.c $(TARGETS)
|
all: blake3.c blake3_dispatch.c blake3_portable.c main.c $(TARGETS)
|
||||||
$(CC) $(CFLAGS) $(EXTRAFLAGS) $^ -o $(NAME) $(LDFLAGS)
|
$(CC) $(CFLAGS) $(EXTRAFLAGS) $^ -o $(NAME) $(LDFLAGS)
|
||||||
|
|
||||||
|
|
17
c/README.md
17
c/README.md
|
@ -250,15 +250,24 @@ gcc -shared -O3 -o libblake3.so -DBLAKE3_NO_SSE2 -DBLAKE3_NO_SSE41 -DBLAKE3_NO_A
|
||||||
|
|
||||||
## ARM NEON
|
## ARM NEON
|
||||||
|
|
||||||
The NEON implementation is not enabled by default on ARM, since not all
|
The NEON implementation is enabled by default on AARCH64, but not on
|
||||||
ARM targets support it. To enable it, set `BLAKE3_USE_NEON=1`. Here's an
|
other ARM targets, since not all of them support it. To enable it, set
|
||||||
example of building a shared library on ARM Linux with NEON support:
|
`BLAKE3_USE_NEON=1`. Here's an example of building a shared library on
|
||||||
|
ARM Linux with NEON support:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON blake3.c blake3_dispatch.c \
|
gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=1 blake3.c blake3_dispatch.c \
|
||||||
blake3_portable.c blake3_neon.c
|
blake3_portable.c blake3_neon.c
|
||||||
```
|
```
|
||||||
|
|
||||||
|
To explicitiy disable using NEON instructions on AARCH64, set
|
||||||
|
`BLAKE3_USE_NEON=0`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
gcc -shared -O3 -o libblake3.so -DBLAKE3_USE_NEON=0 blake3.c blake3_dispatch.c \
|
||||||
|
blake3_portable.c
|
||||||
|
```
|
||||||
|
|
||||||
Note that on some targets (ARMv7 in particular), extra flags may be
|
Note that on some targets (ARMv7 in particular), extra flags may be
|
||||||
required to activate NEON support in the compiler. If you see an error
|
required to activate NEON support in the compiler. If you see an error
|
||||||
like...
|
like...
|
||||||
|
|
|
@ -22,6 +22,10 @@ fn is_armv7() -> bool {
|
||||||
target_components()[0] == "armv7"
|
target_components()[0] == "armv7"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_aarch64() -> bool {
|
||||||
|
target_components()[0] == "aarch64"
|
||||||
|
}
|
||||||
|
|
||||||
// Windows targets may be using the MSVC toolchain or the GNU toolchain. The
|
// Windows targets may be using the MSVC toolchain or the GNU toolchain. The
|
||||||
// right compiler flags to use depend on the toolchain. (And we don't want to
|
// right compiler flags to use depend on the toolchain. (And we don't want to
|
||||||
// use flag_if_supported, because we don't want features to be silently
|
// use flag_if_supported, because we don't want features to be silently
|
||||||
|
@ -148,10 +152,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
avx512_build.compile("blake3_avx512");
|
avx512_build.compile("blake3_avx512");
|
||||||
}
|
}
|
||||||
|
|
||||||
// We only build NEON code here if 1) it's requested and 2) the root crate
|
// We only build NEON code here if
|
||||||
// is not already building it. The only time this will really happen is if
|
// 1) it's requested
|
||||||
// you build this crate by hand with the "neon" feature for some reason.
|
// and 2) the root crate is not already building it.
|
||||||
if defined("CARGO_FEATURE_NEON") {
|
// The only time this will really happen is if you build this
|
||||||
|
// crate by hand with the "neon" feature for some reason.
|
||||||
|
//
|
||||||
|
// In addition, 3) if the target is aarch64, NEON is on by default.
|
||||||
|
if defined("CARGO_FEATURE_NEON") || is_aarch64() {
|
||||||
let mut neon_build = new_build();
|
let mut neon_build = new_build();
|
||||||
neon_build.file(c_dir_path("blake3_neon.c"));
|
neon_build.file(c_dir_path("blake3_neon.c"));
|
||||||
// ARMv7 platforms that support NEON generally need the following
|
// ARMv7 platforms that support NEON generally need the following
|
||||||
|
|
|
@ -232,7 +232,7 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(BLAKE3_USE_NEON)
|
#if BLAKE3_USE_NEON == 1
|
||||||
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
|
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
|
||||||
increment_counter, flags, flags_start, flags_end, out);
|
increment_counter, flags, flags_start, flags_end, out);
|
||||||
return;
|
return;
|
||||||
|
@ -269,7 +269,7 @@ size_t blake3_simd_degree(void) {
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
#if defined(BLAKE3_USE_NEON)
|
#if BLAKE3_USE_NEON == 1
|
||||||
return 4;
|
return 4;
|
||||||
#endif
|
#endif
|
||||||
return 1;
|
return 1;
|
||||||
|
|
|
@ -38,6 +38,10 @@ enum blake3_flags {
|
||||||
#define IS_X86_32
|
#define IS_X86_32
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||||
|
#define IS_AARCH64
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(IS_X86)
|
#if defined(IS_X86)
|
||||||
#if defined(_MSC_VER)
|
#if defined(_MSC_VER)
|
||||||
#include <intrin.h>
|
#include <intrin.h>
|
||||||
|
@ -45,9 +49,18 @@ enum blake3_flags {
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if !defined(BLAKE3_USE_NEON)
|
||||||
|
// If BLAKE3_USE_NEON not manually set, autodetect based on AArch64ness
|
||||||
|
#if defined(IS_AARCH64)
|
||||||
|
#define BLAKE3_USE_NEON 1
|
||||||
|
#else
|
||||||
|
#define BLAKE3_USE_NEON 0
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(IS_X86)
|
#if defined(IS_X86)
|
||||||
#define MAX_SIMD_DEGREE 16
|
#define MAX_SIMD_DEGREE 16
|
||||||
#elif defined(BLAKE3_USE_NEON)
|
#elif BLAKE3_USE_NEON == 1
|
||||||
#define MAX_SIMD_DEGREE 4
|
#define MAX_SIMD_DEGREE 4
|
||||||
#else
|
#else
|
||||||
#define MAX_SIMD_DEGREE 1
|
#define MAX_SIMD_DEGREE 1
|
||||||
|
@ -257,7 +270,7 @@ void blake3_hash_many_avx512(const uint8_t *const *inputs, size_t num_inputs,
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(BLAKE3_USE_NEON)
|
#if BLAKE3_USE_NEON == 1
|
||||||
void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
|
void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
|
||||||
size_t blocks, const uint32_t key[8],
|
size_t blocks, const uint32_t key[8],
|
||||||
uint64_t counter, bool increment_counter,
|
uint64_t counter, bool increment_counter,
|
||||||
|
|
|
@ -94,7 +94,7 @@ mod avx2;
|
||||||
#[cfg(blake3_avx512_ffi)]
|
#[cfg(blake3_avx512_ffi)]
|
||||||
#[path = "ffi_avx512.rs"]
|
#[path = "ffi_avx512.rs"]
|
||||||
mod avx512;
|
mod avx512;
|
||||||
#[cfg(feature = "neon")]
|
#[cfg(blake3_neon)]
|
||||||
#[path = "ffi_neon.rs"]
|
#[path = "ffi_neon.rs"]
|
||||||
mod neon;
|
mod neon;
|
||||||
mod portable;
|
mod portable;
|
||||||
|
|
|
@ -10,7 +10,7 @@ cfg_if::cfg_if! {
|
||||||
pub const MAX_SIMD_DEGREE: usize = 8;
|
pub const MAX_SIMD_DEGREE: usize = 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if #[cfg(feature = "neon")] {
|
} else if #[cfg(blake3_neon)] {
|
||||||
pub const MAX_SIMD_DEGREE: usize = 4;
|
pub const MAX_SIMD_DEGREE: usize = 4;
|
||||||
} else {
|
} else {
|
||||||
pub const MAX_SIMD_DEGREE: usize = 1;
|
pub const MAX_SIMD_DEGREE: usize = 1;
|
||||||
|
@ -30,7 +30,7 @@ cfg_if::cfg_if! {
|
||||||
pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
|
pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if #[cfg(feature = "neon")] {
|
} else if #[cfg(blake3_neon)] {
|
||||||
pub const MAX_SIMD_DEGREE_OR_2: usize = 4;
|
pub const MAX_SIMD_DEGREE_OR_2: usize = 4;
|
||||||
} else {
|
} else {
|
||||||
pub const MAX_SIMD_DEGREE_OR_2: usize = 2;
|
pub const MAX_SIMD_DEGREE_OR_2: usize = 2;
|
||||||
|
@ -49,7 +49,7 @@ pub enum Platform {
|
||||||
#[cfg(blake3_avx512_ffi)]
|
#[cfg(blake3_avx512_ffi)]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
AVX512,
|
AVX512,
|
||||||
#[cfg(feature = "neon")]
|
#[cfg(blake3_neon)]
|
||||||
NEON,
|
NEON,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ impl Platform {
|
||||||
}
|
}
|
||||||
// We don't use dynamic feature detection for NEON. If the "neon"
|
// We don't use dynamic feature detection for NEON. If the "neon"
|
||||||
// feature is on, NEON is assumed to be supported.
|
// feature is on, NEON is assumed to be supported.
|
||||||
#[cfg(feature = "neon")]
|
#[cfg(blake3_neon)]
|
||||||
{
|
{
|
||||||
return Platform::NEON;
|
return Platform::NEON;
|
||||||
}
|
}
|
||||||
|
@ -95,7 +95,7 @@ impl Platform {
|
||||||
#[cfg(blake3_avx512_ffi)]
|
#[cfg(blake3_avx512_ffi)]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
Platform::AVX512 => 16,
|
Platform::AVX512 => 16,
|
||||||
#[cfg(feature = "neon")]
|
#[cfg(blake3_neon)]
|
||||||
Platform::NEON => 4,
|
Platform::NEON => 4,
|
||||||
};
|
};
|
||||||
debug_assert!(degree <= MAX_SIMD_DEGREE);
|
debug_assert!(degree <= MAX_SIMD_DEGREE);
|
||||||
|
@ -129,7 +129,7 @@ impl Platform {
|
||||||
crate::avx512::compress_in_place(cv, block, block_len, counter, flags)
|
crate::avx512::compress_in_place(cv, block, block_len, counter, flags)
|
||||||
},
|
},
|
||||||
// No NEON compress_in_place() implementation yet.
|
// No NEON compress_in_place() implementation yet.
|
||||||
#[cfg(feature = "neon")]
|
#[cfg(blake3_neon)]
|
||||||
Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags),
|
Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -161,7 +161,7 @@ impl Platform {
|
||||||
crate::avx512::compress_xof(cv, block, block_len, counter, flags)
|
crate::avx512::compress_xof(cv, block, block_len, counter, flags)
|
||||||
},
|
},
|
||||||
// No NEON compress_xof() implementation yet.
|
// No NEON compress_xof() implementation yet.
|
||||||
#[cfg(feature = "neon")]
|
#[cfg(blake3_neon)]
|
||||||
Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags),
|
Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -256,7 +256,7 @@ impl Platform {
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
// Assumed to be safe if the "neon" feature is on.
|
// Assumed to be safe if the "neon" feature is on.
|
||||||
#[cfg(feature = "neon")]
|
#[cfg(blake3_neon)]
|
||||||
Platform::NEON => unsafe {
|
Platform::NEON => unsafe {
|
||||||
crate::neon::hash_many(
|
crate::neon::hash_many(
|
||||||
inputs,
|
inputs,
|
||||||
|
@ -315,7 +315,7 @@ impl Platform {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "neon")]
|
#[cfg(blake3_neon)]
|
||||||
pub fn neon() -> Option<Self> {
|
pub fn neon() -> Option<Self> {
|
||||||
// Assumed to be safe if the "neon" feature is on.
|
// Assumed to be safe if the "neon" feature is on.
|
||||||
Some(Self::NEON)
|
Some(Self::NEON)
|
||||||
|
|
Loading…
Reference in New Issue