mirror of
https://github.com/BLAKE3-team/BLAKE3
synced 2024-05-21 11:06:04 +02:00
refactor the Cargo feature set
The biggest change here is that assembly implementations are enabled by default. Added features: - "pure" (Pure Rust, with no C or assembly implementations.) Removed features: - "c" (Now basically the default.) Renamed features; - "c_prefer_intrinsics" -> "prefer_intrinsics" - "c_neon" -> "neon" Unchanged: - "rayon" - "std" (Still the only feature on by default.)
This commit is contained in:
parent
7caf1ad4bb
commit
e06a0f255a
|
@ -30,27 +30,27 @@ jobs:
|
||||||
- run: cargo test --features=rayon
|
- run: cargo test --features=rayon
|
||||||
# no_std tests.
|
# no_std tests.
|
||||||
- run: cargo test --no-default-features
|
- run: cargo test --no-default-features
|
||||||
# Test the x86 assembly implementations. Use -vv to log compiler commands.
|
# Test the intrinsics implementations.
|
||||||
- run: cargo test --features=c -vv
|
- run: cargo test --features=prefer_intrinsics
|
||||||
# Test the C intrinsics implementations. Use -vv to log compiler commands.
|
# Test the pure Rust build.
|
||||||
- run: cargo test --features=c,c_prefer_intrinsics -vv
|
- run: cargo test --features=pure
|
||||||
# Test release mode. This does more iteratations in test_fuzz_hasher.
|
# Test release mode. This does more iteratations in test_fuzz_hasher.
|
||||||
- run: cargo test --release
|
- run: cargo test --release
|
||||||
- run: cargo test --release --features=c
|
- run: cargo test --release --features=prefer_intrinsics
|
||||||
- run: cargo test --release --features=c,c_prefer_intrinsics
|
- run: cargo test --release --features=pure
|
||||||
# Test benchmarks. RUSTC_BOOTSTRAP=1 lets this run on non-nightly toolchains.
|
# Test benchmarks. RUSTC_BOOTSTRAP=1 lets this run on non-nightly toolchains.
|
||||||
- run: cargo test --benches --features=c
|
- run: cargo test --benches
|
||||||
env:
|
env:
|
||||||
RUSTC_BOOTSTRAP: 1
|
RUSTC_BOOTSTRAP: 1
|
||||||
# Test vectors.
|
# Test vectors.
|
||||||
- name: test vectors
|
- name: test vectors
|
||||||
run: cargo test
|
run: cargo test
|
||||||
working-directory: ./test_vectors
|
working-directory: ./test_vectors
|
||||||
- name: test vectors C assembly
|
- name: test vectors intrinsics
|
||||||
run: cargo test --features=c
|
run: cargo test --features=prefer_intrinsics
|
||||||
working-directory: ./test_vectors
|
working-directory: ./test_vectors
|
||||||
- name: test vectors C intrinsics
|
- name: test vectors pure
|
||||||
run: cargo test --features=c,c_prefer_intrinsics
|
run: cargo test --features=pure
|
||||||
working-directory: ./test_vectors
|
working-directory: ./test_vectors
|
||||||
# Test b3sum.
|
# Test b3sum.
|
||||||
- name: test b3sum
|
- name: test b3sum
|
||||||
|
@ -93,7 +93,7 @@ jobs:
|
||||||
# Test the portable implementation on everything.
|
# Test the portable implementation on everything.
|
||||||
- run: cross test --target ${{ matrix.arch }}
|
- run: cross test --target ${{ matrix.arch }}
|
||||||
# Test the NEON implementation on ARM targets.
|
# Test the NEON implementation on ARM targets.
|
||||||
- run: cross test --target ${{ matrix.arch }} --features=c_neon
|
- run: cross test --target ${{ matrix.arch }} --features=neon
|
||||||
if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-')
|
if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-')
|
||||||
# Test vectors. Note that this uses a hacky script due to path dependency limitations.
|
# Test vectors. Note that this uses a hacky script due to path dependency limitations.
|
||||||
- run: ./test_vectors/cross_test.sh --target ${{ matrix.arch }}
|
- run: ./test_vectors/cross_test.sh --target ${{ matrix.arch }}
|
||||||
|
|
59
Cargo.toml
59
Cargo.toml
|
@ -11,27 +11,43 @@ edition = "2018"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["std"]
|
default = ["std"]
|
||||||
# The "c" feature includes C and assembly SIMD implementations of the
|
|
||||||
# compression function for x86 platforms, called via FFI. (Currently it has no
|
# By default on x86_64, this crate uses Samuel Neves' hand-written assembly
|
||||||
# effect on other platforms.) This requires a C toolchain on the build machine.
|
# implementations for SSE4.1, AVX2, and AVX512. (These provide both the best
|
||||||
# This is necessary for AVX-512 support, which is not yet stable in Rust, and
|
# runtime performance, and the fastest build times.) And by default on 32-bit
|
||||||
# the assembly implementations also perform better than those using Rust/LLVM
|
# x86, this crate uses Rust intrinsics implementations for SSE4.1 and AVX2, and
|
||||||
# intrinsics. As with the Rust implementations, these C and assembly
|
# a C intrinsics implementation for AVX-512. Enabling the "pure" feature
|
||||||
# implementations participate in runtime CPU feature detection, and the
|
# disables all FFI to C and assembly implementations, leaving only the Rust
|
||||||
# resulting binary is portable.
|
# intrinsics implementations for SSE4.1 and AVX2. This removes the dependency
|
||||||
c = []
|
# on a C compiler/assembler, which can be helpful for certain applications.
|
||||||
# Normally x86-64 builds prefer assembly implementations over C intrinsics. The
|
# Library crates should generally avoid this feature, so that each binary crate
|
||||||
# assembly implementations perform better, perform most consistently across
|
# is free make its own decision about build dependencies.
|
||||||
# compilers, and are much faster to build. However, this feature makes the
|
pure = []
|
||||||
# build use the C intrinsics implementations instead. This is mainly for
|
|
||||||
# testing purposes, and most callers will not want to use it.
|
# As described above, on x86_64 this crate use assembly implementations by
|
||||||
c_prefer_intrinsics = []
|
# default. Enabling the "prefer_intrinsics" feature makes this crate use
|
||||||
|
# intrinsics implementations on both 32-bit and 64-bit x86. This is mainly for
|
||||||
|
# testing, and calling crates should not need it.
|
||||||
|
prefer_intrinsics = []
|
||||||
|
|
||||||
# The NEON implementation does not participate in dynamic feature detection,
|
# The NEON implementation does not participate in dynamic feature detection,
|
||||||
# which is currently x86-only. If "c_neon" is on, NEON support is assumed. Note
|
# which is currently x86-only. If "neon" is on, NEON support is assumed. Note
|
||||||
# that AArch64 always supports NEON, but support on ARMv7 varies.
|
# that AArch64 always supports NEON, but support on ARMv7 varies. The NEON
|
||||||
c_neon = []
|
# implementation uses C intrinsics and requires a C compiler.
|
||||||
|
neon = []
|
||||||
|
|
||||||
|
# This crate uses libstd for std::io trait implementations, and also for
|
||||||
|
# runtime CPU feature detection. This feature is enabled by default. If you use
|
||||||
|
# --no-default-features, the only way to use the SIMD implementations in this
|
||||||
|
# crate is to enable the corresponding instruction sets statically for the
|
||||||
|
# entire build, with e.g. RUSTFLAGS="-C target-cpu=native".
|
||||||
std = ["digest/std"]
|
std = ["digest/std"]
|
||||||
|
|
||||||
|
# The "rayon" feature (defined below as an optional dependency) enables the
|
||||||
|
# join::RayonJoin type, which can be used with Hasher::update_with_join to
|
||||||
|
# perform multi-threaded hashing. However, even if this feature is enabled, all
|
||||||
|
# other APIs remain single-threaded.
|
||||||
|
|
||||||
[package.metadata.docs.rs]
|
[package.metadata.docs.rs]
|
||||||
# Document blake3::join::RayonJoin on docs.rs.
|
# Document blake3::join::RayonJoin on docs.rs.
|
||||||
features = ["rayon"]
|
features = ["rayon"]
|
||||||
|
@ -40,13 +56,6 @@ features = ["rayon"]
|
||||||
arrayref = "0.3.5"
|
arrayref = "0.3.5"
|
||||||
arrayvec = { version = "0.5.1", default-features = false, features = ["array-sizes-33-128"] }
|
arrayvec = { version = "0.5.1", default-features = false, features = ["array-sizes-33-128"] }
|
||||||
constant_time_eq = "0.1.5"
|
constant_time_eq = "0.1.5"
|
||||||
# A performance note for the "rayon" feature: Multi-threading can have
|
|
||||||
# significant overhead for small inputs, particularly on x86 where individual
|
|
||||||
# cores are very fast. On the other hand, on slower platforms like ARM,
|
|
||||||
# multi-threading can be beneficial for all inputs. There's no one input size
|
|
||||||
# threshold that would work well everywhere, and this crate doesn't try to be
|
|
||||||
# clever. If you're going to enable the "rayon" feature, you should benchmark
|
|
||||||
# it for your specific use case.
|
|
||||||
rayon = { version = "1.2.1", optional = true }
|
rayon = { version = "1.2.1", optional = true }
|
||||||
cfg-if = "0.1.10"
|
cfg-if = "0.1.10"
|
||||||
digest = "0.8.1"
|
digest = "0.8.1"
|
||||||
|
|
31
README.md
31
README.md
|
@ -1,4 +1,4 @@
|
||||||
# <a href="#"><img src="media/BLAKE3.svg" alt="BLAKE3" height=50></a>
|
# <a href="#"><img src="media/BLAKE3.svg" alt="BLAKE3" height=50></a> [![Actions Status](https://github.com/BLAKE3-team/BLAKE3/workflows/tests/badge.svg)](https://github.com/BLAKE3-team/BLAKE3/actions)
|
||||||
|
|
||||||
BLAKE3 is a cryptographic hash function that is:
|
BLAKE3 is a cryptographic hash function that is:
|
||||||
|
|
||||||
|
@ -33,23 +33,19 @@ with BLAKE3.
|
||||||
This repository is the official implementation of BLAKE3. It includes:
|
This repository is the official implementation of BLAKE3. It includes:
|
||||||
|
|
||||||
* The [`blake3`](https://crates.io/crates/blake3) Rust crate, which
|
* The [`blake3`](https://crates.io/crates/blake3) Rust crate, which
|
||||||
includes optimized SIMD implementations, with runtime CPU feature
|
includes optimized SIMD implementations for SSE4.1, AVX2, AVX-512, and
|
||||||
detection on x86. SSE4.1 and AVX2 are supported in pure Rust. The `c`
|
NEON, with automatic runtime CPU feature detection on x86. The
|
||||||
feature enables C/assembly implementations and AVX-512 support. The
|
optional `rayon` feature also enables multi-threading.
|
||||||
`c_neon` feature enables ARM NEON support. Multi-threading is also
|
|
||||||
supported, and the `rayon` feature provides a
|
|
||||||
[Rayon](https://github.com/rayon-rs/rayon)-based implementation.
|
|
||||||
|
|
||||||
* The [`b3sum`](https://crates.io/crates/b3sum) Rust crate, which
|
* The [`b3sum`](https://crates.io/crates/b3sum) Rust crate, which
|
||||||
provides a command line interface. You can install it from
|
provides a command line interface. It uses multi-threading by default,
|
||||||
[crates.io](https://crates.io/crates/b3sum) with `cargo install
|
making it an order of magnitude faster than e.g. `sha256sum` on
|
||||||
b3sum`. It enables the `rayon` and `c` features of the `blake3` crate
|
typical desktop hardware.
|
||||||
by default.
|
|
||||||
|
|
||||||
* The [C implementation](c), which like the Rust implementation includes
|
* The [C implementation](c), which like the Rust implementation includes
|
||||||
SIMD code and dynamic CPU feature detection on x86. Unlike the Rust
|
SIMD code and runtime CPU feature detection on x86. Unlike the Rust
|
||||||
implementation, it's not currently multi-threaded. The
|
implementation, it's not currently multi-threaded. See
|
||||||
[README](c/README.md) provides build examples.
|
[`c/README.md`](c/README.md).
|
||||||
|
|
||||||
* The [reference implementation](reference_impl/reference_impl.rs),
|
* The [reference implementation](reference_impl/reference_impl.rs),
|
||||||
which is discussed in Section 5.1 of the [BLAKE3
|
which is discussed in Section 5.1 of the [BLAKE3
|
||||||
|
@ -59,9 +55,6 @@ This repository is the official implementation of BLAKE3. It includes:
|
||||||
port that doesn't need multi-threading or SIMD optimizations, start
|
port that doesn't need multi-threading or SIMD optimizations, start
|
||||||
here.
|
here.
|
||||||
|
|
||||||
* [![Actions
|
|
||||||
Status](https://github.com/BLAKE3-team/BLAKE3/workflows/tests/badge.svg)](https://github.com/BLAKE3-team/BLAKE3/actions)
|
|
||||||
|
|
||||||
BLAKE3 was designed by:
|
BLAKE3 was designed by:
|
||||||
|
|
||||||
* [@oconnor663 ](https://github.com/oconnor663) (Jack O'Connor)
|
* [@oconnor663 ](https://github.com/oconnor663) (Jack O'Connor)
|
||||||
|
@ -108,7 +101,9 @@ time b3sum /tmp/bigfile
|
||||||
### The `blake3` crate
|
### The `blake3` crate
|
||||||
|
|
||||||
To use BLAKE3 from Rust code, add a dependency on the `blake3` crate to
|
To use BLAKE3 from Rust code, add a dependency on the `blake3` crate to
|
||||||
your `Cargo.toml`. Here's an example of hashing some input bytes:
|
your `Cargo.toml`. Note that by default, unless the `pure` feature is
|
||||||
|
enabled, building `blake3` requires a C compiler. Here's an example of
|
||||||
|
hashing some input bytes:
|
||||||
|
|
||||||
```rust
|
```rust
|
||||||
// Hash an input all at once.
|
// Hash an input all at once.
|
||||||
|
|
|
@ -9,9 +9,9 @@ readme = "README.md"
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["c"]
|
neon = ["blake3/neon"]
|
||||||
c = ["blake3/c"]
|
prefer_intrinsics = ["blake3/prefer_intrinsics"]
|
||||||
c_neon = ["blake3/c_neon"]
|
pure = ["blake3/pure"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.25"
|
anyhow = "1.0.25"
|
||||||
|
|
|
@ -69,7 +69,7 @@ fn bench_single_compression_sse41(b: &mut Bencher) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(feature = "c")]
|
#[cfg(not(feature = "pure"))]
|
||||||
fn bench_single_compression_avx512(b: &mut Bencher) {
|
fn bench_single_compression_avx512(b: &mut Bencher) {
|
||||||
if let Some(platform) = Platform::avx512() {
|
if let Some(platform) = Platform::avx512() {
|
||||||
bench_single_compression_fn(b, platform);
|
bench_single_compression_fn(b, platform);
|
||||||
|
@ -119,7 +119,7 @@ fn bench_many_chunks_avx2(b: &mut Bencher) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(feature = "c")]
|
#[cfg(not(feature = "pure"))]
|
||||||
fn bench_many_chunks_avx512(b: &mut Bencher) {
|
fn bench_many_chunks_avx512(b: &mut Bencher) {
|
||||||
if let Some(platform) = Platform::avx512() {
|
if let Some(platform) = Platform::avx512() {
|
||||||
bench_many_chunks_fn(b, platform);
|
bench_many_chunks_fn(b, platform);
|
||||||
|
@ -127,7 +127,7 @@ fn bench_many_chunks_avx512(b: &mut Bencher) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "neon")]
|
||||||
fn bench_many_chunks_neon(b: &mut Bencher) {
|
fn bench_many_chunks_neon(b: &mut Bencher) {
|
||||||
if let Some(platform) = Platform::neon() {
|
if let Some(platform) = Platform::neon() {
|
||||||
bench_many_chunks_fn(b, platform);
|
bench_many_chunks_fn(b, platform);
|
||||||
|
@ -178,7 +178,7 @@ fn bench_many_parents_avx2(b: &mut Bencher) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(feature = "c")]
|
#[cfg(not(feature = "pure"))]
|
||||||
fn bench_many_parents_avx512(b: &mut Bencher) {
|
fn bench_many_parents_avx512(b: &mut Bencher) {
|
||||||
if let Some(platform) = Platform::avx512() {
|
if let Some(platform) = Platform::avx512() {
|
||||||
bench_many_parents_fn(b, platform);
|
bench_many_parents_fn(b, platform);
|
||||||
|
@ -186,7 +186,7 @@ fn bench_many_parents_avx512(b: &mut Bencher) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "neon")]
|
||||||
fn bench_many_parents_neon(b: &mut Bencher) {
|
fn bench_many_parents_neon(b: &mut Bencher) {
|
||||||
if let Some(platform) = Platform::neon() {
|
if let Some(platform) = Platform::neon() {
|
||||||
bench_many_parents_fn(b, platform);
|
bench_many_parents_fn(b, platform);
|
||||||
|
|
72
build.rs
72
build.rs
|
@ -49,21 +49,16 @@ fn new_build() -> cc::Build {
|
||||||
}
|
}
|
||||||
|
|
||||||
const WINDOWS_MSVC_ERROR: &str = r#"
|
const WINDOWS_MSVC_ERROR: &str = r#"
|
||||||
The "c" feature is enabled, but your version of the MSVC C compiler does not
|
Your version of the MSVC C compiler does not support the "/arch:AVX512" flag.
|
||||||
support the "/arch:AVX512" flag. If you are building the "b3sum" or "bao_bin"
|
If you're building the "b3sum" or "bao_bin" crates, you can disable AVX-512
|
||||||
crates, you can disable AVX-512 with Cargo's "--no-default-features" flag.
|
with "--features=pure". Other crates might or might not support this
|
||||||
(Note that this also disables other default features like Rayon-based
|
workaround.
|
||||||
multithreading, which you can re-enable with "--features=rayon".) Other crates
|
|
||||||
might or might not support this workaround.
|
|
||||||
"#;
|
"#;
|
||||||
|
|
||||||
const GNU_ERROR: &str = r#"
|
const GNU_ERROR: &str = r#"
|
||||||
The "c" feature is enabled, but your C compiler does not support the
|
Your C compiler does not support the "-mavx512f" flag. If you are building the
|
||||||
"-mavx512f" flag. If you are building the "b3sum" or "bao_bin" crates, you can
|
"b3sum" or "bao_bin" crates, you can disable AVX-512 with "--features=pure".
|
||||||
disable AVX-512 with Cargo's "--no-default-features" flag. (Note that this also
|
Other crates might or might not support this workaround.
|
||||||
disables other default features like Rayon-based multithreading, which you can
|
|
||||||
re-enable with "--features=rayon".) Other crates might or might not support
|
|
||||||
this workaround.
|
|
||||||
"#;
|
"#;
|
||||||
|
|
||||||
fn check_for_avx512_compiler_support() {
|
fn check_for_avx512_compiler_support() {
|
||||||
|
@ -82,11 +77,15 @@ fn check_for_avx512_compiler_support() {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
if defined("CARGO_FEATURE_C") {
|
if defined("CARGO_FEATURE_PURE") && defined("CARGO_FEATURE_NEON") {
|
||||||
|
panic!("It doesn't make sense to enable both \"pure\" and \"neon\".");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_x86_64() || is_x86_32()) && !defined("CARGO_FEATURE_PURE") {
|
||||||
check_for_avx512_compiler_support();
|
check_for_avx512_compiler_support();
|
||||||
if is_x86_64() && !defined("CARGO_FEATURE_C_PREFER_INTRINSICS") {
|
if is_x86_64() && !defined("CARGO_FEATURE_PREFER_INTRINSICS") {
|
||||||
// On 64-bit, use the assembly implementations, unless the
|
// On 64-bit, use the assembly implementations, unless the
|
||||||
// "c_prefer_intrinsics" feature is enabled.
|
// "prefer_intrinsics" feature is enabled.
|
||||||
if is_windows_msvc() {
|
if is_windows_msvc() {
|
||||||
let mut build = new_build();
|
let mut build = new_build();
|
||||||
build.file("c/blake3_sse41_x86-64_windows_msvc.asm");
|
build.file("c/blake3_sse41_x86-64_windows_msvc.asm");
|
||||||
|
@ -109,40 +108,15 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
build.file("c/blake3_avx512_x86-64_unix.S");
|
build.file("c/blake3_avx512_x86-64_unix.S");
|
||||||
build.compile("blake3_asm");
|
build.compile("blake3_asm");
|
||||||
}
|
}
|
||||||
} else if is_x86_64() || is_x86_32() {
|
|
||||||
// Assembly implementations are only for 64-bit. On 32-bit, or if
|
|
||||||
// the "c_prefer_intrinsics" feature is enabled, use the
|
|
||||||
// intrinsics-based C implementations. These each need to be
|
|
||||||
// compiled separately, with the corresponding instruction set
|
|
||||||
// extension explicitly enabled in the compiler.
|
|
||||||
|
|
||||||
let mut sse41_build = new_build();
|
|
||||||
sse41_build.file("c/blake3_sse41.c");
|
|
||||||
if is_windows_msvc() {
|
|
||||||
// /arch:SSE2 is the default on x86 and undefined on x86_64:
|
|
||||||
// https://docs.microsoft.com/en-us/cpp/build/reference/arch-x86
|
|
||||||
// It also includes SSE4.1 intrisincs:
|
|
||||||
// https://stackoverflow.com/a/32183222/823869
|
|
||||||
} else {
|
} else {
|
||||||
sse41_build.flag("-msse4.1");
|
// Assembly implementations are only for x86_64. On 32-bit x86, or
|
||||||
}
|
// if the "prefer_intrinsics" feature is enabled, use the Rust
|
||||||
sse41_build.compile("blake3_sse41");
|
// intrinsics implementations for SSE4.1 and AVX2, and the C
|
||||||
|
// intrinsics implementation for AVX-512. (Stable Rust does not yet
|
||||||
let mut avx2_build = new_build();
|
// support AVX-512.)
|
||||||
avx2_build.file("c/blake3_avx2.c");
|
|
||||||
if is_windows_msvc() {
|
|
||||||
avx2_build.flag("/arch:AVX2");
|
|
||||||
} else {
|
|
||||||
avx2_build.flag("-mavx2");
|
|
||||||
}
|
|
||||||
avx2_build.compile("blake3_avx2");
|
|
||||||
|
|
||||||
let mut avx512_build = new_build();
|
let mut avx512_build = new_build();
|
||||||
avx512_build.file("c/blake3_avx512.c");
|
avx512_build.file("c/blake3_avx512.c");
|
||||||
if is_windows_msvc() {
|
if is_windows_msvc() {
|
||||||
// Note that a lot of versions of MSVC don't support /arch:AVX512,
|
|
||||||
// and they'll discard it with a warning, hopefully leading to a
|
|
||||||
// build error.
|
|
||||||
avx512_build.flag("/arch:AVX512");
|
avx512_build.flag("/arch:AVX512");
|
||||||
} else {
|
} else {
|
||||||
avx512_build.flag("-mavx512f");
|
avx512_build.flag("-mavx512f");
|
||||||
|
@ -153,16 +127,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
avx512_build.flag("-fno-asynchronous-unwind-tables");
|
avx512_build.flag("-fno-asynchronous-unwind-tables");
|
||||||
}
|
}
|
||||||
avx512_build.compile("blake3_avx512");
|
avx512_build.compile("blake3_avx512");
|
||||||
} else {
|
|
||||||
// Currently no effect for non-x86 platforms.
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if defined("CARGO_FEATURE_C_NEON") {
|
if defined("CARGO_FEATURE_NEON") {
|
||||||
let mut build = new_build();
|
let mut build = new_build();
|
||||||
// Note that blake3_neon.c normally depends on the blake3_portable.c
|
// Note that blake3_neon.c normally depends on the blake3_portable.c
|
||||||
// for the single-instance compression function, but we expose
|
// for the single-instance compression function, but we expose
|
||||||
// portable.rs over FFI instead. See c_neon.rs.
|
// portable.rs over FFI instead. See ffi_neon.rs.
|
||||||
build.file("c/blake3_neon.c");
|
build.file("c/blake3_neon.c");
|
||||||
// ARMv7 platforms that support NEON generally need the following
|
// ARMv7 platforms that support NEON generally need the following
|
||||||
// flags. AArch64 supports NEON by default and does not support -mpfu.
|
// flags. AArch64 supports NEON by default and does not support -mpfu.
|
||||||
|
@ -173,7 +145,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
build.compile("blake3_neon");
|
build.compile("blake3_neon");
|
||||||
}
|
}
|
||||||
|
|
||||||
// The `cc` crate does not automatically emit rerun-if directives for the
|
// The `cc` crate doesn't automatically emit rerun-if directives for the
|
||||||
// environment variables it supports, in particular for $CC. We expect to
|
// environment variables it supports, in particular for $CC. We expect to
|
||||||
// do a lot of benchmarking across different compilers, so we explicitly
|
// do a lot of benchmarking across different compilers, so we explicitly
|
||||||
// add the variables that we're likely to need.
|
// add the variables that we're likely to need.
|
||||||
|
|
|
@ -53,7 +53,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
base_build.file("../blake3.c");
|
base_build.file("../blake3.c");
|
||||||
base_build.file("../blake3_dispatch.c");
|
base_build.file("../blake3_dispatch.c");
|
||||||
base_build.file("../blake3_portable.c");
|
base_build.file("../blake3_portable.c");
|
||||||
base_build.compile("blake3_c_base");
|
base_build.compile("blake3_base");
|
||||||
|
|
||||||
if is_x86_64() && !defined("CARGO_FEATURE_PREFER_INTRINSICS") {
|
if is_x86_64() && !defined("CARGO_FEATURE_PREFER_INTRINSICS") {
|
||||||
// On 64-bit, use the assembly implementations, unless the
|
// On 64-bit, use the assembly implementations, unless the
|
||||||
|
@ -134,7 +134,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
neon_build.flag("-mfpu=neon-vfpv4");
|
neon_build.flag("-mfpu=neon-vfpv4");
|
||||||
neon_build.flag("-mfloat-abi=hard");
|
neon_build.flag("-mfloat-abi=hard");
|
||||||
}
|
}
|
||||||
neon_build.compile("blake3_c_neon");
|
neon_build.compile("blake3_neon");
|
||||||
}
|
}
|
||||||
|
|
||||||
// The `cc` crate does not automatically emit rerun-if directives for the
|
// The `cc` crate does not automatically emit rerun-if directives for the
|
||||||
|
|
|
@ -75,7 +75,7 @@ mod test {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_hash_many() {
|
fn test_hash_many() {
|
||||||
// This entire file is gated on feature="c_neon", so NEON support is
|
// This entire file is gated on feature="neon", so NEON support is
|
||||||
// assumed here.
|
// assumed here.
|
||||||
crate::test::test_hash_many_fn(hash_many, hash_many);
|
crate::test::test_hash_many_fn(hash_many, hash_many);
|
||||||
}
|
}
|
49
src/lib.rs
49
src/lib.rs
|
@ -29,13 +29,15 @@
|
||||||
//!
|
//!
|
||||||
//! # Cargo Features
|
//! # Cargo Features
|
||||||
//!
|
//!
|
||||||
//! The `c` feature provides optimized assembly implementations and also
|
|
||||||
//! AVX-512 support. It is off by default. If activated, a C compiler for the
|
|
||||||
//! target platform is required.
|
|
||||||
//!
|
|
||||||
//! The `rayon` feature provides [Rayon]-based multi-threading, in particular
|
//! The `rayon` feature provides [Rayon]-based multi-threading, in particular
|
||||||
//! the [`join::RayonJoin`] type for use with [`Hasher::update_with_join`]. It
|
//! the [`join::RayonJoin`] type for use with [`Hasher::update_with_join`]. It
|
||||||
//! is also off by default, but on for [docs.rs].
|
//! is disabled by default, but enabled for [docs.rs].
|
||||||
|
//!
|
||||||
|
//! The `pure` feature disables all FFI to C and assembly implementations,
|
||||||
|
//! leaving only the Rust intrinsics implementations for SSE4.1 and AVX2. This
|
||||||
|
//! removes the dependency on a C compiler/assembler. Library crates should
|
||||||
|
//! generally avoid this feature, so that each binary crate is free make its
|
||||||
|
//! own decision about build dependencies.
|
||||||
//!
|
//!
|
||||||
//! [BLAKE3]: https://blake3.io
|
//! [BLAKE3]: https://blake3.io
|
||||||
//! [Rayon]: https://github.com/rayon-rs/rayon
|
//! [Rayon]: https://github.com/rayon-rs/rayon
|
||||||
|
@ -63,23 +65,38 @@ pub mod platform;
|
||||||
mod portable;
|
mod portable;
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
cfg_if::cfg_if! {
|
cfg_if::cfg_if! {
|
||||||
if #[cfg(feature = "c")] {
|
if #[cfg(feature = "pure")] {
|
||||||
#[path = "c_sse41.rs"]
|
// When "pure" is enabled, use only Rust intrinsics. Stable Rust
|
||||||
mod sse41;
|
// doesn't currently support AVX-512.
|
||||||
#[path = "c_avx2.rs"]
|
|
||||||
mod avx2;
|
|
||||||
#[path = "c_avx512.rs"]
|
|
||||||
mod avx512;
|
|
||||||
} else {
|
|
||||||
#[path = "rust_sse41.rs"]
|
#[path = "rust_sse41.rs"]
|
||||||
mod sse41;
|
mod sse41;
|
||||||
#[path = "rust_avx2.rs"]
|
#[path = "rust_avx2.rs"]
|
||||||
mod avx2;
|
mod avx2;
|
||||||
// Stable Rust does not currently support AVX-512.
|
} else if #[cfg(any(target_arch = "x86", feature = "prefer_intrinsics"))] {
|
||||||
|
// When "prefer_intrinsics" is enabled, or on 32-bit x86 (which our
|
||||||
|
// assembly implementations don't support), use Rust intrinsics for
|
||||||
|
// SSE4.1 and AVX2, and use C intrinsics for AVX-512. In this cacse,
|
||||||
|
// build.rs will compile and link c/blake3_avx512.c.
|
||||||
|
#[path = "rust_sse41.rs"]
|
||||||
|
mod sse41;
|
||||||
|
#[path = "rust_avx2.rs"]
|
||||||
|
mod avx2;
|
||||||
|
#[path = "ffi_avx512.rs"]
|
||||||
|
mod avx512;
|
||||||
|
} else {
|
||||||
|
// Otherwise on x86_64, use assembly implementations for everything. In
|
||||||
|
// this case, build.rs will compile and link all the assembly files for
|
||||||
|
// the target platform (Unix, Windows MSVC, or Windows GNU).
|
||||||
|
#[path = "ffi_sse41.rs"]
|
||||||
|
mod sse41;
|
||||||
|
#[path = "ffi_avx2.rs"]
|
||||||
|
mod avx2;
|
||||||
|
#[path = "ffi_avx512.rs"]
|
||||||
|
mod avx512;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "neon")]
|
||||||
#[path = "c_neon.rs"]
|
#[path = "ffi_neon.rs"]
|
||||||
mod neon;
|
mod neon;
|
||||||
|
|
||||||
pub mod traits;
|
pub mod traits;
|
||||||
|
|
|
@ -4,13 +4,13 @@ use arrayref::{array_mut_ref, array_ref};
|
||||||
cfg_if::cfg_if! {
|
cfg_if::cfg_if! {
|
||||||
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
|
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
|
||||||
cfg_if::cfg_if! {
|
cfg_if::cfg_if! {
|
||||||
if #[cfg(feature = "c")] {
|
if #[cfg(feature = "pure")] {
|
||||||
pub const MAX_SIMD_DEGREE: usize = 16;
|
|
||||||
} else {
|
|
||||||
pub const MAX_SIMD_DEGREE: usize = 8;
|
pub const MAX_SIMD_DEGREE: usize = 8;
|
||||||
|
} else {
|
||||||
|
pub const MAX_SIMD_DEGREE: usize = 16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if #[cfg(feature = "c_neon")] {
|
} else if #[cfg(feature = "neon")] {
|
||||||
pub const MAX_SIMD_DEGREE: usize = 4;
|
pub const MAX_SIMD_DEGREE: usize = 4;
|
||||||
} else {
|
} else {
|
||||||
pub const MAX_SIMD_DEGREE: usize = 1;
|
pub const MAX_SIMD_DEGREE: usize = 1;
|
||||||
|
@ -24,13 +24,13 @@ cfg_if::cfg_if! {
|
||||||
cfg_if::cfg_if! {
|
cfg_if::cfg_if! {
|
||||||
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
|
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
|
||||||
cfg_if::cfg_if! {
|
cfg_if::cfg_if! {
|
||||||
if #[cfg(feature = "c")] {
|
if #[cfg(feature = "pure")] {
|
||||||
pub const MAX_SIMD_DEGREE_OR_2: usize = 16;
|
|
||||||
} else {
|
|
||||||
pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
|
pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
|
||||||
|
} else {
|
||||||
|
pub const MAX_SIMD_DEGREE_OR_2: usize = 16;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if #[cfg(feature = "c_neon")] {
|
} else if #[cfg(feature = "neon")] {
|
||||||
pub const MAX_SIMD_DEGREE_OR_2: usize = 4;
|
pub const MAX_SIMD_DEGREE_OR_2: usize = 4;
|
||||||
} else {
|
} else {
|
||||||
pub const MAX_SIMD_DEGREE_OR_2: usize = 2;
|
pub const MAX_SIMD_DEGREE_OR_2: usize = 2;
|
||||||
|
@ -44,10 +44,10 @@ pub enum Platform {
|
||||||
SSE41,
|
SSE41,
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
AVX2,
|
AVX2,
|
||||||
#[cfg(feature = "c")]
|
#[cfg(not(feature = "pure"))]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
AVX512,
|
AVX512,
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "neon")]
|
||||||
NEON,
|
NEON,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -56,7 +56,7 @@ impl Platform {
|
||||||
pub fn detect() -> Self {
|
pub fn detect() -> Self {
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
{
|
{
|
||||||
#[cfg(feature = "c")]
|
#[cfg(not(feature = "pure"))]
|
||||||
{
|
{
|
||||||
if avx512_detected() {
|
if avx512_detected() {
|
||||||
return Platform::AVX512;
|
return Platform::AVX512;
|
||||||
|
@ -69,9 +69,9 @@ impl Platform {
|
||||||
return Platform::SSE41;
|
return Platform::SSE41;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// We don't use dynamic feature detection for NEON. If the "c_neon"
|
// We don't use dynamic feature detection for NEON. If the "neon"
|
||||||
// feature is on, NEON is assumed to be supported.
|
// feature is on, NEON is assumed to be supported.
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "neon")]
|
||||||
{
|
{
|
||||||
return Platform::NEON;
|
return Platform::NEON;
|
||||||
}
|
}
|
||||||
|
@ -85,10 +85,10 @@ impl Platform {
|
||||||
Platform::SSE41 => 4,
|
Platform::SSE41 => 4,
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
Platform::AVX2 => 8,
|
Platform::AVX2 => 8,
|
||||||
#[cfg(feature = "c")]
|
#[cfg(not(feature = "pure"))]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
Platform::AVX512 => 16,
|
Platform::AVX512 => 16,
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "neon")]
|
||||||
Platform::NEON => 4,
|
Platform::NEON => 4,
|
||||||
};
|
};
|
||||||
debug_assert!(degree <= MAX_SIMD_DEGREE);
|
debug_assert!(degree <= MAX_SIMD_DEGREE);
|
||||||
|
@ -111,13 +111,13 @@ impl Platform {
|
||||||
crate::sse41::compress_in_place(cv, block, block_len, counter, flags)
|
crate::sse41::compress_in_place(cv, block, block_len, counter, flags)
|
||||||
},
|
},
|
||||||
// Safe because detect() checked for platform support.
|
// Safe because detect() checked for platform support.
|
||||||
#[cfg(feature = "c")]
|
#[cfg(not(feature = "pure"))]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
Platform::AVX512 => unsafe {
|
Platform::AVX512 => unsafe {
|
||||||
crate::avx512::compress_in_place(cv, block, block_len, counter, flags)
|
crate::avx512::compress_in_place(cv, block, block_len, counter, flags)
|
||||||
},
|
},
|
||||||
// No NEON compress_in_place() implementation yet.
|
// No NEON compress_in_place() implementation yet.
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "neon")]
|
||||||
Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags),
|
Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -138,13 +138,13 @@ impl Platform {
|
||||||
crate::sse41::compress_xof(cv, block, block_len, counter, flags)
|
crate::sse41::compress_xof(cv, block, block_len, counter, flags)
|
||||||
},
|
},
|
||||||
// Safe because detect() checked for platform support.
|
// Safe because detect() checked for platform support.
|
||||||
#[cfg(feature = "c")]
|
#[cfg(not(feature = "pure"))]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
Platform::AVX512 => unsafe {
|
Platform::AVX512 => unsafe {
|
||||||
crate::avx512::compress_xof(cv, block, block_len, counter, flags)
|
crate::avx512::compress_xof(cv, block, block_len, counter, flags)
|
||||||
},
|
},
|
||||||
// No NEON compress_xof() implementation yet.
|
// No NEON compress_xof() implementation yet.
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "neon")]
|
||||||
Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags),
|
Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -210,7 +210,7 @@ impl Platform {
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
// Safe because detect() checked for platform support.
|
// Safe because detect() checked for platform support.
|
||||||
#[cfg(feature = "c")]
|
#[cfg(not(feature = "pure"))]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
Platform::AVX512 => unsafe {
|
Platform::AVX512 => unsafe {
|
||||||
crate::avx512::hash_many(
|
crate::avx512::hash_many(
|
||||||
|
@ -224,8 +224,8 @@ impl Platform {
|
||||||
out,
|
out,
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
// Assumed to be safe if the "c_neon" feature is on.
|
// Assumed to be safe if the "neon" feature is on.
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "neon")]
|
||||||
Platform::NEON => unsafe {
|
Platform::NEON => unsafe {
|
||||||
crate::neon::hash_many(
|
crate::neon::hash_many(
|
||||||
inputs,
|
inputs,
|
||||||
|
@ -265,7 +265,7 @@ impl Platform {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "c")]
|
#[cfg(not(feature = "pure"))]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
pub fn avx512() -> Option<Self> {
|
pub fn avx512() -> Option<Self> {
|
||||||
if avx512_detected() {
|
if avx512_detected() {
|
||||||
|
@ -275,17 +275,17 @@ impl Platform {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "neon")]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
pub fn neon() -> Option<Self> {
|
pub fn neon() -> Option<Self> {
|
||||||
// Assumed to be safe if the "c_neon" feature is on.
|
// Assumed to be safe if the "neon" feature is on.
|
||||||
Some(Self::NEON)
|
Some(Self::NEON)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note that AVX-512 is divided into multiple featuresets, and we use two of
|
// Note that AVX-512 is divided into multiple featuresets, and we use two of
|
||||||
// them, F and VL.
|
// them, F and VL.
|
||||||
#[cfg(feature = "c")]
|
#[cfg(not(feature = "pure"))]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn avx512_detected() -> bool {
|
pub fn avx512_detected() -> bool {
|
||||||
|
|
|
@ -4,10 +4,9 @@ version = "0.0.0"
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = []
|
neon = ["blake3/neon"]
|
||||||
c = ["blake3/c"]
|
prefer_intrinsics = ["blake3/prefer_intrinsics"]
|
||||||
c_prefer_intrinsics = ["blake3/c_prefer_intrinsics"]
|
pure = ["blake3/pure"]
|
||||||
c_neon = ["blake3/c_neon"]
|
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
# If you ever change these path dependencies, you'll probably need to update
|
# If you ever change these path dependencies, you'll probably need to update
|
||||||
|
|
Loading…
Reference in New Issue