1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-05-04 02:36:08 +02:00

refactor the Cargo feature set

The biggest change here is that assembly implementations are enabled by
default.

Added features:
- "pure" (Pure Rust, with no C or assembly implementations.)

Removed features:
- "c" (Now basically the default.)

Renamed features;
- "c_prefer_intrinsics" -> "prefer_intrinsics"
- "c_neon" -> "neon"

Unchanged:
- "rayon"
- "std" (Still the only feature on by default.)
This commit is contained in:
Jack O'Connor 2020-03-28 17:27:31 -04:00
parent 7caf1ad4bb
commit e06a0f255a
14 changed files with 155 additions and 163 deletions

View File

@ -30,27 +30,27 @@ jobs:
- run: cargo test --features=rayon - run: cargo test --features=rayon
# no_std tests. # no_std tests.
- run: cargo test --no-default-features - run: cargo test --no-default-features
# Test the x86 assembly implementations. Use -vv to log compiler commands. # Test the intrinsics implementations.
- run: cargo test --features=c -vv - run: cargo test --features=prefer_intrinsics
# Test the C intrinsics implementations. Use -vv to log compiler commands. # Test the pure Rust build.
- run: cargo test --features=c,c_prefer_intrinsics -vv - run: cargo test --features=pure
# Test release mode. This does more iteratations in test_fuzz_hasher. # Test release mode. This does more iteratations in test_fuzz_hasher.
- run: cargo test --release - run: cargo test --release
- run: cargo test --release --features=c - run: cargo test --release --features=prefer_intrinsics
- run: cargo test --release --features=c,c_prefer_intrinsics - run: cargo test --release --features=pure
# Test benchmarks. RUSTC_BOOTSTRAP=1 lets this run on non-nightly toolchains. # Test benchmarks. RUSTC_BOOTSTRAP=1 lets this run on non-nightly toolchains.
- run: cargo test --benches --features=c - run: cargo test --benches
env: env:
RUSTC_BOOTSTRAP: 1 RUSTC_BOOTSTRAP: 1
# Test vectors. # Test vectors.
- name: test vectors - name: test vectors
run: cargo test run: cargo test
working-directory: ./test_vectors working-directory: ./test_vectors
- name: test vectors C assembly - name: test vectors intrinsics
run: cargo test --features=c run: cargo test --features=prefer_intrinsics
working-directory: ./test_vectors working-directory: ./test_vectors
- name: test vectors C intrinsics - name: test vectors pure
run: cargo test --features=c,c_prefer_intrinsics run: cargo test --features=pure
working-directory: ./test_vectors working-directory: ./test_vectors
# Test b3sum. # Test b3sum.
- name: test b3sum - name: test b3sum
@ -93,7 +93,7 @@ jobs:
# Test the portable implementation on everything. # Test the portable implementation on everything.
- run: cross test --target ${{ matrix.arch }} - run: cross test --target ${{ matrix.arch }}
# Test the NEON implementation on ARM targets. # Test the NEON implementation on ARM targets.
- run: cross test --target ${{ matrix.arch }} --features=c_neon - run: cross test --target ${{ matrix.arch }} --features=neon
if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-') if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-')
# Test vectors. Note that this uses a hacky script due to path dependency limitations. # Test vectors. Note that this uses a hacky script due to path dependency limitations.
- run: ./test_vectors/cross_test.sh --target ${{ matrix.arch }} - run: ./test_vectors/cross_test.sh --target ${{ matrix.arch }}

View File

@ -11,27 +11,43 @@ edition = "2018"
[features] [features]
default = ["std"] default = ["std"]
# The "c" feature includes C and assembly SIMD implementations of the
# compression function for x86 platforms, called via FFI. (Currently it has no # By default on x86_64, this crate uses Samuel Neves' hand-written assembly
# effect on other platforms.) This requires a C toolchain on the build machine. # implementations for SSE4.1, AVX2, and AVX512. (These provide both the best
# This is necessary for AVX-512 support, which is not yet stable in Rust, and # runtime performance, and the fastest build times.) And by default on 32-bit
# the assembly implementations also perform better than those using Rust/LLVM # x86, this crate uses Rust intrinsics implementations for SSE4.1 and AVX2, and
# intrinsics. As with the Rust implementations, these C and assembly # a C intrinsics implementation for AVX-512. Enabling the "pure" feature
# implementations participate in runtime CPU feature detection, and the # disables all FFI to C and assembly implementations, leaving only the Rust
# resulting binary is portable. # intrinsics implementations for SSE4.1 and AVX2. This removes the dependency
c = [] # on a C compiler/assembler, which can be helpful for certain applications.
# Normally x86-64 builds prefer assembly implementations over C intrinsics. The # Library crates should generally avoid this feature, so that each binary crate
# assembly implementations perform better, perform most consistently across # is free make its own decision about build dependencies.
# compilers, and are much faster to build. However, this feature makes the pure = []
# build use the C intrinsics implementations instead. This is mainly for
# testing purposes, and most callers will not want to use it. # As described above, on x86_64 this crate use assembly implementations by
c_prefer_intrinsics = [] # default. Enabling the "prefer_intrinsics" feature makes this crate use
# intrinsics implementations on both 32-bit and 64-bit x86. This is mainly for
# testing, and calling crates should not need it.
prefer_intrinsics = []
# The NEON implementation does not participate in dynamic feature detection, # The NEON implementation does not participate in dynamic feature detection,
# which is currently x86-only. If "c_neon" is on, NEON support is assumed. Note # which is currently x86-only. If "neon" is on, NEON support is assumed. Note
# that AArch64 always supports NEON, but support on ARMv7 varies. # that AArch64 always supports NEON, but support on ARMv7 varies. The NEON
c_neon = [] # implementation uses C intrinsics and requires a C compiler.
neon = []
# This crate uses libstd for std::io trait implementations, and also for
# runtime CPU feature detection. This feature is enabled by default. If you use
# --no-default-features, the only way to use the SIMD implementations in this
# crate is to enable the corresponding instruction sets statically for the
# entire build, with e.g. RUSTFLAGS="-C target-cpu=native".
std = ["digest/std"] std = ["digest/std"]
# The "rayon" feature (defined below as an optional dependency) enables the
# join::RayonJoin type, which can be used with Hasher::update_with_join to
# perform multi-threaded hashing. However, even if this feature is enabled, all
# other APIs remain single-threaded.
[package.metadata.docs.rs] [package.metadata.docs.rs]
# Document blake3::join::RayonJoin on docs.rs. # Document blake3::join::RayonJoin on docs.rs.
features = ["rayon"] features = ["rayon"]
@ -40,13 +56,6 @@ features = ["rayon"]
arrayref = "0.3.5" arrayref = "0.3.5"
arrayvec = { version = "0.5.1", default-features = false, features = ["array-sizes-33-128"] } arrayvec = { version = "0.5.1", default-features = false, features = ["array-sizes-33-128"] }
constant_time_eq = "0.1.5" constant_time_eq = "0.1.5"
# A performance note for the "rayon" feature: Multi-threading can have
# significant overhead for small inputs, particularly on x86 where individual
# cores are very fast. On the other hand, on slower platforms like ARM,
# multi-threading can be beneficial for all inputs. There's no one input size
# threshold that would work well everywhere, and this crate doesn't try to be
# clever. If you're going to enable the "rayon" feature, you should benchmark
# it for your specific use case.
rayon = { version = "1.2.1", optional = true } rayon = { version = "1.2.1", optional = true }
cfg-if = "0.1.10" cfg-if = "0.1.10"
digest = "0.8.1" digest = "0.8.1"

View File

@ -1,4 +1,4 @@
# <a href="#"><img src="media/BLAKE3.svg" alt="BLAKE3" height=50></a> # <a href="#"><img src="media/BLAKE3.svg" alt="BLAKE3" height=50></a>&ensp;[![Actions Status](https://github.com/BLAKE3-team/BLAKE3/workflows/tests/badge.svg)](https://github.com/BLAKE3-team/BLAKE3/actions)
BLAKE3 is a cryptographic hash function that is: BLAKE3 is a cryptographic hash function that is:
@ -33,23 +33,19 @@ with BLAKE3.
This repository is the official implementation of BLAKE3. It includes: This repository is the official implementation of BLAKE3. It includes:
* The [`blake3`](https://crates.io/crates/blake3) Rust crate, which * The [`blake3`](https://crates.io/crates/blake3) Rust crate, which
includes optimized SIMD implementations, with runtime CPU feature includes optimized SIMD implementations for SSE4.1, AVX2, AVX-512, and
detection on x86. SSE4.1 and AVX2 are supported in pure Rust. The `c` NEON, with automatic runtime CPU feature detection on x86. The
feature enables C/assembly implementations and AVX-512 support. The optional `rayon` feature also enables multi-threading.
`c_neon` feature enables ARM NEON support. Multi-threading is also
supported, and the `rayon` feature provides a
[Rayon](https://github.com/rayon-rs/rayon)-based implementation.
* The [`b3sum`](https://crates.io/crates/b3sum) Rust crate, which * The [`b3sum`](https://crates.io/crates/b3sum) Rust crate, which
provides a command line interface. You can install it from provides a command line interface. It uses multi-threading by default,
[crates.io](https://crates.io/crates/b3sum) with `cargo install making it an order of magnitude faster than e.g. `sha256sum` on
b3sum`. It enables the `rayon` and `c` features of the `blake3` crate typical desktop hardware.
by default.
* The [C implementation](c), which like the Rust implementation includes * The [C implementation](c), which like the Rust implementation includes
SIMD code and dynamic CPU feature detection on x86. Unlike the Rust SIMD code and runtime CPU feature detection on x86. Unlike the Rust
implementation, it's not currently multi-threaded. The implementation, it's not currently multi-threaded. See
[README](c/README.md) provides build examples. [`c/README.md`](c/README.md).
* The [reference implementation](reference_impl/reference_impl.rs), * The [reference implementation](reference_impl/reference_impl.rs),
which is discussed in Section 5.1 of the [BLAKE3 which is discussed in Section 5.1 of the [BLAKE3
@ -59,9 +55,6 @@ This repository is the official implementation of BLAKE3. It includes:
port that doesn't need multi-threading or SIMD optimizations, start port that doesn't need multi-threading or SIMD optimizations, start
here. here.
* [![Actions
Status](https://github.com/BLAKE3-team/BLAKE3/workflows/tests/badge.svg)](https://github.com/BLAKE3-team/BLAKE3/actions)
BLAKE3 was designed by: BLAKE3 was designed by:
* [@oconnor663 ](https://github.com/oconnor663) (Jack O'Connor) * [@oconnor663 ](https://github.com/oconnor663) (Jack O'Connor)
@ -108,7 +101,9 @@ time b3sum /tmp/bigfile
### The `blake3` crate ### The `blake3` crate
To use BLAKE3 from Rust code, add a dependency on the `blake3` crate to To use BLAKE3 from Rust code, add a dependency on the `blake3` crate to
your `Cargo.toml`. Here's an example of hashing some input bytes: your `Cargo.toml`. Note that by default, unless the `pure` feature is
enabled, building `blake3` requires a C compiler. Here's an example of
hashing some input bytes:
```rust ```rust
// Hash an input all at once. // Hash an input all at once.

View File

@ -9,9 +9,9 @@ readme = "README.md"
edition = "2018" edition = "2018"
[features] [features]
default = ["c"] neon = ["blake3/neon"]
c = ["blake3/c"] prefer_intrinsics = ["blake3/prefer_intrinsics"]
c_neon = ["blake3/c_neon"] pure = ["blake3/pure"]
[dependencies] [dependencies]
anyhow = "1.0.25" anyhow = "1.0.25"

View File

@ -69,7 +69,7 @@ fn bench_single_compression_sse41(b: &mut Bencher) {
} }
#[bench] #[bench]
#[cfg(feature = "c")] #[cfg(not(feature = "pure"))]
fn bench_single_compression_avx512(b: &mut Bencher) { fn bench_single_compression_avx512(b: &mut Bencher) {
if let Some(platform) = Platform::avx512() { if let Some(platform) = Platform::avx512() {
bench_single_compression_fn(b, platform); bench_single_compression_fn(b, platform);
@ -119,7 +119,7 @@ fn bench_many_chunks_avx2(b: &mut Bencher) {
} }
#[bench] #[bench]
#[cfg(feature = "c")] #[cfg(not(feature = "pure"))]
fn bench_many_chunks_avx512(b: &mut Bencher) { fn bench_many_chunks_avx512(b: &mut Bencher) {
if let Some(platform) = Platform::avx512() { if let Some(platform) = Platform::avx512() {
bench_many_chunks_fn(b, platform); bench_many_chunks_fn(b, platform);
@ -127,7 +127,7 @@ fn bench_many_chunks_avx512(b: &mut Bencher) {
} }
#[bench] #[bench]
#[cfg(feature = "c_neon")] #[cfg(feature = "neon")]
fn bench_many_chunks_neon(b: &mut Bencher) { fn bench_many_chunks_neon(b: &mut Bencher) {
if let Some(platform) = Platform::neon() { if let Some(platform) = Platform::neon() {
bench_many_chunks_fn(b, platform); bench_many_chunks_fn(b, platform);
@ -178,7 +178,7 @@ fn bench_many_parents_avx2(b: &mut Bencher) {
} }
#[bench] #[bench]
#[cfg(feature = "c")] #[cfg(not(feature = "pure"))]
fn bench_many_parents_avx512(b: &mut Bencher) { fn bench_many_parents_avx512(b: &mut Bencher) {
if let Some(platform) = Platform::avx512() { if let Some(platform) = Platform::avx512() {
bench_many_parents_fn(b, platform); bench_many_parents_fn(b, platform);
@ -186,7 +186,7 @@ fn bench_many_parents_avx512(b: &mut Bencher) {
} }
#[bench] #[bench]
#[cfg(feature = "c_neon")] #[cfg(feature = "neon")]
fn bench_many_parents_neon(b: &mut Bencher) { fn bench_many_parents_neon(b: &mut Bencher) {
if let Some(platform) = Platform::neon() { if let Some(platform) = Platform::neon() {
bench_many_parents_fn(b, platform); bench_many_parents_fn(b, platform);

View File

@ -49,21 +49,16 @@ fn new_build() -> cc::Build {
} }
const WINDOWS_MSVC_ERROR: &str = r#" const WINDOWS_MSVC_ERROR: &str = r#"
The "c" feature is enabled, but your version of the MSVC C compiler does not Your version of the MSVC C compiler does not support the "/arch:AVX512" flag.
support the "/arch:AVX512" flag. If you are building the "b3sum" or "bao_bin" If you're building the "b3sum" or "bao_bin" crates, you can disable AVX-512
crates, you can disable AVX-512 with Cargo's "--no-default-features" flag. with "--features=pure". Other crates might or might not support this
(Note that this also disables other default features like Rayon-based workaround.
multithreading, which you can re-enable with "--features=rayon".) Other crates
might or might not support this workaround.
"#; "#;
const GNU_ERROR: &str = r#" const GNU_ERROR: &str = r#"
The "c" feature is enabled, but your C compiler does not support the Your C compiler does not support the "-mavx512f" flag. If you are building the
"-mavx512f" flag. If you are building the "b3sum" or "bao_bin" crates, you can "b3sum" or "bao_bin" crates, you can disable AVX-512 with "--features=pure".
disable AVX-512 with Cargo's "--no-default-features" flag. (Note that this also Other crates might or might not support this workaround.
disables other default features like Rayon-based multithreading, which you can
re-enable with "--features=rayon".) Other crates might or might not support
this workaround.
"#; "#;
fn check_for_avx512_compiler_support() { fn check_for_avx512_compiler_support() {
@ -82,11 +77,15 @@ fn check_for_avx512_compiler_support() {
} }
fn main() -> Result<(), Box<dyn std::error::Error>> { fn main() -> Result<(), Box<dyn std::error::Error>> {
if defined("CARGO_FEATURE_C") { if defined("CARGO_FEATURE_PURE") && defined("CARGO_FEATURE_NEON") {
panic!("It doesn't make sense to enable both \"pure\" and \"neon\".");
}
if (is_x86_64() || is_x86_32()) && !defined("CARGO_FEATURE_PURE") {
check_for_avx512_compiler_support(); check_for_avx512_compiler_support();
if is_x86_64() && !defined("CARGO_FEATURE_C_PREFER_INTRINSICS") { if is_x86_64() && !defined("CARGO_FEATURE_PREFER_INTRINSICS") {
// On 64-bit, use the assembly implementations, unless the // On 64-bit, use the assembly implementations, unless the
// "c_prefer_intrinsics" feature is enabled. // "prefer_intrinsics" feature is enabled.
if is_windows_msvc() { if is_windows_msvc() {
let mut build = new_build(); let mut build = new_build();
build.file("c/blake3_sse41_x86-64_windows_msvc.asm"); build.file("c/blake3_sse41_x86-64_windows_msvc.asm");
@ -109,40 +108,15 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
build.file("c/blake3_avx512_x86-64_unix.S"); build.file("c/blake3_avx512_x86-64_unix.S");
build.compile("blake3_asm"); build.compile("blake3_asm");
} }
} else if is_x86_64() || is_x86_32() { } else {
// Assembly implementations are only for 64-bit. On 32-bit, or if // Assembly implementations are only for x86_64. On 32-bit x86, or
// the "c_prefer_intrinsics" feature is enabled, use the // if the "prefer_intrinsics" feature is enabled, use the Rust
// intrinsics-based C implementations. These each need to be // intrinsics implementations for SSE4.1 and AVX2, and the C
// compiled separately, with the corresponding instruction set // intrinsics implementation for AVX-512. (Stable Rust does not yet
// extension explicitly enabled in the compiler. // support AVX-512.)
let mut sse41_build = new_build();
sse41_build.file("c/blake3_sse41.c");
if is_windows_msvc() {
// /arch:SSE2 is the default on x86 and undefined on x86_64:
// https://docs.microsoft.com/en-us/cpp/build/reference/arch-x86
// It also includes SSE4.1 intrisincs:
// https://stackoverflow.com/a/32183222/823869
} else {
sse41_build.flag("-msse4.1");
}
sse41_build.compile("blake3_sse41");
let mut avx2_build = new_build();
avx2_build.file("c/blake3_avx2.c");
if is_windows_msvc() {
avx2_build.flag("/arch:AVX2");
} else {
avx2_build.flag("-mavx2");
}
avx2_build.compile("blake3_avx2");
let mut avx512_build = new_build(); let mut avx512_build = new_build();
avx512_build.file("c/blake3_avx512.c"); avx512_build.file("c/blake3_avx512.c");
if is_windows_msvc() { if is_windows_msvc() {
// Note that a lot of versions of MSVC don't support /arch:AVX512,
// and they'll discard it with a warning, hopefully leading to a
// build error.
avx512_build.flag("/arch:AVX512"); avx512_build.flag("/arch:AVX512");
} else { } else {
avx512_build.flag("-mavx512f"); avx512_build.flag("-mavx512f");
@ -153,16 +127,14 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
avx512_build.flag("-fno-asynchronous-unwind-tables"); avx512_build.flag("-fno-asynchronous-unwind-tables");
} }
avx512_build.compile("blake3_avx512"); avx512_build.compile("blake3_avx512");
} else {
// Currently no effect for non-x86 platforms.
} }
} }
if defined("CARGO_FEATURE_C_NEON") { if defined("CARGO_FEATURE_NEON") {
let mut build = new_build(); let mut build = new_build();
// Note that blake3_neon.c normally depends on the blake3_portable.c // Note that blake3_neon.c normally depends on the blake3_portable.c
// for the single-instance compression function, but we expose // for the single-instance compression function, but we expose
// portable.rs over FFI instead. See c_neon.rs. // portable.rs over FFI instead. See ffi_neon.rs.
build.file("c/blake3_neon.c"); build.file("c/blake3_neon.c");
// ARMv7 platforms that support NEON generally need the following // ARMv7 platforms that support NEON generally need the following
// flags. AArch64 supports NEON by default and does not support -mpfu. // flags. AArch64 supports NEON by default and does not support -mpfu.
@ -173,7 +145,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
build.compile("blake3_neon"); build.compile("blake3_neon");
} }
// The `cc` crate does not automatically emit rerun-if directives for the // The `cc` crate doesn't automatically emit rerun-if directives for the
// environment variables it supports, in particular for $CC. We expect to // environment variables it supports, in particular for $CC. We expect to
// do a lot of benchmarking across different compilers, so we explicitly // do a lot of benchmarking across different compilers, so we explicitly
// add the variables that we're likely to need. // add the variables that we're likely to need.

View File

@ -53,7 +53,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
base_build.file("../blake3.c"); base_build.file("../blake3.c");
base_build.file("../blake3_dispatch.c"); base_build.file("../blake3_dispatch.c");
base_build.file("../blake3_portable.c"); base_build.file("../blake3_portable.c");
base_build.compile("blake3_c_base"); base_build.compile("blake3_base");
if is_x86_64() && !defined("CARGO_FEATURE_PREFER_INTRINSICS") { if is_x86_64() && !defined("CARGO_FEATURE_PREFER_INTRINSICS") {
// On 64-bit, use the assembly implementations, unless the // On 64-bit, use the assembly implementations, unless the
@ -134,7 +134,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
neon_build.flag("-mfpu=neon-vfpv4"); neon_build.flag("-mfpu=neon-vfpv4");
neon_build.flag("-mfloat-abi=hard"); neon_build.flag("-mfloat-abi=hard");
} }
neon_build.compile("blake3_c_neon"); neon_build.compile("blake3_neon");
} }
// The `cc` crate does not automatically emit rerun-if directives for the // The `cc` crate does not automatically emit rerun-if directives for the

View File

@ -75,7 +75,7 @@ mod test {
#[test] #[test]
fn test_hash_many() { fn test_hash_many() {
// This entire file is gated on feature="c_neon", so NEON support is // This entire file is gated on feature="neon", so NEON support is
// assumed here. // assumed here.
crate::test::test_hash_many_fn(hash_many, hash_many); crate::test::test_hash_many_fn(hash_many, hash_many);
} }

View File

@ -29,13 +29,15 @@
//! //!
//! # Cargo Features //! # Cargo Features
//! //!
//! The `c` feature provides optimized assembly implementations and also
//! AVX-512 support. It is off by default. If activated, a C compiler for the
//! target platform is required.
//!
//! The `rayon` feature provides [Rayon]-based multi-threading, in particular //! The `rayon` feature provides [Rayon]-based multi-threading, in particular
//! the [`join::RayonJoin`] type for use with [`Hasher::update_with_join`]. It //! the [`join::RayonJoin`] type for use with [`Hasher::update_with_join`]. It
//! is also off by default, but on for [docs.rs]. //! is disabled by default, but enabled for [docs.rs].
//!
//! The `pure` feature disables all FFI to C and assembly implementations,
//! leaving only the Rust intrinsics implementations for SSE4.1 and AVX2. This
//! removes the dependency on a C compiler/assembler. Library crates should
//! generally avoid this feature, so that each binary crate is free make its
//! own decision about build dependencies.
//! //!
//! [BLAKE3]: https://blake3.io //! [BLAKE3]: https://blake3.io
//! [Rayon]: https://github.com/rayon-rs/rayon //! [Rayon]: https://github.com/rayon-rs/rayon
@ -63,23 +65,38 @@ pub mod platform;
mod portable; mod portable;
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
cfg_if::cfg_if! { cfg_if::cfg_if! {
if #[cfg(feature = "c")] { if #[cfg(feature = "pure")] {
#[path = "c_sse41.rs"] // When "pure" is enabled, use only Rust intrinsics. Stable Rust
mod sse41; // doesn't currently support AVX-512.
#[path = "c_avx2.rs"]
mod avx2;
#[path = "c_avx512.rs"]
mod avx512;
} else {
#[path = "rust_sse41.rs"] #[path = "rust_sse41.rs"]
mod sse41; mod sse41;
#[path = "rust_avx2.rs"] #[path = "rust_avx2.rs"]
mod avx2; mod avx2;
// Stable Rust does not currently support AVX-512. } else if #[cfg(any(target_arch = "x86", feature = "prefer_intrinsics"))] {
// When "prefer_intrinsics" is enabled, or on 32-bit x86 (which our
// assembly implementations don't support), use Rust intrinsics for
// SSE4.1 and AVX2, and use C intrinsics for AVX-512. In this cacse,
// build.rs will compile and link c/blake3_avx512.c.
#[path = "rust_sse41.rs"]
mod sse41;
#[path = "rust_avx2.rs"]
mod avx2;
#[path = "ffi_avx512.rs"]
mod avx512;
} else {
// Otherwise on x86_64, use assembly implementations for everything. In
// this case, build.rs will compile and link all the assembly files for
// the target platform (Unix, Windows MSVC, or Windows GNU).
#[path = "ffi_sse41.rs"]
mod sse41;
#[path = "ffi_avx2.rs"]
mod avx2;
#[path = "ffi_avx512.rs"]
mod avx512;
} }
} }
#[cfg(feature = "c_neon")] #[cfg(feature = "neon")]
#[path = "c_neon.rs"] #[path = "ffi_neon.rs"]
mod neon; mod neon;
pub mod traits; pub mod traits;

View File

@ -4,13 +4,13 @@ use arrayref::{array_mut_ref, array_ref};
cfg_if::cfg_if! { cfg_if::cfg_if! {
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
cfg_if::cfg_if! { cfg_if::cfg_if! {
if #[cfg(feature = "c")] { if #[cfg(feature = "pure")] {
pub const MAX_SIMD_DEGREE: usize = 16;
} else {
pub const MAX_SIMD_DEGREE: usize = 8; pub const MAX_SIMD_DEGREE: usize = 8;
} else {
pub const MAX_SIMD_DEGREE: usize = 16;
} }
} }
} else if #[cfg(feature = "c_neon")] { } else if #[cfg(feature = "neon")] {
pub const MAX_SIMD_DEGREE: usize = 4; pub const MAX_SIMD_DEGREE: usize = 4;
} else { } else {
pub const MAX_SIMD_DEGREE: usize = 1; pub const MAX_SIMD_DEGREE: usize = 1;
@ -24,13 +24,13 @@ cfg_if::cfg_if! {
cfg_if::cfg_if! { cfg_if::cfg_if! {
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
cfg_if::cfg_if! { cfg_if::cfg_if! {
if #[cfg(feature = "c")] { if #[cfg(feature = "pure")] {
pub const MAX_SIMD_DEGREE_OR_2: usize = 16;
} else {
pub const MAX_SIMD_DEGREE_OR_2: usize = 8; pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
} else {
pub const MAX_SIMD_DEGREE_OR_2: usize = 16;
} }
} }
} else if #[cfg(feature = "c_neon")] { } else if #[cfg(feature = "neon")] {
pub const MAX_SIMD_DEGREE_OR_2: usize = 4; pub const MAX_SIMD_DEGREE_OR_2: usize = 4;
} else { } else {
pub const MAX_SIMD_DEGREE_OR_2: usize = 2; pub const MAX_SIMD_DEGREE_OR_2: usize = 2;
@ -44,10 +44,10 @@ pub enum Platform {
SSE41, SSE41,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
AVX2, AVX2,
#[cfg(feature = "c")] #[cfg(not(feature = "pure"))]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
AVX512, AVX512,
#[cfg(feature = "c_neon")] #[cfg(feature = "neon")]
NEON, NEON,
} }
@ -56,7 +56,7 @@ impl Platform {
pub fn detect() -> Self { pub fn detect() -> Self {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
{ {
#[cfg(feature = "c")] #[cfg(not(feature = "pure"))]
{ {
if avx512_detected() { if avx512_detected() {
return Platform::AVX512; return Platform::AVX512;
@ -69,9 +69,9 @@ impl Platform {
return Platform::SSE41; return Platform::SSE41;
} }
} }
// We don't use dynamic feature detection for NEON. If the "c_neon" // We don't use dynamic feature detection for NEON. If the "neon"
// feature is on, NEON is assumed to be supported. // feature is on, NEON is assumed to be supported.
#[cfg(feature = "c_neon")] #[cfg(feature = "neon")]
{ {
return Platform::NEON; return Platform::NEON;
} }
@ -85,10 +85,10 @@ impl Platform {
Platform::SSE41 => 4, Platform::SSE41 => 4,
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX2 => 8, Platform::AVX2 => 8,
#[cfg(feature = "c")] #[cfg(not(feature = "pure"))]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => 16, Platform::AVX512 => 16,
#[cfg(feature = "c_neon")] #[cfg(feature = "neon")]
Platform::NEON => 4, Platform::NEON => 4,
}; };
debug_assert!(degree <= MAX_SIMD_DEGREE); debug_assert!(degree <= MAX_SIMD_DEGREE);
@ -111,13 +111,13 @@ impl Platform {
crate::sse41::compress_in_place(cv, block, block_len, counter, flags) crate::sse41::compress_in_place(cv, block, block_len, counter, flags)
}, },
// Safe because detect() checked for platform support. // Safe because detect() checked for platform support.
#[cfg(feature = "c")] #[cfg(not(feature = "pure"))]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => unsafe { Platform::AVX512 => unsafe {
crate::avx512::compress_in_place(cv, block, block_len, counter, flags) crate::avx512::compress_in_place(cv, block, block_len, counter, flags)
}, },
// No NEON compress_in_place() implementation yet. // No NEON compress_in_place() implementation yet.
#[cfg(feature = "c_neon")] #[cfg(feature = "neon")]
Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags), Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags),
} }
} }
@ -138,13 +138,13 @@ impl Platform {
crate::sse41::compress_xof(cv, block, block_len, counter, flags) crate::sse41::compress_xof(cv, block, block_len, counter, flags)
}, },
// Safe because detect() checked for platform support. // Safe because detect() checked for platform support.
#[cfg(feature = "c")] #[cfg(not(feature = "pure"))]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => unsafe { Platform::AVX512 => unsafe {
crate::avx512::compress_xof(cv, block, block_len, counter, flags) crate::avx512::compress_xof(cv, block, block_len, counter, flags)
}, },
// No NEON compress_xof() implementation yet. // No NEON compress_xof() implementation yet.
#[cfg(feature = "c_neon")] #[cfg(feature = "neon")]
Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags), Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags),
} }
} }
@ -210,7 +210,7 @@ impl Platform {
) )
}, },
// Safe because detect() checked for platform support. // Safe because detect() checked for platform support.
#[cfg(feature = "c")] #[cfg(not(feature = "pure"))]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
Platform::AVX512 => unsafe { Platform::AVX512 => unsafe {
crate::avx512::hash_many( crate::avx512::hash_many(
@ -224,8 +224,8 @@ impl Platform {
out, out,
) )
}, },
// Assumed to be safe if the "c_neon" feature is on. // Assumed to be safe if the "neon" feature is on.
#[cfg(feature = "c_neon")] #[cfg(feature = "neon")]
Platform::NEON => unsafe { Platform::NEON => unsafe {
crate::neon::hash_many( crate::neon::hash_many(
inputs, inputs,
@ -265,7 +265,7 @@ impl Platform {
} }
} }
#[cfg(feature = "c")] #[cfg(not(feature = "pure"))]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub fn avx512() -> Option<Self> { pub fn avx512() -> Option<Self> {
if avx512_detected() { if avx512_detected() {
@ -275,17 +275,17 @@ impl Platform {
} }
} }
#[cfg(feature = "c_neon")] #[cfg(feature = "neon")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub fn neon() -> Option<Self> { pub fn neon() -> Option<Self> {
// Assumed to be safe if the "c_neon" feature is on. // Assumed to be safe if the "neon" feature is on.
Some(Self::NEON) Some(Self::NEON)
} }
} }
// Note that AVX-512 is divided into multiple featuresets, and we use two of // Note that AVX-512 is divided into multiple featuresets, and we use two of
// them, F and VL. // them, F and VL.
#[cfg(feature = "c")] #[cfg(not(feature = "pure"))]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
#[inline(always)] #[inline(always)]
pub fn avx512_detected() -> bool { pub fn avx512_detected() -> bool {

View File

@ -4,10 +4,9 @@ version = "0.0.0"
edition = "2018" edition = "2018"
[features] [features]
default = [] neon = ["blake3/neon"]
c = ["blake3/c"] prefer_intrinsics = ["blake3/prefer_intrinsics"]
c_prefer_intrinsics = ["blake3/c_prefer_intrinsics"] pure = ["blake3/pure"]
c_neon = ["blake3/c_neon"]
[dependencies] [dependencies]
# If you ever change these path dependencies, you'll probably need to update # If you ever change these path dependencies, you'll probably need to update