From e06a0f255ae22449e96b62d0d733943c6a86cb71 Mon Sep 17 00:00:00 2001 From: Jack O'Connor Date: Sat, 28 Mar 2020 17:27:31 -0400 Subject: [PATCH] refactor the Cargo feature set The biggest change here is that assembly implementations are enabled by default. Added features: - "pure" (Pure Rust, with no C or assembly implementations.) Removed features: - "c" (Now basically the default.) Renamed features; - "c_prefer_intrinsics" -> "prefer_intrinsics" - "c_neon" -> "neon" Unchanged: - "rayon" - "std" (Still the only feature on by default.) --- .github/workflows/ci.yml | 24 +++++----- Cargo.toml | 59 ++++++++++++++---------- README.md | 31 ++++++------- b3sum/Cargo.toml | 6 +-- benches/bench.rs | 10 ++-- build.rs | 74 ++++++++++-------------------- c/blake3_c_rust_bindings/build.rs | 4 +- src/{c_avx2.rs => ffi_avx2.rs} | 0 src/{c_avx512.rs => ffi_avx512.rs} | 0 src/{c_neon.rs => ffi_neon.rs} | 2 +- src/{c_sse41.rs => ffi_sse41.rs} | 0 src/lib.rs | 49 +++++++++++++------- src/platform.rs | 52 ++++++++++----------- test_vectors/Cargo.toml | 7 ++- 14 files changed, 155 insertions(+), 163 deletions(-) rename src/{c_avx2.rs => ffi_avx2.rs} (100%) rename src/{c_avx512.rs => ffi_avx512.rs} (100%) rename src/{c_neon.rs => ffi_neon.rs} (96%) rename src/{c_sse41.rs => ffi_sse41.rs} (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f9162cb..4cdd644 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -30,27 +30,27 @@ jobs: - run: cargo test --features=rayon # no_std tests. - run: cargo test --no-default-features - # Test the x86 assembly implementations. Use -vv to log compiler commands. - - run: cargo test --features=c -vv - # Test the C intrinsics implementations. Use -vv to log compiler commands. - - run: cargo test --features=c,c_prefer_intrinsics -vv + # Test the intrinsics implementations. + - run: cargo test --features=prefer_intrinsics + # Test the pure Rust build. + - run: cargo test --features=pure # Test release mode. This does more iteratations in test_fuzz_hasher. - run: cargo test --release - - run: cargo test --release --features=c - - run: cargo test --release --features=c,c_prefer_intrinsics + - run: cargo test --release --features=prefer_intrinsics + - run: cargo test --release --features=pure # Test benchmarks. RUSTC_BOOTSTRAP=1 lets this run on non-nightly toolchains. - - run: cargo test --benches --features=c + - run: cargo test --benches env: RUSTC_BOOTSTRAP: 1 # Test vectors. - name: test vectors run: cargo test working-directory: ./test_vectors - - name: test vectors C assembly - run: cargo test --features=c + - name: test vectors intrinsics + run: cargo test --features=prefer_intrinsics working-directory: ./test_vectors - - name: test vectors C intrinsics - run: cargo test --features=c,c_prefer_intrinsics + - name: test vectors pure + run: cargo test --features=pure working-directory: ./test_vectors # Test b3sum. - name: test b3sum @@ -93,7 +93,7 @@ jobs: # Test the portable implementation on everything. - run: cross test --target ${{ matrix.arch }} # Test the NEON implementation on ARM targets. - - run: cross test --target ${{ matrix.arch }} --features=c_neon + - run: cross test --target ${{ matrix.arch }} --features=neon if: startsWith(matrix.arch, 'armv7-') || startsWith(matrix.arch, 'aarch64-') # Test vectors. Note that this uses a hacky script due to path dependency limitations. - run: ./test_vectors/cross_test.sh --target ${{ matrix.arch }} diff --git a/Cargo.toml b/Cargo.toml index d9440fa..dffaf7c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,27 +11,43 @@ edition = "2018" [features] default = ["std"] -# The "c" feature includes C and assembly SIMD implementations of the -# compression function for x86 platforms, called via FFI. (Currently it has no -# effect on other platforms.) This requires a C toolchain on the build machine. -# This is necessary for AVX-512 support, which is not yet stable in Rust, and -# the assembly implementations also perform better than those using Rust/LLVM -# intrinsics. As with the Rust implementations, these C and assembly -# implementations participate in runtime CPU feature detection, and the -# resulting binary is portable. -c = [] -# Normally x86-64 builds prefer assembly implementations over C intrinsics. The -# assembly implementations perform better, perform most consistently across -# compilers, and are much faster to build. However, this feature makes the -# build use the C intrinsics implementations instead. This is mainly for -# testing purposes, and most callers will not want to use it. -c_prefer_intrinsics = [] + +# By default on x86_64, this crate uses Samuel Neves' hand-written assembly +# implementations for SSE4.1, AVX2, and AVX512. (These provide both the best +# runtime performance, and the fastest build times.) And by default on 32-bit +# x86, this crate uses Rust intrinsics implementations for SSE4.1 and AVX2, and +# a C intrinsics implementation for AVX-512. Enabling the "pure" feature +# disables all FFI to C and assembly implementations, leaving only the Rust +# intrinsics implementations for SSE4.1 and AVX2. This removes the dependency +# on a C compiler/assembler, which can be helpful for certain applications. +# Library crates should generally avoid this feature, so that each binary crate +# is free make its own decision about build dependencies. +pure = [] + +# As described above, on x86_64 this crate use assembly implementations by +# default. Enabling the "prefer_intrinsics" feature makes this crate use +# intrinsics implementations on both 32-bit and 64-bit x86. This is mainly for +# testing, and calling crates should not need it. +prefer_intrinsics = [] + # The NEON implementation does not participate in dynamic feature detection, -# which is currently x86-only. If "c_neon" is on, NEON support is assumed. Note -# that AArch64 always supports NEON, but support on ARMv7 varies. -c_neon = [] +# which is currently x86-only. If "neon" is on, NEON support is assumed. Note +# that AArch64 always supports NEON, but support on ARMv7 varies. The NEON +# implementation uses C intrinsics and requires a C compiler. +neon = [] + +# This crate uses libstd for std::io trait implementations, and also for +# runtime CPU feature detection. This feature is enabled by default. If you use +# --no-default-features, the only way to use the SIMD implementations in this +# crate is to enable the corresponding instruction sets statically for the +# entire build, with e.g. RUSTFLAGS="-C target-cpu=native". std = ["digest/std"] +# The "rayon" feature (defined below as an optional dependency) enables the +# join::RayonJoin type, which can be used with Hasher::update_with_join to +# perform multi-threaded hashing. However, even if this feature is enabled, all +# other APIs remain single-threaded. + [package.metadata.docs.rs] # Document blake3::join::RayonJoin on docs.rs. features = ["rayon"] @@ -40,13 +56,6 @@ features = ["rayon"] arrayref = "0.3.5" arrayvec = { version = "0.5.1", default-features = false, features = ["array-sizes-33-128"] } constant_time_eq = "0.1.5" -# A performance note for the "rayon" feature: Multi-threading can have -# significant overhead for small inputs, particularly on x86 where individual -# cores are very fast. On the other hand, on slower platforms like ARM, -# multi-threading can be beneficial for all inputs. There's no one input size -# threshold that would work well everywhere, and this crate doesn't try to be -# clever. If you're going to enable the "rayon" feature, you should benchmark -# it for your specific use case. rayon = { version = "1.2.1", optional = true } cfg-if = "0.1.10" digest = "0.8.1" diff --git a/README.md b/README.md index ab2e36c..1cb6f87 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# BLAKE3 +# BLAKE3 [![Actions Status](https://github.com/BLAKE3-team/BLAKE3/workflows/tests/badge.svg)](https://github.com/BLAKE3-team/BLAKE3/actions) BLAKE3 is a cryptographic hash function that is: @@ -33,23 +33,19 @@ with BLAKE3. This repository is the official implementation of BLAKE3. It includes: * The [`blake3`](https://crates.io/crates/blake3) Rust crate, which - includes optimized SIMD implementations, with runtime CPU feature - detection on x86. SSE4.1 and AVX2 are supported in pure Rust. The `c` - feature enables C/assembly implementations and AVX-512 support. The - `c_neon` feature enables ARM NEON support. Multi-threading is also - supported, and the `rayon` feature provides a - [Rayon](https://github.com/rayon-rs/rayon)-based implementation. + includes optimized SIMD implementations for SSE4.1, AVX2, AVX-512, and + NEON, with automatic runtime CPU feature detection on x86. The + optional `rayon` feature also enables multi-threading. * The [`b3sum`](https://crates.io/crates/b3sum) Rust crate, which - provides a command line interface. You can install it from - [crates.io](https://crates.io/crates/b3sum) with `cargo install - b3sum`. It enables the `rayon` and `c` features of the `blake3` crate - by default. + provides a command line interface. It uses multi-threading by default, + making it an order of magnitude faster than e.g. `sha256sum` on + typical desktop hardware. * The [C implementation](c), which like the Rust implementation includes - SIMD code and dynamic CPU feature detection on x86. Unlike the Rust - implementation, it's not currently multi-threaded. The - [README](c/README.md) provides build examples. + SIMD code and runtime CPU feature detection on x86. Unlike the Rust + implementation, it's not currently multi-threaded. See + [`c/README.md`](c/README.md). * The [reference implementation](reference_impl/reference_impl.rs), which is discussed in Section 5.1 of the [BLAKE3 @@ -59,9 +55,6 @@ This repository is the official implementation of BLAKE3. It includes: port that doesn't need multi-threading or SIMD optimizations, start here. -* [![Actions - Status](https://github.com/BLAKE3-team/BLAKE3/workflows/tests/badge.svg)](https://github.com/BLAKE3-team/BLAKE3/actions) - BLAKE3 was designed by: * [@oconnor663 ](https://github.com/oconnor663) (Jack O'Connor) @@ -108,7 +101,9 @@ time b3sum /tmp/bigfile ### The `blake3` crate To use BLAKE3 from Rust code, add a dependency on the `blake3` crate to -your `Cargo.toml`. Here's an example of hashing some input bytes: +your `Cargo.toml`. Note that by default, unless the `pure` feature is +enabled, building `blake3` requires a C compiler. Here's an example of +hashing some input bytes: ```rust // Hash an input all at once. diff --git a/b3sum/Cargo.toml b/b3sum/Cargo.toml index a8b83f4..0e12bfb 100644 --- a/b3sum/Cargo.toml +++ b/b3sum/Cargo.toml @@ -9,9 +9,9 @@ readme = "README.md" edition = "2018" [features] -default = ["c"] -c = ["blake3/c"] -c_neon = ["blake3/c_neon"] +neon = ["blake3/neon"] +prefer_intrinsics = ["blake3/prefer_intrinsics"] +pure = ["blake3/pure"] [dependencies] anyhow = "1.0.25" diff --git a/benches/bench.rs b/benches/bench.rs index 263f81e..a6cd97a 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -69,7 +69,7 @@ fn bench_single_compression_sse41(b: &mut Bencher) { } #[bench] -#[cfg(feature = "c")] +#[cfg(not(feature = "pure"))] fn bench_single_compression_avx512(b: &mut Bencher) { if let Some(platform) = Platform::avx512() { bench_single_compression_fn(b, platform); @@ -119,7 +119,7 @@ fn bench_many_chunks_avx2(b: &mut Bencher) { } #[bench] -#[cfg(feature = "c")] +#[cfg(not(feature = "pure"))] fn bench_many_chunks_avx512(b: &mut Bencher) { if let Some(platform) = Platform::avx512() { bench_many_chunks_fn(b, platform); @@ -127,7 +127,7 @@ fn bench_many_chunks_avx512(b: &mut Bencher) { } #[bench] -#[cfg(feature = "c_neon")] +#[cfg(feature = "neon")] fn bench_many_chunks_neon(b: &mut Bencher) { if let Some(platform) = Platform::neon() { bench_many_chunks_fn(b, platform); @@ -178,7 +178,7 @@ fn bench_many_parents_avx2(b: &mut Bencher) { } #[bench] -#[cfg(feature = "c")] +#[cfg(not(feature = "pure"))] fn bench_many_parents_avx512(b: &mut Bencher) { if let Some(platform) = Platform::avx512() { bench_many_parents_fn(b, platform); @@ -186,7 +186,7 @@ fn bench_many_parents_avx512(b: &mut Bencher) { } #[bench] -#[cfg(feature = "c_neon")] +#[cfg(feature = "neon")] fn bench_many_parents_neon(b: &mut Bencher) { if let Some(platform) = Platform::neon() { bench_many_parents_fn(b, platform); diff --git a/build.rs b/build.rs index b7f7f7b..5459e84 100644 --- a/build.rs +++ b/build.rs @@ -49,21 +49,16 @@ fn new_build() -> cc::Build { } const WINDOWS_MSVC_ERROR: &str = r#" -The "c" feature is enabled, but your version of the MSVC C compiler does not -support the "/arch:AVX512" flag. If you are building the "b3sum" or "bao_bin" -crates, you can disable AVX-512 with Cargo's "--no-default-features" flag. -(Note that this also disables other default features like Rayon-based -multithreading, which you can re-enable with "--features=rayon".) Other crates -might or might not support this workaround. +Your version of the MSVC C compiler does not support the "/arch:AVX512" flag. +If you're building the "b3sum" or "bao_bin" crates, you can disable AVX-512 +with "--features=pure". Other crates might or might not support this +workaround. "#; const GNU_ERROR: &str = r#" -The "c" feature is enabled, but your C compiler does not support the -"-mavx512f" flag. If you are building the "b3sum" or "bao_bin" crates, you can -disable AVX-512 with Cargo's "--no-default-features" flag. (Note that this also -disables other default features like Rayon-based multithreading, which you can -re-enable with "--features=rayon".) Other crates might or might not support -this workaround. +Your C compiler does not support the "-mavx512f" flag. If you are building the +"b3sum" or "bao_bin" crates, you can disable AVX-512 with "--features=pure". +Other crates might or might not support this workaround. "#; fn check_for_avx512_compiler_support() { @@ -82,11 +77,15 @@ fn check_for_avx512_compiler_support() { } fn main() -> Result<(), Box> { - if defined("CARGO_FEATURE_C") { + if defined("CARGO_FEATURE_PURE") && defined("CARGO_FEATURE_NEON") { + panic!("It doesn't make sense to enable both \"pure\" and \"neon\"."); + } + + if (is_x86_64() || is_x86_32()) && !defined("CARGO_FEATURE_PURE") { check_for_avx512_compiler_support(); - if is_x86_64() && !defined("CARGO_FEATURE_C_PREFER_INTRINSICS") { + if is_x86_64() && !defined("CARGO_FEATURE_PREFER_INTRINSICS") { // On 64-bit, use the assembly implementations, unless the - // "c_prefer_intrinsics" feature is enabled. + // "prefer_intrinsics" feature is enabled. if is_windows_msvc() { let mut build = new_build(); build.file("c/blake3_sse41_x86-64_windows_msvc.asm"); @@ -109,40 +108,15 @@ fn main() -> Result<(), Box> { build.file("c/blake3_avx512_x86-64_unix.S"); build.compile("blake3_asm"); } - } else if is_x86_64() || is_x86_32() { - // Assembly implementations are only for 64-bit. On 32-bit, or if - // the "c_prefer_intrinsics" feature is enabled, use the - // intrinsics-based C implementations. These each need to be - // compiled separately, with the corresponding instruction set - // extension explicitly enabled in the compiler. - - let mut sse41_build = new_build(); - sse41_build.file("c/blake3_sse41.c"); - if is_windows_msvc() { - // /arch:SSE2 is the default on x86 and undefined on x86_64: - // https://docs.microsoft.com/en-us/cpp/build/reference/arch-x86 - // It also includes SSE4.1 intrisincs: - // https://stackoverflow.com/a/32183222/823869 - } else { - sse41_build.flag("-msse4.1"); - } - sse41_build.compile("blake3_sse41"); - - let mut avx2_build = new_build(); - avx2_build.file("c/blake3_avx2.c"); - if is_windows_msvc() { - avx2_build.flag("/arch:AVX2"); - } else { - avx2_build.flag("-mavx2"); - } - avx2_build.compile("blake3_avx2"); - + } else { + // Assembly implementations are only for x86_64. On 32-bit x86, or + // if the "prefer_intrinsics" feature is enabled, use the Rust + // intrinsics implementations for SSE4.1 and AVX2, and the C + // intrinsics implementation for AVX-512. (Stable Rust does not yet + // support AVX-512.) let mut avx512_build = new_build(); avx512_build.file("c/blake3_avx512.c"); if is_windows_msvc() { - // Note that a lot of versions of MSVC don't support /arch:AVX512, - // and they'll discard it with a warning, hopefully leading to a - // build error. avx512_build.flag("/arch:AVX512"); } else { avx512_build.flag("-mavx512f"); @@ -153,16 +127,14 @@ fn main() -> Result<(), Box> { avx512_build.flag("-fno-asynchronous-unwind-tables"); } avx512_build.compile("blake3_avx512"); - } else { - // Currently no effect for non-x86 platforms. } } - if defined("CARGO_FEATURE_C_NEON") { + if defined("CARGO_FEATURE_NEON") { let mut build = new_build(); // Note that blake3_neon.c normally depends on the blake3_portable.c // for the single-instance compression function, but we expose - // portable.rs over FFI instead. See c_neon.rs. + // portable.rs over FFI instead. See ffi_neon.rs. build.file("c/blake3_neon.c"); // ARMv7 platforms that support NEON generally need the following // flags. AArch64 supports NEON by default and does not support -mpfu. @@ -173,7 +145,7 @@ fn main() -> Result<(), Box> { build.compile("blake3_neon"); } - // The `cc` crate does not automatically emit rerun-if directives for the + // The `cc` crate doesn't automatically emit rerun-if directives for the // environment variables it supports, in particular for $CC. We expect to // do a lot of benchmarking across different compilers, so we explicitly // add the variables that we're likely to need. diff --git a/c/blake3_c_rust_bindings/build.rs b/c/blake3_c_rust_bindings/build.rs index 125f3f7..85d8170 100644 --- a/c/blake3_c_rust_bindings/build.rs +++ b/c/blake3_c_rust_bindings/build.rs @@ -53,7 +53,7 @@ fn main() -> Result<(), Box> { base_build.file("../blake3.c"); base_build.file("../blake3_dispatch.c"); base_build.file("../blake3_portable.c"); - base_build.compile("blake3_c_base"); + base_build.compile("blake3_base"); if is_x86_64() && !defined("CARGO_FEATURE_PREFER_INTRINSICS") { // On 64-bit, use the assembly implementations, unless the @@ -134,7 +134,7 @@ fn main() -> Result<(), Box> { neon_build.flag("-mfpu=neon-vfpv4"); neon_build.flag("-mfloat-abi=hard"); } - neon_build.compile("blake3_c_neon"); + neon_build.compile("blake3_neon"); } // The `cc` crate does not automatically emit rerun-if directives for the diff --git a/src/c_avx2.rs b/src/ffi_avx2.rs similarity index 100% rename from src/c_avx2.rs rename to src/ffi_avx2.rs diff --git a/src/c_avx512.rs b/src/ffi_avx512.rs similarity index 100% rename from src/c_avx512.rs rename to src/ffi_avx512.rs diff --git a/src/c_neon.rs b/src/ffi_neon.rs similarity index 96% rename from src/c_neon.rs rename to src/ffi_neon.rs index 77b9654..8899742 100644 --- a/src/c_neon.rs +++ b/src/ffi_neon.rs @@ -75,7 +75,7 @@ mod test { #[test] fn test_hash_many() { - // This entire file is gated on feature="c_neon", so NEON support is + // This entire file is gated on feature="neon", so NEON support is // assumed here. crate::test::test_hash_many_fn(hash_many, hash_many); } diff --git a/src/c_sse41.rs b/src/ffi_sse41.rs similarity index 100% rename from src/c_sse41.rs rename to src/ffi_sse41.rs diff --git a/src/lib.rs b/src/lib.rs index c0915ee..0a0d640 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,13 +29,15 @@ //! //! # Cargo Features //! -//! The `c` feature provides optimized assembly implementations and also -//! AVX-512 support. It is off by default. If activated, a C compiler for the -//! target platform is required. -//! //! The `rayon` feature provides [Rayon]-based multi-threading, in particular //! the [`join::RayonJoin`] type for use with [`Hasher::update_with_join`]. It -//! is also off by default, but on for [docs.rs]. +//! is disabled by default, but enabled for [docs.rs]. +//! +//! The `pure` feature disables all FFI to C and assembly implementations, +//! leaving only the Rust intrinsics implementations for SSE4.1 and AVX2. This +//! removes the dependency on a C compiler/assembler. Library crates should +//! generally avoid this feature, so that each binary crate is free make its +//! own decision about build dependencies. //! //! [BLAKE3]: https://blake3.io //! [Rayon]: https://github.com/rayon-rs/rayon @@ -63,23 +65,38 @@ pub mod platform; mod portable; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] cfg_if::cfg_if! { - if #[cfg(feature = "c")] { - #[path = "c_sse41.rs"] - mod sse41; - #[path = "c_avx2.rs"] - mod avx2; - #[path = "c_avx512.rs"] - mod avx512; - } else { + if #[cfg(feature = "pure")] { + // When "pure" is enabled, use only Rust intrinsics. Stable Rust + // doesn't currently support AVX-512. #[path = "rust_sse41.rs"] mod sse41; #[path = "rust_avx2.rs"] mod avx2; - // Stable Rust does not currently support AVX-512. + } else if #[cfg(any(target_arch = "x86", feature = "prefer_intrinsics"))] { + // When "prefer_intrinsics" is enabled, or on 32-bit x86 (which our + // assembly implementations don't support), use Rust intrinsics for + // SSE4.1 and AVX2, and use C intrinsics for AVX-512. In this cacse, + // build.rs will compile and link c/blake3_avx512.c. + #[path = "rust_sse41.rs"] + mod sse41; + #[path = "rust_avx2.rs"] + mod avx2; + #[path = "ffi_avx512.rs"] + mod avx512; + } else { + // Otherwise on x86_64, use assembly implementations for everything. In + // this case, build.rs will compile and link all the assembly files for + // the target platform (Unix, Windows MSVC, or Windows GNU). + #[path = "ffi_sse41.rs"] + mod sse41; + #[path = "ffi_avx2.rs"] + mod avx2; + #[path = "ffi_avx512.rs"] + mod avx512; } } -#[cfg(feature = "c_neon")] -#[path = "c_neon.rs"] +#[cfg(feature = "neon")] +#[path = "ffi_neon.rs"] mod neon; pub mod traits; diff --git a/src/platform.rs b/src/platform.rs index 163cbbb..b1b9dad 100644 --- a/src/platform.rs +++ b/src/platform.rs @@ -4,13 +4,13 @@ use arrayref::{array_mut_ref, array_ref}; cfg_if::cfg_if! { if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { cfg_if::cfg_if! { - if #[cfg(feature = "c")] { - pub const MAX_SIMD_DEGREE: usize = 16; - } else { + if #[cfg(feature = "pure")] { pub const MAX_SIMD_DEGREE: usize = 8; + } else { + pub const MAX_SIMD_DEGREE: usize = 16; } } - } else if #[cfg(feature = "c_neon")] { + } else if #[cfg(feature = "neon")] { pub const MAX_SIMD_DEGREE: usize = 4; } else { pub const MAX_SIMD_DEGREE: usize = 1; @@ -24,13 +24,13 @@ cfg_if::cfg_if! { cfg_if::cfg_if! { if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { cfg_if::cfg_if! { - if #[cfg(feature = "c")] { - pub const MAX_SIMD_DEGREE_OR_2: usize = 16; - } else { + if #[cfg(feature = "pure")] { pub const MAX_SIMD_DEGREE_OR_2: usize = 8; + } else { + pub const MAX_SIMD_DEGREE_OR_2: usize = 16; } } - } else if #[cfg(feature = "c_neon")] { + } else if #[cfg(feature = "neon")] { pub const MAX_SIMD_DEGREE_OR_2: usize = 4; } else { pub const MAX_SIMD_DEGREE_OR_2: usize = 2; @@ -44,10 +44,10 @@ pub enum Platform { SSE41, #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] AVX2, - #[cfg(feature = "c")] + #[cfg(not(feature = "pure"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] AVX512, - #[cfg(feature = "c_neon")] + #[cfg(feature = "neon")] NEON, } @@ -56,7 +56,7 @@ impl Platform { pub fn detect() -> Self { #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { - #[cfg(feature = "c")] + #[cfg(not(feature = "pure"))] { if avx512_detected() { return Platform::AVX512; @@ -69,9 +69,9 @@ impl Platform { return Platform::SSE41; } } - // We don't use dynamic feature detection for NEON. If the "c_neon" + // We don't use dynamic feature detection for NEON. If the "neon" // feature is on, NEON is assumed to be supported. - #[cfg(feature = "c_neon")] + #[cfg(feature = "neon")] { return Platform::NEON; } @@ -85,10 +85,10 @@ impl Platform { Platform::SSE41 => 4, #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::AVX2 => 8, - #[cfg(feature = "c")] + #[cfg(not(feature = "pure"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::AVX512 => 16, - #[cfg(feature = "c_neon")] + #[cfg(feature = "neon")] Platform::NEON => 4, }; debug_assert!(degree <= MAX_SIMD_DEGREE); @@ -111,13 +111,13 @@ impl Platform { crate::sse41::compress_in_place(cv, block, block_len, counter, flags) }, // Safe because detect() checked for platform support. - #[cfg(feature = "c")] + #[cfg(not(feature = "pure"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::AVX512 => unsafe { crate::avx512::compress_in_place(cv, block, block_len, counter, flags) }, // No NEON compress_in_place() implementation yet. - #[cfg(feature = "c_neon")] + #[cfg(feature = "neon")] Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags), } } @@ -138,13 +138,13 @@ impl Platform { crate::sse41::compress_xof(cv, block, block_len, counter, flags) }, // Safe because detect() checked for platform support. - #[cfg(feature = "c")] + #[cfg(not(feature = "pure"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::AVX512 => unsafe { crate::avx512::compress_xof(cv, block, block_len, counter, flags) }, // No NEON compress_xof() implementation yet. - #[cfg(feature = "c_neon")] + #[cfg(feature = "neon")] Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags), } } @@ -210,7 +210,7 @@ impl Platform { ) }, // Safe because detect() checked for platform support. - #[cfg(feature = "c")] + #[cfg(not(feature = "pure"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] Platform::AVX512 => unsafe { crate::avx512::hash_many( @@ -224,8 +224,8 @@ impl Platform { out, ) }, - // Assumed to be safe if the "c_neon" feature is on. - #[cfg(feature = "c_neon")] + // Assumed to be safe if the "neon" feature is on. + #[cfg(feature = "neon")] Platform::NEON => unsafe { crate::neon::hash_many( inputs, @@ -265,7 +265,7 @@ impl Platform { } } - #[cfg(feature = "c")] + #[cfg(not(feature = "pure"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn avx512() -> Option { if avx512_detected() { @@ -275,17 +275,17 @@ impl Platform { } } - #[cfg(feature = "c_neon")] + #[cfg(feature = "neon")] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] pub fn neon() -> Option { - // Assumed to be safe if the "c_neon" feature is on. + // Assumed to be safe if the "neon" feature is on. Some(Self::NEON) } } // Note that AVX-512 is divided into multiple featuresets, and we use two of // them, F and VL. -#[cfg(feature = "c")] +#[cfg(not(feature = "pure"))] #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[inline(always)] pub fn avx512_detected() -> bool { diff --git a/test_vectors/Cargo.toml b/test_vectors/Cargo.toml index 2a90e39..cd74a9d 100644 --- a/test_vectors/Cargo.toml +++ b/test_vectors/Cargo.toml @@ -4,10 +4,9 @@ version = "0.0.0" edition = "2018" [features] -default = [] -c = ["blake3/c"] -c_prefer_intrinsics = ["blake3/c_prefer_intrinsics"] -c_neon = ["blake3/c_neon"] +neon = ["blake3/neon"] +prefer_intrinsics = ["blake3/prefer_intrinsics"] +pure = ["blake3/pure"] [dependencies] # If you ever change these path dependencies, you'll probably need to update