mirror of
https://github.com/BLAKE3-team/BLAKE3
synced 2024-05-08 19:06:02 +02:00
integrate assembly implementations into the blake3 crate
This commit is contained in:
parent
b6b3c27824
commit
efbfa0463c
|
@ -24,22 +24,30 @@ jobs:
|
||||||
toolchain: ${{ format('{0}-{1}', matrix.channel, matrix.target.toolchain) }}
|
toolchain: ${{ format('{0}-{1}', matrix.channel, matrix.target.toolchain) }}
|
||||||
profile: minimal
|
profile: minimal
|
||||||
override: true
|
override: true
|
||||||
# Default tests.
|
# Default tests plus Rayon.
|
||||||
- run: cargo test
|
- run: cargo test --features=rayon
|
||||||
# No-default-features tests.
|
# no_std tests.
|
||||||
- run: cargo test --no-default-features
|
- run: cargo test --no-default-features
|
||||||
# More features tests. Note that "c_avx512" participates in dynamic feature
|
# Test the x86 assembly implementations. Use -vv to log compiler commands.
|
||||||
# detection, so it'll be built, but it probably won't run.
|
- run: cargo test --features=c -vv
|
||||||
- run: cargo test --features=c_avx512,rayon
|
# Test the C intrinsics implementations. Use -vv to log compiler commands.
|
||||||
|
- run: cargo test --features=c,c_prefer_intrinsics -vv
|
||||||
# Test release mode. This does more iteratations in test_fuzz_hasher.
|
# Test release mode. This does more iteratations in test_fuzz_hasher.
|
||||||
- run: cargo test --release
|
- run: cargo test --release
|
||||||
# Test benchmarks. Nightly only.
|
# Test benchmarks. RUSTC_BOOTSTRAP=1 lets this run on non-nightly toolchains.
|
||||||
- run: cargo test --benches
|
- run: cargo test --benches --features=c
|
||||||
if: matrix.rust_version == 'nightly'
|
env:
|
||||||
|
RUSTC_BOOTSTRAP: 1
|
||||||
# Test vectors.
|
# Test vectors.
|
||||||
- name: test vectors
|
- name: test vectors
|
||||||
run: cargo test
|
run: cargo test
|
||||||
working-directory: ./test_vectors
|
working-directory: ./test_vectors
|
||||||
|
- name: test vectors
|
||||||
|
run: cargo test --features=c
|
||||||
|
working-directory: ./test_vectors
|
||||||
|
- name: test vectors
|
||||||
|
run: cargo test --features=c,c_prefer_intrinsics
|
||||||
|
working-directory: ./test_vectors
|
||||||
# Test b3sum.
|
# Test b3sum.
|
||||||
- name: test b3sum
|
- name: test b3sum
|
||||||
run: cargo test
|
run: cargo test
|
||||||
|
|
19
Cargo.toml
19
Cargo.toml
|
@ -11,10 +11,21 @@ edition = "2018"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["std"]
|
default = ["std"]
|
||||||
# Like SSE4.1 and AVX2, the AVX-512 implementation participates in dynamic CPU
|
# The "c" feature includes C and assembly SIMD implementations of the
|
||||||
# feature detection. A binary with "c_avx512" on is still cross-platform. This
|
# compression function for x86 platforms, called via FFI. (Currently it has no
|
||||||
# feature has no effect on non-x86.
|
# effect on other platforms.) This requires a C toolchain on the build machine.
|
||||||
c_avx512 = []
|
# This is necessary for AVX-512 support, which is not yet stable in Rust, and
|
||||||
|
# the assembly implementations also perform better than those using Rust/LLVM
|
||||||
|
# intrinsics. As with the Rust implementations, these C and assembly
|
||||||
|
# implementations participate in runtime CPU feature detection, and the
|
||||||
|
# resulting binary is portable.
|
||||||
|
c = []
|
||||||
|
# Normally x86-64 builds prefer assembly implementations over C intrinsics. The
|
||||||
|
# assembly implementations perform better, perform most consistently across
|
||||||
|
# compilers, and are much faster to build. However, this feature makes the
|
||||||
|
# build use the C intrinsics implementations instead. This is mainly for
|
||||||
|
# testing purposes, and most callers will not want to use it.
|
||||||
|
c_prefer_intrinsics = []
|
||||||
# The NEON implementation does not participate in dynamic feature detection,
|
# The NEON implementation does not participate in dynamic feature detection,
|
||||||
# which is currently x86-only. If "c_neon" is on, NEON support is assumed. Note
|
# which is currently x86-only. If "c_neon" is on, NEON support is assumed. Note
|
||||||
# that AArch64 always supports NEON, but support on ARMv7 varies.
|
# that AArch64 always supports NEON, but support on ARMv7 varies.
|
||||||
|
|
20
README.md
20
README.md
|
@ -33,19 +33,18 @@ with BLAKE3.
|
||||||
This repository is the official implementation of BLAKE3. It includes:
|
This repository is the official implementation of BLAKE3. It includes:
|
||||||
|
|
||||||
* The [`blake3`](https://crates.io/crates/blake3) Rust crate, which
|
* The [`blake3`](https://crates.io/crates/blake3) Rust crate, which
|
||||||
includes optimized SIMD implementations, with dynamic CPU feature
|
includes optimized SIMD implementations, with runtime CPU feature
|
||||||
detection on x86. SSE4.1 and AVX2 support are implemented in Rust,
|
detection on x86. SSE4.1 and AVX2 are supported in pure Rust. The `c`
|
||||||
while AVX-512 and ARM NEON support are imported from the C
|
feature enables C/assembly implementations and AVX-512 support. The
|
||||||
implementation and controlled by the `c_avx512` and `c_neon` features.
|
`c_neon` feature enables ARM NEON support. Multi-threading is also
|
||||||
Multi-threading is implemented with
|
supported, and the `rayon` feature provides a
|
||||||
[Rayon](https://github.com/rayon-rs/rayon) and controlled by the
|
[Rayon](https://github.com/rayon-rs/rayon)-based implementation.
|
||||||
`rayon` feature.
|
|
||||||
|
|
||||||
* The [`b3sum`](https://crates.io/crates/b3sum) Rust crate, which
|
* The [`b3sum`](https://crates.io/crates/b3sum) Rust crate, which
|
||||||
provides a command line interface. You can install it from
|
provides a command line interface. You can install it from
|
||||||
[crates.io](https://crates.io/crates/b3sum) with `cargo install
|
[crates.io](https://crates.io/crates/b3sum) with `cargo install
|
||||||
b3sum`. It enables the multi-threading and AVX-512 features of the
|
b3sum`. It enables the `rayon` and `c` features of the `blake3` crate
|
||||||
`blake3` crate by default.
|
by default.
|
||||||
|
|
||||||
* The [C implementation](c), which like the Rust implementation includes
|
* The [C implementation](c), which like the Rust implementation includes
|
||||||
SIMD code and dynamic CPU feature detection on x86. Unlike the Rust
|
SIMD code and dynamic CPU feature detection on x86. Unlike the Rust
|
||||||
|
@ -80,9 +79,6 @@ we recommend [Argon2](https://github.com/P-H-C/phc-winner-argon2).*
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
This repository provides the `b3sum` command line utility and the
|
|
||||||
`blake3` Rust crate.
|
|
||||||
|
|
||||||
### The `b3sum` utility
|
### The `b3sum` utility
|
||||||
|
|
||||||
The `b3sum` utility allows you to process files and data from standard
|
The `b3sum` utility allows you to process files and data from standard
|
||||||
|
|
|
@ -9,8 +9,8 @@ readme = "README.md"
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["c_avx512", "rayon"]
|
default = ["c", "rayon"]
|
||||||
c_avx512 = ["blake3/c_avx512"]
|
c = ["blake3/c"]
|
||||||
c_neon = ["blake3/c_neon"]
|
c_neon = ["blake3/c_neon"]
|
||||||
rayon = ["blake3/rayon", "memmap"]
|
rayon = ["blake3/rayon", "memmap"]
|
||||||
|
|
||||||
|
|
162
benches/bench.rs
162
benches/bench.rs
|
@ -4,7 +4,7 @@ extern crate test;
|
||||||
|
|
||||||
use arrayref::array_ref;
|
use arrayref::array_ref;
|
||||||
use arrayvec::ArrayVec;
|
use arrayvec::ArrayVec;
|
||||||
use blake3::platform::MAX_SIMD_DEGREE;
|
use blake3::platform::{Platform, MAX_SIMD_DEGREE};
|
||||||
use blake3::{BLOCK_LEN, CHUNK_LEN, OUT_LEN};
|
use blake3::{BLOCK_LEN, CHUNK_LEN, OUT_LEN};
|
||||||
use rand::prelude::*;
|
use rand::prelude::*;
|
||||||
use test::Bencher;
|
use test::Bencher;
|
||||||
|
@ -48,173 +48,149 @@ impl RandomInput {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type CompressInPlaceFn =
|
fn bench_single_compression_fn(b: &mut Bencher, platform: Platform) {
|
||||||
unsafe fn(cv: &mut [u32; 8], block: &[u8; BLOCK_LEN], block_len: u8, counter: u64, flags: u8);
|
|
||||||
|
|
||||||
fn bench_single_compression_fn(b: &mut Bencher, f: CompressInPlaceFn) {
|
|
||||||
let mut state = [1u32; 8];
|
let mut state = [1u32; 8];
|
||||||
let mut r = RandomInput::new(b, 64);
|
let mut r = RandomInput::new(b, 64);
|
||||||
let input = array_ref!(r.get(), 0, 64);
|
let input = array_ref!(r.get(), 0, 64);
|
||||||
unsafe {
|
b.iter(|| platform.compress_in_place(&mut state, input, 64 as u8, 0, 0));
|
||||||
b.iter(|| f(&mut state, input, 64 as u8, 0, 0));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn bench_single_compression_portable(b: &mut Bencher) {
|
fn bench_single_compression_portable(b: &mut Bencher) {
|
||||||
bench_single_compression_fn(b, blake3::portable::compress_in_place);
|
bench_single_compression_fn(b, Platform::portable());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
fn bench_single_compression_sse41(b: &mut Bencher) {
|
fn bench_single_compression_sse41(b: &mut Bencher) {
|
||||||
if !blake3::platform::sse41_detected() {
|
if let Some(platform) = Platform::sse41() {
|
||||||
return;
|
bench_single_compression_fn(b, platform);
|
||||||
}
|
}
|
||||||
bench_single_compression_fn(b, blake3::sse41::compress_in_place);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(feature = "c_avx512")]
|
#[cfg(feature = "c")]
|
||||||
fn bench_single_compression_avx512(b: &mut Bencher) {
|
fn bench_single_compression_avx512(b: &mut Bencher) {
|
||||||
if !blake3::platform::avx512_detected() {
|
if let Some(platform) = Platform::avx512() {
|
||||||
return;
|
bench_single_compression_fn(b, platform);
|
||||||
}
|
}
|
||||||
bench_single_compression_fn(b, blake3::c_avx512::compress_in_place);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
type HashManyFn<A> = unsafe fn(
|
fn bench_many_chunks_fn(b: &mut Bencher, platform: Platform) {
|
||||||
inputs: &[&A],
|
let degree = platform.simd_degree();
|
||||||
key: &[u32; 8],
|
|
||||||
counter: u64,
|
|
||||||
increment_counter: blake3::IncrementCounter,
|
|
||||||
flags: u8,
|
|
||||||
flags_start: u8,
|
|
||||||
flags_end: u8,
|
|
||||||
out: &mut [u8],
|
|
||||||
);
|
|
||||||
|
|
||||||
fn bench_many_chunks_fn(b: &mut Bencher, f: HashManyFn<[u8; CHUNK_LEN]>, degree: usize) {
|
|
||||||
let mut inputs = Vec::new();
|
let mut inputs = Vec::new();
|
||||||
for _ in 0..degree {
|
for _ in 0..degree {
|
||||||
inputs.push(RandomInput::new(b, CHUNK_LEN));
|
inputs.push(RandomInput::new(b, CHUNK_LEN));
|
||||||
}
|
}
|
||||||
unsafe {
|
b.iter(|| {
|
||||||
b.iter(|| {
|
let input_arrays: ArrayVec<[&[u8; CHUNK_LEN]; MAX_SIMD_DEGREE]> = inputs
|
||||||
let input_arrays: ArrayVec<[&[u8; CHUNK_LEN]; MAX_SIMD_DEGREE]> = inputs
|
.iter_mut()
|
||||||
.iter_mut()
|
.take(degree)
|
||||||
.take(degree)
|
.map(|i| array_ref!(i.get(), 0, CHUNK_LEN))
|
||||||
.map(|i| array_ref!(i.get(), 0, CHUNK_LEN))
|
.collect();
|
||||||
.collect();
|
let mut out = [0; MAX_SIMD_DEGREE * OUT_LEN];
|
||||||
let mut out = [0; MAX_SIMD_DEGREE * OUT_LEN];
|
platform.hash_many(
|
||||||
f(
|
&input_arrays[..],
|
||||||
&input_arrays[..],
|
&[0; 8],
|
||||||
&[0; 8],
|
0,
|
||||||
0,
|
blake3::IncrementCounter::Yes,
|
||||||
blake3::IncrementCounter::Yes,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
&mut out,
|
||||||
&mut out,
|
);
|
||||||
);
|
});
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
fn bench_many_chunks_sse41(b: &mut Bencher) {
|
fn bench_many_chunks_sse41(b: &mut Bencher) {
|
||||||
if !blake3::platform::sse41_detected() {
|
if let Some(platform) = Platform::sse41() {
|
||||||
return;
|
bench_many_chunks_fn(b, platform);
|
||||||
}
|
}
|
||||||
bench_many_chunks_fn(b, blake3::sse41::hash_many, blake3::sse41::DEGREE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
fn bench_many_chunks_avx2(b: &mut Bencher) {
|
fn bench_many_chunks_avx2(b: &mut Bencher) {
|
||||||
if !blake3::platform::avx2_detected() {
|
if let Some(platform) = Platform::avx2() {
|
||||||
return;
|
bench_many_chunks_fn(b, platform);
|
||||||
}
|
}
|
||||||
bench_many_chunks_fn(b, blake3::avx2::hash_many, blake3::avx2::DEGREE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(feature = "c_avx512")]
|
#[cfg(feature = "c")]
|
||||||
fn bench_many_chunks_avx512(b: &mut Bencher) {
|
fn bench_many_chunks_avx512(b: &mut Bencher) {
|
||||||
if !blake3::platform::avx512_detected() {
|
if let Some(platform) = Platform::avx512() {
|
||||||
return;
|
bench_many_chunks_fn(b, platform);
|
||||||
}
|
}
|
||||||
bench_many_chunks_fn(b, blake3::c_avx512::hash_many, blake3::c_avx512::DEGREE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "c_neon")]
|
||||||
fn bench_many_chunks_neon(b: &mut Bencher) {
|
fn bench_many_chunks_neon(b: &mut Bencher) {
|
||||||
// When "c_neon" is on, NEON support is assumed.
|
if let Some(platform) = Platform::neon() {
|
||||||
bench_many_chunks_fn(b, blake3::c_neon::hash_many, blake3::c_neon::DEGREE);
|
bench_many_chunks_fn(b, platform);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: When we get const generics we can unify this with the chunks code.
|
// TODO: When we get const generics we can unify this with the chunks code.
|
||||||
fn bench_many_parents_fn(b: &mut Bencher, f: HashManyFn<[u8; BLOCK_LEN]>, degree: usize) {
|
fn bench_many_parents_fn(b: &mut Bencher, platform: Platform) {
|
||||||
|
let degree = platform.simd_degree();
|
||||||
let mut inputs = Vec::new();
|
let mut inputs = Vec::new();
|
||||||
for _ in 0..degree {
|
for _ in 0..degree {
|
||||||
inputs.push(RandomInput::new(b, BLOCK_LEN));
|
inputs.push(RandomInput::new(b, BLOCK_LEN));
|
||||||
}
|
}
|
||||||
unsafe {
|
b.iter(|| {
|
||||||
b.iter(|| {
|
let input_arrays: ArrayVec<[&[u8; BLOCK_LEN]; MAX_SIMD_DEGREE]> = inputs
|
||||||
let input_arrays: ArrayVec<[&[u8; BLOCK_LEN]; MAX_SIMD_DEGREE]> = inputs
|
.iter_mut()
|
||||||
.iter_mut()
|
.take(degree)
|
||||||
.take(degree)
|
.map(|i| array_ref!(i.get(), 0, BLOCK_LEN))
|
||||||
.map(|i| array_ref!(i.get(), 0, BLOCK_LEN))
|
.collect();
|
||||||
.collect();
|
let mut out = [0; MAX_SIMD_DEGREE * OUT_LEN];
|
||||||
let mut out = [0; MAX_SIMD_DEGREE * OUT_LEN];
|
platform.hash_many(
|
||||||
f(
|
&input_arrays[..],
|
||||||
&input_arrays[..],
|
&[0; 8],
|
||||||
&[0; 8],
|
0,
|
||||||
0,
|
blake3::IncrementCounter::No,
|
||||||
blake3::IncrementCounter::No,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
0,
|
||||||
0,
|
&mut out,
|
||||||
&mut out,
|
);
|
||||||
);
|
});
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
fn bench_many_parents_sse41(b: &mut Bencher) {
|
fn bench_many_parents_sse41(b: &mut Bencher) {
|
||||||
if !blake3::platform::sse41_detected() {
|
if let Some(platform) = Platform::sse41() {
|
||||||
return;
|
bench_many_parents_fn(b, platform);
|
||||||
}
|
}
|
||||||
bench_many_parents_fn(b, blake3::sse41::hash_many, blake3::sse41::DEGREE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
fn bench_many_parents_avx2(b: &mut Bencher) {
|
fn bench_many_parents_avx2(b: &mut Bencher) {
|
||||||
if !blake3::platform::avx2_detected() {
|
if let Some(platform) = Platform::avx2() {
|
||||||
return;
|
bench_many_parents_fn(b, platform);
|
||||||
}
|
}
|
||||||
bench_many_parents_fn(b, blake3::avx2::hash_many, blake3::avx2::DEGREE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(feature = "c_avx512")]
|
#[cfg(feature = "c")]
|
||||||
fn bench_many_parents_avx512(b: &mut Bencher) {
|
fn bench_many_parents_avx512(b: &mut Bencher) {
|
||||||
if !blake3::platform::avx512_detected() {
|
if let Some(platform) = Platform::avx512() {
|
||||||
return;
|
bench_many_parents_fn(b, platform);
|
||||||
}
|
}
|
||||||
bench_many_parents_fn(b, blake3::c_avx512::hash_many, blake3::c_avx512::DEGREE);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "c_neon")]
|
||||||
fn bench_many_parents_neon(b: &mut Bencher) {
|
fn bench_many_parents_neon(b: &mut Bencher) {
|
||||||
// When "c_neon" is on, NEON support is assumed.
|
if let Some(platform) = Platform::neon() {
|
||||||
bench_many_parents_fn(b, blake3::c_neon::hash_many, blake3::c_neon::DEGREE);
|
bench_many_parents_fn(b, platform);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn bench_atonce(b: &mut Bencher, len: usize) {
|
fn bench_atonce(b: &mut Bencher, len: usize) {
|
||||||
|
|
107
build.rs
107
build.rs
|
@ -13,6 +13,11 @@ fn is_x86_64() -> bool {
|
||||||
target_components()[0] == "x86_64"
|
target_components()[0] == "x86_64"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_x86_32() -> bool {
|
||||||
|
let arch = &target_components()[0];
|
||||||
|
arch == "i386" || arch == "i586" || arch == "i686"
|
||||||
|
}
|
||||||
|
|
||||||
fn is_armv7() -> bool {
|
fn is_armv7() -> bool {
|
||||||
target_components()[0] == "armv7"
|
target_components()[0] == "armv7"
|
||||||
}
|
}
|
||||||
|
@ -28,6 +33,13 @@ fn is_windows_msvc() -> bool {
|
||||||
&& target_components()[3] == "msvc"
|
&& target_components()[3] == "msvc"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_windows_gnu() -> bool {
|
||||||
|
// Some targets are only two components long, so check in steps.
|
||||||
|
target_components()[1] == "pc"
|
||||||
|
&& target_components()[2] == "windows"
|
||||||
|
&& target_components()[3] == "gnu"
|
||||||
|
}
|
||||||
|
|
||||||
fn new_build() -> cc::Build {
|
fn new_build() -> cc::Build {
|
||||||
let mut build = cc::Build::new();
|
let mut build = cc::Build::new();
|
||||||
if !is_windows_msvc() {
|
if !is_windows_msvc() {
|
||||||
|
@ -37,16 +49,16 @@ fn new_build() -> cc::Build {
|
||||||
}
|
}
|
||||||
|
|
||||||
const WINDOWS_MSVC_ERROR: &str = r#"
|
const WINDOWS_MSVC_ERROR: &str = r#"
|
||||||
The "c_avx512" feature is enabled, but your version of the MSVC C compiler does
|
The "c" feature is enabled, but your version of the MSVC C compiler does not
|
||||||
not support the "/arch:AVX512" flag. If you are building the "b3sum" or
|
support the "/arch:AVX512" flag. If you are building the "b3sum" or "bao_bin"
|
||||||
"bao_bin" crates, you can disable AVX-512 with Cargo's "--no-default-features"
|
crates, you can disable AVX-512 with Cargo's "--no-default-features" flag.
|
||||||
flag. (Note that this also disables other default features like Rayon-based
|
(Note that this also disables other default features like Rayon-based
|
||||||
multithreading, which you can re-enable with "--features=rayon".) Other crates
|
multithreading, which you can re-enable with "--features=rayon".) Other crates
|
||||||
might or might not support this workaround.
|
might or might not support this workaround.
|
||||||
"#;
|
"#;
|
||||||
|
|
||||||
const GNU_ERROR: &str = r#"
|
const GNU_ERROR: &str = r#"
|
||||||
The "c_avx512" feature is enabled, but your C compiler does not support the
|
The "c" feature is enabled, but your C compiler does not support the
|
||||||
"-mavx512f" flag. If you are building the "b3sum" or "bao_bin" crates, you can
|
"-mavx512f" flag. If you are building the "b3sum" or "bao_bin" crates, you can
|
||||||
disable AVX-512 with Cargo's "--no-default-features" flag. (Note that this also
|
disable AVX-512 with Cargo's "--no-default-features" flag. (Note that this also
|
||||||
disables other default features like Rayon-based multithreading, which you can
|
disables other default features like Rayon-based multithreading, which you can
|
||||||
|
@ -69,25 +81,76 @@ fn check_for_avx512_compiler_support(build: &cc::Build) {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
// "c_avx512' is a no-op for non-x86_64 targets. It also participates in
|
if defined("CARGO_FEATURE_C") {
|
||||||
// dynamic CPU feature detection, so it's generally safe to enable.
|
if is_x86_64() && !defined("CARGO_FEATURE_C_PREFER_INTRINSICS") {
|
||||||
// However, it probably won't build in some older environments without
|
// On 64-bit, use the assembly implementations, unless the
|
||||||
// AVX-512 support in the C compiler, and it's disabled by default for that
|
// "c_prefer_intrinsics" feature is enabled.
|
||||||
// reason.
|
if is_windows_msvc() {
|
||||||
if defined("CARGO_FEATURE_C_AVX512") && is_x86_64() {
|
let mut build = new_build();
|
||||||
let mut build = new_build();
|
build.file("c/blake3-sse41-x86_64-windows-msvc.asm");
|
||||||
check_for_avx512_compiler_support(&build);
|
build.file("c/blake3-avx2-x86_64-windows-msvc.asm");
|
||||||
build.file("c/blake3_avx512.c");
|
build.file("c/blake3-avx512-x86_64-windows-msvc.asm");
|
||||||
if is_windows_msvc() {
|
build.compile("blake3_asm");
|
||||||
// Note that a lot of versions of MSVC don't support /arch:AVX512,
|
} else if is_windows_gnu() {
|
||||||
// and they'll discard it with a warning, hopefully leading to a
|
let mut build = new_build();
|
||||||
// build error.
|
build.file("c/blake3-sse41-x86_64-windows-gnu.S");
|
||||||
build.flag("/arch:AVX512");
|
build.file("c/blake3-avx2-x86_64-windows-gnu.S");
|
||||||
|
build.file("c/blake3-avx512-x86_64-windows-gnu.S");
|
||||||
|
build.compile("blake3_asm");
|
||||||
|
} else {
|
||||||
|
// All non-Windows implementations are assumed to support
|
||||||
|
// Linux-style assembly. These files do contain a small
|
||||||
|
// explicit workaround for macOS also.
|
||||||
|
let mut build = new_build();
|
||||||
|
build.file("c/blake3-sse41-x86_64-unix.S");
|
||||||
|
build.file("c/blake3-avx2-x86_64-unix.S");
|
||||||
|
build.file("c/blake3-avx512-x86_64-unix.S");
|
||||||
|
build.compile("blake3_asm");
|
||||||
|
}
|
||||||
|
} else if is_x86_64() || is_x86_32() {
|
||||||
|
// Assembly implementations are only for 64-bit. On 32-bit, or if
|
||||||
|
// the "c_prefer_intrinsics" feature is enabled, use the
|
||||||
|
// intrinsics-based C implementations. These each need to be
|
||||||
|
// compiled separately, with the corresponding instruction set
|
||||||
|
// extension explicitly enabled in the compiler.
|
||||||
|
|
||||||
|
let mut sse41_build = new_build();
|
||||||
|
sse41_build.file("c/blake3_sse41.c");
|
||||||
|
if is_windows_msvc() {
|
||||||
|
// /arch:SSE2 is the default on x86 and undefined on x86_64:
|
||||||
|
// https://docs.microsoft.com/en-us/cpp/build/reference/arch-x86
|
||||||
|
// It also includes SSE4.1 intrisincs:
|
||||||
|
// https://stackoverflow.com/a/32183222/823869
|
||||||
|
} else {
|
||||||
|
sse41_build.flag("-msse4.1");
|
||||||
|
}
|
||||||
|
sse41_build.compile("blake3_sse41");
|
||||||
|
|
||||||
|
let mut avx2_build = new_build();
|
||||||
|
avx2_build.file("c/blake3_avx2.c");
|
||||||
|
if is_windows_msvc() {
|
||||||
|
avx2_build.flag("/arch:AVX2");
|
||||||
|
} else {
|
||||||
|
avx2_build.flag("-mavx2");
|
||||||
|
}
|
||||||
|
avx2_build.compile("blake3_avx2");
|
||||||
|
|
||||||
|
let mut avx512_build = new_build();
|
||||||
|
check_for_avx512_compiler_support(&avx512_build);
|
||||||
|
avx512_build.file("c/blake3_avx512.c");
|
||||||
|
if is_windows_msvc() {
|
||||||
|
// Note that a lot of versions of MSVC don't support /arch:AVX512,
|
||||||
|
// and they'll discard it with a warning, hopefully leading to a
|
||||||
|
// build error.
|
||||||
|
avx512_build.flag("/arch:AVX512");
|
||||||
|
} else {
|
||||||
|
avx512_build.flag("-mavx512f");
|
||||||
|
avx512_build.flag("-mavx512vl");
|
||||||
|
}
|
||||||
|
avx512_build.compile("blake3_avx512");
|
||||||
} else {
|
} else {
|
||||||
build.flag("-mavx512f");
|
// Currently no effect for non-x86 platforms.
|
||||||
build.flag("-mavx512vl");
|
|
||||||
}
|
}
|
||||||
build.compile("blake3_avx512");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if defined("CARGO_FEATURE_C_NEON") {
|
if defined("CARGO_FEATURE_C_NEON") {
|
||||||
|
|
|
@ -0,0 +1,63 @@
|
||||||
|
use crate::{CVWords, IncrementCounter, BLOCK_LEN, OUT_LEN};
|
||||||
|
|
||||||
|
// Note that there is no AVX2 implementation of compress_in_place or
|
||||||
|
// compress_xof.
|
||||||
|
|
||||||
|
// Unsafe because this may only be called on platforms supporting AVX2.
|
||||||
|
pub unsafe fn hash_many<A: arrayvec::Array<Item = u8>>(
|
||||||
|
inputs: &[&A],
|
||||||
|
key: &CVWords,
|
||||||
|
counter: u64,
|
||||||
|
increment_counter: IncrementCounter,
|
||||||
|
flags: u8,
|
||||||
|
flags_start: u8,
|
||||||
|
flags_end: u8,
|
||||||
|
out: &mut [u8],
|
||||||
|
) {
|
||||||
|
// The Rust hash_many implementations do bounds checking on the `out`
|
||||||
|
// array, but the C implementations don't. Even though this is an unsafe
|
||||||
|
// function, assert the bounds here.
|
||||||
|
assert!(out.len() >= inputs.len() * OUT_LEN);
|
||||||
|
ffi::blake3_hash_many_avx2(
|
||||||
|
inputs.as_ptr() as *const *const u8,
|
||||||
|
inputs.len(),
|
||||||
|
A::CAPACITY / BLOCK_LEN,
|
||||||
|
key.as_ptr(),
|
||||||
|
counter,
|
||||||
|
increment_counter.yes(),
|
||||||
|
flags,
|
||||||
|
flags_start,
|
||||||
|
flags_end,
|
||||||
|
out.as_mut_ptr(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub mod ffi {
|
||||||
|
extern "C" {
|
||||||
|
pub fn blake3_hash_many_avx2(
|
||||||
|
inputs: *const *const u8,
|
||||||
|
num_inputs: usize,
|
||||||
|
blocks: usize,
|
||||||
|
key: *const u32,
|
||||||
|
counter: u64,
|
||||||
|
increment_counter: bool,
|
||||||
|
flags: u8,
|
||||||
|
flags_start: u8,
|
||||||
|
flags_end: u8,
|
||||||
|
out: *mut u8,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_hash_many() {
|
||||||
|
if !crate::platform::avx2_detected() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
crate::test::test_hash_many_fn(hash_many, hash_many);
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,7 +1,5 @@
|
||||||
use crate::{CVWords, IncrementCounter, BLOCK_LEN, OUT_LEN};
|
use crate::{CVWords, IncrementCounter, BLOCK_LEN, OUT_LEN};
|
||||||
|
|
||||||
pub const DEGREE: usize = 16;
|
|
||||||
|
|
||||||
// Unsafe because this may only be called on platforms supporting AVX-512.
|
// Unsafe because this may only be called on platforms supporting AVX-512.
|
||||||
pub unsafe fn compress_in_place(
|
pub unsafe fn compress_in_place(
|
||||||
cv: &mut CVWords,
|
cv: &mut CVWords,
|
||||||
|
@ -91,7 +89,6 @@ pub mod ffi {
|
||||||
flags_end: u8,
|
flags_end: u8,
|
||||||
out: *mut u8,
|
out: *mut u8,
|
||||||
);
|
);
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,5 @@
|
||||||
use crate::{CVWords, IncrementCounter, BLOCK_LEN, OUT_LEN};
|
use crate::{CVWords, IncrementCounter, BLOCK_LEN, OUT_LEN};
|
||||||
|
|
||||||
pub const DEGREE: usize = 4;
|
|
||||||
|
|
||||||
// Unsafe because this may only be called on platforms supporting NEON.
|
// Unsafe because this may only be called on platforms supporting NEON.
|
||||||
pub unsafe fn hash_many<A: arrayvec::Array<Item = u8>>(
|
pub unsafe fn hash_many<A: arrayvec::Array<Item = u8>>(
|
||||||
inputs: &[&A],
|
inputs: &[&A],
|
||||||
|
|
|
@ -0,0 +1,114 @@
|
||||||
|
use crate::{CVWords, IncrementCounter, BLOCK_LEN, OUT_LEN};
|
||||||
|
|
||||||
|
// Unsafe because this may only be called on platforms supporting SSE4.1.
|
||||||
|
pub unsafe fn compress_in_place(
|
||||||
|
cv: &mut CVWords,
|
||||||
|
block: &[u8; BLOCK_LEN],
|
||||||
|
block_len: u8,
|
||||||
|
counter: u64,
|
||||||
|
flags: u8,
|
||||||
|
) {
|
||||||
|
ffi::blake3_compress_in_place_sse41(cv.as_mut_ptr(), block.as_ptr(), block_len, counter, flags)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unsafe because this may only be called on platforms supporting SSE4.1.
|
||||||
|
pub unsafe fn compress_xof(
|
||||||
|
cv: &CVWords,
|
||||||
|
block: &[u8; BLOCK_LEN],
|
||||||
|
block_len: u8,
|
||||||
|
counter: u64,
|
||||||
|
flags: u8,
|
||||||
|
) -> [u8; 64] {
|
||||||
|
let mut out = [0u8; 64];
|
||||||
|
ffi::blake3_compress_xof_sse41(
|
||||||
|
cv.as_ptr(),
|
||||||
|
block.as_ptr(),
|
||||||
|
block_len,
|
||||||
|
counter,
|
||||||
|
flags,
|
||||||
|
out.as_mut_ptr(),
|
||||||
|
);
|
||||||
|
out
|
||||||
|
}
|
||||||
|
|
||||||
|
// Unsafe because this may only be called on platforms supporting SSE4.1.
|
||||||
|
pub unsafe fn hash_many<A: arrayvec::Array<Item = u8>>(
|
||||||
|
inputs: &[&A],
|
||||||
|
key: &CVWords,
|
||||||
|
counter: u64,
|
||||||
|
increment_counter: IncrementCounter,
|
||||||
|
flags: u8,
|
||||||
|
flags_start: u8,
|
||||||
|
flags_end: u8,
|
||||||
|
out: &mut [u8],
|
||||||
|
) {
|
||||||
|
// The Rust hash_many implementations do bounds checking on the `out`
|
||||||
|
// array, but the C implementations don't. Even though this is an unsafe
|
||||||
|
// function, assert the bounds here.
|
||||||
|
assert!(out.len() >= inputs.len() * OUT_LEN);
|
||||||
|
ffi::blake3_hash_many_sse41(
|
||||||
|
inputs.as_ptr() as *const *const u8,
|
||||||
|
inputs.len(),
|
||||||
|
A::CAPACITY / BLOCK_LEN,
|
||||||
|
key.as_ptr(),
|
||||||
|
counter,
|
||||||
|
increment_counter.yes(),
|
||||||
|
flags,
|
||||||
|
flags_start,
|
||||||
|
flags_end,
|
||||||
|
out.as_mut_ptr(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub mod ffi {
|
||||||
|
extern "C" {
|
||||||
|
pub fn blake3_compress_in_place_sse41(
|
||||||
|
cv: *mut u32,
|
||||||
|
block: *const u8,
|
||||||
|
block_len: u8,
|
||||||
|
counter: u64,
|
||||||
|
flags: u8,
|
||||||
|
);
|
||||||
|
pub fn blake3_compress_xof_sse41(
|
||||||
|
cv: *const u32,
|
||||||
|
block: *const u8,
|
||||||
|
block_len: u8,
|
||||||
|
counter: u64,
|
||||||
|
flags: u8,
|
||||||
|
out: *mut u8,
|
||||||
|
);
|
||||||
|
pub fn blake3_hash_many_sse41(
|
||||||
|
inputs: *const *const u8,
|
||||||
|
num_inputs: usize,
|
||||||
|
blocks: usize,
|
||||||
|
key: *const u32,
|
||||||
|
counter: u64,
|
||||||
|
increment_counter: bool,
|
||||||
|
flags: u8,
|
||||||
|
flags_start: u8,
|
||||||
|
flags_end: u8,
|
||||||
|
out: *mut u8,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_compress() {
|
||||||
|
if !crate::platform::sse41_detected() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
crate::test::test_compress_fn(compress_in_place, compress_xof);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_hash_many() {
|
||||||
|
if !crate::platform::sse41_detected() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
crate::test::test_hash_many_fn(hash_many, hash_many);
|
||||||
|
}
|
||||||
|
}
|
38
src/lib.rs
38
src/lib.rs
|
@ -39,24 +39,32 @@ mod test;
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
pub mod guts;
|
pub mod guts;
|
||||||
|
|
||||||
// These modules are pub for benchmarks only. They are not stable.
|
// The platform module is pub for benchmarks only. It is not stable.
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
||||||
#[doc(hidden)]
|
|
||||||
pub mod avx2;
|
|
||||||
#[cfg(feature = "c_avx512")]
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
||||||
#[doc(hidden)]
|
|
||||||
pub mod c_avx512;
|
|
||||||
#[cfg(feature = "c_neon")]
|
|
||||||
#[doc(hidden)]
|
|
||||||
pub mod c_neon;
|
|
||||||
#[doc(hidden)]
|
#[doc(hidden)]
|
||||||
pub mod platform;
|
pub mod platform;
|
||||||
#[doc(hidden)]
|
|
||||||
pub mod portable;
|
// Platform-specific implementations of the compression function.
|
||||||
|
mod portable;
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
#[doc(hidden)]
|
cfg_if::cfg_if! {
|
||||||
pub mod sse41;
|
if #[cfg(feature = "c")] {
|
||||||
|
#[path = "c_sse41.rs"]
|
||||||
|
mod sse41;
|
||||||
|
#[path = "c_avx2.rs"]
|
||||||
|
mod avx2;
|
||||||
|
#[path = "c_avx512.rs"]
|
||||||
|
mod avx512;
|
||||||
|
} else {
|
||||||
|
#[path = "rust_sse41.rs"]
|
||||||
|
mod sse41;
|
||||||
|
#[path = "rust_avx2.rs"]
|
||||||
|
mod avx2;
|
||||||
|
// Stable Rust does not currently support AVX-512.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[cfg(feature = "c_neon")]
|
||||||
|
#[path = "c_neon.rs"]
|
||||||
|
mod neon;
|
||||||
|
|
||||||
pub mod traits;
|
pub mod traits;
|
||||||
|
|
||||||
|
|
|
@ -1,18 +1,10 @@
|
||||||
use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN};
|
use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN};
|
||||||
use arrayref::{array_mut_ref, array_ref};
|
use arrayref::{array_mut_ref, array_ref};
|
||||||
|
|
||||||
#[cfg(feature = "c_avx512")]
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
||||||
use crate::c_avx512;
|
|
||||||
#[cfg(feature = "c_neon")]
|
|
||||||
use crate::c_neon;
|
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
||||||
use crate::{avx2, sse41};
|
|
||||||
|
|
||||||
cfg_if::cfg_if! {
|
cfg_if::cfg_if! {
|
||||||
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
|
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
|
||||||
cfg_if::cfg_if! {
|
cfg_if::cfg_if! {
|
||||||
if #[cfg(feature = "c_avx512")] {
|
if #[cfg(feature = "c")] {
|
||||||
pub const MAX_SIMD_DEGREE: usize = 16;
|
pub const MAX_SIMD_DEGREE: usize = 16;
|
||||||
} else {
|
} else {
|
||||||
pub const MAX_SIMD_DEGREE: usize = 8;
|
pub const MAX_SIMD_DEGREE: usize = 8;
|
||||||
|
@ -32,7 +24,7 @@ cfg_if::cfg_if! {
|
||||||
cfg_if::cfg_if! {
|
cfg_if::cfg_if! {
|
||||||
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
|
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
|
||||||
cfg_if::cfg_if! {
|
cfg_if::cfg_if! {
|
||||||
if #[cfg(feature = "c_avx512")] {
|
if #[cfg(feature = "c")] {
|
||||||
pub const MAX_SIMD_DEGREE_OR_2: usize = 16;
|
pub const MAX_SIMD_DEGREE_OR_2: usize = 16;
|
||||||
} else {
|
} else {
|
||||||
pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
|
pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
|
||||||
|
@ -52,7 +44,7 @@ pub enum Platform {
|
||||||
SSE41,
|
SSE41,
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
AVX2,
|
AVX2,
|
||||||
#[cfg(feature = "c_avx512")]
|
#[cfg(feature = "c")]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
AVX512,
|
AVX512,
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "c_neon")]
|
||||||
|
@ -64,7 +56,7 @@ impl Platform {
|
||||||
pub fn detect() -> Self {
|
pub fn detect() -> Self {
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
{
|
{
|
||||||
#[cfg(feature = "c_avx512")]
|
#[cfg(feature = "c")]
|
||||||
{
|
{
|
||||||
if avx512_detected() {
|
if avx512_detected() {
|
||||||
return Platform::AVX512;
|
return Platform::AVX512;
|
||||||
|
@ -93,7 +85,7 @@ impl Platform {
|
||||||
Platform::SSE41 => 4,
|
Platform::SSE41 => 4,
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
Platform::AVX2 => 8,
|
Platform::AVX2 => 8,
|
||||||
#[cfg(feature = "c_avx512")]
|
#[cfg(feature = "c")]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
Platform::AVX512 => 16,
|
Platform::AVX512 => 16,
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "c_neon")]
|
||||||
|
@ -103,7 +95,7 @@ impl Platform {
|
||||||
degree
|
degree
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn compress_in_place(
|
pub fn compress_in_place(
|
||||||
&self,
|
&self,
|
||||||
cv: &mut CVWords,
|
cv: &mut CVWords,
|
||||||
block: &[u8; BLOCK_LEN],
|
block: &[u8; BLOCK_LEN],
|
||||||
|
@ -116,13 +108,13 @@ impl Platform {
|
||||||
// Safe because detect() checked for platform support.
|
// Safe because detect() checked for platform support.
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
Platform::SSE41 | Platform::AVX2 => unsafe {
|
Platform::SSE41 | Platform::AVX2 => unsafe {
|
||||||
sse41::compress_in_place(cv, block, block_len, counter, flags)
|
crate::sse41::compress_in_place(cv, block, block_len, counter, flags)
|
||||||
},
|
},
|
||||||
// Safe because detect() checked for platform support.
|
// Safe because detect() checked for platform support.
|
||||||
#[cfg(feature = "c_avx512")]
|
#[cfg(feature = "c")]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
Platform::AVX512 => unsafe {
|
Platform::AVX512 => unsafe {
|
||||||
c_avx512::compress_in_place(cv, block, block_len, counter, flags)
|
crate::avx512::compress_in_place(cv, block, block_len, counter, flags)
|
||||||
},
|
},
|
||||||
// No NEON compress_in_place() implementation yet.
|
// No NEON compress_in_place() implementation yet.
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "c_neon")]
|
||||||
|
@ -130,7 +122,7 @@ impl Platform {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn compress_xof(
|
pub fn compress_xof(
|
||||||
&self,
|
&self,
|
||||||
cv: &CVWords,
|
cv: &CVWords,
|
||||||
block: &[u8; BLOCK_LEN],
|
block: &[u8; BLOCK_LEN],
|
||||||
|
@ -143,13 +135,13 @@ impl Platform {
|
||||||
// Safe because detect() checked for platform support.
|
// Safe because detect() checked for platform support.
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
Platform::SSE41 | Platform::AVX2 => unsafe {
|
Platform::SSE41 | Platform::AVX2 => unsafe {
|
||||||
sse41::compress_xof(cv, block, block_len, counter, flags)
|
crate::sse41::compress_xof(cv, block, block_len, counter, flags)
|
||||||
},
|
},
|
||||||
// Safe because detect() checked for platform support.
|
// Safe because detect() checked for platform support.
|
||||||
#[cfg(feature = "c_avx512")]
|
#[cfg(feature = "c")]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
Platform::AVX512 => unsafe {
|
Platform::AVX512 => unsafe {
|
||||||
c_avx512::compress_xof(cv, block, block_len, counter, flags)
|
crate::avx512::compress_xof(cv, block, block_len, counter, flags)
|
||||||
},
|
},
|
||||||
// No NEON compress_xof() implementation yet.
|
// No NEON compress_xof() implementation yet.
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "c_neon")]
|
||||||
|
@ -167,7 +159,7 @@ impl Platform {
|
||||||
// after every block, there's a small but measurable performance loss.
|
// after every block, there's a small but measurable performance loss.
|
||||||
// Compressing chunks with a dedicated loop avoids this.
|
// Compressing chunks with a dedicated loop avoids this.
|
||||||
|
|
||||||
pub(crate) fn hash_many<A: arrayvec::Array<Item = u8>>(
|
pub fn hash_many<A: arrayvec::Array<Item = u8>>(
|
||||||
&self,
|
&self,
|
||||||
inputs: &[&A],
|
inputs: &[&A],
|
||||||
key: &CVWords,
|
key: &CVWords,
|
||||||
|
@ -192,7 +184,7 @@ impl Platform {
|
||||||
// Safe because detect() checked for platform support.
|
// Safe because detect() checked for platform support.
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
Platform::SSE41 => unsafe {
|
Platform::SSE41 => unsafe {
|
||||||
sse41::hash_many(
|
crate::sse41::hash_many(
|
||||||
inputs,
|
inputs,
|
||||||
key,
|
key,
|
||||||
counter,
|
counter,
|
||||||
|
@ -206,7 +198,7 @@ impl Platform {
|
||||||
// Safe because detect() checked for platform support.
|
// Safe because detect() checked for platform support.
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
Platform::AVX2 => unsafe {
|
Platform::AVX2 => unsafe {
|
||||||
avx2::hash_many(
|
crate::avx2::hash_many(
|
||||||
inputs,
|
inputs,
|
||||||
key,
|
key,
|
||||||
counter,
|
counter,
|
||||||
|
@ -218,10 +210,10 @@ impl Platform {
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
// Safe because detect() checked for platform support.
|
// Safe because detect() checked for platform support.
|
||||||
#[cfg(feature = "c_avx512")]
|
#[cfg(feature = "c")]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
Platform::AVX512 => unsafe {
|
Platform::AVX512 => unsafe {
|
||||||
c_avx512::hash_many(
|
crate::avx512::hash_many(
|
||||||
inputs,
|
inputs,
|
||||||
key,
|
key,
|
||||||
counter,
|
counter,
|
||||||
|
@ -235,7 +227,7 @@ impl Platform {
|
||||||
// Assumed to be safe if the "c_neon" feature is on.
|
// Assumed to be safe if the "c_neon" feature is on.
|
||||||
#[cfg(feature = "c_neon")]
|
#[cfg(feature = "c_neon")]
|
||||||
Platform::NEON => unsafe {
|
Platform::NEON => unsafe {
|
||||||
c_neon::hash_many(
|
crate::neon::hash_many(
|
||||||
inputs,
|
inputs,
|
||||||
key,
|
key,
|
||||||
counter,
|
counter,
|
||||||
|
@ -248,11 +240,52 @@ impl Platform {
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Explicit platform constructors, for benchmarks.
|
||||||
|
|
||||||
|
pub fn portable() -> Self {
|
||||||
|
Self::Portable
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
pub fn sse41() -> Option<Self> {
|
||||||
|
if sse41_detected() {
|
||||||
|
Some(Self::SSE41)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
pub fn avx2() -> Option<Self> {
|
||||||
|
if avx2_detected() {
|
||||||
|
Some(Self::AVX2)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "c")]
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
pub fn avx512() -> Option<Self> {
|
||||||
|
if avx512_detected() {
|
||||||
|
Some(Self::AVX512)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(feature = "c_neon")]
|
||||||
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
|
pub fn neon() -> Option<Self> {
|
||||||
|
// Assumed to be safe if the "c_neon" feature is on.
|
||||||
|
Some(Self::NEON)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Note that AVX-512 is divided into multiple featuresets, and we use two of
|
// Note that AVX-512 is divided into multiple featuresets, and we use two of
|
||||||
// them, F and VL.
|
// them, F and VL.
|
||||||
#[cfg(feature = "c_avx512")]
|
#[cfg(feature = "c")]
|
||||||
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn avx512_detected() -> bool {
|
pub fn avx512_detected() -> bool {
|
||||||
|
|
|
@ -3,10 +3,16 @@ name = "test_vectors"
|
||||||
version = "0.0.0"
|
version = "0.0.0"
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
default = []
|
||||||
|
c = ["blake3/c"]
|
||||||
|
c_prefer_intrinsics = ["blake3/c_prefer_intrinsics"]
|
||||||
|
c_neon = ["blake3/c_neon"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
# If you ever change these path dependencies, you'll probably need to update
|
# If you ever change these path dependencies, you'll probably need to update
|
||||||
# cross_test.sh, or CI will break. I'm sorry >.<
|
# cross_test.sh, or CI will break. I'm sorry >.<
|
||||||
blake3 = { path = "../", features=["c_avx512"] }
|
blake3 = { path = "../" }
|
||||||
hex = "0.4.0"
|
hex = "0.4.0"
|
||||||
reference_impl = { path = "../reference_impl" }
|
reference_impl = { path = "../reference_impl" }
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
|
|
|
@ -19,7 +19,7 @@ mv blake3/test_vectors .
|
||||||
mv blake3/reference_impl test_vectors
|
mv blake3/reference_impl test_vectors
|
||||||
mv blake3 test_vectors
|
mv blake3 test_vectors
|
||||||
cd test_vectors
|
cd test_vectors
|
||||||
sed -i 's|blake3 = { path = "../", features=\["c_avx512"\] }|blake3 = { path = "./blake3" }|' Cargo.toml
|
sed -i 's|blake3 = { path = "../" }|blake3 = { path = "./blake3" }|' Cargo.toml
|
||||||
sed -i 's|reference_impl = { path = "../reference_impl" }|reference_impl = { path = "reference_impl" }|' Cargo.toml
|
sed -i 's|reference_impl = { path = "../reference_impl" }|reference_impl = { path = "reference_impl" }|' Cargo.toml
|
||||||
|
|
||||||
cross test "$@"
|
cross test "$@"
|
||||||
|
|
Loading…
Reference in New Issue