1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-05-04 19:16:27 +02:00

add blake3_c_rust_bindings for testing and benchmarking

This commit is contained in:
Jack O'Connor 2020-01-13 18:11:07 -05:00
parent 33a9bee51f
commit 84c26670bf
8 changed files with 1192 additions and 15 deletions

View File

@ -40,6 +40,10 @@ jobs:
- name: test b3sum --no-default-features
run: cargo test --no-default-features
working-directory: ./b3sum
# Test C code.
- name: cargo test C bindings
run: cargo test
working-directory: ./c/blake3_c_rust_bindings
cross_tests:
name: cross ${{ matrix.arch }}
@ -65,7 +69,7 @@ jobs:
# Currently only on x86.
c_tests:
name: C tests
name: C Makefile tests
runs-on: ubuntu-latest
steps:

View File

@ -1,5 +1,6 @@
#pragma once
#include <stddef.h>
#include <stdint.h>
#define BLAKE3_KEY_LEN 32

View File

@ -0,0 +1,21 @@
[package]
name = "blake3_c_rust_bindings"
version = "0.0.0"
description = "TESTING ONLY Rust bindings for the BLAKE3 C implementation"
edition = "2018"
[features]
# Activate NEON bindings. We don't currently do any CPU feature detection for
# this. If this Cargo feature is on, the NEON gets used.
neon = []
[dev-dependencies]
arrayref = "0.3.5"
arrayvec = { version = "0.5.1", default-features = false, features = ["array-sizes-33-128"] }
page_size = "0.4.1"
rand = "0.7.2"
rand_chacha = "0.2.1"
reference_impl = { path = "../../reference_impl" }
[build-dependencies]
cc = "1.0.48"

View File

@ -0,0 +1,334 @@
#![feature(test)]
extern crate test;
use arrayref::array_ref;
use arrayvec::ArrayVec;
use rand::prelude::*;
use test::Bencher;
const KIB: usize = 1024;
const MAX_SIMD_DEGREE: usize = 16;
const BLOCK_LEN: usize = 64;
const CHUNK_LEN: usize = 1024;
const OUT_LEN: usize = 32;
// This struct randomizes two things:
// 1. The actual bytes of input.
// 2. The page offset the input starts at.
pub struct RandomInput {
buf: Vec<u8>,
len: usize,
offsets: Vec<usize>,
offset_index: usize,
}
impl RandomInput {
pub fn new(b: &mut Bencher, len: usize) -> Self {
b.bytes += len as u64;
let page_size: usize = page_size::get();
let mut buf = vec![0u8; len + page_size];
let mut rng = rand::thread_rng();
rng.fill_bytes(&mut buf);
let mut offsets: Vec<usize> = (0..page_size).collect();
offsets.shuffle(&mut rng);
Self {
buf,
len,
offsets,
offset_index: 0,
}
}
pub fn get(&mut self) -> &[u8] {
let offset = self.offsets[self.offset_index];
self.offset_index += 1;
if self.offset_index >= self.offsets.len() {
self.offset_index = 0;
}
&self.buf[offset..][..self.len]
}
}
type CompressInPlaceFn =
unsafe extern "C" fn(cv: *mut u32, block: *const u8, block_len: u8, counter: u64, flags: u8);
fn bench_single_compression_fn(b: &mut Bencher, f: CompressInPlaceFn) {
let mut state = [1u32; 8];
let mut r = RandomInput::new(b, 64);
let input = array_ref!(r.get(), 0, 64);
b.iter(|| unsafe { f(state.as_mut_ptr(), input.as_ptr(), 64, 0, 0) });
}
#[bench]
fn bench_single_compression_portable(b: &mut Bencher) {
bench_single_compression_fn(
b,
blake3_c_rust_bindings::ffi::blake3_compress_in_place_portable,
);
}
#[bench]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn bench_single_compression_sse41(b: &mut Bencher) {
if !blake3_c_rust_bindings::sse41_detected() {
return;
}
bench_single_compression_fn(
b,
blake3_c_rust_bindings::ffi::x86::blake3_compress_in_place_sse41,
);
}
#[bench]
fn bench_single_compression_avx512(b: &mut Bencher) {
if !blake3_c_rust_bindings::avx512_detected() {
return;
}
bench_single_compression_fn(
b,
blake3_c_rust_bindings::ffi::x86::blake3_compress_in_place_avx512,
);
}
type HashManyFn = unsafe extern "C" fn(
inputs: *const *const u8,
num_inputs: usize,
blocks: usize,
key: *const u32,
counter: u64,
increment_counter: bool,
flags: u8,
flags_start: u8,
flags_end: u8,
out: *mut u8,
);
fn bench_many_chunks_fn(b: &mut Bencher, f: HashManyFn, degree: usize) {
let mut inputs = Vec::new();
for _ in 0..degree {
inputs.push(RandomInput::new(b, CHUNK_LEN));
}
b.iter(|| {
let input_arrays: ArrayVec<[&[u8; CHUNK_LEN]; MAX_SIMD_DEGREE]> = inputs
.iter_mut()
.take(degree)
.map(|i| array_ref!(i.get(), 0, CHUNK_LEN))
.collect();
let mut out = [0; MAX_SIMD_DEGREE * OUT_LEN];
unsafe {
f(
input_arrays.as_ptr() as _,
input_arrays.len(),
CHUNK_LEN / BLOCK_LEN,
[0u32; 8].as_ptr(),
0,
true,
0,
0,
0,
out.as_mut_ptr(),
)
}
});
}
#[bench]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn bench_many_chunks_sse41(b: &mut Bencher) {
if !blake3_c_rust_bindings::sse41_detected() {
return;
}
bench_many_chunks_fn(
b,
blake3_c_rust_bindings::ffi::x86::blake3_hash_many_sse41,
4,
);
}
#[bench]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn bench_many_chunks_avx2(b: &mut Bencher) {
if !blake3_c_rust_bindings::avx2_detected() {
return;
}
bench_many_chunks_fn(
b,
blake3_c_rust_bindings::ffi::x86::blake3_hash_many_avx2,
8,
);
}
#[bench]
fn bench_many_chunks_avx512(b: &mut Bencher) {
if !blake3_c_rust_bindings::avx512_detected() {
return;
}
bench_many_chunks_fn(
b,
blake3_c_rust_bindings::ffi::x86::blake3_hash_many_avx512,
16,
);
}
#[bench]
#[cfg(feature = "neon")]
fn bench_many_chunks_neon(b: &mut Bencher) {
// When "neon" is on, NEON support is assumed.
bench_many_chunks_fn(
b,
blake3_c_rust_bindings::ffi::neon::blake3_hash_many_neon,
4,
);
}
// TODO: When we get const generics we can unify this with the chunks code.
fn bench_many_parents_fn(b: &mut Bencher, f: HashManyFn, degree: usize) {
let mut inputs = Vec::new();
for _ in 0..degree {
inputs.push(RandomInput::new(b, BLOCK_LEN));
}
b.iter(|| {
let input_arrays: ArrayVec<[&[u8; BLOCK_LEN]; MAX_SIMD_DEGREE]> = inputs
.iter_mut()
.take(degree)
.map(|i| array_ref!(i.get(), 0, BLOCK_LEN))
.collect();
let mut out = [0; MAX_SIMD_DEGREE * OUT_LEN];
unsafe {
f(
input_arrays.as_ptr() as _,
input_arrays.len(),
1,
[0u32; 8].as_ptr(),
0,
false,
0,
0,
0,
out.as_mut_ptr(),
)
}
});
}
#[bench]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn bench_many_parents_sse41(b: &mut Bencher) {
if !blake3_c_rust_bindings::sse41_detected() {
return;
}
bench_many_parents_fn(
b,
blake3_c_rust_bindings::ffi::x86::blake3_hash_many_sse41,
4,
);
}
#[bench]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn bench_many_parents_avx2(b: &mut Bencher) {
if !blake3_c_rust_bindings::avx2_detected() {
return;
}
bench_many_parents_fn(
b,
blake3_c_rust_bindings::ffi::x86::blake3_hash_many_avx2,
8,
);
}
#[bench]
fn bench_many_parents_avx512(b: &mut Bencher) {
if !blake3_c_rust_bindings::avx512_detected() {
return;
}
bench_many_parents_fn(
b,
blake3_c_rust_bindings::ffi::x86::blake3_hash_many_avx512,
16,
);
}
#[bench]
#[cfg(feature = "neon")]
fn bench_many_parents_neon(b: &mut Bencher) {
// When "neon" is on, NEON support is assumed.
bench_many_parents_fn(
b,
blake3_c_rust_bindings::ffi::neon::blake3_hash_many_neon,
4,
);
}
fn bench_incremental(b: &mut Bencher, len: usize) {
let mut input = RandomInput::new(b, len);
b.iter(|| {
let mut hasher = blake3_c_rust_bindings::Hasher::new();
hasher.update(input.get());
let mut out = [0; 32];
hasher.finalize(&mut out);
out
});
}
#[bench]
fn bench_incremental_0001_block(b: &mut Bencher) {
bench_incremental(b, BLOCK_LEN);
}
#[bench]
fn bench_incremental_0001_kib(b: &mut Bencher) {
bench_incremental(b, 1 * KIB);
}
#[bench]
fn bench_incremental_0002_kib(b: &mut Bencher) {
bench_incremental(b, 2 * KIB);
}
#[bench]
fn bench_incremental_0004_kib(b: &mut Bencher) {
bench_incremental(b, 4 * KIB);
}
#[bench]
fn bench_incremental_0008_kib(b: &mut Bencher) {
bench_incremental(b, 8 * KIB);
}
#[bench]
fn bench_incremental_0016_kib(b: &mut Bencher) {
bench_incremental(b, 16 * KIB);
}
#[bench]
fn bench_incremental_0032_kib(b: &mut Bencher) {
bench_incremental(b, 32 * KIB);
}
#[bench]
fn bench_incremental_0064_kib(b: &mut Bencher) {
bench_incremental(b, 64 * KIB);
}
#[bench]
fn bench_incremental_0128_kib(b: &mut Bencher) {
bench_incremental(b, 128 * KIB);
}
#[bench]
fn bench_incremental_0256_kib(b: &mut Bencher) {
bench_incremental(b, 256 * KIB);
}
#[bench]
fn bench_incremental_0512_kib(b: &mut Bencher) {
bench_incremental(b, 512 * KIB);
}
#[bench]
fn bench_incremental_1024_kib(b: &mut Bencher) {
bench_incremental(b, 1024 * KIB);
}

View File

@ -0,0 +1,119 @@
use std::env;
fn defined(var: &str) -> bool {
env::var_os(var).is_some()
}
fn target_components() -> Vec<String> {
let target = env::var("TARGET").unwrap();
target.split("-").map(|s| s.to_string()).collect()
}
// This is the full current list of x86 targets supported by Rustc. The C
// dispatch code uses
// #if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
// so this needs to be somewhat broad to match. These bindings are mainly for
// testing, so it's not the end of the world if this misses some obscure *86
// platform somehow.
fn is_x86() -> bool {
target_components()[0] == "x86_64"
|| target_components()[0] == "i386"
|| target_components()[0] == "i586"
|| target_components()[0] == "i686"
}
fn is_armv7() -> bool {
target_components()[0] == "armv7"
}
// Windows targets may be using the MSVC toolchain or the GNU toolchain. The
// right compiler flags to use depend on the toolchain. (And we don't want to
// use flag_if_supported, because we don't want features to be silently
// disabled by old compilers.)
fn is_windows_msvc() -> bool {
// Some targets are only two components long, so check in steps.
target_components()[1] == "pc"
&& target_components()[2] == "windows"
&& target_components()[3] == "msvc"
}
fn new_build() -> cc::Build {
let mut build = cc::Build::new();
if !is_windows_msvc() {
build.flag("-std=c11");
}
build
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut base_build = new_build();
base_build.file("../blake3.c");
base_build.file("../blake3_dispatch.c");
base_build.file("../blake3_portable.c");
base_build.compile("blake3_c_base");
if is_x86() {
let mut sse41_build = new_build();
sse41_build.file("../blake3_sse41.c");
if is_windows_msvc() {
// /arch:SSE2 is the default on x86 and undefined on x86_64:
// https://docs.microsoft.com/en-us/cpp/build/reference/arch-x86
// It also includes SSE4.1 intrisincs:
// https://stackoverflow.com/a/32183222/823869
} else {
sse41_build.flag("-msse4.1");
}
sse41_build.compile("blake3_c_sse41");
let mut avx2_build = new_build();
avx2_build.file("../blake3_avx2.c");
if is_windows_msvc() {
avx2_build.flag("/arch:AVX2");
} else {
avx2_build.flag("-mavx2");
}
avx2_build.compile("blake3_c_avx2");
let mut avx512_build = new_build();
avx512_build.file("../blake3_avx512.c");
if is_windows_msvc() {
avx512_build.flag("/arch:AVX512");
} else {
avx512_build.flag("-mavx512f");
avx512_build.flag("-mavx512vl");
}
avx512_build.compile("blake3_c_avx512");
}
// We only build NEON code here if 1) it's requested and 2) the root crate
// is not already building it. The only time this will really happen is if
// you build this crate by hand with the "neon" feature for some reason.
if defined("CARGO_FEATURE_NEON") {
let mut neon_build = new_build();
neon_build.file("../blake3_neon.c");
// ARMv7 platforms that support NEON generally need the following
// flags. AArch64 supports NEON by default and does not support -mpfu.
if is_armv7() {
neon_build.flag("-mfpu=neon-vfpv4");
neon_build.flag("-mfloat-abi=hard");
}
neon_build.compile("blake3_c_neon");
}
// The `cc` crate does not automatically emit rerun-if directives for the
// environment variables it supports, in particular for $CC. We expect to
// do a lot of benchmarking across different compilers, so we explicitly
// add the variables that we're likely to need.
println!("cargo:rerun-if-env-changed=CC");
println!("cargo:rerun-if-env-changed=CFLAGS");
// Ditto for source files, though these shouldn't change as often.
for file in std::fs::read_dir("..")? {
println!(
"cargo:rerun-if-changed={}",
file?.path().to_str().expect("utf-8")
);
}
Ok(())
}

View File

@ -0,0 +1,236 @@
//! These are Rust bindings for the C implementation of BLAKE3. As there is a
//! native Rust implementation of BLAKE3 provided in this same repo, these
//! bindings are not expected to be used in production. They're intended for
//! testing and benchmarking.
use std::ffi::{c_void, CString};
use std::mem::MaybeUninit;
#[cfg(test)]
mod test;
pub const BLOCK_LEN: usize = 64;
pub const CHUNK_LEN: usize = 1024;
pub const OUT_LEN: usize = 32;
// Feature detection functions for tests and benchmarks. Note that the C code
// does its own feature detection in blake3_dispatch.c.
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub fn sse41_detected() -> bool {
is_x86_feature_detected!("sse4.1")
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub fn avx2_detected() -> bool {
is_x86_feature_detected!("avx2")
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub fn avx512_detected() -> bool {
is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512vl")
}
#[derive(Clone)]
pub struct Hasher(ffi::blake3_hasher);
impl Hasher {
pub fn new() -> Self {
let mut c_state = MaybeUninit::uninit();
unsafe {
ffi::blake3_hasher_init(c_state.as_mut_ptr());
Self(c_state.assume_init())
}
}
pub fn new_keyed(key: &[u8; 32]) -> Self {
let mut c_state = MaybeUninit::uninit();
unsafe {
ffi::blake3_hasher_init_keyed(c_state.as_mut_ptr(), key.as_ptr());
Self(c_state.assume_init())
}
}
pub fn new_derive_key(context: &str) -> Self {
let mut c_state = MaybeUninit::uninit();
let context_c_string = CString::new(context).expect("valid C string, no null bytes");
unsafe {
ffi::blake3_hasher_init_derive_key(c_state.as_mut_ptr(), context_c_string.as_ptr());
Self(c_state.assume_init())
}
}
pub fn update(&mut self, input: &[u8]) {
unsafe {
ffi::blake3_hasher_update(&mut self.0, input.as_ptr() as *const c_void, input.len());
}
}
pub fn finalize(&self, output: &mut [u8]) {
unsafe {
ffi::blake3_hasher_finalize(&self.0, output.as_mut_ptr(), output.len());
}
}
}
pub mod ffi {
#[repr(C)]
#[derive(Copy, Clone)]
pub struct blake3_chunk_state {
pub cv: [u32; 8usize],
pub chunk_counter: u64,
pub buf: [u8; 64usize],
pub buf_len: u8,
pub blocks_compressed: u8,
pub flags: u8,
}
#[repr(C)]
#[derive(Copy, Clone)]
pub struct blake3_hasher {
pub key: [u32; 8usize],
pub chunk: blake3_chunk_state,
pub cv_stack_len: u8,
pub cv_stack: [u8; 1728usize],
}
extern "C" {
// public interface
pub fn blake3_hasher_init(self_: *mut blake3_hasher);
pub fn blake3_hasher_init_keyed(self_: *mut blake3_hasher, key: *const u8);
pub fn blake3_hasher_init_derive_key(
self_: *mut blake3_hasher,
context: *const ::std::os::raw::c_char,
);
pub fn blake3_hasher_update(
self_: *mut blake3_hasher,
input: *const ::std::os::raw::c_void,
input_len: usize,
);
pub fn blake3_hasher_finalize(self_: *const blake3_hasher, out: *mut u8, out_len: usize);
// portable low-level functions
pub fn blake3_compress_in_place_portable(
cv: *mut u32,
block: *const u8,
block_len: u8,
counter: u64,
flags: u8,
);
pub fn blake3_compress_xof_portable(
cv: *const u32,
block: *const u8,
block_len: u8,
counter: u64,
flags: u8,
out: *mut u8,
);
pub fn blake3_hash_many_portable(
inputs: *const *const u8,
num_inputs: usize,
blocks: usize,
key: *const u32,
counter: u64,
increment_counter: bool,
flags: u8,
flags_start: u8,
flags_end: u8,
out: *mut u8,
);
}
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
pub mod x86 {
extern "C" {
// SSE4.1 low level functions
pub fn blake3_compress_in_place_sse41(
cv: *mut u32,
block: *const u8,
block_len: u8,
counter: u64,
flags: u8,
);
pub fn blake3_compress_xof_sse41(
cv: *const u32,
block: *const u8,
block_len: u8,
counter: u64,
flags: u8,
out: *mut u8,
);
pub fn blake3_hash_many_sse41(
inputs: *const *const u8,
num_inputs: usize,
blocks: usize,
key: *const u32,
counter: u64,
increment_counter: bool,
flags: u8,
flags_start: u8,
flags_end: u8,
out: *mut u8,
);
// AVX2 low level functions
pub fn blake3_hash_many_avx2(
inputs: *const *const u8,
num_inputs: usize,
blocks: usize,
key: *const u32,
counter: u64,
increment_counter: bool,
flags: u8,
flags_start: u8,
flags_end: u8,
out: *mut u8,
);
// AVX-512 low level functions
pub fn blake3_compress_xof_avx512(
cv: *const u32,
block: *const u8,
block_len: u8,
counter: u64,
flags: u8,
out: *mut u8,
);
pub fn blake3_compress_in_place_avx512(
cv: *mut u32,
block: *const u8,
block_len: u8,
counter: u64,
flags: u8,
);
pub fn blake3_hash_many_avx512(
inputs: *const *const u8,
num_inputs: usize,
blocks: usize,
key: *const u32,
counter: u64,
increment_counter: bool,
flags: u8,
flags_start: u8,
flags_end: u8,
out: *mut u8,
);
}
}
#[cfg(feature = "neon")]
pub mod neon {
extern "C" {
// NEON low level functions
pub fn blake3_hash_many_neon(
inputs: *const *const u8,
num_inputs: usize,
blocks: usize,
key: *const u32,
counter: u64,
increment_counter: bool,
flags: u8,
flags_start: u8,
flags_end: u8,
out: *mut u8,
);
}
}
}

View File

@ -0,0 +1,452 @@
// Most of this code is duplicated from the root `blake3` crate. Perhaps we
// could share more of it in the future.
use crate::{BLOCK_LEN, CHUNK_LEN, OUT_LEN};
use arrayref::{array_mut_ref, array_ref};
use arrayvec::ArrayVec;
use core::usize;
use rand::prelude::*;
const CHUNK_START: u8 = 1 << 0;
const CHUNK_END: u8 = 1 << 1;
const PARENT: u8 = 1 << 2;
const ROOT: u8 = 1 << 3;
const KEYED_HASH: u8 = 1 << 4;
// const DERIVE_KEY_CONTEXT: u8 = 1 << 5;
// const DERIVE_KEY_MATERIAL: u8 = 1 << 6;
// Interesting input lengths to run tests on.
pub const TEST_CASES: &[usize] = &[
0,
1,
CHUNK_LEN - 1,
CHUNK_LEN,
CHUNK_LEN + 1,
2 * CHUNK_LEN,
2 * CHUNK_LEN + 1,
3 * CHUNK_LEN,
3 * CHUNK_LEN + 1,
4 * CHUNK_LEN,
4 * CHUNK_LEN + 1,
5 * CHUNK_LEN,
5 * CHUNK_LEN + 1,
6 * CHUNK_LEN,
6 * CHUNK_LEN + 1,
7 * CHUNK_LEN,
7 * CHUNK_LEN + 1,
8 * CHUNK_LEN,
8 * CHUNK_LEN + 1,
16 * CHUNK_LEN, // AVX512's bandwidth
31 * CHUNK_LEN, // 16 + 8 + 4 + 2 + 1
];
pub const TEST_CASES_MAX: usize = 31 * CHUNK_LEN;
// There's a test to make sure these two are equal below.
pub const TEST_KEY: [u8; 32] = *b"whats the Elvish word for friend";
pub const TEST_KEY_WORDS: [u32; 8] = [
1952540791, 1752440947, 1816469605, 1752394102, 1919907616, 1868963940, 1919295602, 1684956521,
];
// Paint the input with a repeating byte pattern. We use a cycle length of 251,
// because that's the largets prime number less than 256. This makes it
// unlikely to swapping any two adjacent input blocks or chunks will give the
// same answer.
fn paint_test_input(buf: &mut [u8]) {
for (i, b) in buf.iter_mut().enumerate() {
*b = (i % 251) as u8;
}
}
#[inline(always)]
fn le_bytes_from_words_32(words: &[u32; 8]) -> [u8; 32] {
let mut out = [0; 32];
*array_mut_ref!(out, 0 * 4, 4) = words[0].to_le_bytes();
*array_mut_ref!(out, 1 * 4, 4) = words[1].to_le_bytes();
*array_mut_ref!(out, 2 * 4, 4) = words[2].to_le_bytes();
*array_mut_ref!(out, 3 * 4, 4) = words[3].to_le_bytes();
*array_mut_ref!(out, 4 * 4, 4) = words[4].to_le_bytes();
*array_mut_ref!(out, 5 * 4, 4) = words[5].to_le_bytes();
*array_mut_ref!(out, 6 * 4, 4) = words[6].to_le_bytes();
*array_mut_ref!(out, 7 * 4, 4) = words[7].to_le_bytes();
out
}
type CompressInPlaceFn =
unsafe extern "C" fn(cv: *mut u32, block: *const u8, block_len: u8, counter: u64, flags: u8);
type CompressXofFn = unsafe extern "C" fn(
cv: *const u32,
block: *const u8,
block_len: u8,
counter: u64,
flags: u8,
out: *mut u8,
);
// A shared helper function for platform-specific tests.
pub fn test_compress_fn(compress_in_place_fn: CompressInPlaceFn, compress_xof_fn: CompressXofFn) {
let initial_state = TEST_KEY_WORDS;
let block_len: u8 = 61;
let mut block = [0; BLOCK_LEN];
paint_test_input(&mut block[..block_len as usize]);
// Use a counter with set bits in both 32-bit words.
let counter = (5u64 << 32) + 6;
let flags = CHUNK_END | ROOT | KEYED_HASH;
let mut portable_out = [0; 64];
unsafe {
crate::ffi::blake3_compress_xof_portable(
initial_state.as_ptr(),
block.as_ptr(),
block_len,
counter,
flags,
portable_out.as_mut_ptr(),
);
}
let mut test_state = initial_state;
unsafe {
compress_in_place_fn(
test_state.as_mut_ptr(),
block.as_ptr(),
block_len,
counter,
flags,
)
};
let test_state_bytes = le_bytes_from_words_32(&test_state);
let mut test_xof = [0; 64];
unsafe {
compress_xof_fn(
initial_state.as_ptr(),
block.as_ptr(),
block_len,
counter,
flags,
test_xof.as_mut_ptr(),
)
};
assert_eq!(&portable_out[..32], &test_state_bytes[..]);
assert_eq!(&portable_out[..], &test_xof[..]);
}
// Testing the portable implementation against itself is circular, but why not.
#[test]
fn test_compress_portable() {
test_compress_fn(
crate::ffi::blake3_compress_in_place_portable,
crate::ffi::blake3_compress_xof_portable,
);
}
#[test]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn test_compress_sse41() {
if !crate::sse41_detected() {
return;
}
test_compress_fn(
crate::ffi::x86::blake3_compress_in_place_sse41,
crate::ffi::x86::blake3_compress_xof_sse41,
);
}
#[test]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn test_compress_avx512() {
if !crate::avx512_detected() {
return;
}
test_compress_fn(
crate::ffi::x86::blake3_compress_in_place_avx512,
crate::ffi::x86::blake3_compress_xof_avx512,
);
}
type HashManyFn = unsafe extern "C" fn(
inputs: *const *const u8,
num_inputs: usize,
blocks: usize,
key: *const u32,
counter: u64,
increment_counter: bool,
flags: u8,
flags_start: u8,
flags_end: u8,
out: *mut u8,
);
// A shared helper function for platform-specific tests.
pub fn test_hash_many_fn(hash_many_fn: HashManyFn) {
// 31 (16 + 8 + 4 + 2 + 1) inputs
const NUM_INPUTS: usize = 31;
let mut input_buf = [0; CHUNK_LEN * NUM_INPUTS];
crate::test::paint_test_input(&mut input_buf);
// A counter just prior to u32::MAX.
let counter = (1u64 << 32) - 1;
// First hash chunks.
let mut chunks = ArrayVec::<[&[u8; CHUNK_LEN]; NUM_INPUTS]>::new();
for i in 0..NUM_INPUTS {
chunks.push(array_ref!(input_buf, i * CHUNK_LEN, CHUNK_LEN));
}
let mut portable_chunks_out = [0; NUM_INPUTS * OUT_LEN];
unsafe {
crate::ffi::blake3_hash_many_portable(
chunks.as_ptr() as _,
chunks.len(),
CHUNK_LEN / BLOCK_LEN,
TEST_KEY_WORDS.as_ptr(),
counter,
true,
KEYED_HASH,
CHUNK_START,
CHUNK_END,
portable_chunks_out.as_mut_ptr(),
);
}
let mut test_chunks_out = [0; NUM_INPUTS * OUT_LEN];
unsafe {
hash_many_fn(
chunks.as_ptr() as _,
chunks.len(),
CHUNK_LEN / BLOCK_LEN,
TEST_KEY_WORDS.as_ptr(),
counter,
true,
KEYED_HASH,
CHUNK_START,
CHUNK_END,
test_chunks_out.as_mut_ptr(),
);
}
for n in 0..NUM_INPUTS {
#[cfg(feature = "std")]
dbg!(n);
assert_eq!(
&portable_chunks_out[n * OUT_LEN..][..OUT_LEN],
&test_chunks_out[n * OUT_LEN..][..OUT_LEN]
);
}
// Then hash parents.
let mut parents = ArrayVec::<[&[u8; 2 * OUT_LEN]; NUM_INPUTS]>::new();
for i in 0..NUM_INPUTS {
parents.push(array_ref!(input_buf, i * 2 * OUT_LEN, 2 * OUT_LEN));
}
let mut portable_parents_out = [0; NUM_INPUTS * OUT_LEN];
unsafe {
crate::ffi::blake3_hash_many_portable(
parents.as_ptr() as _,
parents.len(),
1,
TEST_KEY_WORDS.as_ptr(),
counter,
false,
KEYED_HASH | PARENT,
0,
0,
portable_parents_out.as_mut_ptr(),
);
}
let mut test_parents_out = [0; NUM_INPUTS * OUT_LEN];
unsafe {
hash_many_fn(
parents.as_ptr() as _,
parents.len(),
1,
TEST_KEY_WORDS.as_ptr(),
counter,
false,
KEYED_HASH | PARENT,
0,
0,
test_parents_out.as_mut_ptr(),
);
}
for n in 0..NUM_INPUTS {
#[cfg(feature = "std")]
dbg!(n);
assert_eq!(
&portable_parents_out[n * OUT_LEN..][..OUT_LEN],
&test_parents_out[n * OUT_LEN..][..OUT_LEN]
);
}
}
// Testing the portable implementation against itself is circular, but why not.
#[test]
fn test_hash_many_portable() {
test_hash_many_fn(crate::ffi::blake3_hash_many_portable);
}
#[test]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn test_hash_many_sse41() {
if !crate::sse41_detected() {
return;
}
test_hash_many_fn(crate::ffi::x86::blake3_hash_many_sse41);
}
#[test]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn test_hash_many_avx2() {
if !crate::avx2_detected() {
return;
}
test_hash_many_fn(crate::ffi::x86::blake3_hash_many_avx2);
}
#[test]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
fn test_hash_many_avx512() {
if !crate::avx512_detected() {
return;
}
test_hash_many_fn(crate::ffi::x86::blake3_hash_many_avx512);
}
#[test]
#[cfg(feature = "neon")]
fn test_hash_many_neon() {
test_hash_many_fn(crate::ffi::neon::blake3_hash_many_neon);
}
#[test]
fn test_compare_reference_impl() {
const OUT: usize = 303; // more than 64, not a multiple of 4
let mut input_buf = [0; TEST_CASES_MAX];
paint_test_input(&mut input_buf);
for &case in TEST_CASES {
let input = &input_buf[..case];
#[cfg(feature = "std")]
dbg!(case);
// regular
{
let mut reference_hasher = reference_impl::Hasher::new();
reference_hasher.update(input);
let mut expected_out = [0; OUT];
reference_hasher.finalize(&mut expected_out);
let mut test_hasher = crate::Hasher::new();
test_hasher.update(input);
let mut test_out = [0; OUT];
test_hasher.finalize(&mut test_out);
assert_eq!(test_out[..], expected_out[..]);
}
// keyed
{
let mut reference_hasher = reference_impl::Hasher::new_keyed(&TEST_KEY);
reference_hasher.update(input);
let mut expected_out = [0; OUT];
reference_hasher.finalize(&mut expected_out);
let mut test_hasher = crate::Hasher::new_keyed(&TEST_KEY);
test_hasher.update(input);
let mut test_out = [0; OUT];
test_hasher.finalize(&mut test_out);
assert_eq!(test_out[..], expected_out[..]);
}
// derive_key
{
let context = "BLAKE3 2019-12-27 16:13:59 example context (not the test vector one)";
let mut reference_hasher = reference_impl::Hasher::new_derive_key(context);
reference_hasher.update(input);
let mut expected_out = [0; OUT];
reference_hasher.finalize(&mut expected_out);
let mut test_hasher = crate::Hasher::new_derive_key(context);
test_hasher.update(input);
let mut test_out = [0; OUT];
test_hasher.finalize(&mut test_out);
assert_eq!(test_out[..], expected_out[..]);
}
}
}
fn reference_hash(input: &[u8]) -> [u8; OUT_LEN] {
let mut hasher = reference_impl::Hasher::new();
hasher.update(input);
let mut bytes = [0; OUT_LEN];
hasher.finalize(&mut bytes);
bytes.into()
}
#[test]
fn test_compare_update_multiple() {
// Don't use all the long test cases here, since that's unnecessarily slow
// in debug mode.
let short_test_cases = &TEST_CASES[..10];
assert_eq!(*short_test_cases.last().unwrap(), 4 * CHUNK_LEN);
let mut input_buf = [0; 2 * TEST_CASES_MAX];
paint_test_input(&mut input_buf);
for &first_update in short_test_cases {
#[cfg(feature = "std")]
dbg!(first_update);
let first_input = &input_buf[..first_update];
let mut test_hasher = crate::Hasher::new();
test_hasher.update(first_input);
for &second_update in short_test_cases {
#[cfg(feature = "std")]
dbg!(second_update);
let second_input = &input_buf[first_update..][..second_update];
let total_input = &input_buf[..first_update + second_update];
// Clone the hasher with first_update bytes already written, so
// that the next iteration can reuse it.
let mut test_hasher = test_hasher.clone();
test_hasher.update(second_input);
let mut test_out = [0; OUT_LEN];
test_hasher.finalize(&mut test_out);
let expected = reference_hash(total_input);
assert_eq!(expected, test_out);
}
}
}
#[test]
fn test_fuzz_hasher() {
const INPUT_MAX: usize = 4 * CHUNK_LEN;
let mut input_buf = [0; 3 * INPUT_MAX];
paint_test_input(&mut input_buf);
// Don't do too many iterations in debug mode, to keep the tests under a
// second or so. CI should run tests in release mode also. Provide an
// environment variable for specifying a larger number of fuzz iterations.
let num_tests = if cfg!(debug_assertions) { 100 } else { 10_000 };
// Use a fixed RNG seed for reproducibility.
let mut rng = rand_chacha::ChaCha8Rng::from_seed([1; 32]);
for _num_test in 0..num_tests {
#[cfg(feature = "std")]
dbg!(_num_test);
let mut hasher = crate::Hasher::new();
let mut total_input = 0;
// For each test, write 3 inputs of random length.
for _ in 0..3 {
let input_len = rng.gen_range(0, INPUT_MAX + 1);
#[cfg(feature = "std")]
dbg!(input_len);
let input = &input_buf[total_input..][..input_len];
hasher.update(input);
total_input += input_len;
}
let expected = reference_hash(&input_buf[..total_input]);
let mut test_out = [0; 32];
hasher.finalize(&mut test_out);
assert_eq!(expected, test_out);
}
}

View File

@ -267,15 +267,18 @@ fn test_compare_reference_impl() {
let mut expected_out = [0; OUT];
reference_hasher.finalize(&mut expected_out);
// all at once
let test_out = crate::hash(input);
assert_eq!(&test_out, array_ref!(expected_out, 0, 32));
assert_eq!(test_out, *array_ref!(expected_out, 0, 32));
// incremental
let mut hasher = crate::Hasher::new();
hasher.update(input);
assert_eq!(&hasher.finalize(), array_ref!(expected_out, 0, 32));
assert_eq!(&hasher.finalize(), &test_out);
assert_eq!(hasher.finalize(), *array_ref!(expected_out, 0, 32));
assert_eq!(hasher.finalize(), test_out);
// xof
let mut extended = [0; OUT];
hasher.finalize_xof().fill(&mut extended);
assert_eq!(&extended[..], &expected_out[..]);
assert_eq!(extended[..], expected_out[..]);
}
// keyed
@ -285,15 +288,18 @@ fn test_compare_reference_impl() {
let mut expected_out = [0; OUT];
reference_hasher.finalize(&mut expected_out);
// all at once
let test_out = crate::keyed_hash(&TEST_KEY, input);
assert_eq!(&test_out, array_ref!(expected_out, 0, 32));
assert_eq!(test_out, *array_ref!(expected_out, 0, 32));
// incremental
let mut hasher = crate::Hasher::new_keyed(&TEST_KEY);
hasher.update(input);
assert_eq!(&hasher.finalize(), array_ref!(expected_out, 0, 32));
assert_eq!(&hasher.finalize(), &test_out);
assert_eq!(hasher.finalize(), *array_ref!(expected_out, 0, 32));
assert_eq!(hasher.finalize(), test_out);
// xof
let mut extended = [0; OUT];
hasher.finalize_xof().fill(&mut extended);
assert_eq!(&extended[..], &expected_out[..]);
assert_eq!(extended[..], expected_out[..]);
}
// derive_key
@ -304,16 +310,19 @@ fn test_compare_reference_impl() {
let mut expected_out = [0; OUT];
reference_hasher.finalize(&mut expected_out);
// all at once
let mut test_out = [0; OUT];
crate::derive_key(context, input, &mut test_out);
assert_eq!(&test_out[..], &expected_out[..]);
assert_eq!(test_out[..], expected_out[..]);
// incremental
let mut hasher = crate::Hasher::new_derive_key(context);
hasher.update(input);
assert_eq!(&hasher.finalize(), array_ref!(expected_out, 0, 32));
assert_eq!(&hasher.finalize(), array_ref!(test_out, 0, 32));
assert_eq!(hasher.finalize(), *array_ref!(expected_out, 0, 32));
assert_eq!(hasher.finalize(), *array_ref!(test_out, 0, 32));
// xof
let mut extended = [0; OUT];
hasher.finalize_xof().fill(&mut extended);
assert_eq!(&extended[..], &expected_out[..]);
assert_eq!(extended[..], expected_out[..]);
}
}
}
@ -348,12 +357,13 @@ fn test_compare_update_multiple() {
dbg!(second_update);
let second_input = &input_buf[first_update..][..second_update];
let total_input = &input_buf[..first_update + second_update];
// Clone the hasher with first_update bytes already written, so
// that the next iteration can reuse it.
let mut test_hasher = test_hasher.clone();
test_hasher.update(second_input);
assert_eq!(reference_hash(total_input), test_hasher.finalize());
let expected = reference_hash(total_input);
assert_eq!(expected, test_hasher.finalize());
}
}
}