mirror of
https://github.com/BLAKE3-team/BLAKE3
synced 2024-05-28 13:46:02 +02:00
test_chunks_and_parents_vs_reference
This commit is contained in:
parent
7b02be6a10
commit
097225a43c
|
@ -4,7 +4,7 @@ use core::mem;
|
|||
use core::ptr;
|
||||
use core::sync::atomic::{AtomicPtr, Ordering::Relaxed};
|
||||
|
||||
mod portable;
|
||||
pub mod portable;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
@ -149,7 +149,7 @@ static DETECTED_IMPL: Implementation = Implementation::new(
|
|||
);
|
||||
|
||||
fn init_detected_impl() {
|
||||
let detected = Implementation::portable();
|
||||
let detected = portable::implementation();
|
||||
|
||||
DETECTED_IMPL
|
||||
.degree_ptr
|
||||
|
@ -205,18 +205,6 @@ impl Implementation {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn portable() -> Self {
|
||||
Self::new(
|
||||
|| portable::DEGREE,
|
||||
portable::compress,
|
||||
portable::hash_chunks,
|
||||
portable::hash_parents,
|
||||
portable::xof,
|
||||
portable::xof_xor,
|
||||
portable::universal_hash,
|
||||
)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn degree_fn(&self) -> DegreeFn {
|
||||
unsafe { mem::transmute(self.degree_ptr.load(Relaxed)) }
|
||||
|
@ -234,7 +222,7 @@ impl Implementation {
|
|||
&self,
|
||||
vectors: &'v mut TransposedVectors,
|
||||
) -> (TransposedSplit<'v>, TransposedSplit<'v>) {
|
||||
vectors.split(self.degree())
|
||||
unsafe { vectors.split(self.degree()) }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -758,6 +746,10 @@ const TRANSPOSED_STRIDE: usize = 2 * MAX_SIMD_DEGREE;
|
|||
pub struct TransposedVectors([[u32; 2 * MAX_SIMD_DEGREE]; 8]);
|
||||
|
||||
impl TransposedVectors {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn extract_cv(&self, cv_index: usize) -> CVBytes {
|
||||
let mut words = [0u32; 8];
|
||||
for word_index in 0..8 {
|
||||
|
@ -781,7 +773,10 @@ impl TransposedVectors {
|
|||
self.0[0].as_mut_ptr()
|
||||
}
|
||||
|
||||
fn split(&mut self, degree: usize) -> (TransposedSplit, TransposedSplit) {
|
||||
// SAFETY: This function is just pointer arithmetic, but callers assume that it's safe (not
|
||||
// necessarily correct) to write up to `degree` words to either side of the split, possibly
|
||||
// from different threads.
|
||||
unsafe fn split(&mut self, degree: usize) -> (TransposedSplit, TransposedSplit) {
|
||||
debug_assert!(degree > 0);
|
||||
debug_assert!(degree <= MAX_SIMD_DEGREE);
|
||||
debug_assert_eq!(degree.count_ones(), 1, "power of 2");
|
||||
|
@ -894,3 +889,35 @@ fn test_byte_word_round_trips() {
|
|||
le_bytes_from_words_64(&words_from_le_bytes_64(&block)),
|
||||
);
|
||||
}
|
||||
|
||||
// The largest power of two less than or equal to `n`, used for left_len()
|
||||
// immediately below, and also directly in Hasher::update().
|
||||
pub fn largest_power_of_two_leq(n: usize) -> usize {
|
||||
((n / 2) + 1).next_power_of_two()
|
||||
}
|
||||
|
||||
// Given some input larger than one chunk, return the number of bytes that
|
||||
// should go in the left subtree. This is the largest power-of-2 number of
|
||||
// chunks that leaves at least 1 byte for the right subtree.
|
||||
pub fn left_len(content_len: usize) -> usize {
|
||||
debug_assert!(content_len > CHUNK_LEN);
|
||||
// Subtract 1 to reserve at least one byte for the right side.
|
||||
let full_chunks = (content_len - 1) / CHUNK_LEN;
|
||||
largest_power_of_two_leq(full_chunks) * CHUNK_LEN
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_left_len() {
|
||||
let input_output = &[
|
||||
(CHUNK_LEN + 1, CHUNK_LEN),
|
||||
(2 * CHUNK_LEN - 1, CHUNK_LEN),
|
||||
(2 * CHUNK_LEN, CHUNK_LEN),
|
||||
(2 * CHUNK_LEN + 1, 2 * CHUNK_LEN),
|
||||
(4 * CHUNK_LEN - 1, 2 * CHUNK_LEN),
|
||||
(4 * CHUNK_LEN, 2 * CHUNK_LEN),
|
||||
(4 * CHUNK_LEN + 1, 4 * CHUNK_LEN),
|
||||
];
|
||||
for &(input, output) in input_output {
|
||||
assert_eq!(left_len(input), output);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
use crate::{
|
||||
le_bytes_from_words_32, le_bytes_from_words_64, words_from_le_bytes_32, words_from_le_bytes_64,
|
||||
BlockBytes, BlockWords, CVBytes, CVWords, IV, MAX_SIMD_DEGREE, MSG_SCHEDULE,
|
||||
BlockBytes, BlockWords, CVBytes, CVWords, Implementation, IV, MAX_SIMD_DEGREE, MSG_SCHEDULE,
|
||||
};
|
||||
|
||||
pub const DEGREE: usize = MAX_SIMD_DEGREE;
|
||||
const DEGREE: usize = MAX_SIMD_DEGREE;
|
||||
|
||||
#[inline(always)]
|
||||
fn g(state: &mut BlockWords, a: usize, b: usize, c: usize, d: usize, x: u32, y: u32) {
|
||||
|
@ -67,7 +67,7 @@ fn compress_inner(
|
|||
state
|
||||
}
|
||||
|
||||
pub unsafe extern "C" fn compress(
|
||||
unsafe extern "C" fn compress(
|
||||
block: *const BlockBytes,
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
|
@ -84,7 +84,7 @@ pub unsafe extern "C" fn compress(
|
|||
*out = le_bytes_from_words_32(state[..8].try_into().unwrap());
|
||||
}
|
||||
|
||||
pub unsafe extern "C" fn compress_xof(
|
||||
unsafe extern "C" fn compress_xof(
|
||||
block: *const BlockBytes,
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
|
@ -102,7 +102,7 @@ pub unsafe extern "C" fn compress_xof(
|
|||
*out = le_bytes_from_words_64(&state);
|
||||
}
|
||||
|
||||
pub unsafe extern "C" fn hash_chunks(
|
||||
unsafe extern "C" fn hash_chunks(
|
||||
input: *const u8,
|
||||
input_len: usize,
|
||||
key: *const CVBytes,
|
||||
|
@ -121,7 +121,7 @@ pub unsafe extern "C" fn hash_chunks(
|
|||
)
|
||||
}
|
||||
|
||||
pub unsafe extern "C" fn hash_parents(
|
||||
unsafe extern "C" fn hash_parents(
|
||||
transposed_input: *const u32,
|
||||
num_parents: usize,
|
||||
key: *const CVBytes,
|
||||
|
@ -138,7 +138,7 @@ pub unsafe extern "C" fn hash_parents(
|
|||
)
|
||||
}
|
||||
|
||||
pub unsafe extern "C" fn xof(
|
||||
unsafe extern "C" fn xof(
|
||||
block: *const BlockBytes,
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
|
@ -159,7 +159,7 @@ pub unsafe extern "C" fn xof(
|
|||
)
|
||||
}
|
||||
|
||||
pub unsafe extern "C" fn xof_xor(
|
||||
unsafe extern "C" fn xof_xor(
|
||||
block: *const BlockBytes,
|
||||
block_len: u32,
|
||||
cv: *const CVBytes,
|
||||
|
@ -180,7 +180,7 @@ pub unsafe extern "C" fn xof_xor(
|
|||
)
|
||||
}
|
||||
|
||||
pub unsafe extern "C" fn universal_hash(
|
||||
unsafe extern "C" fn universal_hash(
|
||||
input: *const u8,
|
||||
input_len: usize,
|
||||
key: *const CVBytes,
|
||||
|
@ -190,6 +190,18 @@ pub unsafe extern "C" fn universal_hash(
|
|||
crate::universal_hash_using_compress(compress, input, input_len, key, counter, out)
|
||||
}
|
||||
|
||||
pub fn implementation() -> Implementation {
|
||||
Implementation::new(
|
||||
|| DEGREE,
|
||||
compress,
|
||||
hash_chunks,
|
||||
hash_parents,
|
||||
xof,
|
||||
xof_xor,
|
||||
universal_hash,
|
||||
)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
@ -197,23 +209,28 @@ mod test {
|
|||
// This is circular but do it anyway.
|
||||
#[test]
|
||||
fn test_compress_vs_portable() {
|
||||
crate::test::test_compress_vs_portable(compress);
|
||||
crate::test::test_compress_vs_portable(&implementation());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compress_vs_reference() {
|
||||
crate::test::test_compress_vs_reference(compress);
|
||||
crate::test::test_compress_vs_reference(&implementation());
|
||||
}
|
||||
|
||||
// This is circular but do it anyway.
|
||||
#[test]
|
||||
fn test_hash_chunks_vs_portable() {
|
||||
crate::test::test_hash_chunks_vs_portable(hash_chunks, DEGREE);
|
||||
crate::test::test_hash_chunks_vs_portable(&implementation());
|
||||
}
|
||||
|
||||
// This is circular but do it anyway.
|
||||
#[test]
|
||||
fn test_hash_parents_vs_portable() {
|
||||
crate::test::test_hash_parents_vs_portable(hash_parents, DEGREE);
|
||||
crate::test::test_hash_parents_vs_portable(&implementation());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_chunks_and_parents_vs_reference() {
|
||||
crate::test::test_chunks_and_parents_vs_reference(&implementation());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,14 +23,14 @@ pub fn paint_test_input(buf: &mut [u8]) {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn test_compress_vs_portable(compress_fn: CompressFn) {
|
||||
pub fn test_compress_vs_portable(test_impl: &Implementation) {
|
||||
for block_len in BLOCK_LENGTHS {
|
||||
dbg!(block_len);
|
||||
let mut block = [0; BLOCK_LEN];
|
||||
paint_test_input(&mut block[..block_len]);
|
||||
for counter in INITIAL_COUNTERS {
|
||||
dbg!(counter);
|
||||
let portable_cv = Implementation::portable().compress(
|
||||
let portable_cv = portable::implementation().compress(
|
||||
&block,
|
||||
block_len as u32,
|
||||
&TEST_KEY,
|
||||
|
@ -38,25 +38,15 @@ pub fn test_compress_vs_portable(compress_fn: CompressFn) {
|
|||
KEYED_HASH,
|
||||
);
|
||||
|
||||
let mut test_cv = TEST_KEY;
|
||||
unsafe {
|
||||
let test_cv_ptr: *mut CVBytes = &mut test_cv;
|
||||
compress_fn(
|
||||
&block,
|
||||
block_len as u32,
|
||||
test_cv_ptr,
|
||||
counter,
|
||||
KEYED_HASH,
|
||||
test_cv_ptr,
|
||||
);
|
||||
}
|
||||
let test_cv =
|
||||
test_impl.compress(&block, block_len as u32, &TEST_KEY, counter, KEYED_HASH);
|
||||
|
||||
assert_eq!(portable_cv, test_cv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn test_compress_vs_reference(compress_fn: CompressFn) {
|
||||
pub fn test_compress_vs_reference(test_impl: &Implementation) {
|
||||
for block_len in BLOCK_LENGTHS {
|
||||
dbg!(block_len);
|
||||
let mut block = [0; BLOCK_LEN];
|
||||
|
@ -67,18 +57,13 @@ pub fn test_compress_vs_reference(compress_fn: CompressFn) {
|
|||
let mut ref_hash = [0u8; 32];
|
||||
ref_hasher.finalize(&mut ref_hash);
|
||||
|
||||
let mut test_cv = TEST_KEY;
|
||||
unsafe {
|
||||
let test_cv_ptr: *mut CVBytes = &mut test_cv;
|
||||
compress_fn(
|
||||
&block,
|
||||
block_len as u32,
|
||||
test_cv_ptr,
|
||||
0,
|
||||
CHUNK_START | CHUNK_END | ROOT | KEYED_HASH,
|
||||
test_cv_ptr,
|
||||
);
|
||||
}
|
||||
let test_cv = test_impl.compress(
|
||||
&block,
|
||||
block_len as u32,
|
||||
&TEST_KEY,
|
||||
0,
|
||||
CHUNK_START | CHUNK_END | ROOT | KEYED_HASH,
|
||||
);
|
||||
|
||||
assert_eq!(ref_hash, test_cv);
|
||||
}
|
||||
|
@ -104,14 +89,14 @@ fn check_transposed_eq(output_a: &TransposedVectors, output_b: &TransposedVector
|
|||
panic!("transposed outputs are not equal");
|
||||
}
|
||||
|
||||
pub fn test_hash_chunks_vs_portable(hash_chunks_fn: HashChunksFn, degree: usize) {
|
||||
assert!(degree <= MAX_SIMD_DEGREE);
|
||||
pub fn test_hash_chunks_vs_portable(test_impl: &Implementation) {
|
||||
assert!(test_impl.degree() <= MAX_SIMD_DEGREE);
|
||||
let mut input = [0u8; 2 * MAX_SIMD_DEGREE * CHUNK_LEN];
|
||||
paint_test_input(&mut input);
|
||||
dbg!(degree * CHUNK_LEN);
|
||||
dbg!(test_impl.degree() * CHUNK_LEN);
|
||||
// Try just below, equal to, and just above every whole number of chunks.
|
||||
let mut input_2_lengths = vec![1];
|
||||
let mut next_len = CHUNK_LEN;
|
||||
// Try just below, equal to, and just above every power-of-2 number of chunks.
|
||||
loop {
|
||||
input_2_lengths.push(next_len - 1);
|
||||
input_2_lengths.push(next_len);
|
||||
|
@ -119,24 +104,25 @@ pub fn test_hash_chunks_vs_portable(hash_chunks_fn: HashChunksFn, degree: usize)
|
|||
break;
|
||||
}
|
||||
input_2_lengths.push(next_len + 1);
|
||||
next_len *= 2;
|
||||
next_len += CHUNK_LEN;
|
||||
}
|
||||
for input_2_len in input_2_lengths {
|
||||
dbg!(input_2_len);
|
||||
let input1 = &input[..degree * CHUNK_LEN];
|
||||
let input2 = &input[degree * CHUNK_LEN..][..input_2_len];
|
||||
let input1 = &input[..test_impl.degree() * CHUNK_LEN];
|
||||
let input2 = &input[test_impl.degree() * CHUNK_LEN..][..input_2_len];
|
||||
for initial_counter in INITIAL_COUNTERS {
|
||||
// Make two calls, to test the output_column parameter.
|
||||
let mut portable_output = TransposedVectors::default();
|
||||
let (portable_left, portable_right) = portable_output.split(degree);
|
||||
Implementation::portable().hash_chunks(
|
||||
let mut portable_output = TransposedVectors::new();
|
||||
let (portable_left, portable_right) =
|
||||
test_impl.split_transposed_vectors(&mut portable_output);
|
||||
portable::implementation().hash_chunks(
|
||||
input1,
|
||||
&IV_BYTES,
|
||||
initial_counter,
|
||||
0,
|
||||
portable_left,
|
||||
);
|
||||
Implementation::portable().hash_chunks(
|
||||
portable::implementation().hash_chunks(
|
||||
input2,
|
||||
&TEST_KEY,
|
||||
initial_counter + degree as u64,
|
||||
|
@ -144,26 +130,16 @@ pub fn test_hash_chunks_vs_portable(hash_chunks_fn: HashChunksFn, degree: usize)
|
|||
portable_right,
|
||||
);
|
||||
|
||||
let mut test_output = TransposedVectors::default();
|
||||
let (test_left, test_right) = test_output.split(degree);
|
||||
unsafe {
|
||||
hash_chunks_fn(
|
||||
input1.as_ptr(),
|
||||
input1.len(),
|
||||
&IV_BYTES,
|
||||
initial_counter,
|
||||
0,
|
||||
test_left.ptr,
|
||||
);
|
||||
hash_chunks_fn(
|
||||
input2.as_ptr(),
|
||||
input2.len(),
|
||||
&TEST_KEY,
|
||||
initial_counter + degree as u64,
|
||||
KEYED_HASH,
|
||||
test_right.ptr,
|
||||
);
|
||||
}
|
||||
let mut test_output = TransposedVectors::new();
|
||||
let (test_left, test_right) = test_impl.split_transposed_vectors(&mut test_output);
|
||||
test_impl.hash_chunks(input1, &IV_BYTES, initial_counter, 0, test_left);
|
||||
test_impl.hash_chunks(
|
||||
input2,
|
||||
&TEST_KEY,
|
||||
initial_counter + degree as u64,
|
||||
KEYED_HASH,
|
||||
test_right,
|
||||
);
|
||||
|
||||
check_transposed_eq(&portable_output, &test_output);
|
||||
}
|
||||
|
@ -171,7 +147,7 @@ pub fn test_hash_chunks_vs_portable(hash_chunks_fn: HashChunksFn, degree: usize)
|
|||
}
|
||||
|
||||
fn painted_transposed_input() -> TransposedVectors {
|
||||
let mut vectors = TransposedVectors::default();
|
||||
let mut vectors = TransposedVectors::new();
|
||||
let mut val = 0;
|
||||
for col in 0..2 * MAX_SIMD_DEGREE {
|
||||
for row in 0..8 {
|
||||
|
@ -182,21 +158,22 @@ fn painted_transposed_input() -> TransposedVectors {
|
|||
vectors
|
||||
}
|
||||
|
||||
pub fn test_hash_parents_vs_portable(hash_parents_fn: HashParentsFn, degree: usize) {
|
||||
assert!(degree <= MAX_SIMD_DEGREE);
|
||||
pub fn test_hash_parents_vs_portable(test_impl: &Implementation) {
|
||||
assert!(test_impl.degree() <= MAX_SIMD_DEGREE);
|
||||
let input = painted_transposed_input();
|
||||
for num_parents in 2..=(degree / 2) {
|
||||
for num_parents in 2..=(test_impl.degree() / 2) {
|
||||
dbg!(num_parents);
|
||||
let mut portable_output = TransposedVectors(input.0);
|
||||
let (portable_left, portable_right) = portable_output.split(degree);
|
||||
Implementation::portable().hash_parents(
|
||||
let (portable_left, portable_right) =
|
||||
test_impl.split_transposed_vectors(&mut portable_output);
|
||||
portable::implementation().hash_parents(
|
||||
&input,
|
||||
2 * num_parents, // num_cvs
|
||||
&IV_BYTES,
|
||||
0,
|
||||
portable_left,
|
||||
);
|
||||
Implementation::portable().hash_parents(
|
||||
portable::implementation().hash_parents(
|
||||
&input,
|
||||
2 * num_parents, // num_cvs
|
||||
&TEST_KEY,
|
||||
|
@ -204,25 +181,101 @@ pub fn test_hash_parents_vs_portable(hash_parents_fn: HashParentsFn, degree: usi
|
|||
portable_right,
|
||||
);
|
||||
|
||||
let mut test_output = input.clone();
|
||||
let (test_left, test_right) = test_output.split(degree);
|
||||
unsafe {
|
||||
hash_parents_fn(
|
||||
input.as_ptr(),
|
||||
num_parents,
|
||||
&IV_BYTES,
|
||||
PARENT,
|
||||
test_left.ptr,
|
||||
);
|
||||
hash_parents_fn(
|
||||
input.as_ptr(),
|
||||
num_parents,
|
||||
&TEST_KEY,
|
||||
PARENT | KEYED_HASH,
|
||||
test_right.ptr,
|
||||
);
|
||||
}
|
||||
let mut test_output = TransposedVectors(input.0);
|
||||
let (test_left, test_right) = test_impl.split_transposed_vectors(&mut test_output);
|
||||
test_impl.hash_parents(
|
||||
&input,
|
||||
2 * num_parents, // num_cvs
|
||||
&IV_BYTES,
|
||||
0,
|
||||
test_left,
|
||||
);
|
||||
test_impl.hash_parents(
|
||||
&input,
|
||||
2 * num_parents, // num_cvs
|
||||
&TEST_KEY,
|
||||
KEYED_HASH,
|
||||
test_right,
|
||||
);
|
||||
|
||||
check_transposed_eq(&portable_output, &test_output);
|
||||
}
|
||||
}
|
||||
|
||||
fn hash_with_chunks_and_parents_recurse(
|
||||
test_impl: &Implementation,
|
||||
input: &[u8],
|
||||
counter: u64,
|
||||
output: TransposedSplit,
|
||||
) -> usize {
|
||||
assert!(input.len() > 0);
|
||||
if input.len() <= test_impl.degree() * CHUNK_LEN {
|
||||
return test_impl.hash_chunks(input, &IV_BYTES, counter, 0, output);
|
||||
}
|
||||
let (left_input, right_input) = input.split_at(left_len(input.len()));
|
||||
let mut child_output = TransposedVectors::new();
|
||||
let (left_output, right_output) = test_impl.split_transposed_vectors(&mut child_output);
|
||||
let mut children =
|
||||
hash_with_chunks_and_parents_recurse(test_impl, left_input, counter, left_output);
|
||||
assert_eq!(children, test_impl.degree());
|
||||
children += hash_with_chunks_and_parents_recurse(
|
||||
test_impl,
|
||||
right_input,
|
||||
counter + (left_input.len() / CHUNK_LEN) as u64,
|
||||
right_output,
|
||||
);
|
||||
test_impl.hash_parents(&child_output, children, &IV_BYTES, PARENT, output)
|
||||
}
|
||||
|
||||
// Note: This test implementation doesn't support the 1-chunk-or-less case.
|
||||
fn root_hash_with_chunks_and_parents(test_impl: &Implementation, input: &[u8]) -> CVBytes {
|
||||
// TODO: handle the 1-chunk case?
|
||||
assert!(input.len() > CHUNK_LEN);
|
||||
let mut cvs = TransposedVectors::new();
|
||||
// The right half of these vectors are never used.
|
||||
let (cvs_left, _) = test_impl.split_transposed_vectors(&mut cvs);
|
||||
let mut num_cvs = hash_with_chunks_and_parents_recurse(test_impl, input, 0, cvs_left);
|
||||
while num_cvs > 2 {
|
||||
num_cvs = test_impl.reduce_parents(&mut cvs, num_cvs, &IV_BYTES, 0);
|
||||
}
|
||||
test_impl.compress(
|
||||
&cvs.extract_parent_node(0),
|
||||
BLOCK_LEN as u32,
|
||||
&IV_BYTES,
|
||||
0,
|
||||
PARENT | ROOT,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn test_chunks_and_parents_vs_reference(test_impl: &Implementation) {
|
||||
assert_eq!(test_impl.degree().count_ones(), 1, "power of 2");
|
||||
const MAX_INPUT_LEN: usize = 2 * MAX_SIMD_DEGREE * CHUNK_LEN;
|
||||
let mut input_buf = [0u8; MAX_INPUT_LEN];
|
||||
paint_test_input(&mut input_buf);
|
||||
// Try just below, equal to, and just above every whole number of chunks, except that
|
||||
// root_hash_with_chunks_and_parents doesn't support the 1-chunk-or-less case.
|
||||
let mut test_lengths = vec![CHUNK_LEN + 1];
|
||||
let mut next_len = 2 * CHUNK_LEN;
|
||||
loop {
|
||||
test_lengths.push(next_len - 1);
|
||||
test_lengths.push(next_len);
|
||||
if next_len == MAX_INPUT_LEN {
|
||||
break;
|
||||
}
|
||||
test_lengths.push(next_len + 1);
|
||||
next_len += CHUNK_LEN;
|
||||
}
|
||||
for test_len in test_lengths {
|
||||
dbg!(test_len);
|
||||
let input = &input_buf[..test_len];
|
||||
|
||||
let mut ref_hasher = reference_impl::Hasher::new();
|
||||
ref_hasher.update(&input);
|
||||
let mut ref_hash = [0u8; 32];
|
||||
ref_hasher.finalize(&mut ref_hash);
|
||||
|
||||
let test_hash = root_hash_with_chunks_and_parents(test_impl, input);
|
||||
|
||||
assert_eq!(ref_hash, test_hash);
|
||||
}
|
||||
}
|
||||
|
|
26
src/lib.rs
26
src/lib.rs
|
@ -447,22 +447,6 @@ impl fmt::Debug for ChunkState {
|
|||
// use full-width SIMD vectors for parent hashing. Without parallel parent
|
||||
// hashing, we lose about 10% of overall throughput on AVX2 and AVX-512.
|
||||
|
||||
// The largest power of two less than or equal to `n`, used for left_len()
|
||||
// immediately below, and also directly in Hasher::update().
|
||||
fn largest_power_of_two_leq(n: usize) -> usize {
|
||||
((n / 2) + 1).next_power_of_two()
|
||||
}
|
||||
|
||||
// Given some input larger than one chunk, return the number of bytes that
|
||||
// should go in the left subtree. This is the largest power-of-2 number of
|
||||
// chunks that leaves at least 1 byte for the right subtree.
|
||||
fn left_len(content_len: usize) -> usize {
|
||||
debug_assert!(content_len > CHUNK_LEN);
|
||||
// Subtract 1 to reserve at least one byte for the right side.
|
||||
let full_chunks = (content_len - 1) / CHUNK_LEN;
|
||||
largest_power_of_two_leq(full_chunks) * CHUNK_LEN
|
||||
}
|
||||
|
||||
// The wide helper function returns (writes out) an array of chaining values
|
||||
// and returns the length of that array. The number of chaining values returned
|
||||
// is the dynamically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer,
|
||||
|
@ -499,10 +483,10 @@ fn compress_subtree_wide<J: join::Join>(
|
|||
// as long as the SIMD degree is a power of 2. If we ever get a SIMD degree
|
||||
// of 3 or something, we'll need a more complicated strategy.)
|
||||
debug_assert_eq!(guts::degree().count_ones(), 1, "power of 2");
|
||||
let (left, right) = input.split_at(left_len(input.len()));
|
||||
let (left, right) = input.split_at(guts::left_len(input.len()));
|
||||
let right_chunk_counter = chunk_counter + (left.len() / CHUNK_LEN) as u64;
|
||||
|
||||
let mut transposed_cvs = guts::TransposedVectors::default();
|
||||
let mut transposed_cvs = guts::TransposedVectors::new();
|
||||
let (left_cvs, right_cvs) = guts::split_transposed_vectors(&mut transposed_cvs);
|
||||
|
||||
// Recurse! For update_rayon(), this is where we take advantage of RayonJoin and use multiple
|
||||
|
@ -535,7 +519,7 @@ fn compress_subtree_to_parent_node<J: join::Join>(
|
|||
flags: u32,
|
||||
) -> BlockBytes {
|
||||
debug_assert!(input.len() > CHUNK_LEN);
|
||||
let mut transposed_cvs = guts::TransposedVectors::default();
|
||||
let mut transposed_cvs = guts::TransposedVectors::new();
|
||||
let (left_cvs, _) = guts::split_transposed_vectors(&mut transposed_cvs);
|
||||
let mut num_cvs = compress_subtree_wide::<J>(input, &key, chunk_counter, flags, left_cvs);
|
||||
debug_assert!(num_cvs >= 2);
|
||||
|
@ -546,7 +530,7 @@ fn compress_subtree_to_parent_node<J: join::Join>(
|
|||
while num_cvs > 2 {
|
||||
num_cvs = guts::reduce_parents(&mut transposed_cvs, num_cvs, key, flags);
|
||||
}
|
||||
transposed_cvs.parent_node(0)
|
||||
transposed_cvs.extract_parent_node(0)
|
||||
}
|
||||
|
||||
// Hash a complete input all at once. Unlike compress_subtree_wide() and
|
||||
|
@ -906,7 +890,7 @@ impl Hasher {
|
|||
while input.len() > CHUNK_LEN {
|
||||
debug_assert_eq!(self.chunk_state.len(), 0, "no partial chunk data");
|
||||
debug_assert_eq!(CHUNK_LEN.count_ones(), 1, "power of 2 chunk len");
|
||||
let mut subtree_len = largest_power_of_two_leq(input.len());
|
||||
let mut subtree_len = guts::largest_power_of_two_leq(input.len());
|
||||
let count_so_far = self.chunk_state.chunk_counter * CHUNK_LEN as u64;
|
||||
// Shrink the subtree_len until it evenly divides the count so far.
|
||||
// We know that subtree_len itself is a power of 2, so we can use a
|
||||
|
|
243
src/test.rs
243
src/test.rs
|
@ -51,233 +51,6 @@ pub const TEST_CASES_MAX: usize = 100 * CHUNK_LEN;
|
|||
pub const TEST_KEY: &CVBytes = b"whats the Elvish word for friend";
|
||||
pub const TEST_KEY_WORDS: &CVWords = &guts::words_from_le_bytes_32(TEST_KEY);
|
||||
|
||||
fn paint_transposed_input(input: &mut TransposedVectors) {
|
||||
let mut val = 0;
|
||||
for row in 0..8 {
|
||||
for col in 0..2 * MAX_SIMD_DEGREE {
|
||||
input[row][col] = val;
|
||||
val += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Both xof() and xof_xof() have this signature.
|
||||
type HashParentsFn = unsafe fn(
|
||||
transposed_input: *const u32,
|
||||
num_parents: usize,
|
||||
key: *const u32,
|
||||
flags: u32,
|
||||
transposed_output: *mut u32, // may overlap the input
|
||||
);
|
||||
|
||||
pub fn test_hash_parents_fn(target_fn: HashParentsFn, degree: usize) {
|
||||
assert!(degree <= MAX_SIMD_DEGREE);
|
||||
for test_degree in 1..=degree {
|
||||
// separate
|
||||
{
|
||||
let mut input = TransposedVectors::default();
|
||||
paint_transposed_input(&mut input);
|
||||
let mut test_output = input.clone();
|
||||
unsafe {
|
||||
target_fn(
|
||||
ParentInOut::Separate {
|
||||
input: &input,
|
||||
num_parents: test_degree,
|
||||
output: &mut test_output,
|
||||
output_column: 0,
|
||||
},
|
||||
TEST_KEY_WORDS,
|
||||
crate::KEYED_HASH | crate::PARENT,
|
||||
);
|
||||
}
|
||||
|
||||
let mut portable_output = TransposedVectors(input.0);
|
||||
crate::portable::hash_parents(
|
||||
ParentInOut::Separate {
|
||||
input: &input,
|
||||
num_parents: test_degree,
|
||||
output: &mut portable_output,
|
||||
output_column: 0,
|
||||
},
|
||||
TEST_KEY_WORDS,
|
||||
crate::KEYED_HASH | crate::PARENT,
|
||||
);
|
||||
|
||||
assert_eq!(portable_output.0, test_output.0);
|
||||
}
|
||||
|
||||
// in-place
|
||||
{
|
||||
let mut test_io = TransposedVectors::default();
|
||||
paint_transposed_input(&mut test_io);
|
||||
unsafe {
|
||||
target_fn(
|
||||
ParentInOut::InPlace {
|
||||
in_out: &mut test_io,
|
||||
num_parents: test_degree,
|
||||
},
|
||||
TEST_KEY_WORDS,
|
||||
crate::KEYED_HASH | crate::PARENT,
|
||||
);
|
||||
}
|
||||
|
||||
let mut portable_io = TransposedVectors::default();
|
||||
paint_transposed_input(&mut portable_io);
|
||||
crate::portable::hash_parents(
|
||||
ParentInOut::InPlace {
|
||||
in_out: &mut portable_io,
|
||||
num_parents: test_degree,
|
||||
},
|
||||
TEST_KEY_WORDS,
|
||||
crate::KEYED_HASH | crate::PARENT,
|
||||
);
|
||||
|
||||
assert_eq!(portable_io.0, test_io.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn hash_with_chunks_and_parents_recurse(
|
||||
chunks_fn: HashChunksFn,
|
||||
parents_fn: HashParentsFn,
|
||||
degree: usize,
|
||||
input: &[u8],
|
||||
counter: u64,
|
||||
output: &mut TransposedVectors,
|
||||
output_column: usize,
|
||||
) -> usize {
|
||||
// TODO: hash partial chunks?
|
||||
assert_eq!(input.len() % CHUNK_LEN, 0);
|
||||
assert_eq!(degree.count_ones(), 1, "power of 2");
|
||||
if input.len() <= degree * CHUNK_LEN {
|
||||
unsafe {
|
||||
chunks_fn(input, crate::IV, counter, 0, output, output_column);
|
||||
}
|
||||
input.len() / CHUNK_LEN
|
||||
} else {
|
||||
let mut child_output = TransposedVectors::default();
|
||||
let (left_input, right_input) = input.split_at(crate::left_len(input.len()));
|
||||
let mut children = hash_with_chunks_and_parents_recurse(
|
||||
chunks_fn,
|
||||
parents_fn,
|
||||
degree,
|
||||
left_input,
|
||||
counter,
|
||||
&mut child_output,
|
||||
0,
|
||||
);
|
||||
assert_eq!(children, degree);
|
||||
children += hash_with_chunks_and_parents_recurse(
|
||||
chunks_fn,
|
||||
parents_fn,
|
||||
degree,
|
||||
right_input,
|
||||
counter + (left_input.len() / CHUNK_LEN) as u64,
|
||||
&mut child_output,
|
||||
children,
|
||||
);
|
||||
unsafe {
|
||||
parents_fn(
|
||||
ParentInOut::Separate {
|
||||
input: &child_output,
|
||||
num_parents: children / 2,
|
||||
output,
|
||||
output_column,
|
||||
},
|
||||
crate::IV,
|
||||
crate::PARENT,
|
||||
);
|
||||
}
|
||||
// If there's an odd child left over, copy it to the output.
|
||||
if children % 2 == 0 {
|
||||
children / 2
|
||||
} else {
|
||||
for i in 0..8 {
|
||||
output[i][output_column + (children / 2)] = child_output[i][children - 1];
|
||||
}
|
||||
(children / 2) + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn root_hash_with_chunks_and_parents(
|
||||
chunks_fn: HashChunksFn,
|
||||
parents_fn: HashParentsFn,
|
||||
degree: usize,
|
||||
input: &[u8],
|
||||
) -> [u8; 32] {
|
||||
assert_eq!(degree.count_ones(), 1, "power of 2");
|
||||
// TODO: handle the 1-chunk case?
|
||||
assert!(input.len() >= 2 * CHUNK_LEN);
|
||||
// TODO: hash partial chunks?
|
||||
assert_eq!(input.len() % CHUNK_LEN, 0);
|
||||
let mut cvs = TransposedVectors::default();
|
||||
let mut num_cvs =
|
||||
hash_with_chunks_and_parents_recurse(chunks_fn, parents_fn, degree, input, 0, &mut cvs, 0);
|
||||
while num_cvs > 2 {
|
||||
unsafe {
|
||||
parents_fn(
|
||||
ParentInOut::InPlace {
|
||||
in_out: &mut cvs,
|
||||
num_parents: num_cvs / 2,
|
||||
},
|
||||
crate::IV,
|
||||
crate::PARENT,
|
||||
);
|
||||
}
|
||||
if num_cvs % 2 == 0 {
|
||||
num_cvs = num_cvs / 2;
|
||||
} else {
|
||||
for i in 0..8 {
|
||||
cvs[i][num_cvs / 2] = cvs[i][num_cvs - 1];
|
||||
}
|
||||
num_cvs = (num_cvs / 2) + 1;
|
||||
}
|
||||
}
|
||||
unsafe {
|
||||
parents_fn(
|
||||
ParentInOut::InPlace {
|
||||
in_out: &mut cvs,
|
||||
num_parents: 1,
|
||||
},
|
||||
crate::IV,
|
||||
crate::PARENT | crate::ROOT,
|
||||
);
|
||||
}
|
||||
let mut ret = [0u8; 32];
|
||||
for i in 0..8 {
|
||||
ret[4 * i..][..4].copy_from_slice(&cvs[i][0].to_le_bytes());
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_compare_reference_impl_chunks_and_hashes() {
|
||||
// 31 (16 + 8 + 4 + 2 + 1) chunks
|
||||
const MAX_CHUNKS: usize = 31;
|
||||
let mut input = [0u8; MAX_CHUNKS * CHUNK_LEN];
|
||||
paint_test_input(&mut input);
|
||||
for num_chunks in 2..=MAX_CHUNKS {
|
||||
#[cfg(feature = "std")]
|
||||
dbg!(num_chunks);
|
||||
|
||||
let mut reference_output = [0u8; 32];
|
||||
let mut reference_hasher = reference_impl::Hasher::new();
|
||||
reference_hasher.update(&input[..num_chunks * CHUNK_LEN]);
|
||||
reference_hasher.finalize(&mut reference_output);
|
||||
|
||||
for test_degree in [2, 4, 8, 16] {
|
||||
let test_output = root_hash_with_chunks_and_parents(
|
||||
crate::portable::hash_chunks,
|
||||
crate::portable::hash_parents,
|
||||
test_degree,
|
||||
&input[..num_chunks * CHUNK_LEN],
|
||||
);
|
||||
assert_eq!(reference_output, test_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Both xof() and xof_xof() have this signature.
|
||||
type XofFn = unsafe fn(
|
||||
block: &[u8; BLOCK_LEN],
|
||||
|
@ -527,22 +300,6 @@ fn test_largest_power_of_two_leq() {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_left_len() {
|
||||
let input_output = &[
|
||||
(CHUNK_LEN + 1, CHUNK_LEN),
|
||||
(2 * CHUNK_LEN - 1, CHUNK_LEN),
|
||||
(2 * CHUNK_LEN, CHUNK_LEN),
|
||||
(2 * CHUNK_LEN + 1, 2 * CHUNK_LEN),
|
||||
(4 * CHUNK_LEN - 1, 2 * CHUNK_LEN),
|
||||
(4 * CHUNK_LEN, 2 * CHUNK_LEN),
|
||||
(4 * CHUNK_LEN + 1, 4 * CHUNK_LEN),
|
||||
];
|
||||
for &(input, output) in input_output {
|
||||
assert_eq!(crate::left_len(input), output);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compare_reference_impl() {
|
||||
const OUT: usize = 303; // more than 64, not a multiple of 4
|
||||
|
|
Loading…
Reference in New Issue