1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-05-28 13:46:02 +02:00

test_chunks_and_parents_vs_reference

This commit is contained in:
Jack O'Connor 2023-07-09 19:30:09 -07:00
parent 7b02be6a10
commit 097225a43c
5 changed files with 214 additions and 376 deletions

View File

@ -4,7 +4,7 @@ use core::mem;
use core::ptr;
use core::sync::atomic::{AtomicPtr, Ordering::Relaxed};
mod portable;
pub mod portable;
#[cfg(test)]
mod test;
@ -149,7 +149,7 @@ static DETECTED_IMPL: Implementation = Implementation::new(
);
fn init_detected_impl() {
let detected = Implementation::portable();
let detected = portable::implementation();
DETECTED_IMPL
.degree_ptr
@ -205,18 +205,6 @@ impl Implementation {
}
}
pub fn portable() -> Self {
Self::new(
|| portable::DEGREE,
portable::compress,
portable::hash_chunks,
portable::hash_parents,
portable::xof,
portable::xof_xor,
portable::universal_hash,
)
}
#[inline]
fn degree_fn(&self) -> DegreeFn {
unsafe { mem::transmute(self.degree_ptr.load(Relaxed)) }
@ -234,7 +222,7 @@ impl Implementation {
&self,
vectors: &'v mut TransposedVectors,
) -> (TransposedSplit<'v>, TransposedSplit<'v>) {
vectors.split(self.degree())
unsafe { vectors.split(self.degree()) }
}
#[inline]
@ -758,6 +746,10 @@ const TRANSPOSED_STRIDE: usize = 2 * MAX_SIMD_DEGREE;
pub struct TransposedVectors([[u32; 2 * MAX_SIMD_DEGREE]; 8]);
impl TransposedVectors {
pub fn new() -> Self {
Self::default()
}
pub fn extract_cv(&self, cv_index: usize) -> CVBytes {
let mut words = [0u32; 8];
for word_index in 0..8 {
@ -781,7 +773,10 @@ impl TransposedVectors {
self.0[0].as_mut_ptr()
}
fn split(&mut self, degree: usize) -> (TransposedSplit, TransposedSplit) {
// SAFETY: This function is just pointer arithmetic, but callers assume that it's safe (not
// necessarily correct) to write up to `degree` words to either side of the split, possibly
// from different threads.
unsafe fn split(&mut self, degree: usize) -> (TransposedSplit, TransposedSplit) {
debug_assert!(degree > 0);
debug_assert!(degree <= MAX_SIMD_DEGREE);
debug_assert_eq!(degree.count_ones(), 1, "power of 2");
@ -894,3 +889,35 @@ fn test_byte_word_round_trips() {
le_bytes_from_words_64(&words_from_le_bytes_64(&block)),
);
}
// The largest power of two less than or equal to `n`, used for left_len()
// immediately below, and also directly in Hasher::update().
pub fn largest_power_of_two_leq(n: usize) -> usize {
((n / 2) + 1).next_power_of_two()
}
// Given some input larger than one chunk, return the number of bytes that
// should go in the left subtree. This is the largest power-of-2 number of
// chunks that leaves at least 1 byte for the right subtree.
pub fn left_len(content_len: usize) -> usize {
debug_assert!(content_len > CHUNK_LEN);
// Subtract 1 to reserve at least one byte for the right side.
let full_chunks = (content_len - 1) / CHUNK_LEN;
largest_power_of_two_leq(full_chunks) * CHUNK_LEN
}
#[test]
fn test_left_len() {
let input_output = &[
(CHUNK_LEN + 1, CHUNK_LEN),
(2 * CHUNK_LEN - 1, CHUNK_LEN),
(2 * CHUNK_LEN, CHUNK_LEN),
(2 * CHUNK_LEN + 1, 2 * CHUNK_LEN),
(4 * CHUNK_LEN - 1, 2 * CHUNK_LEN),
(4 * CHUNK_LEN, 2 * CHUNK_LEN),
(4 * CHUNK_LEN + 1, 4 * CHUNK_LEN),
];
for &(input, output) in input_output {
assert_eq!(left_len(input), output);
}
}

View File

@ -1,9 +1,9 @@
use crate::{
le_bytes_from_words_32, le_bytes_from_words_64, words_from_le_bytes_32, words_from_le_bytes_64,
BlockBytes, BlockWords, CVBytes, CVWords, IV, MAX_SIMD_DEGREE, MSG_SCHEDULE,
BlockBytes, BlockWords, CVBytes, CVWords, Implementation, IV, MAX_SIMD_DEGREE, MSG_SCHEDULE,
};
pub const DEGREE: usize = MAX_SIMD_DEGREE;
const DEGREE: usize = MAX_SIMD_DEGREE;
#[inline(always)]
fn g(state: &mut BlockWords, a: usize, b: usize, c: usize, d: usize, x: u32, y: u32) {
@ -67,7 +67,7 @@ fn compress_inner(
state
}
pub unsafe extern "C" fn compress(
unsafe extern "C" fn compress(
block: *const BlockBytes,
block_len: u32,
cv: *const CVBytes,
@ -84,7 +84,7 @@ pub unsafe extern "C" fn compress(
*out = le_bytes_from_words_32(state[..8].try_into().unwrap());
}
pub unsafe extern "C" fn compress_xof(
unsafe extern "C" fn compress_xof(
block: *const BlockBytes,
block_len: u32,
cv: *const CVBytes,
@ -102,7 +102,7 @@ pub unsafe extern "C" fn compress_xof(
*out = le_bytes_from_words_64(&state);
}
pub unsafe extern "C" fn hash_chunks(
unsafe extern "C" fn hash_chunks(
input: *const u8,
input_len: usize,
key: *const CVBytes,
@ -121,7 +121,7 @@ pub unsafe extern "C" fn hash_chunks(
)
}
pub unsafe extern "C" fn hash_parents(
unsafe extern "C" fn hash_parents(
transposed_input: *const u32,
num_parents: usize,
key: *const CVBytes,
@ -138,7 +138,7 @@ pub unsafe extern "C" fn hash_parents(
)
}
pub unsafe extern "C" fn xof(
unsafe extern "C" fn xof(
block: *const BlockBytes,
block_len: u32,
cv: *const CVBytes,
@ -159,7 +159,7 @@ pub unsafe extern "C" fn xof(
)
}
pub unsafe extern "C" fn xof_xor(
unsafe extern "C" fn xof_xor(
block: *const BlockBytes,
block_len: u32,
cv: *const CVBytes,
@ -180,7 +180,7 @@ pub unsafe extern "C" fn xof_xor(
)
}
pub unsafe extern "C" fn universal_hash(
unsafe extern "C" fn universal_hash(
input: *const u8,
input_len: usize,
key: *const CVBytes,
@ -190,6 +190,18 @@ pub unsafe extern "C" fn universal_hash(
crate::universal_hash_using_compress(compress, input, input_len, key, counter, out)
}
pub fn implementation() -> Implementation {
Implementation::new(
|| DEGREE,
compress,
hash_chunks,
hash_parents,
xof,
xof_xor,
universal_hash,
)
}
#[cfg(test)]
mod test {
use super::*;
@ -197,23 +209,28 @@ mod test {
// This is circular but do it anyway.
#[test]
fn test_compress_vs_portable() {
crate::test::test_compress_vs_portable(compress);
crate::test::test_compress_vs_portable(&implementation());
}
#[test]
fn test_compress_vs_reference() {
crate::test::test_compress_vs_reference(compress);
crate::test::test_compress_vs_reference(&implementation());
}
// This is circular but do it anyway.
#[test]
fn test_hash_chunks_vs_portable() {
crate::test::test_hash_chunks_vs_portable(hash_chunks, DEGREE);
crate::test::test_hash_chunks_vs_portable(&implementation());
}
// This is circular but do it anyway.
#[test]
fn test_hash_parents_vs_portable() {
crate::test::test_hash_parents_vs_portable(hash_parents, DEGREE);
crate::test::test_hash_parents_vs_portable(&implementation());
}
#[test]
fn test_chunks_and_parents_vs_reference() {
crate::test::test_chunks_and_parents_vs_reference(&implementation());
}
}

View File

@ -23,14 +23,14 @@ pub fn paint_test_input(buf: &mut [u8]) {
}
}
pub fn test_compress_vs_portable(compress_fn: CompressFn) {
pub fn test_compress_vs_portable(test_impl: &Implementation) {
for block_len in BLOCK_LENGTHS {
dbg!(block_len);
let mut block = [0; BLOCK_LEN];
paint_test_input(&mut block[..block_len]);
for counter in INITIAL_COUNTERS {
dbg!(counter);
let portable_cv = Implementation::portable().compress(
let portable_cv = portable::implementation().compress(
&block,
block_len as u32,
&TEST_KEY,
@ -38,25 +38,15 @@ pub fn test_compress_vs_portable(compress_fn: CompressFn) {
KEYED_HASH,
);
let mut test_cv = TEST_KEY;
unsafe {
let test_cv_ptr: *mut CVBytes = &mut test_cv;
compress_fn(
&block,
block_len as u32,
test_cv_ptr,
counter,
KEYED_HASH,
test_cv_ptr,
);
}
let test_cv =
test_impl.compress(&block, block_len as u32, &TEST_KEY, counter, KEYED_HASH);
assert_eq!(portable_cv, test_cv);
}
}
}
pub fn test_compress_vs_reference(compress_fn: CompressFn) {
pub fn test_compress_vs_reference(test_impl: &Implementation) {
for block_len in BLOCK_LENGTHS {
dbg!(block_len);
let mut block = [0; BLOCK_LEN];
@ -67,18 +57,13 @@ pub fn test_compress_vs_reference(compress_fn: CompressFn) {
let mut ref_hash = [0u8; 32];
ref_hasher.finalize(&mut ref_hash);
let mut test_cv = TEST_KEY;
unsafe {
let test_cv_ptr: *mut CVBytes = &mut test_cv;
compress_fn(
&block,
block_len as u32,
test_cv_ptr,
0,
CHUNK_START | CHUNK_END | ROOT | KEYED_HASH,
test_cv_ptr,
);
}
let test_cv = test_impl.compress(
&block,
block_len as u32,
&TEST_KEY,
0,
CHUNK_START | CHUNK_END | ROOT | KEYED_HASH,
);
assert_eq!(ref_hash, test_cv);
}
@ -104,14 +89,14 @@ fn check_transposed_eq(output_a: &TransposedVectors, output_b: &TransposedVector
panic!("transposed outputs are not equal");
}
pub fn test_hash_chunks_vs_portable(hash_chunks_fn: HashChunksFn, degree: usize) {
assert!(degree <= MAX_SIMD_DEGREE);
pub fn test_hash_chunks_vs_portable(test_impl: &Implementation) {
assert!(test_impl.degree() <= MAX_SIMD_DEGREE);
let mut input = [0u8; 2 * MAX_SIMD_DEGREE * CHUNK_LEN];
paint_test_input(&mut input);
dbg!(degree * CHUNK_LEN);
dbg!(test_impl.degree() * CHUNK_LEN);
// Try just below, equal to, and just above every whole number of chunks.
let mut input_2_lengths = vec![1];
let mut next_len = CHUNK_LEN;
// Try just below, equal to, and just above every power-of-2 number of chunks.
loop {
input_2_lengths.push(next_len - 1);
input_2_lengths.push(next_len);
@ -119,24 +104,25 @@ pub fn test_hash_chunks_vs_portable(hash_chunks_fn: HashChunksFn, degree: usize)
break;
}
input_2_lengths.push(next_len + 1);
next_len *= 2;
next_len += CHUNK_LEN;
}
for input_2_len in input_2_lengths {
dbg!(input_2_len);
let input1 = &input[..degree * CHUNK_LEN];
let input2 = &input[degree * CHUNK_LEN..][..input_2_len];
let input1 = &input[..test_impl.degree() * CHUNK_LEN];
let input2 = &input[test_impl.degree() * CHUNK_LEN..][..input_2_len];
for initial_counter in INITIAL_COUNTERS {
// Make two calls, to test the output_column parameter.
let mut portable_output = TransposedVectors::default();
let (portable_left, portable_right) = portable_output.split(degree);
Implementation::portable().hash_chunks(
let mut portable_output = TransposedVectors::new();
let (portable_left, portable_right) =
test_impl.split_transposed_vectors(&mut portable_output);
portable::implementation().hash_chunks(
input1,
&IV_BYTES,
initial_counter,
0,
portable_left,
);
Implementation::portable().hash_chunks(
portable::implementation().hash_chunks(
input2,
&TEST_KEY,
initial_counter + degree as u64,
@ -144,26 +130,16 @@ pub fn test_hash_chunks_vs_portable(hash_chunks_fn: HashChunksFn, degree: usize)
portable_right,
);
let mut test_output = TransposedVectors::default();
let (test_left, test_right) = test_output.split(degree);
unsafe {
hash_chunks_fn(
input1.as_ptr(),
input1.len(),
&IV_BYTES,
initial_counter,
0,
test_left.ptr,
);
hash_chunks_fn(
input2.as_ptr(),
input2.len(),
&TEST_KEY,
initial_counter + degree as u64,
KEYED_HASH,
test_right.ptr,
);
}
let mut test_output = TransposedVectors::new();
let (test_left, test_right) = test_impl.split_transposed_vectors(&mut test_output);
test_impl.hash_chunks(input1, &IV_BYTES, initial_counter, 0, test_left);
test_impl.hash_chunks(
input2,
&TEST_KEY,
initial_counter + degree as u64,
KEYED_HASH,
test_right,
);
check_transposed_eq(&portable_output, &test_output);
}
@ -171,7 +147,7 @@ pub fn test_hash_chunks_vs_portable(hash_chunks_fn: HashChunksFn, degree: usize)
}
fn painted_transposed_input() -> TransposedVectors {
let mut vectors = TransposedVectors::default();
let mut vectors = TransposedVectors::new();
let mut val = 0;
for col in 0..2 * MAX_SIMD_DEGREE {
for row in 0..8 {
@ -182,21 +158,22 @@ fn painted_transposed_input() -> TransposedVectors {
vectors
}
pub fn test_hash_parents_vs_portable(hash_parents_fn: HashParentsFn, degree: usize) {
assert!(degree <= MAX_SIMD_DEGREE);
pub fn test_hash_parents_vs_portable(test_impl: &Implementation) {
assert!(test_impl.degree() <= MAX_SIMD_DEGREE);
let input = painted_transposed_input();
for num_parents in 2..=(degree / 2) {
for num_parents in 2..=(test_impl.degree() / 2) {
dbg!(num_parents);
let mut portable_output = TransposedVectors(input.0);
let (portable_left, portable_right) = portable_output.split(degree);
Implementation::portable().hash_parents(
let (portable_left, portable_right) =
test_impl.split_transposed_vectors(&mut portable_output);
portable::implementation().hash_parents(
&input,
2 * num_parents, // num_cvs
&IV_BYTES,
0,
portable_left,
);
Implementation::portable().hash_parents(
portable::implementation().hash_parents(
&input,
2 * num_parents, // num_cvs
&TEST_KEY,
@ -204,25 +181,101 @@ pub fn test_hash_parents_vs_portable(hash_parents_fn: HashParentsFn, degree: usi
portable_right,
);
let mut test_output = input.clone();
let (test_left, test_right) = test_output.split(degree);
unsafe {
hash_parents_fn(
input.as_ptr(),
num_parents,
&IV_BYTES,
PARENT,
test_left.ptr,
);
hash_parents_fn(
input.as_ptr(),
num_parents,
&TEST_KEY,
PARENT | KEYED_HASH,
test_right.ptr,
);
}
let mut test_output = TransposedVectors(input.0);
let (test_left, test_right) = test_impl.split_transposed_vectors(&mut test_output);
test_impl.hash_parents(
&input,
2 * num_parents, // num_cvs
&IV_BYTES,
0,
test_left,
);
test_impl.hash_parents(
&input,
2 * num_parents, // num_cvs
&TEST_KEY,
KEYED_HASH,
test_right,
);
check_transposed_eq(&portable_output, &test_output);
}
}
fn hash_with_chunks_and_parents_recurse(
test_impl: &Implementation,
input: &[u8],
counter: u64,
output: TransposedSplit,
) -> usize {
assert!(input.len() > 0);
if input.len() <= test_impl.degree() * CHUNK_LEN {
return test_impl.hash_chunks(input, &IV_BYTES, counter, 0, output);
}
let (left_input, right_input) = input.split_at(left_len(input.len()));
let mut child_output = TransposedVectors::new();
let (left_output, right_output) = test_impl.split_transposed_vectors(&mut child_output);
let mut children =
hash_with_chunks_and_parents_recurse(test_impl, left_input, counter, left_output);
assert_eq!(children, test_impl.degree());
children += hash_with_chunks_and_parents_recurse(
test_impl,
right_input,
counter + (left_input.len() / CHUNK_LEN) as u64,
right_output,
);
test_impl.hash_parents(&child_output, children, &IV_BYTES, PARENT, output)
}
// Note: This test implementation doesn't support the 1-chunk-or-less case.
fn root_hash_with_chunks_and_parents(test_impl: &Implementation, input: &[u8]) -> CVBytes {
// TODO: handle the 1-chunk case?
assert!(input.len() > CHUNK_LEN);
let mut cvs = TransposedVectors::new();
// The right half of these vectors are never used.
let (cvs_left, _) = test_impl.split_transposed_vectors(&mut cvs);
let mut num_cvs = hash_with_chunks_and_parents_recurse(test_impl, input, 0, cvs_left);
while num_cvs > 2 {
num_cvs = test_impl.reduce_parents(&mut cvs, num_cvs, &IV_BYTES, 0);
}
test_impl.compress(
&cvs.extract_parent_node(0),
BLOCK_LEN as u32,
&IV_BYTES,
0,
PARENT | ROOT,
)
}
pub fn test_chunks_and_parents_vs_reference(test_impl: &Implementation) {
assert_eq!(test_impl.degree().count_ones(), 1, "power of 2");
const MAX_INPUT_LEN: usize = 2 * MAX_SIMD_DEGREE * CHUNK_LEN;
let mut input_buf = [0u8; MAX_INPUT_LEN];
paint_test_input(&mut input_buf);
// Try just below, equal to, and just above every whole number of chunks, except that
// root_hash_with_chunks_and_parents doesn't support the 1-chunk-or-less case.
let mut test_lengths = vec![CHUNK_LEN + 1];
let mut next_len = 2 * CHUNK_LEN;
loop {
test_lengths.push(next_len - 1);
test_lengths.push(next_len);
if next_len == MAX_INPUT_LEN {
break;
}
test_lengths.push(next_len + 1);
next_len += CHUNK_LEN;
}
for test_len in test_lengths {
dbg!(test_len);
let input = &input_buf[..test_len];
let mut ref_hasher = reference_impl::Hasher::new();
ref_hasher.update(&input);
let mut ref_hash = [0u8; 32];
ref_hasher.finalize(&mut ref_hash);
let test_hash = root_hash_with_chunks_and_parents(test_impl, input);
assert_eq!(ref_hash, test_hash);
}
}

View File

@ -447,22 +447,6 @@ impl fmt::Debug for ChunkState {
// use full-width SIMD vectors for parent hashing. Without parallel parent
// hashing, we lose about 10% of overall throughput on AVX2 and AVX-512.
// The largest power of two less than or equal to `n`, used for left_len()
// immediately below, and also directly in Hasher::update().
fn largest_power_of_two_leq(n: usize) -> usize {
((n / 2) + 1).next_power_of_two()
}
// Given some input larger than one chunk, return the number of bytes that
// should go in the left subtree. This is the largest power-of-2 number of
// chunks that leaves at least 1 byte for the right subtree.
fn left_len(content_len: usize) -> usize {
debug_assert!(content_len > CHUNK_LEN);
// Subtract 1 to reserve at least one byte for the right side.
let full_chunks = (content_len - 1) / CHUNK_LEN;
largest_power_of_two_leq(full_chunks) * CHUNK_LEN
}
// The wide helper function returns (writes out) an array of chaining values
// and returns the length of that array. The number of chaining values returned
// is the dynamically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer,
@ -499,10 +483,10 @@ fn compress_subtree_wide<J: join::Join>(
// as long as the SIMD degree is a power of 2. If we ever get a SIMD degree
// of 3 or something, we'll need a more complicated strategy.)
debug_assert_eq!(guts::degree().count_ones(), 1, "power of 2");
let (left, right) = input.split_at(left_len(input.len()));
let (left, right) = input.split_at(guts::left_len(input.len()));
let right_chunk_counter = chunk_counter + (left.len() / CHUNK_LEN) as u64;
let mut transposed_cvs = guts::TransposedVectors::default();
let mut transposed_cvs = guts::TransposedVectors::new();
let (left_cvs, right_cvs) = guts::split_transposed_vectors(&mut transposed_cvs);
// Recurse! For update_rayon(), this is where we take advantage of RayonJoin and use multiple
@ -535,7 +519,7 @@ fn compress_subtree_to_parent_node<J: join::Join>(
flags: u32,
) -> BlockBytes {
debug_assert!(input.len() > CHUNK_LEN);
let mut transposed_cvs = guts::TransposedVectors::default();
let mut transposed_cvs = guts::TransposedVectors::new();
let (left_cvs, _) = guts::split_transposed_vectors(&mut transposed_cvs);
let mut num_cvs = compress_subtree_wide::<J>(input, &key, chunk_counter, flags, left_cvs);
debug_assert!(num_cvs >= 2);
@ -546,7 +530,7 @@ fn compress_subtree_to_parent_node<J: join::Join>(
while num_cvs > 2 {
num_cvs = guts::reduce_parents(&mut transposed_cvs, num_cvs, key, flags);
}
transposed_cvs.parent_node(0)
transposed_cvs.extract_parent_node(0)
}
// Hash a complete input all at once. Unlike compress_subtree_wide() and
@ -906,7 +890,7 @@ impl Hasher {
while input.len() > CHUNK_LEN {
debug_assert_eq!(self.chunk_state.len(), 0, "no partial chunk data");
debug_assert_eq!(CHUNK_LEN.count_ones(), 1, "power of 2 chunk len");
let mut subtree_len = largest_power_of_two_leq(input.len());
let mut subtree_len = guts::largest_power_of_two_leq(input.len());
let count_so_far = self.chunk_state.chunk_counter * CHUNK_LEN as u64;
// Shrink the subtree_len until it evenly divides the count so far.
// We know that subtree_len itself is a power of 2, so we can use a

View File

@ -51,233 +51,6 @@ pub const TEST_CASES_MAX: usize = 100 * CHUNK_LEN;
pub const TEST_KEY: &CVBytes = b"whats the Elvish word for friend";
pub const TEST_KEY_WORDS: &CVWords = &guts::words_from_le_bytes_32(TEST_KEY);
fn paint_transposed_input(input: &mut TransposedVectors) {
let mut val = 0;
for row in 0..8 {
for col in 0..2 * MAX_SIMD_DEGREE {
input[row][col] = val;
val += 1;
}
}
}
// Both xof() and xof_xof() have this signature.
type HashParentsFn = unsafe fn(
transposed_input: *const u32,
num_parents: usize,
key: *const u32,
flags: u32,
transposed_output: *mut u32, // may overlap the input
);
pub fn test_hash_parents_fn(target_fn: HashParentsFn, degree: usize) {
assert!(degree <= MAX_SIMD_DEGREE);
for test_degree in 1..=degree {
// separate
{
let mut input = TransposedVectors::default();
paint_transposed_input(&mut input);
let mut test_output = input.clone();
unsafe {
target_fn(
ParentInOut::Separate {
input: &input,
num_parents: test_degree,
output: &mut test_output,
output_column: 0,
},
TEST_KEY_WORDS,
crate::KEYED_HASH | crate::PARENT,
);
}
let mut portable_output = TransposedVectors(input.0);
crate::portable::hash_parents(
ParentInOut::Separate {
input: &input,
num_parents: test_degree,
output: &mut portable_output,
output_column: 0,
},
TEST_KEY_WORDS,
crate::KEYED_HASH | crate::PARENT,
);
assert_eq!(portable_output.0, test_output.0);
}
// in-place
{
let mut test_io = TransposedVectors::default();
paint_transposed_input(&mut test_io);
unsafe {
target_fn(
ParentInOut::InPlace {
in_out: &mut test_io,
num_parents: test_degree,
},
TEST_KEY_WORDS,
crate::KEYED_HASH | crate::PARENT,
);
}
let mut portable_io = TransposedVectors::default();
paint_transposed_input(&mut portable_io);
crate::portable::hash_parents(
ParentInOut::InPlace {
in_out: &mut portable_io,
num_parents: test_degree,
},
TEST_KEY_WORDS,
crate::KEYED_HASH | crate::PARENT,
);
assert_eq!(portable_io.0, test_io.0);
}
}
}
fn hash_with_chunks_and_parents_recurse(
chunks_fn: HashChunksFn,
parents_fn: HashParentsFn,
degree: usize,
input: &[u8],
counter: u64,
output: &mut TransposedVectors,
output_column: usize,
) -> usize {
// TODO: hash partial chunks?
assert_eq!(input.len() % CHUNK_LEN, 0);
assert_eq!(degree.count_ones(), 1, "power of 2");
if input.len() <= degree * CHUNK_LEN {
unsafe {
chunks_fn(input, crate::IV, counter, 0, output, output_column);
}
input.len() / CHUNK_LEN
} else {
let mut child_output = TransposedVectors::default();
let (left_input, right_input) = input.split_at(crate::left_len(input.len()));
let mut children = hash_with_chunks_and_parents_recurse(
chunks_fn,
parents_fn,
degree,
left_input,
counter,
&mut child_output,
0,
);
assert_eq!(children, degree);
children += hash_with_chunks_and_parents_recurse(
chunks_fn,
parents_fn,
degree,
right_input,
counter + (left_input.len() / CHUNK_LEN) as u64,
&mut child_output,
children,
);
unsafe {
parents_fn(
ParentInOut::Separate {
input: &child_output,
num_parents: children / 2,
output,
output_column,
},
crate::IV,
crate::PARENT,
);
}
// If there's an odd child left over, copy it to the output.
if children % 2 == 0 {
children / 2
} else {
for i in 0..8 {
output[i][output_column + (children / 2)] = child_output[i][children - 1];
}
(children / 2) + 1
}
}
}
fn root_hash_with_chunks_and_parents(
chunks_fn: HashChunksFn,
parents_fn: HashParentsFn,
degree: usize,
input: &[u8],
) -> [u8; 32] {
assert_eq!(degree.count_ones(), 1, "power of 2");
// TODO: handle the 1-chunk case?
assert!(input.len() >= 2 * CHUNK_LEN);
// TODO: hash partial chunks?
assert_eq!(input.len() % CHUNK_LEN, 0);
let mut cvs = TransposedVectors::default();
let mut num_cvs =
hash_with_chunks_and_parents_recurse(chunks_fn, parents_fn, degree, input, 0, &mut cvs, 0);
while num_cvs > 2 {
unsafe {
parents_fn(
ParentInOut::InPlace {
in_out: &mut cvs,
num_parents: num_cvs / 2,
},
crate::IV,
crate::PARENT,
);
}
if num_cvs % 2 == 0 {
num_cvs = num_cvs / 2;
} else {
for i in 0..8 {
cvs[i][num_cvs / 2] = cvs[i][num_cvs - 1];
}
num_cvs = (num_cvs / 2) + 1;
}
}
unsafe {
parents_fn(
ParentInOut::InPlace {
in_out: &mut cvs,
num_parents: 1,
},
crate::IV,
crate::PARENT | crate::ROOT,
);
}
let mut ret = [0u8; 32];
for i in 0..8 {
ret[4 * i..][..4].copy_from_slice(&cvs[i][0].to_le_bytes());
}
ret
}
#[test]
pub fn test_compare_reference_impl_chunks_and_hashes() {
// 31 (16 + 8 + 4 + 2 + 1) chunks
const MAX_CHUNKS: usize = 31;
let mut input = [0u8; MAX_CHUNKS * CHUNK_LEN];
paint_test_input(&mut input);
for num_chunks in 2..=MAX_CHUNKS {
#[cfg(feature = "std")]
dbg!(num_chunks);
let mut reference_output = [0u8; 32];
let mut reference_hasher = reference_impl::Hasher::new();
reference_hasher.update(&input[..num_chunks * CHUNK_LEN]);
reference_hasher.finalize(&mut reference_output);
for test_degree in [2, 4, 8, 16] {
let test_output = root_hash_with_chunks_and_parents(
crate::portable::hash_chunks,
crate::portable::hash_parents,
test_degree,
&input[..num_chunks * CHUNK_LEN],
);
assert_eq!(reference_output, test_output);
}
}
}
// Both xof() and xof_xof() have this signature.
type XofFn = unsafe fn(
block: &[u8; BLOCK_LEN],
@ -527,22 +300,6 @@ fn test_largest_power_of_two_leq() {
}
}
#[test]
fn test_left_len() {
let input_output = &[
(CHUNK_LEN + 1, CHUNK_LEN),
(2 * CHUNK_LEN - 1, CHUNK_LEN),
(2 * CHUNK_LEN, CHUNK_LEN),
(2 * CHUNK_LEN + 1, 2 * CHUNK_LEN),
(4 * CHUNK_LEN - 1, 2 * CHUNK_LEN),
(4 * CHUNK_LEN, 2 * CHUNK_LEN),
(4 * CHUNK_LEN + 1, 4 * CHUNK_LEN),
];
for &(input, output) in input_output {
assert_eq!(crate::left_len(input), output);
}
}
#[test]
fn test_compare_reference_impl() {
const OUT: usize = 303; // more than 64, not a multiple of 4