1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-05-28 22:06:04 +02:00

compare portable impl for chunks and parents

This commit is contained in:
Jack O'Connor 2023-06-18 12:39:22 -07:00
parent 2722065bb4
commit 01f5e7e6d7
3 changed files with 197 additions and 54 deletions

View File

@ -273,14 +273,6 @@ impl Platform {
}
}
// Hashes N=input.len()/CHUNK_LEN chunks and writes N transposed chunk CVs to the output,
// starting at the column given by num_cvs (i.e. appending to the transposed CVs already
// present). After returning, the total number of transposed CVs in the output will be
// num_cvs+N. N and num_cvs must both be less than or equal to simd_degree. Any partial chunk
// bytes in the input after the last complete chunk are ignored and need to be hashed
// separately by the caller. The counter argument is the value of the chunk counter for the
// first chunk, and it's incremented by 1 for each chunk after the first. The CHUNK_START and
// CHUNK_END flags are set internally.
pub fn hash_chunks(
&self,
input: &[u8],
@ -290,10 +282,11 @@ impl Platform {
cvs_out: &mut TransposedVectors,
num_cvs: usize,
) {
// TODO: Handle partial chunks?
debug_assert_eq!(input.len() % CHUNK_LEN, 0);
debug_assert!(input.len() / CHUNK_LEN <= self.simd_degree());
debug_assert!(num_cvs <= self.simd_degree());
portable::hash_chunks(input, key, counter, flags, cvs_out, num_cvs);
// XXX: should separate the thing that hashes the remainder from this interface
}
pub fn hash_parents(&self, in_out: ParentInOut, key: &[u32; 8], flags: u8) {

View File

@ -179,6 +179,11 @@ pub fn hash_many<const N: usize>(
}
}
// Using DEGREE=2 instead of DEGREE=1 here (and so guaranteeing that all vectorized implementations
// have DEGREE>=2) makes it easier to avoid screwing up the root parent node in a recursive hash.
#[cfg(test)]
pub const DEGREE: usize = 2;
/// General contract:
/// - `input` is N chunks, each exactly 1 KiB, 1 <= N <= DEGREE
/// - `output_column` is a multiple of DEGREE.
@ -186,7 +191,8 @@ pub fn hash_many<const N: usize>(
/// from `output_column` to `output_column+N-1`. Columns prior to `output_column` must be
/// unmodified.
///
/// The DEGREE of this portable implementation is 1, so the input here is always exactly 1 KiB.
/// The DEGREE of this portable implementation is 2, so the input here is either exactly 1 KiB or
/// exactly 2 KiB.
pub fn hash_chunks(
input: &[u8],
key: &[u32; 8],
@ -195,26 +201,29 @@ pub fn hash_chunks(
output: &mut TransposedVectors,
output_column: usize,
) {
debug_assert_eq!(CHUNK_LEN, input.len());
let mut cv = *key;
for block_index in 0..16 {
let block_flags = match block_index {
0 => flags | crate::CHUNK_START,
15 => flags | crate::CHUNK_END,
_ => flags,
};
compress_in_place(
&mut cv,
input[BLOCK_LEN * block_index..][..BLOCK_LEN]
.try_into()
.unwrap(),
BLOCK_LEN as u8,
counter as u64,
block_flags,
);
}
for word_index in 0..cv.len() {
output[word_index][output_column] = cv[word_index];
debug_assert!(input.len() == CHUNK_LEN || input.len() == 2 * CHUNK_LEN);
let num_chunks = input.len() / CHUNK_LEN;
for chunk_index in 0..num_chunks {
let mut cv = *key;
for block_index in 0..16 {
let block_flags = match block_index {
0 => flags | crate::CHUNK_START,
15 => flags | crate::CHUNK_END,
_ => flags,
};
compress_in_place(
&mut cv,
input[CHUNK_LEN * chunk_index + BLOCK_LEN * block_index..][..BLOCK_LEN]
.try_into()
.unwrap(),
BLOCK_LEN as u8,
counter + chunk_index as u64,
block_flags,
);
}
for word_index in 0..cv.len() {
output[word_index][output_column + chunk_index] = cv[word_index];
}
}
}
@ -225,21 +234,24 @@ pub fn hash_chunks(
/// `PARENT` in `flags`). Writes `num_parents` transposed parent CVs to the output, starting at
/// column 0.
///
/// The DEGREE of this portable implementation is 1, so the input here is always exactly 2 CVs
/// (num_parents == 1).
/// The DEGREE of this portable implementation is 2, so num_parents is either 1 or 2.
pub fn hash_parents(mut in_out: ParentInOut, key: &[u32; 8], flags: u8) {
let (input, num_parents) = in_out.input();
debug_assert_eq!(num_parents, 1);
let mut block = [0u8; BLOCK_LEN];
for i in 0..8 {
block[4 * i..][..4].copy_from_slice(&input[i][0].to_le_bytes());
block[4 * (i + 8)..][..4].copy_from_slice(&input[i][1].to_le_bytes());
}
let mut cv = *key;
compress_in_place(&mut cv, &block, BLOCK_LEN as u8, 0, flags);
let (output, output_column) = in_out.output();
for i in 0..8 {
output[i][output_column] = cv[i];
let (_, num_parents) = in_out.input();
debug_assert!(num_parents == 1 || num_parents == 2);
for parent_index in 0..num_parents {
let (input, _) = in_out.input();
let mut block = [0u8; BLOCK_LEN];
for i in 0..8 {
block[4 * i..][..4].copy_from_slice(&input[i][2 * parent_index].to_le_bytes());
block[4 * (i + 8)..][..4]
.copy_from_slice(&input[i][2 * parent_index + 1].to_le_bytes());
}
let mut cv = *key;
compress_in_place(&mut cv, &block, BLOCK_LEN as u8, 0, flags);
let (output, output_column) = in_out.output();
for i in 0..8 {
output[i][output_column + parent_index] = cv[i];
}
}
}
@ -313,8 +325,6 @@ pub fn universal_hash(
pub mod test {
use super::*;
const DEGREE: usize = 1;
// These are basically testing the portable implementation against itself, but we also check
// that compress_in_place and compress_xof are consistent. And there are tests against the
// reference implementation and against hardcoded test vectors elsewhere.

View File

@ -224,37 +224,38 @@ type HashChunksFn = unsafe fn(
pub fn test_hash_chunks_fn(target_fn: HashChunksFn, degree: usize) {
assert!(degree <= MAX_SIMD_DEGREE);
let mut input = [0u8; MAX_SIMD_DEGREE * CHUNK_LEN];
let mut input = [0u8; 2 * MAX_SIMD_DEGREE * CHUNK_LEN];
paint_test_input(&mut input);
for test_degree in 1..=degree {
for &counter in INITIAL_COUNTERS {
for &initial_counter in INITIAL_COUNTERS {
// Make two calls, to test the output_column parameter.
let mut test_output = TransposedVectors::default();
unsafe {
target_fn(
&input[..test_degree * CHUNK_LEN],
TEST_KEY_WORDS,
counter,
initial_counter,
crate::KEYED_HASH,
&mut test_output,
0,
);
target_fn(
&input[..test_degree * CHUNK_LEN],
&input[test_degree * CHUNK_LEN..][..test_degree * CHUNK_LEN],
TEST_KEY_WORDS,
counter + test_degree as u64,
initial_counter + test_degree as u64,
crate::KEYED_HASH,
&mut test_output,
test_degree,
);
}
// Here always hash one chunk at a time, even though portable::DEGREE is 2.
let mut portable_output = TransposedVectors::default();
for i in 0..2 * test_degree {
for i in 0..(2 * test_degree) {
crate::portable::hash_chunks(
&input[..test_degree * CHUNK_LEN],
&input[i * CHUNK_LEN..][..CHUNK_LEN],
TEST_KEY_WORDS,
counter + i as u64,
initial_counter + i as u64,
crate::KEYED_HASH,
&mut portable_output,
i,
@ -362,6 +363,145 @@ pub fn test_hash_parents_fn(target_fn: HashParentsFn, degree: usize) {
}
}
fn hash_with_chunks_and_parents_recurse(
chunks_fn: HashChunksFn,
parents_fn: HashParentsFn,
degree: usize,
input: &[u8],
counter: u64,
output: &mut TransposedVectors,
output_column: usize,
) -> usize {
// TODO: hash partial chunks?
assert_eq!(input.len() % CHUNK_LEN, 0);
assert_eq!(degree.count_ones(), 1, "power of 2");
if input.len() <= degree * CHUNK_LEN {
unsafe {
chunks_fn(input, crate::IV, counter, 0, output, output_column);
}
input.len() / CHUNK_LEN
} else {
let mut child_output = TransposedVectors::default();
let (left_input, right_input) = input.split_at(crate::left_len(input.len()));
let mut children = hash_with_chunks_and_parents_recurse(
chunks_fn,
parents_fn,
degree,
left_input,
counter,
&mut child_output,
0,
);
assert_eq!(children, degree);
children += hash_with_chunks_and_parents_recurse(
chunks_fn,
parents_fn,
degree,
right_input,
counter + (left_input.len() / CHUNK_LEN) as u64,
&mut child_output,
children,
);
unsafe {
parents_fn(
ParentInOut::Separate {
input: &child_output,
num_parents: children / 2,
output,
output_column,
},
crate::IV,
crate::PARENT,
);
}
// If there's an odd child left over, copy it to the output.
if children % 2 == 0 {
children / 2
} else {
for i in 0..8 {
output[i][output_column + (children / 2)] = child_output[i][children - 1];
}
(children / 2) + 1
}
}
}
fn root_hash_with_chunks_and_parents(
chunks_fn: HashChunksFn,
parents_fn: HashParentsFn,
degree: usize,
input: &[u8],
) -> [u8; 32] {
// TODO: handle the 1-chunk case?
assert!(input.len() >= 2 * CHUNK_LEN);
// TODO: hash partial chunks?
assert_eq!(input.len() % CHUNK_LEN, 0);
let mut cvs = TransposedVectors::default();
let mut num_cvs =
hash_with_chunks_and_parents_recurse(chunks_fn, parents_fn, degree, input, 0, &mut cvs, 0);
while num_cvs > 2 {
unsafe {
parents_fn(
ParentInOut::InPlace {
in_out: &mut cvs,
num_parents: num_cvs / 2,
},
crate::IV,
crate::PARENT,
);
}
if num_cvs % 2 == 0 {
num_cvs = num_cvs / 2;
} else {
for i in 0..8 {
cvs[i][num_cvs / 2] = cvs[i][num_cvs - 1];
}
num_cvs = (num_cvs / 2) + 1;
}
}
unsafe {
parents_fn(
ParentInOut::InPlace {
in_out: &mut cvs,
num_parents: 1,
},
crate::IV,
crate::PARENT | crate::ROOT,
);
}
let mut ret = [0u8; 32];
for i in 0..8 {
ret[4 * i..][..4].copy_from_slice(&cvs[i][0].to_le_bytes());
}
ret
}
#[test]
pub fn test_compare_reference_impl_chunks_and_hashes() {
// 31 (16 + 8 + 4 + 2 + 1) chunks
const MAX_CHUNKS: usize = 31;
let mut input = [0u8; MAX_CHUNKS * CHUNK_LEN];
paint_test_input(&mut input);
for num_chunks in 2..=MAX_CHUNKS {
#[cfg(feature = "std")]
dbg!(num_chunks);
let test_output = root_hash_with_chunks_and_parents(
crate::portable::hash_chunks,
crate::portable::hash_parents,
crate::portable::DEGREE,
&input[..num_chunks * CHUNK_LEN],
);
let mut reference_output = [0u8; 32];
let mut reference_hasher = reference_impl::Hasher::new();
reference_hasher.update(&input[..num_chunks * CHUNK_LEN]);
reference_hasher.finalize(&mut reference_output);
assert_eq!(reference_output, test_output);
}
}
// Both xof() and xof_xof() have this signature.
type XofFn = unsafe fn(
block: &[u8; BLOCK_LEN],