mirror of
https://github.com/BLAKE3-team/BLAKE3
synced 2024-05-28 22:06:04 +02:00
compare portable impl for chunks and parents
This commit is contained in:
parent
2722065bb4
commit
01f5e7e6d7
|
@ -273,14 +273,6 @@ impl Platform {
|
|||
}
|
||||
}
|
||||
|
||||
// Hashes N=input.len()/CHUNK_LEN chunks and writes N transposed chunk CVs to the output,
|
||||
// starting at the column given by num_cvs (i.e. appending to the transposed CVs already
|
||||
// present). After returning, the total number of transposed CVs in the output will be
|
||||
// num_cvs+N. N and num_cvs must both be less than or equal to simd_degree. Any partial chunk
|
||||
// bytes in the input after the last complete chunk are ignored and need to be hashed
|
||||
// separately by the caller. The counter argument is the value of the chunk counter for the
|
||||
// first chunk, and it's incremented by 1 for each chunk after the first. The CHUNK_START and
|
||||
// CHUNK_END flags are set internally.
|
||||
pub fn hash_chunks(
|
||||
&self,
|
||||
input: &[u8],
|
||||
|
@ -290,10 +282,11 @@ impl Platform {
|
|||
cvs_out: &mut TransposedVectors,
|
||||
num_cvs: usize,
|
||||
) {
|
||||
// TODO: Handle partial chunks?
|
||||
debug_assert_eq!(input.len() % CHUNK_LEN, 0);
|
||||
debug_assert!(input.len() / CHUNK_LEN <= self.simd_degree());
|
||||
debug_assert!(num_cvs <= self.simd_degree());
|
||||
portable::hash_chunks(input, key, counter, flags, cvs_out, num_cvs);
|
||||
// XXX: should separate the thing that hashes the remainder from this interface
|
||||
}
|
||||
|
||||
pub fn hash_parents(&self, in_out: ParentInOut, key: &[u32; 8], flags: u8) {
|
||||
|
|
|
@ -179,6 +179,11 @@ pub fn hash_many<const N: usize>(
|
|||
}
|
||||
}
|
||||
|
||||
// Using DEGREE=2 instead of DEGREE=1 here (and so guaranteeing that all vectorized implementations
|
||||
// have DEGREE>=2) makes it easier to avoid screwing up the root parent node in a recursive hash.
|
||||
#[cfg(test)]
|
||||
pub const DEGREE: usize = 2;
|
||||
|
||||
/// General contract:
|
||||
/// - `input` is N chunks, each exactly 1 KiB, 1 <= N <= DEGREE
|
||||
/// - `output_column` is a multiple of DEGREE.
|
||||
|
@ -186,7 +191,8 @@ pub fn hash_many<const N: usize>(
|
|||
/// from `output_column` to `output_column+N-1`. Columns prior to `output_column` must be
|
||||
/// unmodified.
|
||||
///
|
||||
/// The DEGREE of this portable implementation is 1, so the input here is always exactly 1 KiB.
|
||||
/// The DEGREE of this portable implementation is 2, so the input here is either exactly 1 KiB or
|
||||
/// exactly 2 KiB.
|
||||
pub fn hash_chunks(
|
||||
input: &[u8],
|
||||
key: &[u32; 8],
|
||||
|
@ -195,26 +201,29 @@ pub fn hash_chunks(
|
|||
output: &mut TransposedVectors,
|
||||
output_column: usize,
|
||||
) {
|
||||
debug_assert_eq!(CHUNK_LEN, input.len());
|
||||
let mut cv = *key;
|
||||
for block_index in 0..16 {
|
||||
let block_flags = match block_index {
|
||||
0 => flags | crate::CHUNK_START,
|
||||
15 => flags | crate::CHUNK_END,
|
||||
_ => flags,
|
||||
};
|
||||
compress_in_place(
|
||||
&mut cv,
|
||||
input[BLOCK_LEN * block_index..][..BLOCK_LEN]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
BLOCK_LEN as u8,
|
||||
counter as u64,
|
||||
block_flags,
|
||||
);
|
||||
}
|
||||
for word_index in 0..cv.len() {
|
||||
output[word_index][output_column] = cv[word_index];
|
||||
debug_assert!(input.len() == CHUNK_LEN || input.len() == 2 * CHUNK_LEN);
|
||||
let num_chunks = input.len() / CHUNK_LEN;
|
||||
for chunk_index in 0..num_chunks {
|
||||
let mut cv = *key;
|
||||
for block_index in 0..16 {
|
||||
let block_flags = match block_index {
|
||||
0 => flags | crate::CHUNK_START,
|
||||
15 => flags | crate::CHUNK_END,
|
||||
_ => flags,
|
||||
};
|
||||
compress_in_place(
|
||||
&mut cv,
|
||||
input[CHUNK_LEN * chunk_index + BLOCK_LEN * block_index..][..BLOCK_LEN]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
BLOCK_LEN as u8,
|
||||
counter + chunk_index as u64,
|
||||
block_flags,
|
||||
);
|
||||
}
|
||||
for word_index in 0..cv.len() {
|
||||
output[word_index][output_column + chunk_index] = cv[word_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -225,21 +234,24 @@ pub fn hash_chunks(
|
|||
/// `PARENT` in `flags`). Writes `num_parents` transposed parent CVs to the output, starting at
|
||||
/// column 0.
|
||||
///
|
||||
/// The DEGREE of this portable implementation is 1, so the input here is always exactly 2 CVs
|
||||
/// (num_parents == 1).
|
||||
/// The DEGREE of this portable implementation is 2, so num_parents is either 1 or 2.
|
||||
pub fn hash_parents(mut in_out: ParentInOut, key: &[u32; 8], flags: u8) {
|
||||
let (input, num_parents) = in_out.input();
|
||||
debug_assert_eq!(num_parents, 1);
|
||||
let mut block = [0u8; BLOCK_LEN];
|
||||
for i in 0..8 {
|
||||
block[4 * i..][..4].copy_from_slice(&input[i][0].to_le_bytes());
|
||||
block[4 * (i + 8)..][..4].copy_from_slice(&input[i][1].to_le_bytes());
|
||||
}
|
||||
let mut cv = *key;
|
||||
compress_in_place(&mut cv, &block, BLOCK_LEN as u8, 0, flags);
|
||||
let (output, output_column) = in_out.output();
|
||||
for i in 0..8 {
|
||||
output[i][output_column] = cv[i];
|
||||
let (_, num_parents) = in_out.input();
|
||||
debug_assert!(num_parents == 1 || num_parents == 2);
|
||||
for parent_index in 0..num_parents {
|
||||
let (input, _) = in_out.input();
|
||||
let mut block = [0u8; BLOCK_LEN];
|
||||
for i in 0..8 {
|
||||
block[4 * i..][..4].copy_from_slice(&input[i][2 * parent_index].to_le_bytes());
|
||||
block[4 * (i + 8)..][..4]
|
||||
.copy_from_slice(&input[i][2 * parent_index + 1].to_le_bytes());
|
||||
}
|
||||
let mut cv = *key;
|
||||
compress_in_place(&mut cv, &block, BLOCK_LEN as u8, 0, flags);
|
||||
let (output, output_column) = in_out.output();
|
||||
for i in 0..8 {
|
||||
output[i][output_column + parent_index] = cv[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -313,8 +325,6 @@ pub fn universal_hash(
|
|||
pub mod test {
|
||||
use super::*;
|
||||
|
||||
const DEGREE: usize = 1;
|
||||
|
||||
// These are basically testing the portable implementation against itself, but we also check
|
||||
// that compress_in_place and compress_xof are consistent. And there are tests against the
|
||||
// reference implementation and against hardcoded test vectors elsewhere.
|
||||
|
|
156
src/test.rs
156
src/test.rs
|
@ -224,37 +224,38 @@ type HashChunksFn = unsafe fn(
|
|||
|
||||
pub fn test_hash_chunks_fn(target_fn: HashChunksFn, degree: usize) {
|
||||
assert!(degree <= MAX_SIMD_DEGREE);
|
||||
let mut input = [0u8; MAX_SIMD_DEGREE * CHUNK_LEN];
|
||||
let mut input = [0u8; 2 * MAX_SIMD_DEGREE * CHUNK_LEN];
|
||||
paint_test_input(&mut input);
|
||||
for test_degree in 1..=degree {
|
||||
for &counter in INITIAL_COUNTERS {
|
||||
for &initial_counter in INITIAL_COUNTERS {
|
||||
// Make two calls, to test the output_column parameter.
|
||||
let mut test_output = TransposedVectors::default();
|
||||
unsafe {
|
||||
target_fn(
|
||||
&input[..test_degree * CHUNK_LEN],
|
||||
TEST_KEY_WORDS,
|
||||
counter,
|
||||
initial_counter,
|
||||
crate::KEYED_HASH,
|
||||
&mut test_output,
|
||||
0,
|
||||
);
|
||||
target_fn(
|
||||
&input[..test_degree * CHUNK_LEN],
|
||||
&input[test_degree * CHUNK_LEN..][..test_degree * CHUNK_LEN],
|
||||
TEST_KEY_WORDS,
|
||||
counter + test_degree as u64,
|
||||
initial_counter + test_degree as u64,
|
||||
crate::KEYED_HASH,
|
||||
&mut test_output,
|
||||
test_degree,
|
||||
);
|
||||
}
|
||||
|
||||
// Here always hash one chunk at a time, even though portable::DEGREE is 2.
|
||||
let mut portable_output = TransposedVectors::default();
|
||||
for i in 0..2 * test_degree {
|
||||
for i in 0..(2 * test_degree) {
|
||||
crate::portable::hash_chunks(
|
||||
&input[..test_degree * CHUNK_LEN],
|
||||
&input[i * CHUNK_LEN..][..CHUNK_LEN],
|
||||
TEST_KEY_WORDS,
|
||||
counter + i as u64,
|
||||
initial_counter + i as u64,
|
||||
crate::KEYED_HASH,
|
||||
&mut portable_output,
|
||||
i,
|
||||
|
@ -362,6 +363,145 @@ pub fn test_hash_parents_fn(target_fn: HashParentsFn, degree: usize) {
|
|||
}
|
||||
}
|
||||
|
||||
fn hash_with_chunks_and_parents_recurse(
|
||||
chunks_fn: HashChunksFn,
|
||||
parents_fn: HashParentsFn,
|
||||
degree: usize,
|
||||
input: &[u8],
|
||||
counter: u64,
|
||||
output: &mut TransposedVectors,
|
||||
output_column: usize,
|
||||
) -> usize {
|
||||
// TODO: hash partial chunks?
|
||||
assert_eq!(input.len() % CHUNK_LEN, 0);
|
||||
assert_eq!(degree.count_ones(), 1, "power of 2");
|
||||
if input.len() <= degree * CHUNK_LEN {
|
||||
unsafe {
|
||||
chunks_fn(input, crate::IV, counter, 0, output, output_column);
|
||||
}
|
||||
input.len() / CHUNK_LEN
|
||||
} else {
|
||||
let mut child_output = TransposedVectors::default();
|
||||
let (left_input, right_input) = input.split_at(crate::left_len(input.len()));
|
||||
let mut children = hash_with_chunks_and_parents_recurse(
|
||||
chunks_fn,
|
||||
parents_fn,
|
||||
degree,
|
||||
left_input,
|
||||
counter,
|
||||
&mut child_output,
|
||||
0,
|
||||
);
|
||||
assert_eq!(children, degree);
|
||||
children += hash_with_chunks_and_parents_recurse(
|
||||
chunks_fn,
|
||||
parents_fn,
|
||||
degree,
|
||||
right_input,
|
||||
counter + (left_input.len() / CHUNK_LEN) as u64,
|
||||
&mut child_output,
|
||||
children,
|
||||
);
|
||||
unsafe {
|
||||
parents_fn(
|
||||
ParentInOut::Separate {
|
||||
input: &child_output,
|
||||
num_parents: children / 2,
|
||||
output,
|
||||
output_column,
|
||||
},
|
||||
crate::IV,
|
||||
crate::PARENT,
|
||||
);
|
||||
}
|
||||
// If there's an odd child left over, copy it to the output.
|
||||
if children % 2 == 0 {
|
||||
children / 2
|
||||
} else {
|
||||
for i in 0..8 {
|
||||
output[i][output_column + (children / 2)] = child_output[i][children - 1];
|
||||
}
|
||||
(children / 2) + 1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn root_hash_with_chunks_and_parents(
|
||||
chunks_fn: HashChunksFn,
|
||||
parents_fn: HashParentsFn,
|
||||
degree: usize,
|
||||
input: &[u8],
|
||||
) -> [u8; 32] {
|
||||
// TODO: handle the 1-chunk case?
|
||||
assert!(input.len() >= 2 * CHUNK_LEN);
|
||||
// TODO: hash partial chunks?
|
||||
assert_eq!(input.len() % CHUNK_LEN, 0);
|
||||
let mut cvs = TransposedVectors::default();
|
||||
let mut num_cvs =
|
||||
hash_with_chunks_and_parents_recurse(chunks_fn, parents_fn, degree, input, 0, &mut cvs, 0);
|
||||
while num_cvs > 2 {
|
||||
unsafe {
|
||||
parents_fn(
|
||||
ParentInOut::InPlace {
|
||||
in_out: &mut cvs,
|
||||
num_parents: num_cvs / 2,
|
||||
},
|
||||
crate::IV,
|
||||
crate::PARENT,
|
||||
);
|
||||
}
|
||||
if num_cvs % 2 == 0 {
|
||||
num_cvs = num_cvs / 2;
|
||||
} else {
|
||||
for i in 0..8 {
|
||||
cvs[i][num_cvs / 2] = cvs[i][num_cvs - 1];
|
||||
}
|
||||
num_cvs = (num_cvs / 2) + 1;
|
||||
}
|
||||
}
|
||||
unsafe {
|
||||
parents_fn(
|
||||
ParentInOut::InPlace {
|
||||
in_out: &mut cvs,
|
||||
num_parents: 1,
|
||||
},
|
||||
crate::IV,
|
||||
crate::PARENT | crate::ROOT,
|
||||
);
|
||||
}
|
||||
let mut ret = [0u8; 32];
|
||||
for i in 0..8 {
|
||||
ret[4 * i..][..4].copy_from_slice(&cvs[i][0].to_le_bytes());
|
||||
}
|
||||
ret
|
||||
}
|
||||
|
||||
#[test]
|
||||
pub fn test_compare_reference_impl_chunks_and_hashes() {
|
||||
// 31 (16 + 8 + 4 + 2 + 1) chunks
|
||||
const MAX_CHUNKS: usize = 31;
|
||||
let mut input = [0u8; MAX_CHUNKS * CHUNK_LEN];
|
||||
paint_test_input(&mut input);
|
||||
for num_chunks in 2..=MAX_CHUNKS {
|
||||
#[cfg(feature = "std")]
|
||||
dbg!(num_chunks);
|
||||
|
||||
let test_output = root_hash_with_chunks_and_parents(
|
||||
crate::portable::hash_chunks,
|
||||
crate::portable::hash_parents,
|
||||
crate::portable::DEGREE,
|
||||
&input[..num_chunks * CHUNK_LEN],
|
||||
);
|
||||
|
||||
let mut reference_output = [0u8; 32];
|
||||
let mut reference_hasher = reference_impl::Hasher::new();
|
||||
reference_hasher.update(&input[..num_chunks * CHUNK_LEN]);
|
||||
reference_hasher.finalize(&mut reference_output);
|
||||
|
||||
assert_eq!(reference_output, test_output);
|
||||
}
|
||||
}
|
||||
|
||||
// Both xof() and xof_xof() have this signature.
|
||||
type XofFn = unsafe fn(
|
||||
block: &[u8; BLOCK_LEN],
|
||||
|
|
Loading…
Reference in New Issue