1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-05-23 21:17:06 +02:00
This commit is contained in:
Jack O'Connor 2023-05-28 13:40:38 -07:00
parent e302cdf36f
commit 589f2c3f48
3 changed files with 397 additions and 14 deletions

View File

@ -1,4 +1,4 @@
use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN};
use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN, CHUNK_LEN};
use arrayref::{array_mut_ref, array_ref};
cfg_if::cfg_if! {
@ -272,6 +272,73 @@ impl Platform {
}
}
// Hashes N=input.len()/CHUNK_LEN chunks and writes N transposed chunk CVs to the output,
// starting at the column given by num_cvs (i.e. appending to the transposed CVs already
// present). After returning, the total number of transposed CVs in the output will be
// num_cvs+N. N and num_cvs must both be less than or equal to simd_degree. Any partial chunk
// bytes in the input after the last complete chunk are ignored and need to be hashed
// separately by the caller. The counter argument is the value of the chunk counter for the
// first chunk, and it's incremented by 1 for each chunk after the first. The CHUNK_START and
// CHUNK_END flags are set internally.
pub fn hash_chunks(
&self,
input: &[u8],
key: &[u32; 8],
counter: u64,
flags: u8,
cvs_out: &mut TransposedVectors,
num_cvs: usize,
) {
debug_assert!(input.len() / CHUNK_LEN <= self.simd_degree());
debug_assert!(num_cvs <= self.simd_degree());
portable::hash_chunks(input, key, counter, flags, cvs_out, num_cvs);
// XXX: should separate the thing that hashes the remainder from this interface
}
// Writes out N=num_cvs/2 transposed parent CVs in-place over the first N columns of the input
// CVs. Columns N and above are unmodified. N must be less than or equal to 2*simd_degree. If
// num_cvs is odd, the final input CV is ignored, and the caller should copy it from column
// 2N+1 to column N after this function returns. The PARENT flag is added internally.
pub fn hash_parents(
&self,
cvs: &mut TransposedVectors,
num_cvs: usize,
key: &[u32; 8],
flags: u8,
) {
debug_assert!(num_cvs <= 2 * self.simd_degree());
portable::hash_parents(cvs, num_cvs, key, flags);
// XXX: should separate the thing that copies the last CV over from this interface
}
pub fn xof(
&self,
block: &[u8; BLOCK_LEN],
block_len: u8,
cv: &[u32; 8],
counter: u64,
flags: u8,
out: &mut [u8],
) {
portable::xof(block, block_len, cv, counter, flags, out);
}
pub fn xof_xor(
&self,
block: &[u8; BLOCK_LEN],
block_len: u8,
cv: &[u32; 8],
counter: u64,
flags: u8,
out: &mut [u8],
) {
portable::xof_xor(block, block_len, cv, counter, flags, out);
}
pub fn universal_hash(&self, input: &[u8], key: &[u32; 8], counter: u64) -> [u8; 64] {
portable::universal_hash(input, key, counter)
}
// Explicit platform constructors, for benchmarks.
pub fn portable() -> Self {
@ -485,3 +552,8 @@ pub fn le_bytes_from_words_64(words: &[u32; 16]) -> [u8; 64] {
*array_mut_ref!(out, 15 * 4, 4) = words[15].to_le_bytes();
out
}
#[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), repr(C, align(64)))]
pub struct TransposedVectors(pub [[u32; 2 * MAX_SIMD_DEGREE]; 8]);
pub struct StridedOutput(*mut u32);

View File

@ -1,8 +1,9 @@
use crate::{
counter_high, counter_low, CVBytes, CVWords, IncrementCounter, BLOCK_LEN, IV, MSG_SCHEDULE,
OUT_LEN,
counter_high, counter_low, platform::TransposedVectors, CVBytes, CVWords, IncrementCounter,
BLOCK_LEN, CHUNK_LEN, IV, MSG_SCHEDULE, OUT_LEN,
};
use arrayref::{array_mut_ref, array_ref};
use core::cmp;
#[inline(always)]
fn g(state: &mut [u32; 16], a: usize, b: usize, c: usize, d: usize, x: u32, y: u32) {
@ -177,14 +178,118 @@ pub fn hash_many<const N: usize>(
}
}
pub fn hash_chunks(
input: &[u8],
key: &[u32; 8],
counter: u64,
flags: u8,
output: &mut TransposedVectors,
output_offset: usize,
) {
const LAST_BLOCK_INDEX: usize = (CHUNK_LEN / BLOCK_LEN) - 1;
// There might be a partial chunk at the end. If so, we ignore it here, and the caller will
// hash it separately.
let num_chunks = input.len() / CHUNK_LEN;
for chunk_index in 0..num_chunks {
let mut cv = *key;
for block_index in 0..CHUNK_LEN / BLOCK_LEN {
compress_in_place(
&mut cv,
input[CHUNK_LEN * chunk_index + BLOCK_LEN * block_index..][..BLOCK_LEN]
.try_into()
.unwrap(),
BLOCK_LEN as u8,
counter + chunk_index as u64,
match block_index {
0 => flags | crate::CHUNK_START,
LAST_BLOCK_INDEX => flags | crate::CHUNK_END,
_ => flags,
},
);
}
for word_index in 0..cv.len() {
output.0[word_index][output_offset + chunk_index] = cv[word_index];
}
}
}
pub fn hash_parents(cvs: &mut TransposedVectors, num_cvs: usize, key: &[u32; 8], flags: u8) {
// Note that there may be an odd number of children. If there's a leftover child, it gets
// appended to the outputs by the caller. We will not overwrite it.
let num_parents = num_cvs / 2;
todo!()
}
pub fn xof(
block: &[u8; BLOCK_LEN],
block_len: u8,
cv: &[u32; 8],
mut counter: u64,
flags: u8,
mut out: &mut [u8],
) {
while !out.is_empty() {
let block_output = compress_xof(cv, block, block_len, counter, flags);
let take = cmp::min(BLOCK_LEN, out.len());
out[..take].copy_from_slice(&block_output[..take]);
out = &mut out[take..];
counter += 1;
}
}
pub fn xof_xor(
block: &[u8; BLOCK_LEN],
block_len: u8,
cv: &[u32; 8],
mut counter: u64,
flags: u8,
mut out: &mut [u8],
) {
while !out.is_empty() {
let block_output = compress_xof(cv, block, block_len, counter, flags);
let take = cmp::min(BLOCK_LEN, out.len());
for i in 0..take {
out[i] ^= block_output[i];
}
out = &mut out[take..];
counter += 1;
}
}
pub fn universal_hash(mut input: &[u8], key: &[u32; 8], mut counter: u64) -> [u8; BLOCK_LEN] {
let flags = crate::KEYED_HASH | crate::CHUNK_START | crate::CHUNK_END | crate::ROOT;
let mut result = [0u8; BLOCK_LEN];
while input.len() > BLOCK_LEN {
let block_output = compress_xof(
key,
&input[..BLOCK_LEN].try_into().unwrap(),
BLOCK_LEN as u8,
counter,
flags,
);
for i in 0..BLOCK_LEN {
result[i] ^= block_output[i];
}
input = &input[BLOCK_LEN..];
counter += 1;
}
let mut final_block = [0u8; BLOCK_LEN];
final_block[..input.len()].copy_from_slice(input);
let final_output = compress_xof(key, &final_block, input.len() as u8, counter, flags);
for i in 0..BLOCK_LEN {
result[i] ^= final_output[i];
}
result
}
#[cfg(test)]
pub mod test {
use super::*;
// This is basically testing the portable implementation against itself,
// but it also checks that compress_in_place and compress_xof are
// consistent. And there are tests against the reference implementation and
// against hardcoded test vectors elsewhere.
// These are basically testing the portable implementation against itself, but we also check
// that compress_in_place and compress_xof are consistent. And there are tests against the
// reference implementation and against hardcoded test vectors elsewhere.
#[test]
fn test_compress() {
crate::test::test_compress_fn(compress_in_place, compress_xof);
@ -195,4 +300,14 @@ pub mod test {
fn test_hash_many() {
crate::test::test_hash_many_fn(hash_many, hash_many);
}
#[test]
fn test_xof_and_xor() {
crate::test::test_xof_and_xor_fns(xof, xof_xor);
}
#[test]
fn test_universal_hash() {
crate::test::test_universal_hash_fn(universal_hash);
}
}

View File

@ -1,6 +1,7 @@
use crate::{CVBytes, CVWords, IncrementCounter, BLOCK_LEN, CHUNK_LEN, OUT_LEN};
use arrayref::array_ref;
use arrayvec::ArrayVec;
use core::cmp;
use core::usize;
use rand::prelude::*;
@ -51,6 +52,13 @@ pub const TEST_KEY_WORDS: CVWords = [
1952540791, 1752440947, 1816469605, 1752394102, 1919907616, 1868963940, 1919295602, 1684956521,
];
// Test a few different initial counter values.
// - 0: The base case.
// - i32::MAX: *No* overflow. But carry bugs in tricky SIMD code can screw this up, if you XOR
// when you're supposed to ANDNOT...
// - u32::MAX: The low word of the counter overflows for all inputs except the first.
const INITIAL_COUNTERS: &[u64] = &[0, i32::MAX as u64, u32::MAX as u64];
// Paint the input with a repeating byte pattern. We use a cycle length of 251,
// because that's the largest prime number less than 256. This makes it
// unlikely to swapping any two adjacent input blocks or chunks will give the
@ -111,13 +119,7 @@ pub fn test_hash_many_fn(
hash_many_chunks_fn: HashManyFn<[u8; CHUNK_LEN]>,
hash_many_parents_fn: HashManyFn<[u8; 2 * OUT_LEN]>,
) {
// Test a few different initial counter values.
// - 0: The base case.
// - u32::MAX: The low word of the counter overflows for all inputs except the first.
// - i32::MAX: *No* overflow. But carry bugs in tricky SIMD code can screw this up, if you XOR
// when you're supposed to ANDNOT...
let initial_counters = [0, u32::MAX as u64, i32::MAX as u64];
for counter in initial_counters {
for &counter in INITIAL_COUNTERS {
#[cfg(feature = "std")]
dbg!(counter);
@ -206,6 +208,200 @@ pub fn test_hash_many_fn(
}
}
// Both xof() and xof_xof() have this signature.
type XofFn = unsafe fn(
block: &[u8; BLOCK_LEN],
block_len: u8,
cv: &[u32; 8],
counter: u64,
flags: u8,
out: &mut [u8],
);
pub fn test_xof_and_xor_fns(target_xof: XofFn, target_xof_xor: XofFn) {
// 31 (16 + 8 + 4 + 2 + 1) outputs
const NUM_OUTPUTS: usize = 31;
let different_flags = [
crate::CHUNK_START | crate::CHUNK_END | crate::ROOT,
crate::PARENT | crate::ROOT | crate::KEYED_HASH,
];
for input_len in [0, 1, BLOCK_LEN] {
let mut input_block = [0u8; BLOCK_LEN];
crate::test::paint_test_input(&mut input_block[..input_len]);
for output_len in [0, 1, BLOCK_LEN, BLOCK_LEN + 1, BLOCK_LEN * NUM_OUTPUTS] {
let mut test_output_buf = [0xff; BLOCK_LEN * NUM_OUTPUTS];
for &counter in INITIAL_COUNTERS {
for flags in different_flags {
let mut expected_output_buf = [0xff; BLOCK_LEN * NUM_OUTPUTS];
crate::portable::xof(
&input_block,
input_len as u8,
&TEST_KEY_WORDS,
counter,
flags,
&mut expected_output_buf[..output_len],
);
unsafe {
target_xof(
&input_block,
input_len as u8,
&TEST_KEY_WORDS,
counter,
flags,
&mut test_output_buf[..output_len],
);
}
assert_eq!(
expected_output_buf[..output_len],
test_output_buf[..output_len],
);
// Make sure unsafe implementations don't overwrite. This shouldn't be possible in the
// portable implementation, which is all safe code, but it could happen in others.
assert!(test_output_buf[output_len..].iter().all(|&b| b == 0xff));
// The first XOR cancels out the output.
unsafe {
target_xof_xor(
&input_block,
input_len as u8,
&TEST_KEY_WORDS,
counter,
flags,
&mut test_output_buf[..output_len],
);
}
assert!(test_output_buf[..output_len].iter().all(|&b| b == 0));
assert!(test_output_buf[output_len..].iter().all(|&b| b == 0xff));
// The second XOR restores out the output.
unsafe {
target_xof_xor(
&input_block,
input_len as u8,
&TEST_KEY_WORDS,
counter,
flags,
&mut test_output_buf[..output_len],
);
}
assert_eq!(
expected_output_buf[..output_len],
test_output_buf[..output_len],
);
assert!(test_output_buf[output_len..].iter().all(|&b| b == 0xff));
}
}
}
}
}
#[test]
fn test_compare_reference_impl_xof() {
const NUM_OUTPUTS: usize = 31;
let input = b"hello world";
let mut input_block = [0; BLOCK_LEN];
input_block[..input.len()].copy_from_slice(input);
let mut reference_output_buf = [0; BLOCK_LEN * NUM_OUTPUTS];
let mut reference_hasher = reference_impl::Hasher::new_keyed(&TEST_KEY);
reference_hasher.update(input);
reference_hasher.finalize(&mut reference_output_buf);
for output_len in [0, 1, BLOCK_LEN, BLOCK_LEN + 1, BLOCK_LEN * NUM_OUTPUTS] {
let mut test_output_buf = [0; BLOCK_LEN * NUM_OUTPUTS];
crate::platform::Platform::detect().xof(
&input_block,
input.len() as u8,
&TEST_KEY_WORDS,
0,
crate::KEYED_HASH | crate::CHUNK_START | crate::CHUNK_END | crate::ROOT,
&mut test_output_buf[..output_len],
);
assert_eq!(
reference_output_buf[..output_len],
test_output_buf[..output_len],
);
// Make sure unsafe implementations don't overwrite. This shouldn't be possible in the
// portable implementation, which is all safe code, but it could happen in others.
assert!(test_output_buf[output_len..].iter().all(|&b| b == 0));
// Do it again starting from block 1.
if output_len >= BLOCK_LEN {
crate::platform::Platform::detect().xof(
&input_block,
input.len() as u8,
&TEST_KEY_WORDS,
1,
crate::KEYED_HASH | crate::CHUNK_START | crate::CHUNK_END | crate::ROOT,
&mut test_output_buf[..output_len - BLOCK_LEN],
);
assert_eq!(
reference_output_buf[BLOCK_LEN..output_len],
test_output_buf[..output_len - BLOCK_LEN],
);
}
}
}
type UniversalHashFn = unsafe fn(input: &[u8], key: &[u32; 8], counter: u64) -> [u8; BLOCK_LEN];
pub fn test_universal_hash_fn(target_fn: UniversalHashFn) {
// 31 (16 + 8 + 4 + 2 + 1) inputs
const NUM_INPUTS: usize = 31;
let mut input_buf = [0; BLOCK_LEN * NUM_INPUTS];
crate::test::paint_test_input(&mut input_buf);
for len in [0, 1, BLOCK_LEN, BLOCK_LEN + 1, input_buf.len()] {
for &counter in INITIAL_COUNTERS {
let portable_output =
crate::portable::universal_hash(&input_buf[..len], &TEST_KEY_WORDS, counter);
let test_output = unsafe { target_fn(&input_buf[..len], &TEST_KEY_WORDS, counter) };
assert_eq!(portable_output, test_output);
}
}
}
fn reference_impl_universal_hash(input: &[u8], key: &[u8; crate::KEY_LEN]) -> [u8; BLOCK_LEN] {
// The reference_impl doesn't support XOF seeking, so we have to materialize an entire extended
// output to seek to a block.
const MAX_BLOCKS: usize = 31;
assert!(input.len() / BLOCK_LEN <= MAX_BLOCKS);
let mut output_buffer: [u8; BLOCK_LEN * MAX_BLOCKS] = [0u8; BLOCK_LEN * MAX_BLOCKS];
let mut result = [0u8; BLOCK_LEN];
let mut i = 0;
while i == 0 || i < input.len() {
let block_len = cmp::min(input.len() - i, BLOCK_LEN);
let mut reference_hasher = reference_impl::Hasher::new_keyed(key);
reference_hasher.update(&input[i..i + block_len]);
reference_hasher.finalize(&mut output_buffer);
for (result_byte, output_byte) in result
.iter_mut()
.zip(output_buffer[i..i + BLOCK_LEN].iter())
{
*result_byte ^= *output_byte;
}
i += BLOCK_LEN;
}
result
}
#[test]
fn test_compare_reference_impl_universal_hash() {
const NUM_INPUTS: usize = 31;
let mut input_buf = [0; BLOCK_LEN * NUM_INPUTS];
crate::test::paint_test_input(&mut input_buf);
for len in [0, 1, BLOCK_LEN, BLOCK_LEN + 1, input_buf.len()] {
let reference_output = reference_impl_universal_hash(&input_buf[..len], &TEST_KEY);
let test_output = crate::platform::Platform::detect().universal_hash(
&input_buf[..len],
&TEST_KEY_WORDS,
0,
);
assert_eq!(reference_output, test_output);
}
}
#[test]
fn test_key_bytes_equal_key_words() {
assert_eq!(