mirror of
https://github.com/BLAKE3-team/BLAKE3
synced 2024-05-23 21:17:06 +02:00
WIP
This commit is contained in:
parent
e302cdf36f
commit
589f2c3f48
|
@ -1,4 +1,4 @@
|
|||
use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN};
|
||||
use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN, CHUNK_LEN};
|
||||
use arrayref::{array_mut_ref, array_ref};
|
||||
|
||||
cfg_if::cfg_if! {
|
||||
|
@ -272,6 +272,73 @@ impl Platform {
|
|||
}
|
||||
}
|
||||
|
||||
// Hashes N=input.len()/CHUNK_LEN chunks and writes N transposed chunk CVs to the output,
|
||||
// starting at the column given by num_cvs (i.e. appending to the transposed CVs already
|
||||
// present). After returning, the total number of transposed CVs in the output will be
|
||||
// num_cvs+N. N and num_cvs must both be less than or equal to simd_degree. Any partial chunk
|
||||
// bytes in the input after the last complete chunk are ignored and need to be hashed
|
||||
// separately by the caller. The counter argument is the value of the chunk counter for the
|
||||
// first chunk, and it's incremented by 1 for each chunk after the first. The CHUNK_START and
|
||||
// CHUNK_END flags are set internally.
|
||||
pub fn hash_chunks(
|
||||
&self,
|
||||
input: &[u8],
|
||||
key: &[u32; 8],
|
||||
counter: u64,
|
||||
flags: u8,
|
||||
cvs_out: &mut TransposedVectors,
|
||||
num_cvs: usize,
|
||||
) {
|
||||
debug_assert!(input.len() / CHUNK_LEN <= self.simd_degree());
|
||||
debug_assert!(num_cvs <= self.simd_degree());
|
||||
portable::hash_chunks(input, key, counter, flags, cvs_out, num_cvs);
|
||||
// XXX: should separate the thing that hashes the remainder from this interface
|
||||
}
|
||||
|
||||
// Writes out N=num_cvs/2 transposed parent CVs in-place over the first N columns of the input
|
||||
// CVs. Columns N and above are unmodified. N must be less than or equal to 2*simd_degree. If
|
||||
// num_cvs is odd, the final input CV is ignored, and the caller should copy it from column
|
||||
// 2N+1 to column N after this function returns. The PARENT flag is added internally.
|
||||
pub fn hash_parents(
|
||||
&self,
|
||||
cvs: &mut TransposedVectors,
|
||||
num_cvs: usize,
|
||||
key: &[u32; 8],
|
||||
flags: u8,
|
||||
) {
|
||||
debug_assert!(num_cvs <= 2 * self.simd_degree());
|
||||
portable::hash_parents(cvs, num_cvs, key, flags);
|
||||
// XXX: should separate the thing that copies the last CV over from this interface
|
||||
}
|
||||
|
||||
pub fn xof(
|
||||
&self,
|
||||
block: &[u8; BLOCK_LEN],
|
||||
block_len: u8,
|
||||
cv: &[u32; 8],
|
||||
counter: u64,
|
||||
flags: u8,
|
||||
out: &mut [u8],
|
||||
) {
|
||||
portable::xof(block, block_len, cv, counter, flags, out);
|
||||
}
|
||||
|
||||
pub fn xof_xor(
|
||||
&self,
|
||||
block: &[u8; BLOCK_LEN],
|
||||
block_len: u8,
|
||||
cv: &[u32; 8],
|
||||
counter: u64,
|
||||
flags: u8,
|
||||
out: &mut [u8],
|
||||
) {
|
||||
portable::xof_xor(block, block_len, cv, counter, flags, out);
|
||||
}
|
||||
|
||||
pub fn universal_hash(&self, input: &[u8], key: &[u32; 8], counter: u64) -> [u8; 64] {
|
||||
portable::universal_hash(input, key, counter)
|
||||
}
|
||||
|
||||
// Explicit platform constructors, for benchmarks.
|
||||
|
||||
pub fn portable() -> Self {
|
||||
|
@ -485,3 +552,8 @@ pub fn le_bytes_from_words_64(words: &[u32; 16]) -> [u8; 64] {
|
|||
*array_mut_ref!(out, 15 * 4, 4) = words[15].to_le_bytes();
|
||||
out
|
||||
}
|
||||
|
||||
#[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), repr(C, align(64)))]
|
||||
pub struct TransposedVectors(pub [[u32; 2 * MAX_SIMD_DEGREE]; 8]);
|
||||
|
||||
pub struct StridedOutput(*mut u32);
|
||||
|
|
127
src/portable.rs
127
src/portable.rs
|
@ -1,8 +1,9 @@
|
|||
use crate::{
|
||||
counter_high, counter_low, CVBytes, CVWords, IncrementCounter, BLOCK_LEN, IV, MSG_SCHEDULE,
|
||||
OUT_LEN,
|
||||
counter_high, counter_low, platform::TransposedVectors, CVBytes, CVWords, IncrementCounter,
|
||||
BLOCK_LEN, CHUNK_LEN, IV, MSG_SCHEDULE, OUT_LEN,
|
||||
};
|
||||
use arrayref::{array_mut_ref, array_ref};
|
||||
use core::cmp;
|
||||
|
||||
#[inline(always)]
|
||||
fn g(state: &mut [u32; 16], a: usize, b: usize, c: usize, d: usize, x: u32, y: u32) {
|
||||
|
@ -177,14 +178,118 @@ pub fn hash_many<const N: usize>(
|
|||
}
|
||||
}
|
||||
|
||||
pub fn hash_chunks(
|
||||
input: &[u8],
|
||||
key: &[u32; 8],
|
||||
counter: u64,
|
||||
flags: u8,
|
||||
output: &mut TransposedVectors,
|
||||
output_offset: usize,
|
||||
) {
|
||||
const LAST_BLOCK_INDEX: usize = (CHUNK_LEN / BLOCK_LEN) - 1;
|
||||
// There might be a partial chunk at the end. If so, we ignore it here, and the caller will
|
||||
// hash it separately.
|
||||
let num_chunks = input.len() / CHUNK_LEN;
|
||||
for chunk_index in 0..num_chunks {
|
||||
let mut cv = *key;
|
||||
for block_index in 0..CHUNK_LEN / BLOCK_LEN {
|
||||
compress_in_place(
|
||||
&mut cv,
|
||||
input[CHUNK_LEN * chunk_index + BLOCK_LEN * block_index..][..BLOCK_LEN]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
BLOCK_LEN as u8,
|
||||
counter + chunk_index as u64,
|
||||
match block_index {
|
||||
0 => flags | crate::CHUNK_START,
|
||||
LAST_BLOCK_INDEX => flags | crate::CHUNK_END,
|
||||
_ => flags,
|
||||
},
|
||||
);
|
||||
}
|
||||
for word_index in 0..cv.len() {
|
||||
output.0[word_index][output_offset + chunk_index] = cv[word_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn hash_parents(cvs: &mut TransposedVectors, num_cvs: usize, key: &[u32; 8], flags: u8) {
|
||||
// Note that there may be an odd number of children. If there's a leftover child, it gets
|
||||
// appended to the outputs by the caller. We will not overwrite it.
|
||||
let num_parents = num_cvs / 2;
|
||||
todo!()
|
||||
}
|
||||
|
||||
pub fn xof(
|
||||
block: &[u8; BLOCK_LEN],
|
||||
block_len: u8,
|
||||
cv: &[u32; 8],
|
||||
mut counter: u64,
|
||||
flags: u8,
|
||||
mut out: &mut [u8],
|
||||
) {
|
||||
while !out.is_empty() {
|
||||
let block_output = compress_xof(cv, block, block_len, counter, flags);
|
||||
let take = cmp::min(BLOCK_LEN, out.len());
|
||||
out[..take].copy_from_slice(&block_output[..take]);
|
||||
out = &mut out[take..];
|
||||
counter += 1;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn xof_xor(
|
||||
block: &[u8; BLOCK_LEN],
|
||||
block_len: u8,
|
||||
cv: &[u32; 8],
|
||||
mut counter: u64,
|
||||
flags: u8,
|
||||
mut out: &mut [u8],
|
||||
) {
|
||||
while !out.is_empty() {
|
||||
let block_output = compress_xof(cv, block, block_len, counter, flags);
|
||||
let take = cmp::min(BLOCK_LEN, out.len());
|
||||
for i in 0..take {
|
||||
out[i] ^= block_output[i];
|
||||
}
|
||||
out = &mut out[take..];
|
||||
counter += 1;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn universal_hash(mut input: &[u8], key: &[u32; 8], mut counter: u64) -> [u8; BLOCK_LEN] {
|
||||
let flags = crate::KEYED_HASH | crate::CHUNK_START | crate::CHUNK_END | crate::ROOT;
|
||||
let mut result = [0u8; BLOCK_LEN];
|
||||
while input.len() > BLOCK_LEN {
|
||||
let block_output = compress_xof(
|
||||
key,
|
||||
&input[..BLOCK_LEN].try_into().unwrap(),
|
||||
BLOCK_LEN as u8,
|
||||
counter,
|
||||
flags,
|
||||
);
|
||||
for i in 0..BLOCK_LEN {
|
||||
result[i] ^= block_output[i];
|
||||
}
|
||||
input = &input[BLOCK_LEN..];
|
||||
counter += 1;
|
||||
}
|
||||
let mut final_block = [0u8; BLOCK_LEN];
|
||||
final_block[..input.len()].copy_from_slice(input);
|
||||
let final_output = compress_xof(key, &final_block, input.len() as u8, counter, flags);
|
||||
for i in 0..BLOCK_LEN {
|
||||
result[i] ^= final_output[i];
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use super::*;
|
||||
|
||||
// This is basically testing the portable implementation against itself,
|
||||
// but it also checks that compress_in_place and compress_xof are
|
||||
// consistent. And there are tests against the reference implementation and
|
||||
// against hardcoded test vectors elsewhere.
|
||||
// These are basically testing the portable implementation against itself, but we also check
|
||||
// that compress_in_place and compress_xof are consistent. And there are tests against the
|
||||
// reference implementation and against hardcoded test vectors elsewhere.
|
||||
|
||||
#[test]
|
||||
fn test_compress() {
|
||||
crate::test::test_compress_fn(compress_in_place, compress_xof);
|
||||
|
@ -195,4 +300,14 @@ pub mod test {
|
|||
fn test_hash_many() {
|
||||
crate::test::test_hash_many_fn(hash_many, hash_many);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_xof_and_xor() {
|
||||
crate::test::test_xof_and_xor_fns(xof, xof_xor);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_universal_hash() {
|
||||
crate::test::test_universal_hash_fn(universal_hash);
|
||||
}
|
||||
}
|
||||
|
|
210
src/test.rs
210
src/test.rs
|
@ -1,6 +1,7 @@
|
|||
use crate::{CVBytes, CVWords, IncrementCounter, BLOCK_LEN, CHUNK_LEN, OUT_LEN};
|
||||
use arrayref::array_ref;
|
||||
use arrayvec::ArrayVec;
|
||||
use core::cmp;
|
||||
use core::usize;
|
||||
use rand::prelude::*;
|
||||
|
||||
|
@ -51,6 +52,13 @@ pub const TEST_KEY_WORDS: CVWords = [
|
|||
1952540791, 1752440947, 1816469605, 1752394102, 1919907616, 1868963940, 1919295602, 1684956521,
|
||||
];
|
||||
|
||||
// Test a few different initial counter values.
|
||||
// - 0: The base case.
|
||||
// - i32::MAX: *No* overflow. But carry bugs in tricky SIMD code can screw this up, if you XOR
|
||||
// when you're supposed to ANDNOT...
|
||||
// - u32::MAX: The low word of the counter overflows for all inputs except the first.
|
||||
const INITIAL_COUNTERS: &[u64] = &[0, i32::MAX as u64, u32::MAX as u64];
|
||||
|
||||
// Paint the input with a repeating byte pattern. We use a cycle length of 251,
|
||||
// because that's the largest prime number less than 256. This makes it
|
||||
// unlikely to swapping any two adjacent input blocks or chunks will give the
|
||||
|
@ -111,13 +119,7 @@ pub fn test_hash_many_fn(
|
|||
hash_many_chunks_fn: HashManyFn<[u8; CHUNK_LEN]>,
|
||||
hash_many_parents_fn: HashManyFn<[u8; 2 * OUT_LEN]>,
|
||||
) {
|
||||
// Test a few different initial counter values.
|
||||
// - 0: The base case.
|
||||
// - u32::MAX: The low word of the counter overflows for all inputs except the first.
|
||||
// - i32::MAX: *No* overflow. But carry bugs in tricky SIMD code can screw this up, if you XOR
|
||||
// when you're supposed to ANDNOT...
|
||||
let initial_counters = [0, u32::MAX as u64, i32::MAX as u64];
|
||||
for counter in initial_counters {
|
||||
for &counter in INITIAL_COUNTERS {
|
||||
#[cfg(feature = "std")]
|
||||
dbg!(counter);
|
||||
|
||||
|
@ -206,6 +208,200 @@ pub fn test_hash_many_fn(
|
|||
}
|
||||
}
|
||||
|
||||
// Both xof() and xof_xof() have this signature.
|
||||
type XofFn = unsafe fn(
|
||||
block: &[u8; BLOCK_LEN],
|
||||
block_len: u8,
|
||||
cv: &[u32; 8],
|
||||
counter: u64,
|
||||
flags: u8,
|
||||
out: &mut [u8],
|
||||
);
|
||||
|
||||
pub fn test_xof_and_xor_fns(target_xof: XofFn, target_xof_xor: XofFn) {
|
||||
// 31 (16 + 8 + 4 + 2 + 1) outputs
|
||||
const NUM_OUTPUTS: usize = 31;
|
||||
let different_flags = [
|
||||
crate::CHUNK_START | crate::CHUNK_END | crate::ROOT,
|
||||
crate::PARENT | crate::ROOT | crate::KEYED_HASH,
|
||||
];
|
||||
for input_len in [0, 1, BLOCK_LEN] {
|
||||
let mut input_block = [0u8; BLOCK_LEN];
|
||||
crate::test::paint_test_input(&mut input_block[..input_len]);
|
||||
for output_len in [0, 1, BLOCK_LEN, BLOCK_LEN + 1, BLOCK_LEN * NUM_OUTPUTS] {
|
||||
let mut test_output_buf = [0xff; BLOCK_LEN * NUM_OUTPUTS];
|
||||
for &counter in INITIAL_COUNTERS {
|
||||
for flags in different_flags {
|
||||
let mut expected_output_buf = [0xff; BLOCK_LEN * NUM_OUTPUTS];
|
||||
crate::portable::xof(
|
||||
&input_block,
|
||||
input_len as u8,
|
||||
&TEST_KEY_WORDS,
|
||||
counter,
|
||||
flags,
|
||||
&mut expected_output_buf[..output_len],
|
||||
);
|
||||
|
||||
unsafe {
|
||||
target_xof(
|
||||
&input_block,
|
||||
input_len as u8,
|
||||
&TEST_KEY_WORDS,
|
||||
counter,
|
||||
flags,
|
||||
&mut test_output_buf[..output_len],
|
||||
);
|
||||
}
|
||||
assert_eq!(
|
||||
expected_output_buf[..output_len],
|
||||
test_output_buf[..output_len],
|
||||
);
|
||||
// Make sure unsafe implementations don't overwrite. This shouldn't be possible in the
|
||||
// portable implementation, which is all safe code, but it could happen in others.
|
||||
assert!(test_output_buf[output_len..].iter().all(|&b| b == 0xff));
|
||||
|
||||
// The first XOR cancels out the output.
|
||||
unsafe {
|
||||
target_xof_xor(
|
||||
&input_block,
|
||||
input_len as u8,
|
||||
&TEST_KEY_WORDS,
|
||||
counter,
|
||||
flags,
|
||||
&mut test_output_buf[..output_len],
|
||||
);
|
||||
}
|
||||
assert!(test_output_buf[..output_len].iter().all(|&b| b == 0));
|
||||
assert!(test_output_buf[output_len..].iter().all(|&b| b == 0xff));
|
||||
|
||||
// The second XOR restores out the output.
|
||||
unsafe {
|
||||
target_xof_xor(
|
||||
&input_block,
|
||||
input_len as u8,
|
||||
&TEST_KEY_WORDS,
|
||||
counter,
|
||||
flags,
|
||||
&mut test_output_buf[..output_len],
|
||||
);
|
||||
}
|
||||
assert_eq!(
|
||||
expected_output_buf[..output_len],
|
||||
test_output_buf[..output_len],
|
||||
);
|
||||
assert!(test_output_buf[output_len..].iter().all(|&b| b == 0xff));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compare_reference_impl_xof() {
|
||||
const NUM_OUTPUTS: usize = 31;
|
||||
let input = b"hello world";
|
||||
let mut input_block = [0; BLOCK_LEN];
|
||||
input_block[..input.len()].copy_from_slice(input);
|
||||
|
||||
let mut reference_output_buf = [0; BLOCK_LEN * NUM_OUTPUTS];
|
||||
let mut reference_hasher = reference_impl::Hasher::new_keyed(&TEST_KEY);
|
||||
reference_hasher.update(input);
|
||||
reference_hasher.finalize(&mut reference_output_buf);
|
||||
|
||||
for output_len in [0, 1, BLOCK_LEN, BLOCK_LEN + 1, BLOCK_LEN * NUM_OUTPUTS] {
|
||||
let mut test_output_buf = [0; BLOCK_LEN * NUM_OUTPUTS];
|
||||
crate::platform::Platform::detect().xof(
|
||||
&input_block,
|
||||
input.len() as u8,
|
||||
&TEST_KEY_WORDS,
|
||||
0,
|
||||
crate::KEYED_HASH | crate::CHUNK_START | crate::CHUNK_END | crate::ROOT,
|
||||
&mut test_output_buf[..output_len],
|
||||
);
|
||||
assert_eq!(
|
||||
reference_output_buf[..output_len],
|
||||
test_output_buf[..output_len],
|
||||
);
|
||||
|
||||
// Make sure unsafe implementations don't overwrite. This shouldn't be possible in the
|
||||
// portable implementation, which is all safe code, but it could happen in others.
|
||||
assert!(test_output_buf[output_len..].iter().all(|&b| b == 0));
|
||||
|
||||
// Do it again starting from block 1.
|
||||
if output_len >= BLOCK_LEN {
|
||||
crate::platform::Platform::detect().xof(
|
||||
&input_block,
|
||||
input.len() as u8,
|
||||
&TEST_KEY_WORDS,
|
||||
1,
|
||||
crate::KEYED_HASH | crate::CHUNK_START | crate::CHUNK_END | crate::ROOT,
|
||||
&mut test_output_buf[..output_len - BLOCK_LEN],
|
||||
);
|
||||
assert_eq!(
|
||||
reference_output_buf[BLOCK_LEN..output_len],
|
||||
test_output_buf[..output_len - BLOCK_LEN],
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type UniversalHashFn = unsafe fn(input: &[u8], key: &[u32; 8], counter: u64) -> [u8; BLOCK_LEN];
|
||||
|
||||
pub fn test_universal_hash_fn(target_fn: UniversalHashFn) {
|
||||
// 31 (16 + 8 + 4 + 2 + 1) inputs
|
||||
const NUM_INPUTS: usize = 31;
|
||||
let mut input_buf = [0; BLOCK_LEN * NUM_INPUTS];
|
||||
crate::test::paint_test_input(&mut input_buf);
|
||||
for len in [0, 1, BLOCK_LEN, BLOCK_LEN + 1, input_buf.len()] {
|
||||
for &counter in INITIAL_COUNTERS {
|
||||
let portable_output =
|
||||
crate::portable::universal_hash(&input_buf[..len], &TEST_KEY_WORDS, counter);
|
||||
let test_output = unsafe { target_fn(&input_buf[..len], &TEST_KEY_WORDS, counter) };
|
||||
assert_eq!(portable_output, test_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn reference_impl_universal_hash(input: &[u8], key: &[u8; crate::KEY_LEN]) -> [u8; BLOCK_LEN] {
|
||||
// The reference_impl doesn't support XOF seeking, so we have to materialize an entire extended
|
||||
// output to seek to a block.
|
||||
const MAX_BLOCKS: usize = 31;
|
||||
assert!(input.len() / BLOCK_LEN <= MAX_BLOCKS);
|
||||
let mut output_buffer: [u8; BLOCK_LEN * MAX_BLOCKS] = [0u8; BLOCK_LEN * MAX_BLOCKS];
|
||||
let mut result = [0u8; BLOCK_LEN];
|
||||
let mut i = 0;
|
||||
while i == 0 || i < input.len() {
|
||||
let block_len = cmp::min(input.len() - i, BLOCK_LEN);
|
||||
let mut reference_hasher = reference_impl::Hasher::new_keyed(key);
|
||||
reference_hasher.update(&input[i..i + block_len]);
|
||||
reference_hasher.finalize(&mut output_buffer);
|
||||
for (result_byte, output_byte) in result
|
||||
.iter_mut()
|
||||
.zip(output_buffer[i..i + BLOCK_LEN].iter())
|
||||
{
|
||||
*result_byte ^= *output_byte;
|
||||
}
|
||||
i += BLOCK_LEN;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_compare_reference_impl_universal_hash() {
|
||||
const NUM_INPUTS: usize = 31;
|
||||
let mut input_buf = [0; BLOCK_LEN * NUM_INPUTS];
|
||||
crate::test::paint_test_input(&mut input_buf);
|
||||
for len in [0, 1, BLOCK_LEN, BLOCK_LEN + 1, input_buf.len()] {
|
||||
let reference_output = reference_impl_universal_hash(&input_buf[..len], &TEST_KEY);
|
||||
let test_output = crate::platform::Platform::detect().universal_hash(
|
||||
&input_buf[..len],
|
||||
&TEST_KEY_WORDS,
|
||||
0,
|
||||
);
|
||||
assert_eq!(reference_output, test_output);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_key_bytes_equal_key_words() {
|
||||
assert_eq!(
|
||||
|
|
Loading…
Reference in New Issue