1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-05-28 13:46:02 +02:00

WIPPPPPPPPPPPP use bytes instead of words in more places

This commit is contained in:
Jack O'Connor 2023-07-09 12:54:00 -07:00
parent 418f8f18e7
commit 5ee3d75afa
3 changed files with 317 additions and 246 deletions

View File

@ -19,9 +19,10 @@ pub const KEYED_HASH: u32 = 1 << 4;
pub const DERIVE_KEY_CONTEXT: u32 = 1 << 5;
pub const DERIVE_KEY_MATERIAL: u32 = 1 << 6;
pub const IV: [u32; 8] = [
pub const IV: CVWords = [
0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19,
];
pub const IV_BYTES: CVBytes = le_bytes_from_words_32(&IV);
pub const MSG_SCHEDULE: [[usize; 16]; 7] = [
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
@ -44,6 +45,11 @@ cfg_if::cfg_if! {
}
}
pub type CVBytes = [u8; 32];
pub type CVWords = [u32; 8];
pub type BlockBytes = [u8; 64];
pub type BlockWords = [u32; 16];
#[inline]
pub fn degree() -> usize {
DETECTED_IMPL.degree()
@ -58,30 +64,19 @@ pub fn split_transposed_vectors(
#[inline]
pub fn compress(
block: &[u8; 64],
block: &BlockBytes,
block_len: u32,
cv: &[u32; 8],
cv: &CVBytes,
counter: u64,
flags: u32,
) -> [u32; 8] {
) -> CVBytes {
DETECTED_IMPL.compress(block, block_len, cv, counter, flags)
}
#[inline]
pub fn compress_xof(
block: &[u8; 64],
block_len: u32,
cv: &[u32; 8],
counter: u64,
flags: u32,
) -> [u8; 64] {
DETECTED_IMPL.compress_xof(block, block_len, cv, counter, flags)
}
#[inline]
pub fn hash_chunks(
input: &[u8],
key: &[u32; 8],
key: &CVBytes,
counter: u64,
flags: u32,
transposed_output: TransposedSplit,
@ -93,7 +88,7 @@ pub fn hash_chunks(
pub fn hash_parents(
transposed_input: &TransposedVectors,
num_cvs: usize,
key: &[u32; 8],
key: &CVBytes,
flags: u32,
transposed_output: TransposedSplit,
) -> usize {
@ -104,7 +99,7 @@ pub fn hash_parents(
pub fn reduce_parents(
transposed_in_out: &mut TransposedVectors,
num_cvs: usize,
key: &[u32; 8],
key: &CVBytes,
flags: u32,
) -> usize {
DETECTED_IMPL.reduce_parents(transposed_in_out, num_cvs, key, flags)
@ -112,9 +107,9 @@ pub fn reduce_parents(
#[inline]
pub fn xof(
block: &[u8; 64],
block: &BlockBytes,
block_len: u32,
cv: &[u32; 8],
cv: &CVBytes,
counter: u64,
flags: u32,
out: &mut [u8],
@ -124,9 +119,9 @@ pub fn xof(
#[inline]
pub fn xof_xor(
block: &[u8; 64],
block: &BlockBytes,
block_len: u32,
cv: &[u32; 8],
cv: &CVBytes,
counter: u64,
flags: u32,
out: &mut [u8],
@ -135,7 +130,7 @@ pub fn xof_xor(
}
#[inline]
pub fn universal_hash(input: &[u8], key: &[u32; 8], counter: u64) -> [u8; 16] {
pub fn universal_hash(input: &[u8], key: &CVBytes, counter: u64) -> [u8; 16] {
DETECTED_IMPL.universal_hash(input, key, counter)
}
@ -246,38 +241,17 @@ impl Implementation {
#[inline]
pub fn compress(
&self,
block: &[u8; 64],
block: &BlockBytes,
block_len: u32,
cv: &[u32; 8],
cv: &CVBytes,
counter: u64,
flags: u32,
) -> [u32; 8] {
let mut out = [0u32; 16];
) -> CVBytes {
let mut out = [0u8; 32];
unsafe {
self.compress_fn()(block, block_len, cv, counter, flags, &mut out);
}
out[..8].try_into().unwrap()
}
#[inline]
pub fn compress_xof(
&self,
block: &[u8; 64],
block_len: u32,
cv: &[u32; 8],
counter: u64,
flags: u32,
) -> [u8; 64] {
let mut out_words = [0u32; 16];
unsafe {
self.compress_fn()(block, block_len, cv, counter, flags, &mut out_words);
}
let mut out_bytes = [0u8; 64];
for word_index in 0..16 {
out_bytes[word_index * WORD_LEN..][..WORD_LEN]
.copy_from_slice(&out_words[word_index].to_le_bytes());
}
out_bytes
out
}
#[inline]
@ -289,7 +263,7 @@ impl Implementation {
pub fn hash_chunks(
&self,
input: &[u8],
key: &[u32; 8],
key: &CVBytes,
counter: u64,
flags: u32,
transposed_output: TransposedSplit,
@ -327,7 +301,7 @@ impl Implementation {
&self,
transposed_input: &TransposedVectors,
num_cvs: usize,
key: &[u32; 8],
key: &CVBytes,
flags: u32,
transposed_output: TransposedSplit,
) -> usize {
@ -359,7 +333,7 @@ impl Implementation {
&self,
transposed_in_out: &mut TransposedVectors,
num_cvs: usize,
key: &[u32; 8],
key: &CVBytes,
flags: u32,
) -> usize {
let num_parents = num_cvs / 2;
@ -385,9 +359,9 @@ impl Implementation {
#[inline]
pub fn xof(
&self,
block: &[u8; 64],
block: &BlockBytes,
block_len: u32,
cv: &[u32; 8],
cv: &CVBytes,
counter: u64,
flags: u32,
out: &mut [u8],
@ -413,9 +387,9 @@ impl Implementation {
#[inline]
pub fn xof_xor(
&self,
block: &[u8; 64],
block: &BlockBytes,
block_len: u32,
cv: &[u32; 8],
cv: &CVBytes,
counter: u64,
flags: u32,
out: &mut [u8],
@ -439,7 +413,7 @@ impl Implementation {
}
#[inline]
pub fn universal_hash(&self, input: &[u8], key: &[u32; 8], counter: u64) -> [u8; 16] {
pub fn universal_hash(&self, input: &[u8], key: &CVBytes, counter: u64) -> [u8; 16] {
let mut out = [0u8; 16];
unsafe {
self.universal_hash_fn()(input.as_ptr(), input.len(), key, counter, &mut out);
@ -471,30 +445,39 @@ fn degree_init() -> usize {
}
type CompressFn = unsafe extern "C" fn(
block: *const [u8; 64], // zero padded to 64 bytes
block: *const BlockBytes, // zero padded to 64 bytes
block_len: u32,
cv: *const [u32; 8],
cv: *const CVBytes,
counter: u64,
flags: u32,
out: *mut [u32; 16], // may overlap the input
out: *mut CVBytes, // may overlap the input
);
unsafe extern "C" fn compress_init(
block: *const [u8; 64],
block: *const BlockBytes,
block_len: u32,
cv: *const [u32; 8],
cv: *const CVBytes,
counter: u64,
flags: u32,
out: *mut [u32; 16],
out: *mut CVBytes,
) {
init_detected_impl();
DETECTED_IMPL.compress_fn()(block, block_len, cv, counter, flags, out);
}
type CompressXofFn = unsafe extern "C" fn(
block: *const BlockBytes, // zero padded to 64 bytes
block_len: u32,
cv: *const CVBytes,
counter: u64,
flags: u32,
out: *mut BlockBytes, // may overlap the input
);
type HashChunksFn = unsafe extern "C" fn(
input: *const u8,
input_len: usize,
key: *const [u32; 8],
key: *const CVBytes,
counter: u64,
flags: u32,
transposed_output: *mut u32,
@ -503,7 +486,7 @@ type HashChunksFn = unsafe extern "C" fn(
unsafe extern "C" fn hash_chunks_init(
input: *const u8,
input_len: usize,
key: *const [u32; 8],
key: *const CVBytes,
counter: u64,
flags: u32,
transposed_output: *mut u32,
@ -515,7 +498,7 @@ unsafe extern "C" fn hash_chunks_init(
type HashParentsFn = unsafe extern "C" fn(
transposed_input: *const u32,
num_parents: usize,
key: *const [u32; 8],
key: *const CVBytes,
flags: u32,
transposed_output: *mut u32, // may overlap the input
);
@ -523,7 +506,7 @@ type HashParentsFn = unsafe extern "C" fn(
unsafe extern "C" fn hash_parents_init(
transposed_input: *const u32,
num_parents: usize,
key: *const [u32; 8],
key: *const CVBytes,
flags: u32,
transposed_output: *mut u32,
) {
@ -533,9 +516,9 @@ unsafe extern "C" fn hash_parents_init(
// This signature covers both xof() and xof_xor().
type XofFn = unsafe extern "C" fn(
block: *const [u8; 64], // zero padded to 64 bytes
block: *const BlockBytes, // zero padded to 64 bytes
block_len: u32,
cv: *const [u32; 8],
cv: *const CVBytes,
counter: u64,
flags: u32,
out: *mut u8,
@ -543,9 +526,9 @@ type XofFn = unsafe extern "C" fn(
);
unsafe extern "C" fn xof_init(
block: *const [u8; 64],
block: *const BlockBytes,
block_len: u32,
cv: *const [u32; 8],
cv: *const CVBytes,
counter: u64,
flags: u32,
out: *mut u8,
@ -556,9 +539,9 @@ unsafe extern "C" fn xof_init(
}
unsafe extern "C" fn xof_xor_init(
block: *const [u8; 64],
block: *const BlockBytes,
block_len: u32,
cv: *const [u32; 8],
cv: *const CVBytes,
counter: u64,
flags: u32,
out: *mut u8,
@ -571,7 +554,7 @@ unsafe extern "C" fn xof_xor_init(
type UniversalHashFn = unsafe extern "C" fn(
input: *const u8,
input_len: usize,
key: *const [u32; 8],
key: *const CVBytes,
counter: u64,
out: *mut [u8; 16],
);
@ -579,7 +562,7 @@ type UniversalHashFn = unsafe extern "C" fn(
unsafe extern "C" fn universal_hash_init(
input: *const u8,
input_len: usize,
key: *const [u32; 8],
key: *const CVBytes,
counter: u64,
out: *mut [u8; 16],
) {
@ -588,11 +571,12 @@ unsafe extern "C" fn universal_hash_init(
}
// The implicit degree of this implementation is MAX_SIMD_DEGREE.
#[inline(always)]
unsafe fn hash_chunks_using_compress(
compress: CompressFn,
mut input: *const u8,
mut input_len: usize,
key: *const [u32; 8],
key: *const CVBytes,
mut counter: u64,
flags: u32,
mut transposed_output: *mut u32,
@ -603,15 +587,14 @@ unsafe fn hash_chunks_using_compress(
let mut chunk_len = cmp::min(input_len, CHUNK_LEN);
input_len -= chunk_len;
// We only use 8 words of the CV, but compress returns 16.
let mut cv = [0u32; 16];
cv[..8].copy_from_slice(&*key);
let cv_ptr: *mut [u32; 16] = &mut cv;
let mut cv = *key;
let cv_ptr: *mut CVBytes = &mut cv;
let mut chunk_flags = flags | CHUNK_START;
while chunk_len > BLOCK_LEN {
compress(
input as *const [u8; 64],
input as *const BlockBytes,
BLOCK_LEN as u32,
cv_ptr as *const [u32; 8],
cv_ptr,
counter,
chunk_flags,
cv_ptr,
@ -626,15 +609,16 @@ unsafe fn hash_chunks_using_compress(
compress(
&last_block,
chunk_len as u32,
cv_ptr as *const [u32; 8],
cv_ptr,
counter,
chunk_flags | CHUNK_END,
cv_ptr,
);
let cv_words = words_from_le_bytes_32(&cv);
for word_index in 0..8 {
transposed_output
.add(word_index * TRANSPOSED_STRIDE)
.write(cv[word_index]);
.write(cv_words[word_index]);
}
transposed_output = transposed_output.add(1);
counter += 1;
@ -642,11 +626,12 @@ unsafe fn hash_chunks_using_compress(
}
// The implicit degree of this implementation is MAX_SIMD_DEGREE.
#[inline(always)]
unsafe fn hash_parents_using_compress(
compress: CompressFn,
mut transposed_input: *const u32,
mut num_parents: usize,
key: *const [u32; 8],
key: *const CVBytes,
flags: u32,
mut transposed_output: *mut u32, // may overlap the input
) {
@ -664,12 +649,13 @@ unsafe fn hash_parents_using_compress(
block_bytes[WORD_LEN * (word_index + 8)..][..WORD_LEN]
.copy_from_slice(&right_child_word.to_le_bytes());
}
let mut cv = [0u32; 16];
let mut cv = [0u8; 32];
compress(&block_bytes, BLOCK_LEN as u32, key, 0, flags, &mut cv);
let cv_words = words_from_le_bytes_32(&cv);
for word_index in 0..8 {
transposed_output
.add(word_index * TRANSPOSED_STRIDE)
.write(cv[word_index]);
.write(cv_words[word_index]);
}
transposed_input = transposed_input.add(2);
transposed_output = transposed_output.add(1);
@ -677,70 +663,68 @@ unsafe fn hash_parents_using_compress(
}
}
unsafe fn xof_using_compress(
compress: CompressFn,
block: *const [u8; 64],
#[inline(always)]
unsafe fn xof_using_compress_xof(
compress_xof: CompressXofFn,
block: *const BlockBytes,
block_len: u32,
cv: *const [u32; 8],
cv: *const CVBytes,
mut counter: u64,
flags: u32,
mut out: *mut u8,
mut out_len: usize,
) {
while out_len > 0 {
let mut block_output = [0u32; 16];
compress(block, block_len, cv, counter, flags, &mut block_output);
for output_word in block_output {
let bytes = output_word.to_le_bytes();
let take = cmp::min(bytes.len(), out_len);
ptr::copy_nonoverlapping(bytes.as_ptr(), out, take);
out = out.add(take);
out_len -= take;
}
let mut block_output = [0u8; 64];
compress_xof(block, block_len, cv, counter, flags, &mut block_output);
let take = cmp::min(out_len, BLOCK_LEN);
ptr::copy_nonoverlapping(block_output.as_ptr(), out, take);
out = out.add(take);
out_len -= take;
counter += 1;
}
}
unsafe fn xof_xor_using_compress(
compress: CompressFn,
block: *const [u8; 64],
#[inline(always)]
unsafe fn xof_xor_using_compress_xof(
compress_xof: CompressXofFn,
block: *const BlockBytes,
block_len: u32,
cv: *const [u32; 8],
cv: *const CVBytes,
mut counter: u64,
flags: u32,
mut out: *mut u8,
mut out_len: usize,
) {
while out_len > 0 {
let mut block_output = [0u32; 16];
compress(block, block_len, cv, counter, flags, &mut block_output);
for output_word in block_output {
let bytes = output_word.to_le_bytes();
for i in 0..cmp::min(bytes.len(), out_len) {
*out = *out ^ bytes[i];
out = out.add(1);
out_len -= 1;
}
let mut block_output = [0u8; 64];
compress_xof(block, block_len, cv, counter, flags, &mut block_output);
let take = cmp::min(out_len, BLOCK_LEN);
for i in 0..take {
*out.add(i) ^= block_output[i];
}
out = out.add(take);
out_len -= take;
counter += 1;
}
}
#[inline(always)]
unsafe fn universal_hash_using_compress(
compress: CompressFn,
mut input: *const u8,
mut input_len: usize,
key: *const [u32; 8],
key: *const CVBytes,
mut counter: u64,
out: *mut [u8; 16],
) {
let flags = KEYED_HASH | CHUNK_START | CHUNK_END | ROOT;
let mut result = [0u32; 4];
let mut result = [0u8; 16];
while input_len > 0 {
let block_len = cmp::min(input_len, BLOCK_LEN);
let mut block = [0u8; BLOCK_LEN];
ptr::copy_nonoverlapping(input, block.as_mut_ptr(), block_len);
let mut block_output = [0u32; 16];
let mut block_output = [0u8; 32];
compress(
&block,
BLOCK_LEN as u32,
@ -749,7 +733,7 @@ unsafe fn universal_hash_using_compress(
flags,
&mut block_output,
);
for i in 0..4 {
for i in 0..16 {
result[i] ^= block_output[i];
}
input = input.add(block_len);
@ -769,7 +753,7 @@ const TRANSPOSED_STRIDE: usize = 2 * MAX_SIMD_DEGREE;
pub struct TransposedVectors([[u32; 2 * MAX_SIMD_DEGREE]; 8]);
impl TransposedVectors {
pub fn parent_node(&self, parent_index: usize) -> [u8; 64] {
pub fn parent_node(&self, parent_index: usize) -> BlockBytes {
let mut bytes = [0u8; 64];
for word_index in 0..8 {
bytes[word_index * WORD_LEN..][..WORD_LEN]
@ -820,3 +804,84 @@ unsafe fn copy_one_transposed_cv(transposed_src: *const u32, transposed_dest: *m
transposed_dest.add(offset_words).write(word);
}
}
#[inline(always)]
pub const fn le_bytes_from_words_32(words: &CVWords) -> CVBytes {
let mut bytes = [0u8; 32];
// This loop is super verbose because currently that's what it takes to be const.
let mut word_index = 0;
while word_index < bytes.len() / WORD_LEN {
let word_bytes = words[word_index].to_le_bytes();
let mut byte_index = 0;
while byte_index < WORD_LEN {
bytes[word_index * WORD_LEN + byte_index] = word_bytes[byte_index];
byte_index += 1;
}
word_index += 1;
}
bytes
}
#[inline(always)]
pub const fn le_bytes_from_words_64(words: &BlockWords) -> BlockBytes {
let mut bytes = [0u8; 64];
// This loop is super verbose because currently that's what it takes to be const.
let mut word_index = 0;
while word_index < bytes.len() / WORD_LEN {
let word_bytes = words[word_index].to_le_bytes();
let mut byte_index = 0;
while byte_index < WORD_LEN {
bytes[word_index * WORD_LEN + byte_index] = word_bytes[byte_index];
byte_index += 1;
}
word_index += 1;
}
bytes
}
#[inline(always)]
pub const fn words_from_le_bytes_32(bytes: &CVBytes) -> CVWords {
let mut words = [0u32; 8];
// This loop is super verbose because currently that's what it takes to be const.
let mut word_index = 0;
while word_index < words.len() {
let mut word_bytes = [0u8; WORD_LEN];
let mut byte_index = 0;
while byte_index < WORD_LEN {
word_bytes[byte_index] = bytes[word_index * WORD_LEN + byte_index];
byte_index += 1;
}
words[word_index] = u32::from_le_bytes(word_bytes);
word_index += 1;
}
words
}
#[inline(always)]
pub const fn words_from_le_bytes_64(bytes: &BlockBytes) -> BlockWords {
let mut words = [0u32; 16];
// This loop is super verbose because currently that's what it takes to be const.
let mut word_index = 0;
while word_index < words.len() {
let mut word_bytes = [0u8; WORD_LEN];
let mut byte_index = 0;
while byte_index < WORD_LEN {
word_bytes[byte_index] = bytes[word_index * WORD_LEN + byte_index];
byte_index += 1;
}
words[word_index] = u32::from_le_bytes(word_bytes);
word_index += 1;
}
words
}
#[test]
fn test_byte_word_round_trips() {
let cv = *b"This is 32 LE bytes/eight words.";
assert_eq!(cv, le_bytes_from_words_32(&words_from_le_bytes_32(&cv)));
let block = *b"This is sixty-four little-endian bytes, or sixteen 32-bit words.";
assert_eq!(
block,
le_bytes_from_words_64(&words_from_le_bytes_64(&block)),
);
}

View File

@ -1,9 +1,12 @@
use crate::{IV, MAX_SIMD_DEGREE, MSG_SCHEDULE, WORD_LEN};
use crate::{
le_bytes_from_words_32, le_bytes_from_words_64, words_from_le_bytes_32, words_from_le_bytes_64,
BlockBytes, BlockWords, CVBytes, CVWords, IV, MAX_SIMD_DEGREE, MSG_SCHEDULE,
};
pub const DEGREE: usize = MAX_SIMD_DEGREE;
#[inline(always)]
fn g(state: &mut [u32; 16], a: usize, b: usize, c: usize, d: usize, x: u32, y: u32) {
fn g(state: &mut BlockWords, a: usize, b: usize, c: usize, d: usize, x: u32, y: u32) {
state[a] = state[a].wrapping_add(state[b]).wrapping_add(x);
state[d] = (state[d] ^ state[a]).rotate_right(16);
state[c] = state[c].wrapping_add(state[d]);
@ -15,7 +18,7 @@ fn g(state: &mut [u32; 16], a: usize, b: usize, c: usize, d: usize, x: u32, y: u
}
#[inline(always)]
fn round(state: &mut [u32; 16], msg: &[u32; 16], round: usize) {
fn round(state: &mut [u32; 16], msg: &BlockWords, round: usize) {
// Select the message schedule based on the round.
let schedule = MSG_SCHEDULE[round];
@ -33,30 +36,22 @@ fn round(state: &mut [u32; 16], msg: &[u32; 16], round: usize) {
}
#[inline(always)]
fn compress_safe(
block: &[u8; 64],
fn compress_inner(
block_words: &BlockWords,
block_len: u32,
cv: &[u32; 8],
cv_words: &CVWords,
counter: u64,
flags: u32,
) -> [u32; 16] {
let mut block_words = [0u32; 16];
for word_index in 0..16 {
block_words[word_index] = u32::from_le_bytes(
block[WORD_LEN * word_index..][..WORD_LEN]
.try_into()
.unwrap(),
);
}
let mut state = [
cv[0],
cv[1],
cv[2],
cv[3],
cv[4],
cv[5],
cv[6],
cv[7],
cv_words[0],
cv_words[1],
cv_words[2],
cv_words[3],
cv_words[4],
cv_words[5],
cv_words[6],
cv_words[7],
IV[0],
IV[1],
IV[2],
@ -69,28 +64,48 @@ fn compress_safe(
for round_number in 0..7 {
round(&mut state, &block_words, round_number);
}
for i in 0..8 {
state[i] ^= state[i + 8];
state[i + 8] ^= (*cv)[i];
}
state
}
pub unsafe extern "C" fn compress(
block: *const [u8; 64],
block: *const BlockBytes,
block_len: u32,
cv: *const [u32; 8],
cv: *const CVBytes,
counter: u64,
flags: u32,
out: *mut [u32; 16],
out: *mut CVBytes,
) {
*out = compress_safe(&*block, block_len, &*cv, counter, flags);
let block_words = words_from_le_bytes_64(&*block);
let cv_words = words_from_le_bytes_32(&*cv);
let mut state = compress_inner(&block_words, block_len, &cv_words, counter, flags);
for word_index in 0..8 {
state[word_index] ^= state[word_index + 8];
}
*out = le_bytes_from_words_32(state[..8].try_into().unwrap());
}
pub unsafe extern "C" fn compress_xof(
block: *const BlockBytes,
block_len: u32,
cv: *const CVBytes,
counter: u64,
flags: u32,
out: *mut BlockBytes,
) {
let block_words = words_from_le_bytes_64(&*block);
let cv_words = words_from_le_bytes_32(&*cv);
let mut state = compress_inner(&block_words, block_len, &cv_words, counter, flags);
for word_index in 0..8 {
state[word_index] ^= state[word_index + 8];
state[word_index + 8] ^= cv_words[word_index];
}
*out = le_bytes_from_words_64(&state);
}
pub unsafe extern "C" fn hash_chunks(
input: *const u8,
input_len: usize,
key: *const [u32; 8],
key: *const CVBytes,
counter: u64,
flags: u32,
transposed_output: *mut u32,
@ -109,7 +124,7 @@ pub unsafe extern "C" fn hash_chunks(
pub unsafe extern "C" fn hash_parents(
transposed_input: *const u32,
num_parents: usize,
key: *const [u32; 8],
key: *const CVBytes,
flags: u32,
transposed_output: *mut u32, // may overlap the input
) {
@ -124,33 +139,51 @@ pub unsafe extern "C" fn hash_parents(
}
pub unsafe extern "C" fn xof(
block: *const [u8; 64],
block: *const BlockBytes,
block_len: u32,
cv: *const [u32; 8],
cv: *const CVBytes,
counter: u64,
flags: u32,
out: *mut u8,
out_len: usize,
) {
crate::xof_using_compress(compress, block, block_len, cv, counter, flags, out, out_len)
crate::xof_using_compress_xof(
compress_xof,
block,
block_len,
cv,
counter,
flags,
out,
out_len,
)
}
pub unsafe extern "C" fn xof_xor(
block: *const [u8; 64],
block: *const BlockBytes,
block_len: u32,
cv: *const [u32; 8],
cv: *const CVBytes,
counter: u64,
flags: u32,
out: *mut u8,
out_len: usize,
) {
crate::xof_xor_using_compress(compress, block, block_len, cv, counter, flags, out, out_len)
crate::xof_xor_using_compress_xof(
compress_xof,
block,
block_len,
cv,
counter,
flags,
out,
out_len,
)
}
pub unsafe extern "C" fn universal_hash(
input: *const u8,
input_len: usize,
key: *const [u32; 8],
key: *const CVBytes,
counter: u64,
out: *mut [u8; 16],
) {

View File

@ -84,8 +84,8 @@ use core::fmt;
use blake3_guts as guts;
use guts::{
BLOCK_LEN, CHUNK_END, CHUNK_LEN, CHUNK_START, DERIVE_KEY_CONTEXT, DERIVE_KEY_MATERIAL, IV,
KEYED_HASH, PARENT, ROOT, WORD_LEN,
BlockBytes, CVBytes, BLOCK_LEN, CHUNK_END, CHUNK_LEN, CHUNK_START, DERIVE_KEY_CONTEXT,
DERIVE_KEY_MATERIAL, IV_BYTES, KEYED_HASH, PARENT, ROOT,
};
/// The number of bytes in a [`Hash`](struct.Hash.html), 32
@ -122,19 +122,19 @@ const MAX_DEPTH: usize = 54; // 2^54 * CHUNK_LEN = 2^64
/// [`FromStr`]: https://doc.rust-lang.org/std/str/trait.FromStr.html
#[cfg_attr(feature = "zeroize", derive(zeroize::Zeroize))]
#[derive(Clone, Copy, Hash)]
pub struct Hash([u8; OUT_LEN]);
pub struct Hash(CVBytes);
impl Hash {
/// The raw bytes of the `Hash`. Note that byte arrays don't provide
/// constant-time equality checking, so if you need to compare hashes,
/// prefer the `Hash` type.
#[inline]
pub const fn as_bytes(&self) -> &[u8; OUT_LEN] {
pub const fn as_bytes(&self) -> &CVBytes {
&self.0
}
/// Create a `Hash` from its raw bytes representation.
pub const fn from_bytes(bytes: [u8; OUT_LEN]) -> Self {
pub const fn from_bytes(bytes: CVBytes) -> Self {
Self(bytes)
}
@ -178,7 +178,7 @@ impl Hash {
if hex_bytes.len() != OUT_LEN * 2 {
return Err(HexError(HexErrorInner::InvalidLen(hex_bytes.len())));
}
let mut hash_bytes: [u8; OUT_LEN] = [0; OUT_LEN];
let mut hash_bytes: CVBytes = [0; OUT_LEN];
for i in 0..OUT_LEN {
hash_bytes[i] = 16 * hex_val(hex_bytes[2 * i])? + hex_val(hex_bytes[2 * i + 1])?;
}
@ -186,14 +186,14 @@ impl Hash {
}
}
impl From<[u8; OUT_LEN]> for Hash {
impl From<CVBytes> for Hash {
#[inline]
fn from(bytes: [u8; OUT_LEN]) -> Self {
fn from(bytes: CVBytes) -> Self {
Self::from_bytes(bytes)
}
}
impl From<Hash> for [u8; OUT_LEN] {
impl From<Hash> for CVBytes {
#[inline]
fn from(hash: Hash) -> Self {
hash.0
@ -217,9 +217,9 @@ impl PartialEq for Hash {
}
/// This implementation is constant-time.
impl PartialEq<[u8; OUT_LEN]> for Hash {
impl PartialEq<CVBytes> for Hash {
#[inline]
fn eq(&self, other: &[u8; OUT_LEN]) -> bool {
fn eq(&self, other: &CVBytes) -> bool {
constant_time_eq::constant_time_eq_32(&self.0, other)
}
}
@ -298,66 +298,49 @@ impl std::error::Error for HexError {}
#[cfg_attr(feature = "zeroize", derive(zeroize::Zeroize))]
#[derive(Clone)]
struct Output {
input_chaining_value: [u32; 8],
block: [u8; 64],
input_chaining_value: CVBytes,
block: BlockBytes,
block_len: u8,
counter: u64,
flags: u8,
}
impl Output {
fn chaining_value(&self) -> [u8; 32] {
let words = guts::compress(
fn chaining_value(&self) -> CVBytes {
guts::compress(
&self.block,
self.block_len as u32,
&self.input_chaining_value,
self.counter,
self.flags as u32,
);
let mut bytes = [0u8; 32];
for word_index in 0..8 {
bytes[word_index * WORD_LEN..][..WORD_LEN]
.copy_from_slice(&words[word_index].to_le_bytes());
}
bytes
)
}
fn root_hash(&self) -> Hash {
debug_assert_eq!(self.counter, 0);
let out_bytes = guts::compress_xof(
Hash(guts::compress(
&self.block,
self.block_len as u32,
&self.input_chaining_value,
0,
self.flags as u32 | ROOT,
);
Hash(out_bytes[..OUT_LEN].try_into().unwrap())
}
fn root_output_block(&self) -> [u8; 2 * OUT_LEN] {
guts::compress_xof(
&self.block,
self.block_len as u32,
&self.input_chaining_value,
self.counter,
self.flags as u32 | ROOT,
)
))
}
}
#[derive(Clone)]
#[cfg_attr(feature = "zeroize", derive(zeroize::Zeroize))]
struct ChunkState {
cv: [u32; 8],
cv: CVBytes,
chunk_counter: u64,
buf: [u8; BLOCK_LEN],
buf: BlockBytes,
buf_len: u8,
blocks_compressed: u8,
flags: u8,
}
impl ChunkState {
fn new(key: &[u32; 8], chunk_counter: u64, flags: u32) -> Self {
fn new(key: &CVBytes, chunk_counter: u64, flags: u32) -> Self {
Self {
cv: *key,
chunk_counter,
@ -499,7 +482,7 @@ fn left_len(content_len: usize) -> usize {
// multithreading parallelism for that update().
fn compress_subtree_wide<J: join::Join>(
input: &[u8],
key: &[u32; 8],
key: &CVBytes,
chunk_counter: u64,
flags: u32,
out: guts::TransposedSplit,
@ -547,10 +530,10 @@ fn compress_subtree_wide<J: join::Join>(
// chunk or less. That's a different codepath.
fn compress_subtree_to_parent_node<J: join::Join>(
input: &[u8],
key: &[u32; 8],
key: &CVBytes,
chunk_counter: u64,
flags: u32,
) -> [u8; 64] {
) -> BlockBytes {
debug_assert!(input.len() > CHUNK_LEN);
let mut transposed_cvs = guts::TransposedVectors::default();
let (left_cvs, _) = guts::split_transposed_vectors(&mut transposed_cvs);
@ -568,7 +551,7 @@ fn compress_subtree_to_parent_node<J: join::Join>(
// Hash a complete input all at once. Unlike compress_subtree_wide() and
// compress_subtree_to_parent_node(), this function handles the 1 chunk case.
fn hash_all_at_once<J: join::Join>(input: &[u8], key: &[u32; 8], flags: u32) -> Output {
fn hash_all_at_once<J: join::Join>(input: &[u8], key: &CVBytes, flags: u32) -> Output {
// If the whole subtree is one chunk, hash it directly with a ChunkState.
if input.len() <= CHUNK_LEN {
return ChunkState::new(key, 0, flags).update(input).output();
@ -596,20 +579,7 @@ fn hash_all_at_once<J: join::Join>(input: &[u8], key: &[u32; 8], flags: u32) ->
/// This function is always single-threaded. For multithreading support, see
/// [`Hasher::update_rayon`](struct.Hasher.html#method.update_rayon).
pub fn hash(input: &[u8]) -> Hash {
hash_all_at_once::<join::SerialJoin>(input, &IV, 0).root_hash()
}
#[inline(always)]
pub fn words_from_le_bytes_32(bytes: &[u8; 32]) -> [u32; 8] {
let mut out = [0; 8];
for word_index in 0..8 {
out[word_index] = u32::from_le_bytes(
bytes[word_index * WORD_LEN..][..WORD_LEN]
.try_into()
.unwrap(),
);
}
out
hash_all_at_once::<join::SerialJoin>(input, &IV_BYTES, 0).root_hash()
}
/// The keyed hash function.
@ -626,9 +596,8 @@ pub fn words_from_le_bytes_32(bytes: &[u8; 32]) -> [u32; 8] {
/// This function is always single-threaded. For multithreading support, see
/// [`Hasher::new_keyed`] and
/// [`Hasher::update_rayon`](struct.Hasher.html#method.update_rayon).
pub fn keyed_hash(key: &[u8; KEY_LEN], input: &[u8]) -> Hash {
let key_words = words_from_le_bytes_32(key);
hash_all_at_once::<join::SerialJoin>(input, &key_words, KEYED_HASH).root_hash()
pub fn keyed_hash(key: &CVBytes, input: &[u8]) -> Hash {
hash_all_at_once::<join::SerialJoin>(input, key, KEYED_HASH).root_hash()
}
/// The key derivation function.
@ -666,20 +635,19 @@ pub fn keyed_hash(key: &[u8; KEY_LEN], input: &[u8]) -> Hash {
/// [`Hasher::update_rayon`](struct.Hasher.html#method.update_rayon).
///
/// [Argon2]: https://en.wikipedia.org/wiki/Argon2
pub fn derive_key(context: &str, key_material: &[u8]) -> [u8; OUT_LEN] {
pub fn derive_key(context: &str, key_material: &[u8]) -> CVBytes {
let context_key =
hash_all_at_once::<join::SerialJoin>(context.as_bytes(), &IV, DERIVE_KEY_CONTEXT)
hash_all_at_once::<join::SerialJoin>(context.as_bytes(), &IV_BYTES, DERIVE_KEY_CONTEXT)
.root_hash();
let context_key_words = words_from_le_bytes_32(context_key.as_bytes());
hash_all_at_once::<join::SerialJoin>(key_material, &context_key_words, DERIVE_KEY_MATERIAL)
hash_all_at_once::<join::SerialJoin>(key_material, context_key.as_bytes(), DERIVE_KEY_MATERIAL)
.root_hash()
.0
}
fn parent_node_output(
left_child: &[u8; 32],
right_child: &[u8; 32],
key: &[u32; 8],
left_child: &CVBytes,
right_child: &CVBytes,
key: &CVBytes,
flags: u32,
) -> Output {
let mut block = [0; BLOCK_LEN];
@ -737,18 +705,18 @@ fn parent_node_output(
#[derive(Clone)]
#[cfg_attr(feature = "zeroize", derive(zeroize::Zeroize))]
pub struct Hasher {
key: [u32; 8],
key: CVBytes,
chunk_state: ChunkState,
// The stack size is MAX_DEPTH + 1 because we do lazy merging. For example,
// with 7 chunks, we have 3 entries in the stack. Adding an 8th chunk
// requires a 4th entry, rather than merging everything down to 1, because
// we don't know whether more input is coming. This is different from how
// the reference implementation does things.
cv_stack: ArrayVec<[u8; 32], { MAX_DEPTH + 1 }>,
cv_stack: ArrayVec<CVBytes, { MAX_DEPTH + 1 }>,
}
impl Hasher {
fn new_internal(key: &[u32; 8], flags: u32) -> Self {
fn new_internal(key: &CVBytes, flags: u32) -> Self {
Self {
key: *key,
chunk_state: ChunkState::new(key, 0, flags),
@ -758,16 +726,15 @@ impl Hasher {
/// Construct a new `Hasher` for the regular hash function.
pub fn new() -> Self {
Self::new_internal(&IV, 0)
Self::new_internal(&IV_BYTES, 0)
}
/// Construct a new `Hasher` for the keyed hash function. See
/// [`keyed_hash`].
///
/// [`keyed_hash`]: fn.keyed_hash.html
pub fn new_keyed(key: &[u8; KEY_LEN]) -> Self {
let key_words = words_from_le_bytes_32(key);
Self::new_internal(&key_words, KEYED_HASH)
pub fn new_keyed(key: &CVBytes) -> Self {
Self::new_internal(key, KEYED_HASH)
}
/// Construct a new `Hasher` for the key derivation function. See
@ -777,10 +744,9 @@ impl Hasher {
/// [`derive_key`]: fn.derive_key.html
pub fn new_derive_key(context: &str) -> Self {
let context_key =
hash_all_at_once::<join::SerialJoin>(context.as_bytes(), &IV, DERIVE_KEY_CONTEXT)
hash_all_at_once::<join::SerialJoin>(context.as_bytes(), &IV_BYTES, DERIVE_KEY_CONTEXT)
.root_hash();
let context_key_words = words_from_le_bytes_32(context_key.as_bytes());
Self::new_internal(&context_key_words, DERIVE_KEY_MATERIAL)
Self::new_internal(context_key.as_bytes(), DERIVE_KEY_MATERIAL)
}
/// Reset the `Hasher` to its initial state.
@ -852,7 +818,7 @@ impl Hasher {
// merging with each of them separately, so that the second CV will always
// remain unmerged. (That also helps us support extendable output when
// we're hashing an input all-at-once.)
fn push_cv(&mut self, new_cv: &[u8; 32], chunk_counter: u64) {
fn push_cv(&mut self, new_cv: &CVBytes, chunk_counter: u64) {
self.merge_cv_stack(chunk_counter);
self.cv_stack.push(*new_cv);
}
@ -1178,19 +1144,26 @@ impl OutputReader {
/// reading further, the behavior is unspecified.
///
/// [`Read::read`]: #method.read
pub fn fill(&mut self, mut buf: &mut [u8]) {
while !buf.is_empty() {
let block: [u8; BLOCK_LEN] = self.inner.root_output_block();
let output_bytes = &block[self.position_within_block as usize..];
let take = cmp::min(buf.len(), output_bytes.len());
buf[..take].copy_from_slice(&output_bytes[..take]);
buf = &mut buf[take..];
self.position_within_block += take as u8;
if self.position_within_block == BLOCK_LEN as u8 {
self.inner.counter += 1;
self.position_within_block = 0;
}
}
pub fn fill(&mut self, buf: &mut [u8]) {
guts::xof(
&self.inner.block,
self.inner.block_len as u32,
&self.inner.input_chaining_value,
self.inner.counter,
self.inner.flags as u32,
buf,
);
}
pub fn fill_xor(&mut self, buf: &mut [u8]) {
guts::xof_xor(
&self.inner.block,
self.inner.block_len as u32,
&self.inner.input_chaining_value,
self.inner.counter,
self.inner.flags as u32,
buf,
);
}
/// Return the current read position in the output stream. This is