1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-05-28 01:16:02 +02:00

get rid of the DETECTED_IMPL wrapper functions

This commit is contained in:
Jack O'Connor 2023-07-10 01:23:21 -07:00
parent c66bff9369
commit 84c9a7a305
2 changed files with 17 additions and 100 deletions

View File

@ -54,91 +54,7 @@ pub type CVWords = [u32; 8];
pub type BlockBytes = [u8; 64];
pub type BlockWords = [u32; 16];
#[inline]
pub fn degree() -> usize {
DETECTED_IMPL.degree()
}
#[inline]
pub fn split_transposed_vectors(
vectors: &mut TransposedVectors,
) -> (TransposedSplit, TransposedSplit) {
DETECTED_IMPL.split_transposed_vectors(vectors)
}
#[inline]
pub fn compress(
block: &BlockBytes,
block_len: u32,
cv: &CVBytes,
counter: u64,
flags: u32,
) -> CVBytes {
DETECTED_IMPL.compress(block, block_len, cv, counter, flags)
}
#[inline]
pub fn hash_chunks(
input: &[u8],
key: &CVBytes,
counter: u64,
flags: u32,
transposed_output: TransposedSplit,
) -> usize {
DETECTED_IMPL.hash_chunks(input, key, counter, flags, transposed_output)
}
#[inline]
pub fn hash_parents(
transposed_input: &TransposedVectors,
num_cvs: usize,
key: &CVBytes,
flags: u32,
transposed_output: TransposedSplit,
) -> usize {
DETECTED_IMPL.hash_parents(transposed_input, num_cvs, key, flags, transposed_output)
}
#[inline]
pub fn reduce_parents(
transposed_in_out: &mut TransposedVectors,
num_cvs: usize,
key: &CVBytes,
flags: u32,
) -> usize {
DETECTED_IMPL.reduce_parents(transposed_in_out, num_cvs, key, flags)
}
#[inline]
pub fn xof(
block: &BlockBytes,
block_len: u32,
cv: &CVBytes,
counter: u64,
flags: u32,
out: &mut [u8],
) {
DETECTED_IMPL.xof(block, block_len, cv, counter, flags, out);
}
#[inline]
pub fn xof_xor(
block: &BlockBytes,
block_len: u32,
cv: &CVBytes,
counter: u64,
flags: u32,
out: &mut [u8],
) {
DETECTED_IMPL.xof_xor(block, block_len, cv, counter, flags, out);
}
#[inline]
pub fn universal_hash(input: &[u8], key: &CVBytes, counter: u64) -> [u8; 16] {
DETECTED_IMPL.universal_hash(input, key, counter)
}
static DETECTED_IMPL: Implementation = Implementation::new(
pub static DETECTED_IMPL: Implementation = Implementation::new(
degree_init,
compress_init,
hash_chunks_init,

View File

@ -307,7 +307,7 @@ struct Output {
impl Output {
fn chaining_value(&self) -> CVBytes {
guts::compress(
guts::DETECTED_IMPL.compress(
&self.block,
self.block_len as u32,
&self.input_chaining_value,
@ -318,7 +318,7 @@ impl Output {
fn root_hash(&self) -> Hash {
debug_assert_eq!(self.counter, 0);
Hash(guts::compress(
Hash(guts::DETECTED_IMPL.compress(
&self.block,
self.block_len as u32,
&self.input_chaining_value,
@ -378,7 +378,7 @@ impl ChunkState {
self.fill_buf(&mut input);
if !input.is_empty() {
debug_assert_eq!(self.buf_len as usize, BLOCK_LEN);
self.cv = guts::compress(
self.cv = guts::DETECTED_IMPL.compress(
&self.buf,
BLOCK_LEN as u32,
&self.cv,
@ -393,7 +393,7 @@ impl ChunkState {
while input.len() > BLOCK_LEN {
debug_assert_eq!(self.buf_len, 0);
self.cv = guts::compress(
self.cv = guts::DETECTED_IMPL.compress(
input[..BLOCK_LEN].try_into().unwrap(),
BLOCK_LEN as u32,
&self.cv,
@ -474,20 +474,21 @@ fn compress_subtree_wide<J: join::Join>(
// Note that the single chunk case does *not* bump the SIMD degree up to 2
// when it is 1. This allows Rayon the option of multithreading even the
// 2-chunk case, which can help performance on smaller platforms.
if input.len() <= guts::degree() * CHUNK_LEN {
return guts::hash_chunks(input, key, chunk_counter, flags, out);
let degree = guts::DETECTED_IMPL.degree();
if input.len() <= degree * CHUNK_LEN {
return guts::DETECTED_IMPL.hash_chunks(input, key, chunk_counter, flags, out);
}
// With more than simd_degree chunks, we need to recurse. Start by dividing
// the input into left and right subtrees. (Note that this is only optimal
// as long as the SIMD degree is a power of 2. If we ever get a SIMD degree
// of 3 or something, we'll need a more complicated strategy.)
debug_assert_eq!(guts::degree().count_ones(), 1, "power of 2");
debug_assert_eq!(degree.count_ones(), 1, "power of 2");
let (left, right) = input.split_at(guts::left_len(input.len()));
let right_chunk_counter = chunk_counter + (left.len() / CHUNK_LEN) as u64;
let mut transposed_cvs = guts::TransposedVectors::new();
let (left_cvs, right_cvs) = guts::split_transposed_vectors(&mut transposed_cvs);
let (left_cvs, right_cvs) = guts::DETECTED_IMPL.split_transposed_vectors(&mut transposed_cvs);
// Recurse! For update_rayon(), this is where we take advantage of RayonJoin and use multiple
// threads.
@ -499,7 +500,7 @@ fn compress_subtree_wide<J: join::Join>(
// Do one layer of parent node compression. The SIMD degree is always at least 2, so we're
// guaranteed that this isn't the root compression.
let num_cvs = left_n + right_n;
guts::hash_parents(&mut transposed_cvs, num_cvs, key, flags, out)
guts::DETECTED_IMPL.hash_parents(&mut transposed_cvs, num_cvs, key, flags, out)
}
// Hash a subtree with compress_subtree_wide(), and then condense the resulting
@ -520,7 +521,7 @@ fn compress_subtree_to_parent_node<J: join::Join>(
) -> BlockBytes {
debug_assert!(input.len() > CHUNK_LEN);
let mut transposed_cvs = guts::TransposedVectors::new();
let (left_cvs, _) = guts::split_transposed_vectors(&mut transposed_cvs);
let (left_cvs, _) = guts::DETECTED_IMPL.split_transposed_vectors(&mut transposed_cvs);
let mut num_cvs = compress_subtree_wide::<J>(input, &key, chunk_counter, flags, left_cvs);
debug_assert!(num_cvs >= 2);
@ -528,7 +529,7 @@ fn compress_subtree_to_parent_node<J: join::Join>(
// compress_subtree_wide() returns more than 2 chaining values. Condense
// them into 2 by forming parent nodes repeatedly.
while num_cvs > 2 {
num_cvs = guts::reduce_parents(&mut transposed_cvs, num_cvs, key, flags);
num_cvs = guts::DETECTED_IMPL.reduce_parents(&mut transposed_cvs, num_cvs, key, flags);
}
transposed_cvs.extract_parent_node(0)
}
@ -1132,7 +1133,7 @@ impl OutputReader {
debug_assert!(self.position_within_block < BLOCK_LEN as u8);
if self.position_within_block != 0 {
let mut partial_block = [0u8; 64];
guts::xof(
guts::DETECTED_IMPL.xof(
&self.inner.block,
self.inner.block_len as u32,
&self.inner.input_chaining_value,
@ -1153,7 +1154,7 @@ impl OutputReader {
return;
}
}
guts::xof(
guts::DETECTED_IMPL.xof(
&self.inner.block,
self.inner.block_len as u32,
&self.inner.input_chaining_value,
@ -1168,7 +1169,7 @@ impl OutputReader {
debug_assert!(self.position_within_block < BLOCK_LEN as u8);
if self.position_within_block != 0 {
let mut partial_block = [0u8; 64];
guts::xof(
guts::DETECTED_IMPL.xof(
&self.inner.block,
self.inner.block_len as u32,
&self.inner.input_chaining_value,
@ -1191,7 +1192,7 @@ impl OutputReader {
return;
}
}
guts::xof_xor(
guts::DETECTED_IMPL.xof_xor(
&self.inner.block,
self.inner.block_len as u32,
&self.inner.input_chaining_value,