mirror of
https://github.com/BLAKE3-team/BLAKE3
synced 2024-05-28 01:16:02 +02:00
WIPPPPPPPPPPPPPPPPPPPPPPPPP
This commit is contained in:
parent
4761fd1bb6
commit
57b090f3b6
|
@ -86,6 +86,7 @@ features = ["rayon"]
|
|||
[dependencies]
|
||||
arrayref = "0.3.5"
|
||||
arrayvec = { version = "0.7.4", default-features = false }
|
||||
atomic = { version = "0.5.3", default-features = false }
|
||||
constant_time_eq = "0.3.0"
|
||||
rayon = { version = "1.2.1", optional = true }
|
||||
cfg-if = "1.0.0"
|
||||
|
|
372
src/platform.rs
372
src/platform.rs
|
@ -1,6 +1,257 @@
|
|||
use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN, CHUNK_LEN, UNIVERSAL_HASH_LEN};
|
||||
use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN, CHUNK_LEN};
|
||||
use arrayref::{array_mut_ref, array_ref};
|
||||
use atomic::Atomic;
|
||||
use core::cmp;
|
||||
use core::ops::{Deref, DerefMut};
|
||||
use core::ptr;
|
||||
|
||||
const CHUNK_START: u32 = 1 << 0;
|
||||
const CHUNK_END: u32 = 1 << 1;
|
||||
const PARENT: u32 = 1 << 2;
|
||||
const ROOT: u32 = 1 << 3;
|
||||
const KEYED_HASH: u32 = 1 << 4;
|
||||
const DERIVE_KEY_CONTEXT: u32 = 1 << 5;
|
||||
const DERIVE_KEY_MATERIAL: u32 = 1 << 6;
|
||||
|
||||
struct Implementation {
|
||||
compress: Atomic<CompressFn>,
|
||||
hash_chunks: Atomic<HashChunksFn>,
|
||||
hash_parents: Atomic<HashParentsFn>,
|
||||
xof: Atomic<XofFn>,
|
||||
xof_xor: Atomic<XofFn>,
|
||||
universal_hash: Atomic<UniversalHashFn>,
|
||||
}
|
||||
|
||||
impl Implementation {
|
||||
fn portable() -> Self {
|
||||
Self {
|
||||
compress: Atomic::new(portable::compress),
|
||||
hash_chunks: Atomic::new(portable::hash_chunks),
|
||||
hash_parents: Atomic::new(portable::hash_parents),
|
||||
xof: Atomic::new(portable::xof),
|
||||
xof_xor: Atomic::new(portable::xof_xor),
|
||||
universal_hash: Atomic::new(portable::universal_hash),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type CompressFn = unsafe extern "C" fn(
|
||||
block: *const [u8; 64], // zero padded to 64 bytes
|
||||
block_len: u32,
|
||||
cv: *const [u32; 8],
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
out: *mut [u32; 16], // may overlap the input
|
||||
);
|
||||
|
||||
type HashChunksFn = unsafe extern "C" fn(
|
||||
input: *const u8,
|
||||
input_len: usize,
|
||||
key: *const [u32; 8],
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
transposed_output: *mut u32,
|
||||
);
|
||||
|
||||
type HashParentsFn = unsafe extern "C" fn(
|
||||
transposed_input: *const u32,
|
||||
num_parents: usize,
|
||||
key: *const [u32; 8],
|
||||
flags: u32,
|
||||
transposed_output: *mut u32, // may overlap the input
|
||||
);
|
||||
|
||||
// This signature covers both xof() and xof_xor().
|
||||
type XofFn = unsafe extern "C" fn(
|
||||
block: *const [u8; 64], // zero padded to 64 bytes
|
||||
block_len: u32,
|
||||
cv: *const [u32; 8],
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
out: *mut u8,
|
||||
out_len: usize,
|
||||
);
|
||||
|
||||
type UniversalHashFn = unsafe extern "C" fn(
|
||||
input: *const u8,
|
||||
input_len: usize,
|
||||
key: *const [u32; 8],
|
||||
counter: u64,
|
||||
out: *mut [u8; 16],
|
||||
);
|
||||
|
||||
// The implicit degree of this implementation is MAX_SIMD_DEGREE.
|
||||
pub(crate) unsafe fn hash_chunks_using_compress(
|
||||
compress: CompressFn,
|
||||
mut input: *const u8,
|
||||
mut input_len: usize,
|
||||
key: *const [u32; 8],
|
||||
mut counter: u64,
|
||||
flags: u32,
|
||||
mut transposed_output: *mut u32,
|
||||
) {
|
||||
debug_assert!(input_len > 0);
|
||||
debug_assert!(input_len <= MAX_SIMD_DEGREE * CHUNK_LEN);
|
||||
while input_len > 0 {
|
||||
let mut chunk_len = cmp::min(input_len, CHUNK_LEN);
|
||||
input_len -= chunk_len;
|
||||
// We only use 8 words of the CV, but compress returns 16.
|
||||
let mut cv = [0u32; 16];
|
||||
cv[..8].copy_from_slice(&*key);
|
||||
let cv_ptr: *mut [u32; 16] = &mut cv;
|
||||
let mut chunk_flags = flags | CHUNK_START;
|
||||
while chunk_len > BLOCK_LEN {
|
||||
compress(
|
||||
input as *const [u8; 64],
|
||||
BLOCK_LEN as u32,
|
||||
cv_ptr as *const [u32; 8],
|
||||
counter,
|
||||
chunk_flags,
|
||||
cv_ptr,
|
||||
);
|
||||
input = input.add(BLOCK_LEN);
|
||||
chunk_len -= BLOCK_LEN;
|
||||
chunk_flags &= !CHUNK_START;
|
||||
}
|
||||
let mut last_block = [0u8; BLOCK_LEN];
|
||||
ptr::copy_nonoverlapping(input, last_block.as_mut_ptr(), chunk_len);
|
||||
input = input.add(chunk_len);
|
||||
compress(
|
||||
&last_block,
|
||||
chunk_len as u32,
|
||||
cv_ptr as *const [u32; 8],
|
||||
counter,
|
||||
chunk_flags | CHUNK_END,
|
||||
cv_ptr,
|
||||
);
|
||||
for word_index in 0..8 {
|
||||
transposed_output
|
||||
.add(word_index * TRANSPOSED_STRIDE)
|
||||
.write(cv[word_index]);
|
||||
}
|
||||
transposed_output = transposed_output.add(1);
|
||||
counter += 1;
|
||||
}
|
||||
}
|
||||
|
||||
// The implicit degree of this implementation is MAX_SIMD_DEGREE.
|
||||
pub(crate) unsafe fn hash_parents_using_compress(
|
||||
compress: CompressFn,
|
||||
mut transposed_input: *const u32,
|
||||
mut num_parents: usize,
|
||||
key: *const [u32; 8],
|
||||
flags: u32,
|
||||
mut transposed_output: *mut u32, // may overlap the input
|
||||
) {
|
||||
debug_assert!(num_parents > 0);
|
||||
debug_assert!(num_parents <= MAX_SIMD_DEGREE);
|
||||
while num_parents > 0 {
|
||||
let mut block_bytes = [0u8; 64];
|
||||
for word_index in 0..8 {
|
||||
let left_child_word = transposed_input.add(word_index * TRANSPOSED_STRIDE).read();
|
||||
block_bytes[4 * word_index..][..4].copy_from_slice(&left_child_word.to_le_bytes());
|
||||
let right_child_word = transposed_input
|
||||
.add(word_index * TRANSPOSED_STRIDE + 1)
|
||||
.read();
|
||||
block_bytes[4 * (word_index + 8)..][..4]
|
||||
.copy_from_slice(&right_child_word.to_le_bytes());
|
||||
}
|
||||
let mut cv = [0u32; 16];
|
||||
compress(&block_bytes, BLOCK_LEN as u32, key, 0, flags, &mut cv);
|
||||
for word_index in 0..8 {
|
||||
transposed_output
|
||||
.add(word_index * TRANSPOSED_STRIDE)
|
||||
.write(cv[word_index]);
|
||||
}
|
||||
transposed_input = transposed_input.add(2);
|
||||
transposed_output = transposed_output.add(1);
|
||||
num_parents -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn xof_using_compress(
|
||||
compress: CompressFn,
|
||||
block: *const [u8; 64],
|
||||
block_len: u32,
|
||||
cv: *const [u32; 8],
|
||||
mut counter: u64,
|
||||
flags: u32,
|
||||
mut out: *mut u8,
|
||||
mut out_len: usize,
|
||||
) {
|
||||
while out_len > 0 {
|
||||
let mut block_output = [0u32; 16];
|
||||
compress(block, block_len, cv, counter, flags, &mut block_output);
|
||||
for output_word in block_output {
|
||||
let bytes = output_word.to_le_bytes();
|
||||
let take = cmp::min(bytes.len(), out_len);
|
||||
ptr::copy_nonoverlapping(bytes.as_ptr(), out, take);
|
||||
out = out.add(take);
|
||||
out_len -= take;
|
||||
}
|
||||
counter += 1;
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn xof_xor_using_compress(
|
||||
compress: CompressFn,
|
||||
block: *const [u8; 64],
|
||||
block_len: u32,
|
||||
cv: *const [u32; 8],
|
||||
mut counter: u64,
|
||||
flags: u32,
|
||||
mut out: *mut u8,
|
||||
mut out_len: usize,
|
||||
) {
|
||||
while out_len > 0 {
|
||||
let mut block_output = [0u32; 16];
|
||||
compress(block, block_len, cv, counter, flags, &mut block_output);
|
||||
for output_word in block_output {
|
||||
let bytes = output_word.to_le_bytes();
|
||||
for i in 0..cmp::min(bytes.len(), out_len) {
|
||||
*out = *out ^ bytes[i];
|
||||
out = out.add(1);
|
||||
out_len -= 1;
|
||||
}
|
||||
}
|
||||
counter += 1;
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) unsafe fn universal_hash_using_compress(
|
||||
compress: CompressFn,
|
||||
mut input: *const u8,
|
||||
mut input_len: usize,
|
||||
key: *const [u32; 8],
|
||||
mut counter: u64,
|
||||
out: *mut [u8; 16],
|
||||
) {
|
||||
let flags = KEYED_HASH | CHUNK_START | CHUNK_END | ROOT;
|
||||
let mut result = [0u32; 4];
|
||||
while input_len > 0 {
|
||||
let block_len = cmp::min(input_len, BLOCK_LEN);
|
||||
let mut block = [0u8; BLOCK_LEN];
|
||||
ptr::copy_nonoverlapping(input, block.as_mut_ptr(), block_len);
|
||||
let mut block_output = [0u32; 16];
|
||||
compress(
|
||||
&block,
|
||||
BLOCK_LEN as u32,
|
||||
key,
|
||||
counter,
|
||||
flags,
|
||||
&mut block_output,
|
||||
);
|
||||
for i in 0..4 {
|
||||
result[i] ^= block_output[i];
|
||||
}
|
||||
input = input.add(block_len);
|
||||
input_len -= block_len;
|
||||
counter += 1;
|
||||
}
|
||||
for i in 0..4 {
|
||||
(*out)[4 * i..][..4].copy_from_slice(&result[i].to_le_bytes());
|
||||
}
|
||||
}
|
||||
|
||||
cfg_if::cfg_if! {
|
||||
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
|
||||
|
@ -273,62 +524,6 @@ impl Platform {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn hash_chunks(
|
||||
&self,
|
||||
input: &[u8],
|
||||
key: &[u32; 8],
|
||||
counter: u64,
|
||||
flags: u8,
|
||||
cvs_out: &mut TransposedVectors,
|
||||
num_cvs: usize,
|
||||
) {
|
||||
// TODO: Handle partial chunks?
|
||||
debug_assert_eq!(input.len() % CHUNK_LEN, 0);
|
||||
debug_assert!(input.len() / CHUNK_LEN <= self.simd_degree());
|
||||
debug_assert!(num_cvs <= self.simd_degree());
|
||||
portable::hash_chunks(input, key, counter, flags, cvs_out, num_cvs);
|
||||
}
|
||||
|
||||
pub fn hash_parents(&self, in_out: ParentInOut, key: &[u32; 8], flags: u8) {
|
||||
let (_, num_parents) = in_out.input();
|
||||
debug_assert!(num_parents <= self.simd_degree());
|
||||
portable::hash_parents(in_out, key, flags);
|
||||
// XXX: should separate the thing that copies the last CV over from this interface
|
||||
}
|
||||
|
||||
pub fn xof(
|
||||
&self,
|
||||
block: &[u8; BLOCK_LEN],
|
||||
block_len: u8,
|
||||
cv: &[u32; 8],
|
||||
counter: u64,
|
||||
flags: u8,
|
||||
out: &mut [u8],
|
||||
) {
|
||||
portable::xof(block, block_len, cv, counter, flags, out);
|
||||
}
|
||||
|
||||
pub fn xof_xor(
|
||||
&self,
|
||||
block: &[u8; BLOCK_LEN],
|
||||
block_len: u8,
|
||||
cv: &[u32; 8],
|
||||
counter: u64,
|
||||
flags: u8,
|
||||
out: &mut [u8],
|
||||
) {
|
||||
portable::xof_xor(block, block_len, cv, counter, flags, out);
|
||||
}
|
||||
|
||||
pub fn universal_hash(
|
||||
&self,
|
||||
input: &[u8],
|
||||
key: &[u32; 8],
|
||||
counter: u64,
|
||||
) -> [u8; UNIVERSAL_HASH_LEN] {
|
||||
portable::universal_hash(input, key, counter)
|
||||
}
|
||||
|
||||
// Explicit platform constructors, for benchmarks.
|
||||
|
||||
pub fn portable() -> Self {
|
||||
|
@ -543,59 +738,66 @@ pub fn le_bytes_from_words_64(words: &[u32; 16]) -> [u8; 64] {
|
|||
out
|
||||
}
|
||||
|
||||
// this is in units of *words*, for pointer operations on *const/mut u32
|
||||
pub const TRANSPOSED_STRIDE: usize = 2 * MAX_SIMD_DEGREE;
|
||||
|
||||
#[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), repr(C, align(64)))]
|
||||
#[derive(Default)]
|
||||
pub struct TransposedVectors(pub [[u32; 2 * MAX_SIMD_DEGREE]; 8]);
|
||||
#[derive(Clone, Default, Debug, PartialEq, Eq)]
|
||||
pub struct TransposedVectors {
|
||||
pub(crate) vectors: [[u32; 2 * MAX_SIMD_DEGREE]; 8],
|
||||
// the number of CVs populated in each vector
|
||||
pub(crate) len: usize,
|
||||
}
|
||||
|
||||
impl Deref for TransposedVectors {
|
||||
type Target = [[u32; 2 * MAX_SIMD_DEGREE]; 8];
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
&self.vectors
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for TransposedVectors {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
&mut self.vectors
|
||||
}
|
||||
}
|
||||
|
||||
pub enum ParentInOut<'a> {
|
||||
InPlace {
|
||||
in_out: &'a mut TransposedVectors,
|
||||
num_parents: usize,
|
||||
},
|
||||
Separate {
|
||||
input: &'a TransposedVectors,
|
||||
num_parents: usize,
|
||||
output: &'a mut TransposedVectors,
|
||||
output_column: usize,
|
||||
},
|
||||
}
|
||||
|
||||
impl<'a> ParentInOut<'a> {
|
||||
// (vectors, num_parents)
|
||||
pub fn input(&self) -> (&TransposedVectors, usize) {
|
||||
pub(crate) fn promote_odd_child_and_update_len(&mut self) {
|
||||
match self {
|
||||
ParentInOut::InPlace {
|
||||
in_out,
|
||||
num_parents,
|
||||
} => (in_out, *num_parents),
|
||||
ParentInOut::Separate {
|
||||
input, num_parents, ..
|
||||
} => (input, *num_parents),
|
||||
}
|
||||
}
|
||||
|
||||
// (vectors, output_column)
|
||||
pub fn output(&mut self) -> (&mut TransposedVectors, usize) {
|
||||
match self {
|
||||
ParentInOut::InPlace { in_out, .. } => (in_out, 0),
|
||||
ParentInOut::Separate {
|
||||
output,
|
||||
output_column,
|
||||
..
|
||||
} => (output, *output_column),
|
||||
ParentInOut::InPlace { in_out } => {
|
||||
// After an in-place parent hashing step (i.e. reduction near the root), the number
|
||||
// of CVs needs to be halved, with a possible adjustment for an odd child.
|
||||
if in_out.len % 2 == 1 {
|
||||
for i in 0..8 {
|
||||
in_out.vectors[i][in_out.len / 2] = in_out.vectors[i][in_out.len - 1];
|
||||
}
|
||||
in_out.len = (in_out.len / 2) + 1;
|
||||
} else {
|
||||
in_out.len /= 2;
|
||||
}
|
||||
}
|
||||
ParentInOut::Separate { input, output } => {
|
||||
// After an out-of-place parent hashing step (i.e. wide hashing near the leaves),
|
||||
// the output length is already correct, and all that's needed is the possible
|
||||
// adjustment for an odd child.
|
||||
if input.len % 2 == 1 {
|
||||
for i in 0..8 {
|
||||
output.vectors[i][output.len] = input.vectors[i][input.len - 1];
|
||||
}
|
||||
output.len += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
232
src/portable.rs
232
src/portable.rs
|
@ -1,10 +1,10 @@
|
|||
use crate::platform::{ParentInOut, TransposedVectors, MAX_SIMD_DEGREE};
|
||||
use crate::{
|
||||
counter_high, counter_low, CVBytes, CVWords, IncrementCounter, BLOCK_LEN, CHUNK_LEN, IV,
|
||||
MSG_SCHEDULE, OUT_LEN, UNIVERSAL_HASH_LEN,
|
||||
counter_high, counter_low, CVBytes, CVWords, IncrementCounter, BLOCK_LEN, IV, MSG_SCHEDULE,
|
||||
OUT_LEN,
|
||||
};
|
||||
use arrayref::{array_mut_ref, array_ref};
|
||||
use core::cmp;
|
||||
|
||||
const WORD_SIZE: usize = 4;
|
||||
|
||||
#[inline(always)]
|
||||
fn g(state: &mut [u32; 16], a: usize, b: usize, c: usize, d: usize, x: u32, y: u32) {
|
||||
|
@ -179,143 +179,115 @@ pub fn hash_many<const N: usize>(
|
|||
}
|
||||
}
|
||||
|
||||
/// General contract:
|
||||
/// - `input` is N chunks, each exactly 1 KiB, 1 <= N <= DEGREE
|
||||
/// - `output_column` is a multiple of DEGREE.
|
||||
/// The CHUNK_START and CHUNK_END flags are set internally. Writes N transposed CVs to the output,
|
||||
/// from `output_column` to `output_column+N-1`. Columns prior to `output_column` must be
|
||||
/// unmodified.
|
||||
///
|
||||
/// This portable implementation has no particular DEGREE. It will accept any number of chunks up
|
||||
/// to MAX_SIMD_DEGREE.
|
||||
pub fn hash_chunks(
|
||||
input: &[u8],
|
||||
key: &[u32; 8],
|
||||
pub unsafe extern "C" fn compress(
|
||||
block: *const [u8; 64],
|
||||
block_len: u32,
|
||||
cv: *const [u32; 8],
|
||||
counter: u64,
|
||||
flags: u8,
|
||||
output: &mut TransposedVectors,
|
||||
output_column: usize,
|
||||
flags: u32,
|
||||
out: *mut [u32; 16],
|
||||
) {
|
||||
debug_assert_eq!(input.len() % CHUNK_LEN, 0);
|
||||
let num_chunks = input.len() / CHUNK_LEN;
|
||||
debug_assert!(num_chunks <= MAX_SIMD_DEGREE);
|
||||
for chunk_index in 0..num_chunks {
|
||||
let mut cv = *key;
|
||||
for block_index in 0..16 {
|
||||
let block_flags = match block_index {
|
||||
0 => flags | crate::CHUNK_START,
|
||||
15 => flags | crate::CHUNK_END,
|
||||
_ => flags,
|
||||
};
|
||||
compress_in_place(
|
||||
&mut cv,
|
||||
input[CHUNK_LEN * chunk_index + BLOCK_LEN * block_index..][..BLOCK_LEN]
|
||||
.try_into()
|
||||
.unwrap(),
|
||||
BLOCK_LEN as u8,
|
||||
counter + chunk_index as u64,
|
||||
block_flags,
|
||||
);
|
||||
}
|
||||
for word_index in 0..cv.len() {
|
||||
output[word_index][output_column + chunk_index] = cv[word_index];
|
||||
}
|
||||
let block_words = crate::platform::words_from_le_bytes_64(&*block);
|
||||
let mut state = [
|
||||
(*cv)[0],
|
||||
(*cv)[1],
|
||||
(*cv)[2],
|
||||
(*cv)[3],
|
||||
(*cv)[4],
|
||||
(*cv)[5],
|
||||
(*cv)[6],
|
||||
(*cv)[7],
|
||||
IV[0],
|
||||
IV[1],
|
||||
IV[2],
|
||||
IV[3],
|
||||
counter_low(counter),
|
||||
counter_high(counter),
|
||||
block_len as u32,
|
||||
flags as u32,
|
||||
];
|
||||
for round_number in 0..7 {
|
||||
round(&mut state, &block_words, round_number);
|
||||
}
|
||||
for i in 0..8 {
|
||||
state[i] ^= state[i + 8];
|
||||
state[i + 8] ^= (*cv)[i];
|
||||
}
|
||||
*out = state;
|
||||
}
|
||||
|
||||
/// General contract:
|
||||
/// - `cvs` contains `2*num_parents` transposed CVs, 1 <= num_parents <= DEGREE, starting at column 0
|
||||
/// There may be additional CVs present beyond the `2*num_parents` CVs indicated, but this function
|
||||
/// isn't aware of them and must not modify them. (The caller will take care of an odd remaining
|
||||
/// CV, if any.) No flags are set internally. (The caller must set `PARENT` in `flags`). Writes
|
||||
/// `num_parents` transposed parent CVs to the output, starting at column 0.
|
||||
///
|
||||
/// This portable implementation has no particular DEGREE. It will accept any number of parents up
|
||||
/// to MAX_SIMD_DEGREE.
|
||||
pub fn hash_parents(mut in_out: ParentInOut, key: &[u32; 8], flags: u8) {
|
||||
let (_, num_parents) = in_out.input();
|
||||
debug_assert!(num_parents <= MAX_SIMD_DEGREE);
|
||||
for parent_index in 0..num_parents {
|
||||
let (input, _) = in_out.input();
|
||||
let mut block = [0u8; BLOCK_LEN];
|
||||
for i in 0..8 {
|
||||
block[4 * i..][..4].copy_from_slice(&input[i][2 * parent_index].to_le_bytes());
|
||||
block[4 * (i + 8)..][..4]
|
||||
.copy_from_slice(&input[i][2 * parent_index + 1].to_le_bytes());
|
||||
}
|
||||
let mut cv = *key;
|
||||
compress_in_place(&mut cv, &block, BLOCK_LEN as u8, 0, flags);
|
||||
let (output, output_column) = in_out.output();
|
||||
for i in 0..8 {
|
||||
output[i][output_column + parent_index] = cv[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn xof(
|
||||
block: &[u8; BLOCK_LEN],
|
||||
block_len: u8,
|
||||
cv: &[u32; 8],
|
||||
mut counter: u64,
|
||||
flags: u8,
|
||||
mut out: &mut [u8],
|
||||
pub unsafe extern "C" fn hash_chunks(
|
||||
input: *const u8,
|
||||
input_len: usize,
|
||||
key: *const [u32; 8],
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
transposed_output: *mut u32,
|
||||
) {
|
||||
while !out.is_empty() {
|
||||
let block_output = compress_xof(cv, block, block_len, counter, flags);
|
||||
let take = cmp::min(BLOCK_LEN, out.len());
|
||||
out[..take].copy_from_slice(&block_output[..take]);
|
||||
out = &mut out[take..];
|
||||
counter += 1;
|
||||
}
|
||||
crate::platform::hash_chunks_using_compress(
|
||||
compress,
|
||||
input,
|
||||
input_len,
|
||||
key,
|
||||
counter,
|
||||
flags,
|
||||
transposed_output,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn xof_xor(
|
||||
block: &[u8; BLOCK_LEN],
|
||||
block_len: u8,
|
||||
cv: &[u32; 8],
|
||||
mut counter: u64,
|
||||
flags: u8,
|
||||
mut out: &mut [u8],
|
||||
pub unsafe extern "C" fn hash_parents(
|
||||
transposed_input: *const u32,
|
||||
num_parents: usize,
|
||||
key: *const [u32; 8],
|
||||
flags: u32,
|
||||
transposed_output: *mut u32, // may overlap the input
|
||||
) {
|
||||
while !out.is_empty() {
|
||||
let block_output = compress_xof(cv, block, block_len, counter, flags);
|
||||
let take = cmp::min(BLOCK_LEN, out.len());
|
||||
for i in 0..take {
|
||||
out[i] ^= block_output[i];
|
||||
}
|
||||
out = &mut out[take..];
|
||||
counter += 1;
|
||||
}
|
||||
crate::platform::hash_parents_using_compress(
|
||||
compress,
|
||||
transposed_input,
|
||||
num_parents,
|
||||
key,
|
||||
flags,
|
||||
transposed_output,
|
||||
)
|
||||
}
|
||||
|
||||
pub fn universal_hash(
|
||||
mut input: &[u8],
|
||||
key: &[u32; 8],
|
||||
mut counter: u64,
|
||||
) -> [u8; UNIVERSAL_HASH_LEN] {
|
||||
let flags = crate::KEYED_HASH | crate::CHUNK_START | crate::CHUNK_END | crate::ROOT;
|
||||
let mut result = [0u8; UNIVERSAL_HASH_LEN];
|
||||
while input.len() > BLOCK_LEN {
|
||||
let block_output = compress_xof(
|
||||
key,
|
||||
&input[..BLOCK_LEN].try_into().unwrap(),
|
||||
BLOCK_LEN as u8,
|
||||
counter,
|
||||
flags,
|
||||
);
|
||||
for i in 0..UNIVERSAL_HASH_LEN {
|
||||
result[i] ^= block_output[i];
|
||||
}
|
||||
input = &input[BLOCK_LEN..];
|
||||
counter += 1;
|
||||
}
|
||||
let mut final_block = [0u8; BLOCK_LEN];
|
||||
final_block[..input.len()].copy_from_slice(input);
|
||||
let final_output = compress_xof(key, &final_block, input.len() as u8, counter, flags);
|
||||
for i in 0..UNIVERSAL_HASH_LEN {
|
||||
result[i] ^= final_output[i];
|
||||
}
|
||||
result
|
||||
pub unsafe extern "C" fn xof(
|
||||
block: *const [u8; 64],
|
||||
block_len: u32,
|
||||
cv: *const [u32; 8],
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
out: *mut u8,
|
||||
out_len: usize,
|
||||
) {
|
||||
crate::platform::xof_using_compress(
|
||||
compress, block, block_len, cv, counter, flags, out, out_len,
|
||||
)
|
||||
}
|
||||
|
||||
pub unsafe extern "C" fn xof_xor(
|
||||
block: *const [u8; 64],
|
||||
block_len: u32,
|
||||
cv: *const [u32; 8],
|
||||
counter: u64,
|
||||
flags: u32,
|
||||
out: *mut u8,
|
||||
out_len: usize,
|
||||
) {
|
||||
crate::platform::xof_xor_using_compress(
|
||||
compress, block, block_len, cv, counter, flags, out, out_len,
|
||||
)
|
||||
}
|
||||
|
||||
pub unsafe extern "C" fn universal_hash(
|
||||
input: *const u8,
|
||||
input_len: usize,
|
||||
key: *const [u32; 8],
|
||||
counter: u64,
|
||||
out: *mut [u8; 16],
|
||||
) {
|
||||
crate::platform::universal_hash_using_compress(compress, input, input_len, key, counter, out)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
83
src/test.rs
83
src/test.rs
|
@ -214,12 +214,13 @@ pub fn test_hash_many_fn(
|
|||
|
||||
// Both xof() and xof_xof() have this signature.
|
||||
type HashChunksFn = unsafe fn(
|
||||
input: &[u8],
|
||||
key: &[u32; 8],
|
||||
counter: u64,
|
||||
flags: u8,
|
||||
output: &mut TransposedVectors,
|
||||
output_column: usize,
|
||||
input: *const u8,
|
||||
input_len: usize,
|
||||
key: *const u32,
|
||||
initial_counter: u64,
|
||||
counter_group: u64,
|
||||
flags: u32,
|
||||
transposed_output: *mut u32,
|
||||
);
|
||||
|
||||
pub fn test_hash_chunks_fn(target_fn: HashChunksFn, degree: usize) {
|
||||
|
@ -227,47 +228,55 @@ pub fn test_hash_chunks_fn(target_fn: HashChunksFn, degree: usize) {
|
|||
let mut input = [0u8; 2 * MAX_SIMD_DEGREE * CHUNK_LEN];
|
||||
paint_test_input(&mut input);
|
||||
for test_degree in 1..=degree {
|
||||
let input1 = &input[..test_degree * CHUNK_LEN];
|
||||
let input2 = &input[test_degree * CHUNK_LEN..][..test_degree * CHUNK_LEN];
|
||||
for &initial_counter in INITIAL_COUNTERS {
|
||||
// Make two calls, to test the output_column parameter.
|
||||
let mut test_output = TransposedVectors::default();
|
||||
unsafe {
|
||||
target_fn(
|
||||
&input[..test_degree * CHUNK_LEN],
|
||||
TEST_KEY_WORDS,
|
||||
input1.as_ptr(),
|
||||
input1.len(),
|
||||
TEST_KEY_WORDS.as_ptr(),
|
||||
initial_counter,
|
||||
crate::KEYED_HASH,
|
||||
&mut test_output,
|
||||
0,
|
||||
crate::KEYED_HASH as u32,
|
||||
test_output[0].as_mut_ptr(),
|
||||
);
|
||||
target_fn(
|
||||
&input[test_degree * CHUNK_LEN..][..test_degree * CHUNK_LEN],
|
||||
TEST_KEY_WORDS,
|
||||
input2.as_ptr(),
|
||||
input2.len(),
|
||||
TEST_KEY_WORDS.as_ptr(),
|
||||
initial_counter + test_degree as u64,
|
||||
crate::KEYED_HASH,
|
||||
&mut test_output,
|
||||
test_degree,
|
||||
0,
|
||||
crate::KEYED_HASH as u32,
|
||||
test_output[0].as_mut_ptr().add(test_degree),
|
||||
);
|
||||
}
|
||||
|
||||
let mut portable_output = TransposedVectors::default();
|
||||
crate::portable::hash_chunks(
|
||||
&input[..test_degree * CHUNK_LEN],
|
||||
TEST_KEY_WORDS,
|
||||
initial_counter,
|
||||
crate::KEYED_HASH,
|
||||
&mut portable_output,
|
||||
0,
|
||||
);
|
||||
crate::portable::hash_chunks(
|
||||
&input[test_degree * CHUNK_LEN..][..test_degree * CHUNK_LEN],
|
||||
TEST_KEY_WORDS,
|
||||
initial_counter + test_degree as u64,
|
||||
crate::KEYED_HASH,
|
||||
&mut portable_output,
|
||||
test_degree,
|
||||
);
|
||||
unsafe {
|
||||
crate::portable::hash_chunks(
|
||||
input1.as_ptr(),
|
||||
input1.len(),
|
||||
TEST_KEY_WORDS.as_ptr(),
|
||||
initial_counter,
|
||||
0,
|
||||
crate::KEYED_HASH as u32,
|
||||
test_output[0].as_mut_ptr(),
|
||||
);
|
||||
crate::portable::hash_chunks(
|
||||
input2.as_ptr(),
|
||||
input2.len(),
|
||||
TEST_KEY_WORDS.as_ptr(),
|
||||
initial_counter + test_degree as u64,
|
||||
0,
|
||||
crate::KEYED_HASH as u32,
|
||||
test_output[0].as_mut_ptr().add(test_degree),
|
||||
);
|
||||
}
|
||||
|
||||
assert_eq!(portable_output.0, test_output.0);
|
||||
assert_eq!(portable_output, test_output);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -283,7 +292,13 @@ fn paint_transposed_input(input: &mut TransposedVectors) {
|
|||
}
|
||||
|
||||
// Both xof() and xof_xof() have this signature.
|
||||
type HashParentsFn = unsafe fn(in_out: ParentInOut, key: &[u32; 8], flags: u8);
|
||||
type HashParentsFn = unsafe fn(
|
||||
transposed_input: *const u32,
|
||||
num_parents: usize,
|
||||
key: *const u32,
|
||||
flags: u32,
|
||||
transposed_output: *mut u32, // may overlap the input
|
||||
);
|
||||
|
||||
pub fn test_hash_parents_fn(target_fn: HashParentsFn, degree: usize) {
|
||||
assert!(degree <= MAX_SIMD_DEGREE);
|
||||
|
@ -292,7 +307,7 @@ pub fn test_hash_parents_fn(target_fn: HashParentsFn, degree: usize) {
|
|||
{
|
||||
let mut input = TransposedVectors::default();
|
||||
paint_transposed_input(&mut input);
|
||||
let mut test_output = TransposedVectors(input.0);
|
||||
let mut test_output = input.clone();
|
||||
unsafe {
|
||||
target_fn(
|
||||
ParentInOut::Separate {
|
||||
|
|
Loading…
Reference in New Issue