WIP

2024-05-23 21:17:06 +02:00 · 2023-05-28 13:40:38 -07:00 · 2023-05-28 13:40:38 -07:00 · 589f2c3f48
parent e302cdf36f
commit 589f2c3f48
3 changed files with 397 additions and 14 deletions
--- a/src/platform.rs
+++ b/src/platform.rs
@ -1,4 +1,4 @@
-use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN};
+use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN, CHUNK_LEN};
 use arrayref::{array_mut_ref, array_ref};

 cfg_if::cfg_if! {
@ -272,6 +272,73 @@ impl Platform {
        }
    }

+    // Hashes N=input.len()/CHUNK_LEN chunks and writes N transposed chunk CVs to the output,
+    // starting at the column given by num_cvs (i.e. appending to the transposed CVs already
+    // present). After returning, the total number of transposed CVs in the output will be
+    // num_cvs+N. N and num_cvs must both be less than or equal to simd_degree. Any partial chunk
+    // bytes in the input after the last complete chunk are ignored and need to be hashed
+    // separately by the caller. The counter argument is the value of the chunk counter for the
+    // first chunk, and it's incremented by 1 for each chunk after the first. The CHUNK_START and
+    // CHUNK_END flags are set internally.
+    pub fn hash_chunks(
+        &self,
+        input: &[u8],
+        key: &[u32; 8],
+        counter: u64,
+        flags: u8,
+        cvs_out: &mut TransposedVectors,
+        num_cvs: usize,
+    ) {
+        debug_assert!(input.len() / CHUNK_LEN <= self.simd_degree());
+        debug_assert!(num_cvs <= self.simd_degree());
+        portable::hash_chunks(input, key, counter, flags, cvs_out, num_cvs);
+        // XXX: should separate the thing that hashes the remainder from this interface
+    }
+
+    // Writes out N=num_cvs/2 transposed parent CVs in-place over the first N columns of the input
+    // CVs. Columns N and above are unmodified. N must be less than or equal to 2*simd_degree. If
+    // num_cvs is odd, the final input CV is ignored, and the caller should copy it from column
+    // 2N+1 to column N after this function returns. The PARENT flag is added internally.
+    pub fn hash_parents(
+        &self,
+        cvs: &mut TransposedVectors,
+        num_cvs: usize,
+        key: &[u32; 8],
+        flags: u8,
+    ) {
+        debug_assert!(num_cvs <= 2 * self.simd_degree());
+        portable::hash_parents(cvs, num_cvs, key, flags);
+        // XXX: should separate the thing that copies the last CV over from this interface
+    }
+
+    pub fn xof(
+        &self,
+        block: &[u8; BLOCK_LEN],
+        block_len: u8,
+        cv: &[u32; 8],
+        counter: u64,
+        flags: u8,
+        out: &mut [u8],
+    ) {
+        portable::xof(block, block_len, cv, counter, flags, out);
+    }
+
+    pub fn xof_xor(
+        &self,
+        block: &[u8; BLOCK_LEN],
+        block_len: u8,
+        cv: &[u32; 8],
+        counter: u64,
+        flags: u8,
+        out: &mut [u8],
+    ) {
+        portable::xof_xor(block, block_len, cv, counter, flags, out);
+    }
+
+    pub fn universal_hash(&self, input: &[u8], key: &[u32; 8], counter: u64) -> [u8; 64] {
+        portable::universal_hash(input, key, counter)
+    }
+
    // Explicit platform constructors, for benchmarks.

    pub fn portable() -> Self {
@ -485,3 +552,8 @@ pub fn le_bytes_from_words_64(words: &[u32; 16]) -> [u8; 64] {
    *array_mut_ref!(out, 15 * 4, 4) = words[15].to_le_bytes();
    out
 }
+
+#[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), repr(C, align(64)))]
+pub struct TransposedVectors(pub [[u32; 2 * MAX_SIMD_DEGREE]; 8]);
+
+pub struct StridedOutput(*mut u32);
--- a/src/portable.rs
+++ b/src/portable.rs
@ -1,8 +1,9 @@
 use crate::{
-    counter_high, counter_low, CVBytes, CVWords, IncrementCounter, BLOCK_LEN, IV, MSG_SCHEDULE,
-    OUT_LEN,
+    counter_high, counter_low, platform::TransposedVectors, CVBytes, CVWords, IncrementCounter,
+    BLOCK_LEN, CHUNK_LEN, IV, MSG_SCHEDULE, OUT_LEN,
 };
 use arrayref::{array_mut_ref, array_ref};
+use core::cmp;

 #[inline(always)]
 fn g(state: &mut [u32; 16], a: usize, b: usize, c: usize, d: usize, x: u32, y: u32) {
@ -177,14 +178,118 @@ pub fn hash_many<const N: usize>(
    }
 }

+pub fn hash_chunks(
+    input: &[u8],
+    key: &[u32; 8],
+    counter: u64,
+    flags: u8,
+    output: &mut TransposedVectors,
+    output_offset: usize,
+) {
+    const LAST_BLOCK_INDEX: usize = (CHUNK_LEN / BLOCK_LEN) - 1;
+    // There might be a partial chunk at the end. If so, we ignore it here, and the caller will
+    // hash it separately.
+    let num_chunks = input.len() / CHUNK_LEN;
+    for chunk_index in 0..num_chunks {
+        let mut cv = *key;
+        for block_index in 0..CHUNK_LEN / BLOCK_LEN {
+            compress_in_place(
+                &mut cv,
+                input[CHUNK_LEN * chunk_index + BLOCK_LEN * block_index..][..BLOCK_LEN]
+                    .try_into()
+                    .unwrap(),
+                BLOCK_LEN as u8,
+                counter + chunk_index as u64,
+                match block_index {
+                    0 => flags | crate::CHUNK_START,
+                    LAST_BLOCK_INDEX => flags | crate::CHUNK_END,
+                    _ => flags,
+                },
+            );
+        }
+        for word_index in 0..cv.len() {
+            output.0[word_index][output_offset + chunk_index] = cv[word_index];
+        }
+    }
+}
+
+pub fn hash_parents(cvs: &mut TransposedVectors, num_cvs: usize, key: &[u32; 8], flags: u8) {
+    // Note that there may be an odd number of children. If there's a leftover child, it gets
+    // appended to the outputs by the caller. We will not overwrite it.
+    let num_parents = num_cvs / 2;
+    todo!()
+}
+
+pub fn xof(
+    block: &[u8; BLOCK_LEN],
+    block_len: u8,
+    cv: &[u32; 8],
+    mut counter: u64,
+    flags: u8,
+    mut out: &mut [u8],
+) {
+    while !out.is_empty() {
+        let block_output = compress_xof(cv, block, block_len, counter, flags);
+        let take = cmp::min(BLOCK_LEN, out.len());
+        out[..take].copy_from_slice(&block_output[..take]);
+        out = &mut out[take..];
+        counter += 1;
+    }
+}
+
+pub fn xof_xor(
+    block: &[u8; BLOCK_LEN],
+    block_len: u8,
+    cv: &[u32; 8],
+    mut counter: u64,
+    flags: u8,
+    mut out: &mut [u8],
+) {
+    while !out.is_empty() {
+        let block_output = compress_xof(cv, block, block_len, counter, flags);
+        let take = cmp::min(BLOCK_LEN, out.len());
+        for i in 0..take {
+            out[i] ^= block_output[i];
+        }
+        out = &mut out[take..];
+        counter += 1;
+    }
+}
+
+pub fn universal_hash(mut input: &[u8], key: &[u32; 8], mut counter: u64) -> [u8; BLOCK_LEN] {
+    let flags = crate::KEYED_HASH | crate::CHUNK_START | crate::CHUNK_END | crate::ROOT;
+    let mut result = [0u8; BLOCK_LEN];
+    while input.len() > BLOCK_LEN {
+        let block_output = compress_xof(
+            key,
+            &input[..BLOCK_LEN].try_into().unwrap(),
+            BLOCK_LEN as u8,
+            counter,
+            flags,
+        );
+        for i in 0..BLOCK_LEN {
+            result[i] ^= block_output[i];
+        }
+        input = &input[BLOCK_LEN..];
+        counter += 1;
+    }
+    let mut final_block = [0u8; BLOCK_LEN];
+    final_block[..input.len()].copy_from_slice(input);
+    let final_output = compress_xof(key, &final_block, input.len() as u8, counter, flags);
+    for i in 0..BLOCK_LEN {
+        result[i] ^= final_output[i];
+    }
+    result
+}
+
 #[cfg(test)]
 pub mod test {
    use super::*;

-    // This is basically testing the portable implementation against itself,
-    // but it also checks that compress_in_place and compress_xof are
-    // consistent. And there are tests against the reference implementation and
-    // against hardcoded test vectors elsewhere.
+    // These are basically testing the portable implementation against itself, but we also check
+    // that compress_in_place and compress_xof are consistent. And there are tests against the
+    // reference implementation and against hardcoded test vectors elsewhere.
+
    #[test]
    fn test_compress() {
        crate::test::test_compress_fn(compress_in_place, compress_xof);
@ -195,4 +300,14 @@ pub mod test {
    fn test_hash_many() {
        crate::test::test_hash_many_fn(hash_many, hash_many);
    }
+
+    #[test]
+    fn test_xof_and_xor() {
+        crate::test::test_xof_and_xor_fns(xof, xof_xor);
+    }
+
+    #[test]
+    fn test_universal_hash() {
+        crate::test::test_universal_hash_fn(universal_hash);
+    }
 }
--- a/src/test.rs
+++ b/src/test.rs
@ -1,6 +1,7 @@
 use crate::{CVBytes, CVWords, IncrementCounter, BLOCK_LEN, CHUNK_LEN, OUT_LEN};
 use arrayref::array_ref;
 use arrayvec::ArrayVec;
+use core::cmp;
 use core::usize;
 use rand::prelude::*;

@ -51,6 +52,13 @@ pub const TEST_KEY_WORDS: CVWords = [
    1952540791, 1752440947, 1816469605, 1752394102, 1919907616, 1868963940, 1919295602, 1684956521,
 ];

+// Test a few different initial counter values.
+// - 0: The base case.
+// - i32::MAX: *No* overflow. But carry bugs in tricky SIMD code can screw this up, if you XOR
+//   when you're supposed to ANDNOT...
+// - u32::MAX: The low word of the counter overflows for all inputs except the first.
+const INITIAL_COUNTERS: &[u64] = &[0, i32::MAX as u64, u32::MAX as u64];
+
 // Paint the input with a repeating byte pattern. We use a cycle length of 251,
 // because that's the largest prime number less than 256. This makes it
 // unlikely to swapping any two adjacent input blocks or chunks will give the
@ -111,13 +119,7 @@ pub fn test_hash_many_fn(
    hash_many_chunks_fn: HashManyFn<[u8; CHUNK_LEN]>,
    hash_many_parents_fn: HashManyFn<[u8; 2 * OUT_LEN]>,
 ) {
-    // Test a few different initial counter values.
-    // - 0: The base case.
-    // - u32::MAX: The low word of the counter overflows for all inputs except the first.
-    // - i32::MAX: *No* overflow. But carry bugs in tricky SIMD code can screw this up, if you XOR
-    //   when you're supposed to ANDNOT...
-    let initial_counters = [0, u32::MAX as u64, i32::MAX as u64];
-    for counter in initial_counters {
+    for &counter in INITIAL_COUNTERS {
        #[cfg(feature = "std")]
        dbg!(counter);

@ -206,6 +208,200 @@ pub fn test_hash_many_fn(
    }
 }

+// Both xof() and xof_xof() have this signature.
+type XofFn = unsafe fn(
+    block: &[u8; BLOCK_LEN],
+    block_len: u8,
+    cv: &[u32; 8],
+    counter: u64,
+    flags: u8,
+    out: &mut [u8],
+);
+
+pub fn test_xof_and_xor_fns(target_xof: XofFn, target_xof_xor: XofFn) {
+    // 31 (16 + 8 + 4 + 2 + 1) outputs
+    const NUM_OUTPUTS: usize = 31;
+    let different_flags = [
+        crate::CHUNK_START | crate::CHUNK_END | crate::ROOT,
+        crate::PARENT | crate::ROOT | crate::KEYED_HASH,
+    ];
+    for input_len in [0, 1, BLOCK_LEN] {
+        let mut input_block = [0u8; BLOCK_LEN];
+        crate::test::paint_test_input(&mut input_block[..input_len]);
+        for output_len in [0, 1, BLOCK_LEN, BLOCK_LEN + 1, BLOCK_LEN * NUM_OUTPUTS] {
+            let mut test_output_buf = [0xff; BLOCK_LEN * NUM_OUTPUTS];
+            for &counter in INITIAL_COUNTERS {
+                for flags in different_flags {
+                    let mut expected_output_buf = [0xff; BLOCK_LEN * NUM_OUTPUTS];
+                    crate::portable::xof(
+                        &input_block,
+                        input_len as u8,
+                        &TEST_KEY_WORDS,
+                        counter,
+                        flags,
+                        &mut expected_output_buf[..output_len],
+                    );
+
+                    unsafe {
+                        target_xof(
+                            &input_block,
+                            input_len as u8,
+                            &TEST_KEY_WORDS,
+                            counter,
+                            flags,
+                            &mut test_output_buf[..output_len],
+                        );
+                    }
+                    assert_eq!(
+                        expected_output_buf[..output_len],
+                        test_output_buf[..output_len],
+                    );
+                    // Make sure unsafe implementations don't overwrite. This shouldn't be possible in the
+                    // portable implementation, which is all safe code, but it could happen in others.
+                    assert!(test_output_buf[output_len..].iter().all(|&b| b == 0xff));
+
+                    // The first XOR cancels out the output.
+                    unsafe {
+                        target_xof_xor(
+                            &input_block,
+                            input_len as u8,
+                            &TEST_KEY_WORDS,
+                            counter,
+                            flags,
+                            &mut test_output_buf[..output_len],
+                        );
+                    }
+                    assert!(test_output_buf[..output_len].iter().all(|&b| b == 0));
+                    assert!(test_output_buf[output_len..].iter().all(|&b| b == 0xff));
+
+                    // The second XOR restores out the output.
+                    unsafe {
+                        target_xof_xor(
+                            &input_block,
+                            input_len as u8,
+                            &TEST_KEY_WORDS,
+                            counter,
+                            flags,
+                            &mut test_output_buf[..output_len],
+                        );
+                    }
+                    assert_eq!(
+                        expected_output_buf[..output_len],
+                        test_output_buf[..output_len],
+                    );
+                    assert!(test_output_buf[output_len..].iter().all(|&b| b == 0xff));
+                }
+            }
+        }
+    }
+}
+
+#[test]
+fn test_compare_reference_impl_xof() {
+    const NUM_OUTPUTS: usize = 31;
+    let input = b"hello world";
+    let mut input_block = [0; BLOCK_LEN];
+    input_block[..input.len()].copy_from_slice(input);
+
+    let mut reference_output_buf = [0; BLOCK_LEN * NUM_OUTPUTS];
+    let mut reference_hasher = reference_impl::Hasher::new_keyed(&TEST_KEY);
+    reference_hasher.update(input);
+    reference_hasher.finalize(&mut reference_output_buf);
+
+    for output_len in [0, 1, BLOCK_LEN, BLOCK_LEN + 1, BLOCK_LEN * NUM_OUTPUTS] {
+        let mut test_output_buf = [0; BLOCK_LEN * NUM_OUTPUTS];
+        crate::platform::Platform::detect().xof(
+            &input_block,
+            input.len() as u8,
+            &TEST_KEY_WORDS,
+            0,
+            crate::KEYED_HASH | crate::CHUNK_START | crate::CHUNK_END | crate::ROOT,
+            &mut test_output_buf[..output_len],
+        );
+        assert_eq!(
+            reference_output_buf[..output_len],
+            test_output_buf[..output_len],
+        );
+
+        // Make sure unsafe implementations don't overwrite. This shouldn't be possible in the
+        // portable implementation, which is all safe code, but it could happen in others.
+        assert!(test_output_buf[output_len..].iter().all(|&b| b == 0));
+
+        // Do it again starting from block 1.
+        if output_len >= BLOCK_LEN {
+            crate::platform::Platform::detect().xof(
+                &input_block,
+                input.len() as u8,
+                &TEST_KEY_WORDS,
+                1,
+                crate::KEYED_HASH | crate::CHUNK_START | crate::CHUNK_END | crate::ROOT,
+                &mut test_output_buf[..output_len - BLOCK_LEN],
+            );
+            assert_eq!(
+                reference_output_buf[BLOCK_LEN..output_len],
+                test_output_buf[..output_len - BLOCK_LEN],
+            );
+        }
+    }
+}
+
+type UniversalHashFn = unsafe fn(input: &[u8], key: &[u32; 8], counter: u64) -> [u8; BLOCK_LEN];
+
+pub fn test_universal_hash_fn(target_fn: UniversalHashFn) {
+    // 31 (16 + 8 + 4 + 2 + 1) inputs
+    const NUM_INPUTS: usize = 31;
+    let mut input_buf = [0; BLOCK_LEN * NUM_INPUTS];
+    crate::test::paint_test_input(&mut input_buf);
+    for len in [0, 1, BLOCK_LEN, BLOCK_LEN + 1, input_buf.len()] {
+        for &counter in INITIAL_COUNTERS {
+            let portable_output =
+                crate::portable::universal_hash(&input_buf[..len], &TEST_KEY_WORDS, counter);
+            let test_output = unsafe { target_fn(&input_buf[..len], &TEST_KEY_WORDS, counter) };
+            assert_eq!(portable_output, test_output);
+        }
+    }
+}
+
+fn reference_impl_universal_hash(input: &[u8], key: &[u8; crate::KEY_LEN]) -> [u8; BLOCK_LEN] {
+    // The reference_impl doesn't support XOF seeking, so we have to materialize an entire extended
+    // output to seek to a block.
+    const MAX_BLOCKS: usize = 31;
+    assert!(input.len() / BLOCK_LEN <= MAX_BLOCKS);
+    let mut output_buffer: [u8; BLOCK_LEN * MAX_BLOCKS] = [0u8; BLOCK_LEN * MAX_BLOCKS];
+    let mut result = [0u8; BLOCK_LEN];
+    let mut i = 0;
+    while i == 0 || i < input.len() {
+        let block_len = cmp::min(input.len() - i, BLOCK_LEN);
+        let mut reference_hasher = reference_impl::Hasher::new_keyed(key);
+        reference_hasher.update(&input[i..i + block_len]);
+        reference_hasher.finalize(&mut output_buffer);
+        for (result_byte, output_byte) in result
+            .iter_mut()
+            .zip(output_buffer[i..i + BLOCK_LEN].iter())
+        {
+            *result_byte ^= *output_byte;
+        }
+        i += BLOCK_LEN;
+    }
+    result
+}
+
+#[test]
+fn test_compare_reference_impl_universal_hash() {
+    const NUM_INPUTS: usize = 31;
+    let mut input_buf = [0; BLOCK_LEN * NUM_INPUTS];
+    crate::test::paint_test_input(&mut input_buf);
+    for len in [0, 1, BLOCK_LEN, BLOCK_LEN + 1, input_buf.len()] {
+        let reference_output = reference_impl_universal_hash(&input_buf[..len], &TEST_KEY);
+        let test_output = crate::platform::Platform::detect().universal_hash(
+            &input_buf[..len],
+            &TEST_KEY_WORDS,
+            0,
+        );
+        assert_eq!(reference_output, test_output);
+    }
+}
+
 #[test]
 fn test_key_bytes_equal_key_words() {
    assert_eq!(