mirror of
https://github.com/BLAKE3-team/BLAKE3
synced 2024-05-25 23:16:03 +02:00
d91f20dd29
Wire up basic functions and features for SSE2 support using the SSE4.1 version as a basis without implementing the SSE2 instructions yet. * Cargo.toml: add no_sse2 feature * benches/bench.rs: wire SSE2 benchmarks * build.rs: add SSE2 rust intrinsics and assembly builds * c/Makefile.testing: add SSE2 C and assembly targets * c/README.md: add SSE2 to C build instructions * c/blake3_c_rust_bindings/build.rs: add SSE2 C rust binding builds * c/blake3_c_rust_bindings/src/lib.rs: add SSE2 C rust bindings * c/blake3_dispatch.c: add SSE2 C dispatch * c/blake3_impl.h: add SSE2 C function prototypes * c/blake3_sse2.c: add SSE2 C intrinsic file starting with SSE4.1 version * c/blake3_sse2_x86-64_{unix.S,windows_gnu.S,windows_msvc.asm}: add SSE2 assembly files starting with SSE4.1 version * src/ffi_sse2.rs: add rust implementation using SSE2 C rust bindings * src/lib.rs: add SSE2 rust intrinsics and SSE2 C rust binding rust SSE2 module configurations * src/platform.rs: add SSE2 rust platform detection and dispatch * src/rust_sse2.rs: add SSE2 rust intrinsic file starting with SSE4.1 version * tools/instruction_set_support/src/main.rs: add SSE2 feature detection
115 lines
2.8 KiB
Rust
115 lines
2.8 KiB
Rust
use crate::{CVWords, IncrementCounter, BLOCK_LEN, OUT_LEN};
|
|
|
|
// Unsafe because this may only be called on platforms supporting SSE2.
|
|
pub unsafe fn compress_in_place(
|
|
cv: &mut CVWords,
|
|
block: &[u8; BLOCK_LEN],
|
|
block_len: u8,
|
|
counter: u64,
|
|
flags: u8,
|
|
) {
|
|
ffi::blake3_compress_in_place_sse2(cv.as_mut_ptr(), block.as_ptr(), block_len, counter, flags)
|
|
}
|
|
|
|
// Unsafe because this may only be called on platforms supporting SSE2.
|
|
pub unsafe fn compress_xof(
|
|
cv: &CVWords,
|
|
block: &[u8; BLOCK_LEN],
|
|
block_len: u8,
|
|
counter: u64,
|
|
flags: u8,
|
|
) -> [u8; 64] {
|
|
let mut out = [0u8; 64];
|
|
ffi::blake3_compress_xof_sse2(
|
|
cv.as_ptr(),
|
|
block.as_ptr(),
|
|
block_len,
|
|
counter,
|
|
flags,
|
|
out.as_mut_ptr(),
|
|
);
|
|
out
|
|
}
|
|
|
|
// Unsafe because this may only be called on platforms supporting SSE2.
|
|
pub unsafe fn hash_many<A: arrayvec::Array<Item = u8>>(
|
|
inputs: &[&A],
|
|
key: &CVWords,
|
|
counter: u64,
|
|
increment_counter: IncrementCounter,
|
|
flags: u8,
|
|
flags_start: u8,
|
|
flags_end: u8,
|
|
out: &mut [u8],
|
|
) {
|
|
// The Rust hash_many implementations do bounds checking on the `out`
|
|
// array, but the C implementations don't. Even though this is an unsafe
|
|
// function, assert the bounds here.
|
|
assert!(out.len() >= inputs.len() * OUT_LEN);
|
|
ffi::blake3_hash_many_sse2(
|
|
inputs.as_ptr() as *const *const u8,
|
|
inputs.len(),
|
|
A::CAPACITY / BLOCK_LEN,
|
|
key.as_ptr(),
|
|
counter,
|
|
increment_counter.yes(),
|
|
flags,
|
|
flags_start,
|
|
flags_end,
|
|
out.as_mut_ptr(),
|
|
)
|
|
}
|
|
|
|
pub mod ffi {
|
|
extern "C" {
|
|
pub fn blake3_compress_in_place_sse2(
|
|
cv: *mut u32,
|
|
block: *const u8,
|
|
block_len: u8,
|
|
counter: u64,
|
|
flags: u8,
|
|
);
|
|
pub fn blake3_compress_xof_sse2(
|
|
cv: *const u32,
|
|
block: *const u8,
|
|
block_len: u8,
|
|
counter: u64,
|
|
flags: u8,
|
|
out: *mut u8,
|
|
);
|
|
pub fn blake3_hash_many_sse2(
|
|
inputs: *const *const u8,
|
|
num_inputs: usize,
|
|
blocks: usize,
|
|
key: *const u32,
|
|
counter: u64,
|
|
increment_counter: bool,
|
|
flags: u8,
|
|
flags_start: u8,
|
|
flags_end: u8,
|
|
out: *mut u8,
|
|
);
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod test {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_compress() {
|
|
if !crate::platform::sse2_detected() {
|
|
return;
|
|
}
|
|
crate::test::test_compress_fn(compress_in_place, compress_xof);
|
|
}
|
|
|
|
#[test]
|
|
fn test_hash_many() {
|
|
if !crate::platform::sse2_detected() {
|
|
return;
|
|
}
|
|
crate::test::test_hash_many_fn(hash_many, hash_many);
|
|
}
|
|
}
|