2019-12-13 00:21:17 +01:00
|
|
|
use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN};
|
2019-12-10 20:20:09 +01:00
|
|
|
use arrayref::{array_mut_ref, array_ref};
|
2019-12-03 19:27:28 +01:00
|
|
|
|
2019-12-08 05:43:45 +01:00
|
|
|
cfg_if::cfg_if! {
|
2019-12-12 20:40:56 +01:00
|
|
|
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
|
|
|
|
cfg_if::cfg_if! {
|
2020-03-31 18:36:41 +02:00
|
|
|
if #[cfg(blake3_avx512_ffi)] {
|
2020-03-28 22:27:31 +01:00
|
|
|
pub const MAX_SIMD_DEGREE: usize = 16;
|
2020-03-31 18:36:41 +02:00
|
|
|
} else {
|
|
|
|
pub const MAX_SIMD_DEGREE: usize = 8;
|
2019-12-12 20:40:56 +01:00
|
|
|
}
|
|
|
|
}
|
2021-10-07 13:23:36 +02:00
|
|
|
} else if #[cfg(blake3_neon)] {
|
2019-12-08 05:43:45 +01:00
|
|
|
pub const MAX_SIMD_DEGREE: usize = 4;
|
2023-09-09 21:45:31 +02:00
|
|
|
} else if #[cfg(blake3_wasm32_simd)] {
|
|
|
|
pub const MAX_SIMD_DEGREE: usize = 8;
|
2019-12-08 05:43:45 +01:00
|
|
|
} else {
|
|
|
|
pub const MAX_SIMD_DEGREE: usize = 1;
|
|
|
|
}
|
|
|
|
}
|
2019-12-03 19:27:28 +01:00
|
|
|
|
2019-12-04 00:54:51 +01:00
|
|
|
// There are some places where we want a static size that's equal to the
|
|
|
|
// MAX_SIMD_DEGREE, but also at least 2. Constant contexts aren't currently
|
|
|
|
// allowed to use cmp::max, so we have to hardcode this additional constant
|
|
|
|
// value. Get rid of this once cmp::max is a const fn.
|
2019-12-08 05:43:45 +01:00
|
|
|
cfg_if::cfg_if! {
|
2019-12-12 20:40:56 +01:00
|
|
|
if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
|
|
|
|
cfg_if::cfg_if! {
|
2020-03-31 18:36:41 +02:00
|
|
|
if #[cfg(blake3_avx512_ffi)] {
|
2020-03-28 22:27:31 +01:00
|
|
|
pub const MAX_SIMD_DEGREE_OR_2: usize = 16;
|
2020-03-31 18:36:41 +02:00
|
|
|
} else {
|
|
|
|
pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
|
2019-12-12 20:40:56 +01:00
|
|
|
}
|
|
|
|
}
|
2021-10-07 13:23:36 +02:00
|
|
|
} else if #[cfg(blake3_neon)] {
|
2019-12-08 05:43:45 +01:00
|
|
|
pub const MAX_SIMD_DEGREE_OR_2: usize = 4;
|
2023-09-09 21:45:31 +02:00
|
|
|
} else if #[cfg(blake3_wasm32_simd)] {
|
|
|
|
pub const MAX_SIMD_DEGREE_OR_2: usize = 4;
|
2019-12-08 05:43:45 +01:00
|
|
|
} else {
|
|
|
|
pub const MAX_SIMD_DEGREE_OR_2: usize = 2;
|
|
|
|
}
|
|
|
|
}
|
2019-12-04 00:54:51 +01:00
|
|
|
|
2019-12-03 19:27:28 +01:00
|
|
|
#[derive(Clone, Copy, Debug)]
|
|
|
|
pub enum Platform {
|
|
|
|
Portable,
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
2020-08-15 00:02:06 +02:00
|
|
|
SSE2,
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
2019-12-03 19:27:28 +01:00
|
|
|
SSE41,
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
|
|
AVX2,
|
2020-03-31 18:36:41 +02:00
|
|
|
#[cfg(blake3_avx512_ffi)]
|
2019-12-12 20:40:56 +01:00
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
2019-12-08 05:43:45 +01:00
|
|
|
AVX512,
|
2021-10-07 13:23:36 +02:00
|
|
|
#[cfg(blake3_neon)]
|
2019-12-08 05:43:45 +01:00
|
|
|
NEON,
|
2023-09-09 21:45:31 +02:00
|
|
|
#[cfg(blake3_wasm32_simd)]
|
|
|
|
#[allow(non_camel_case_types)]
|
|
|
|
WASM32_SIMD,
|
2019-12-03 19:27:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Platform {
|
2019-12-09 03:58:32 +01:00
|
|
|
#[allow(unreachable_code)]
|
2019-12-03 19:27:28 +01:00
|
|
|
pub fn detect() -> Self {
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
|
|
{
|
2020-03-31 18:36:41 +02:00
|
|
|
#[cfg(blake3_avx512_ffi)]
|
2019-12-08 05:43:45 +01:00
|
|
|
{
|
|
|
|
if avx512_detected() {
|
|
|
|
return Platform::AVX512;
|
|
|
|
}
|
|
|
|
}
|
2019-12-03 19:44:30 +01:00
|
|
|
if avx2_detected() {
|
2019-12-03 19:27:28 +01:00
|
|
|
return Platform::AVX2;
|
|
|
|
}
|
2019-12-03 19:44:30 +01:00
|
|
|
if sse41_detected() {
|
2019-12-03 19:27:28 +01:00
|
|
|
return Platform::SSE41;
|
|
|
|
}
|
2020-08-15 00:02:06 +02:00
|
|
|
if sse2_detected() {
|
|
|
|
return Platform::SSE2;
|
|
|
|
}
|
2019-12-03 19:27:28 +01:00
|
|
|
}
|
2020-03-28 22:27:31 +01:00
|
|
|
// We don't use dynamic feature detection for NEON. If the "neon"
|
2019-12-08 05:43:45 +01:00
|
|
|
// feature is on, NEON is assumed to be supported.
|
2021-10-07 13:23:36 +02:00
|
|
|
#[cfg(blake3_neon)]
|
2019-12-08 05:43:45 +01:00
|
|
|
{
|
|
|
|
return Platform::NEON;
|
|
|
|
}
|
2023-09-09 21:45:31 +02:00
|
|
|
#[cfg(blake3_wasm32_simd)]
|
|
|
|
{
|
|
|
|
return Platform::WASM32_SIMD;
|
|
|
|
}
|
2019-12-03 19:27:28 +01:00
|
|
|
Platform::Portable
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn simd_degree(&self) -> usize {
|
|
|
|
let degree = match self {
|
|
|
|
Platform::Portable => 1,
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
2020-08-15 00:02:06 +02:00
|
|
|
Platform::SSE2 => 4,
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
2019-12-03 19:27:28 +01:00
|
|
|
Platform::SSE41 => 4,
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
|
|
Platform::AVX2 => 8,
|
2020-03-31 18:36:41 +02:00
|
|
|
#[cfg(blake3_avx512_ffi)]
|
2019-12-12 20:40:56 +01:00
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
2019-12-08 05:43:45 +01:00
|
|
|
Platform::AVX512 => 16,
|
2021-10-07 13:23:36 +02:00
|
|
|
#[cfg(blake3_neon)]
|
2019-12-08 05:43:45 +01:00
|
|
|
Platform::NEON => 4,
|
2023-09-09 21:45:31 +02:00
|
|
|
#[cfg(blake3_wasm32_simd)]
|
|
|
|
// TODO is it 8 or 4??? SSE4 has 4...
|
|
|
|
Platform::WASM32_SIMD => 4,
|
2019-12-03 19:27:28 +01:00
|
|
|
};
|
|
|
|
debug_assert!(degree <= MAX_SIMD_DEGREE);
|
|
|
|
degree
|
|
|
|
}
|
|
|
|
|
2020-02-11 20:13:30 +01:00
|
|
|
pub fn compress_in_place(
|
2019-12-03 19:27:28 +01:00
|
|
|
&self,
|
2019-12-10 20:20:09 +01:00
|
|
|
cv: &mut CVWords,
|
|
|
|
block: &[u8; BLOCK_LEN],
|
|
|
|
block_len: u8,
|
2019-12-13 00:21:17 +01:00
|
|
|
counter: u64,
|
2019-12-10 20:20:09 +01:00
|
|
|
flags: u8,
|
|
|
|
) {
|
|
|
|
match self {
|
2019-12-13 00:21:17 +01:00
|
|
|
Platform::Portable => portable::compress_in_place(cv, block, block_len, counter, flags),
|
2019-12-10 20:20:09 +01:00
|
|
|
// Safe because detect() checked for platform support.
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
2020-08-15 00:02:06 +02:00
|
|
|
Platform::SSE2 => unsafe {
|
|
|
|
crate::sse2::compress_in_place(cv, block, block_len, counter, flags)
|
|
|
|
},
|
|
|
|
// Safe because detect() checked for platform support.
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
2019-12-10 20:20:09 +01:00
|
|
|
Platform::SSE41 | Platform::AVX2 => unsafe {
|
2020-02-11 20:13:30 +01:00
|
|
|
crate::sse41::compress_in_place(cv, block, block_len, counter, flags)
|
2019-12-10 20:20:09 +01:00
|
|
|
},
|
|
|
|
// Safe because detect() checked for platform support.
|
2020-03-31 18:36:41 +02:00
|
|
|
#[cfg(blake3_avx512_ffi)]
|
2019-12-12 20:40:56 +01:00
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
2019-12-10 20:20:09 +01:00
|
|
|
Platform::AVX512 => unsafe {
|
2020-02-11 20:13:30 +01:00
|
|
|
crate::avx512::compress_in_place(cv, block, block_len, counter, flags)
|
2019-12-10 20:20:09 +01:00
|
|
|
},
|
|
|
|
// No NEON compress_in_place() implementation yet.
|
2021-10-07 13:23:36 +02:00
|
|
|
#[cfg(blake3_neon)]
|
2019-12-13 00:21:17 +01:00
|
|
|
Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags),
|
2023-09-09 21:45:31 +02:00
|
|
|
// Safe because is compiled for wasm32
|
|
|
|
#[cfg(blake3_wasm32_simd)]
|
|
|
|
Platform::WASM32_SIMD => unsafe {
|
|
|
|
crate::wasm32_simd::compress_in_place(cv, block, block_len, counter, flags)
|
|
|
|
},
|
2019-12-10 20:20:09 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-11 20:13:30 +01:00
|
|
|
pub fn compress_xof(
|
2019-12-10 20:20:09 +01:00
|
|
|
&self,
|
|
|
|
cv: &CVWords,
|
2019-12-03 19:27:28 +01:00
|
|
|
block: &[u8; BLOCK_LEN],
|
|
|
|
block_len: u8,
|
2019-12-13 00:21:17 +01:00
|
|
|
counter: u64,
|
2019-12-06 21:32:20 +01:00
|
|
|
flags: u8,
|
2019-12-03 21:18:08 +01:00
|
|
|
) -> [u8; 64] {
|
2019-12-03 19:27:28 +01:00
|
|
|
match self {
|
2019-12-13 00:21:17 +01:00
|
|
|
Platform::Portable => portable::compress_xof(cv, block, block_len, counter, flags),
|
2019-12-03 19:27:28 +01:00
|
|
|
// Safe because detect() checked for platform support.
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
2020-08-15 00:02:06 +02:00
|
|
|
Platform::SSE2 => unsafe {
|
|
|
|
crate::sse2::compress_xof(cv, block, block_len, counter, flags)
|
|
|
|
},
|
|
|
|
// Safe because detect() checked for platform support.
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
2019-12-03 19:27:28 +01:00
|
|
|
Platform::SSE41 | Platform::AVX2 => unsafe {
|
2020-02-11 20:13:30 +01:00
|
|
|
crate::sse41::compress_xof(cv, block, block_len, counter, flags)
|
2019-12-03 19:27:28 +01:00
|
|
|
},
|
2019-12-08 05:43:45 +01:00
|
|
|
// Safe because detect() checked for platform support.
|
2020-03-31 18:36:41 +02:00
|
|
|
#[cfg(blake3_avx512_ffi)]
|
2019-12-12 20:40:56 +01:00
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
2019-12-10 20:20:09 +01:00
|
|
|
Platform::AVX512 => unsafe {
|
2020-02-11 20:13:30 +01:00
|
|
|
crate::avx512::compress_xof(cv, block, block_len, counter, flags)
|
2019-12-10 20:20:09 +01:00
|
|
|
},
|
|
|
|
// No NEON compress_xof() implementation yet.
|
2021-10-07 13:23:36 +02:00
|
|
|
#[cfg(blake3_neon)]
|
2019-12-13 00:21:17 +01:00
|
|
|
Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags),
|
2023-09-09 21:45:31 +02:00
|
|
|
#[cfg(blake3_wasm32_simd)]
|
|
|
|
// TODO Safe because compiled for wasm32
|
|
|
|
Platform::WASM32_SIMD => unsafe {
|
|
|
|
crate::wasm32_simd::compress_xof(cv, block, block_len, counter, flags)
|
|
|
|
},
|
2019-12-03 19:27:28 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-12-03 22:23:19 +01:00
|
|
|
// IMPLEMENTATION NOTE
|
|
|
|
// ===================
|
|
|
|
// hash_many() applies two optimizations. The critically important
|
|
|
|
// optimization is the high-performance parallel SIMD hashing mode,
|
|
|
|
// described in detail in the spec. This more than doubles throughput per
|
|
|
|
// thread. Another optimization is keeping the state vectors transposed
|
|
|
|
// from block to block within a chunk. When state vectors are transposed
|
|
|
|
// after every block, there's a small but measurable performance loss.
|
|
|
|
// Compressing chunks with a dedicated loop avoids this.
|
|
|
|
|
2021-05-18 18:28:29 +02:00
|
|
|
pub fn hash_many<const N: usize>(
|
2019-12-03 19:27:28 +01:00
|
|
|
&self,
|
2021-05-18 18:28:29 +02:00
|
|
|
inputs: &[&[u8; N]],
|
2019-12-10 20:20:09 +01:00
|
|
|
key: &CVWords,
|
2019-12-13 00:21:17 +01:00
|
|
|
counter: u64,
|
|
|
|
increment_counter: IncrementCounter,
|
2019-12-06 21:32:20 +01:00
|
|
|
flags: u8,
|
|
|
|
flags_start: u8,
|
|
|
|
flags_end: u8,
|
2019-12-03 19:27:28 +01:00
|
|
|
out: &mut [u8],
|
|
|
|
) {
|
|
|
|
match self {
|
|
|
|
Platform::Portable => portable::hash_many(
|
|
|
|
inputs,
|
|
|
|
key,
|
2019-12-13 00:21:17 +01:00
|
|
|
counter,
|
|
|
|
increment_counter,
|
2019-12-06 21:32:20 +01:00
|
|
|
flags,
|
|
|
|
flags_start,
|
|
|
|
flags_end,
|
2019-12-03 19:27:28 +01:00
|
|
|
out,
|
|
|
|
),
|
|
|
|
// Safe because detect() checked for platform support.
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
2020-08-15 00:02:06 +02:00
|
|
|
Platform::SSE2 => unsafe {
|
|
|
|
crate::sse2::hash_many(
|
|
|
|
inputs,
|
|
|
|
key,
|
|
|
|
counter,
|
|
|
|
increment_counter,
|
|
|
|
flags,
|
|
|
|
flags_start,
|
|
|
|
flags_end,
|
|
|
|
out,
|
|
|
|
)
|
|
|
|
},
|
|
|
|
// Safe because detect() checked for platform support.
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
2019-12-03 19:27:28 +01:00
|
|
|
Platform::SSE41 => unsafe {
|
2020-02-11 20:13:30 +01:00
|
|
|
crate::sse41::hash_many(
|
2019-12-03 19:27:28 +01:00
|
|
|
inputs,
|
|
|
|
key,
|
2019-12-13 00:21:17 +01:00
|
|
|
counter,
|
|
|
|
increment_counter,
|
2019-12-06 21:32:20 +01:00
|
|
|
flags,
|
|
|
|
flags_start,
|
|
|
|
flags_end,
|
2019-12-03 19:27:28 +01:00
|
|
|
out,
|
|
|
|
)
|
|
|
|
},
|
|
|
|
// Safe because detect() checked for platform support.
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
|
|
Platform::AVX2 => unsafe {
|
2020-02-11 20:13:30 +01:00
|
|
|
crate::avx2::hash_many(
|
2019-12-03 19:27:28 +01:00
|
|
|
inputs,
|
|
|
|
key,
|
2019-12-13 00:21:17 +01:00
|
|
|
counter,
|
|
|
|
increment_counter,
|
2019-12-06 21:32:20 +01:00
|
|
|
flags,
|
|
|
|
flags_start,
|
|
|
|
flags_end,
|
2019-12-03 19:27:28 +01:00
|
|
|
out,
|
|
|
|
)
|
|
|
|
},
|
2019-12-08 05:43:45 +01:00
|
|
|
// Safe because detect() checked for platform support.
|
2020-03-31 18:36:41 +02:00
|
|
|
#[cfg(blake3_avx512_ffi)]
|
2019-12-12 20:40:56 +01:00
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
2019-12-08 05:43:45 +01:00
|
|
|
Platform::AVX512 => unsafe {
|
2020-02-11 20:13:30 +01:00
|
|
|
crate::avx512::hash_many(
|
2019-12-08 05:43:45 +01:00
|
|
|
inputs,
|
|
|
|
key,
|
2019-12-13 00:21:17 +01:00
|
|
|
counter,
|
|
|
|
increment_counter,
|
2019-12-08 05:43:45 +01:00
|
|
|
flags,
|
|
|
|
flags_start,
|
|
|
|
flags_end,
|
|
|
|
out,
|
|
|
|
)
|
|
|
|
},
|
2020-03-28 22:27:31 +01:00
|
|
|
// Assumed to be safe if the "neon" feature is on.
|
2021-10-07 13:23:36 +02:00
|
|
|
#[cfg(blake3_neon)]
|
2019-12-08 05:43:45 +01:00
|
|
|
Platform::NEON => unsafe {
|
2020-02-11 20:13:30 +01:00
|
|
|
crate::neon::hash_many(
|
2019-12-08 05:43:45 +01:00
|
|
|
inputs,
|
|
|
|
key,
|
2019-12-13 00:21:17 +01:00
|
|
|
counter,
|
|
|
|
increment_counter,
|
2019-12-08 05:43:45 +01:00
|
|
|
flags,
|
|
|
|
flags_start,
|
|
|
|
flags_end,
|
|
|
|
out,
|
|
|
|
)
|
|
|
|
},
|
2023-09-09 21:45:31 +02:00
|
|
|
// Assumed to be safe if the "wasm32_simd" feature is on.
|
|
|
|
#[cfg(blake3_wasm32_simd)]
|
|
|
|
Platform::WASM32_SIMD => unsafe {
|
|
|
|
crate::wasm32_simd::hash_many(
|
|
|
|
inputs,
|
|
|
|
key,
|
|
|
|
counter,
|
|
|
|
increment_counter,
|
|
|
|
flags,
|
|
|
|
flags_start,
|
|
|
|
flags_end,
|
|
|
|
out,
|
|
|
|
)
|
|
|
|
},
|
2019-12-03 19:27:28 +01:00
|
|
|
}
|
|
|
|
}
|
2020-02-11 20:13:30 +01:00
|
|
|
|
|
|
|
// Explicit platform constructors, for benchmarks.
|
|
|
|
|
|
|
|
pub fn portable() -> Self {
|
|
|
|
Self::Portable
|
|
|
|
}
|
|
|
|
|
2020-08-15 00:02:06 +02:00
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
|
|
pub fn sse2() -> Option<Self> {
|
|
|
|
if sse2_detected() {
|
|
|
|
Some(Self::SSE2)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-02-11 20:13:30 +01:00
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
|
|
pub fn sse41() -> Option<Self> {
|
|
|
|
if sse41_detected() {
|
|
|
|
Some(Self::SSE41)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
|
|
pub fn avx2() -> Option<Self> {
|
|
|
|
if avx2_detected() {
|
|
|
|
Some(Self::AVX2)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-03-31 18:36:41 +02:00
|
|
|
#[cfg(blake3_avx512_ffi)]
|
2020-02-11 20:13:30 +01:00
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
|
|
pub fn avx512() -> Option<Self> {
|
|
|
|
if avx512_detected() {
|
|
|
|
Some(Self::AVX512)
|
|
|
|
} else {
|
|
|
|
None
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-07 13:23:36 +02:00
|
|
|
#[cfg(blake3_neon)]
|
2020-02-11 20:13:30 +01:00
|
|
|
pub fn neon() -> Option<Self> {
|
2020-03-28 22:27:31 +01:00
|
|
|
// Assumed to be safe if the "neon" feature is on.
|
2020-02-11 20:13:30 +01:00
|
|
|
Some(Self::NEON)
|
|
|
|
}
|
2023-09-09 21:45:31 +02:00
|
|
|
|
|
|
|
#[cfg(blake3_wasm32_simd)]
|
|
|
|
pub fn wasm32_simd() -> Option<Self> {
|
2023-09-11 20:47:58 +02:00
|
|
|
// Assumed to be safe if the "wasm32_simd" feature is on.
|
2023-09-09 21:45:31 +02:00
|
|
|
Some(Self::WASM32_SIMD)
|
|
|
|
}
|
2019-12-03 19:27:28 +01:00
|
|
|
}
|
2019-12-03 19:44:30 +01:00
|
|
|
|
2019-12-08 05:43:45 +01:00
|
|
|
// Note that AVX-512 is divided into multiple featuresets, and we use two of
|
|
|
|
// them, F and VL.
|
2020-03-31 18:36:41 +02:00
|
|
|
#[cfg(blake3_avx512_ffi)]
|
2019-12-08 05:43:45 +01:00
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
|
|
#[inline(always)]
|
|
|
|
pub fn avx512_detected() -> bool {
|
2020-03-30 01:05:04 +02:00
|
|
|
// A testing-only short-circuit.
|
|
|
|
if cfg!(feature = "no_avx512") {
|
|
|
|
return false;
|
|
|
|
}
|
2019-12-08 05:43:45 +01:00
|
|
|
// Static check, e.g. for building with target-cpu=native.
|
|
|
|
#[cfg(all(target_feature = "avx512f", target_feature = "avx512vl"))]
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
2020-08-31 23:37:09 +02:00
|
|
|
// Dynamic check, if std is enabled.
|
2019-12-08 05:43:45 +01:00
|
|
|
#[cfg(feature = "std")]
|
|
|
|
{
|
|
|
|
if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512vl") {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
false
|
|
|
|
}
|
|
|
|
|
2019-12-03 19:44:30 +01:00
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
|
|
#[inline(always)]
|
|
|
|
pub fn avx2_detected() -> bool {
|
2020-03-30 01:05:04 +02:00
|
|
|
// A testing-only short-circuit.
|
|
|
|
if cfg!(feature = "no_avx2") {
|
|
|
|
return false;
|
|
|
|
}
|
2019-12-03 19:44:30 +01:00
|
|
|
// Static check, e.g. for building with target-cpu=native.
|
|
|
|
#[cfg(target_feature = "avx2")]
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
2020-08-31 23:37:09 +02:00
|
|
|
// Dynamic check, if std is enabled.
|
2019-12-03 19:44:30 +01:00
|
|
|
#[cfg(feature = "std")]
|
|
|
|
{
|
|
|
|
if is_x86_feature_detected!("avx2") {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
false
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
|
|
#[inline(always)]
|
|
|
|
pub fn sse41_detected() -> bool {
|
2020-03-30 01:05:04 +02:00
|
|
|
// A testing-only short-circuit.
|
|
|
|
if cfg!(feature = "no_sse41") {
|
|
|
|
return false;
|
|
|
|
}
|
2019-12-03 19:44:30 +01:00
|
|
|
// Static check, e.g. for building with target-cpu=native.
|
|
|
|
#[cfg(target_feature = "sse4.1")]
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
2020-08-31 23:37:09 +02:00
|
|
|
// Dynamic check, if std is enabled.
|
2019-12-03 19:44:30 +01:00
|
|
|
#[cfg(feature = "std")]
|
|
|
|
{
|
|
|
|
if is_x86_feature_detected!("sse4.1") {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
false
|
|
|
|
}
|
2019-12-10 20:20:09 +01:00
|
|
|
|
2020-08-15 00:02:06 +02:00
|
|
|
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
|
|
|
#[inline(always)]
|
2020-08-31 23:37:09 +02:00
|
|
|
#[allow(unreachable_code)]
|
2020-08-15 00:02:06 +02:00
|
|
|
pub fn sse2_detected() -> bool {
|
|
|
|
// A testing-only short-circuit.
|
|
|
|
if cfg!(feature = "no_sse2") {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
// Static check, e.g. for building with target-cpu=native.
|
|
|
|
#[cfg(target_feature = "sse2")]
|
|
|
|
{
|
|
|
|
return true;
|
|
|
|
}
|
2020-08-31 23:37:09 +02:00
|
|
|
// Dynamic check, if std is enabled.
|
|
|
|
#[cfg(feature = "std")]
|
|
|
|
{
|
|
|
|
if is_x86_feature_detected!("sse2") {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
2020-08-31 22:55:48 +02:00
|
|
|
false
|
2020-08-15 00:02:06 +02:00
|
|
|
}
|
|
|
|
|
2019-12-10 20:20:09 +01:00
|
|
|
#[inline(always)]
|
|
|
|
pub fn words_from_le_bytes_32(bytes: &[u8; 32]) -> [u32; 8] {
|
|
|
|
let mut out = [0; 8];
|
|
|
|
out[0] = u32::from_le_bytes(*array_ref!(bytes, 0 * 4, 4));
|
|
|
|
out[1] = u32::from_le_bytes(*array_ref!(bytes, 1 * 4, 4));
|
|
|
|
out[2] = u32::from_le_bytes(*array_ref!(bytes, 2 * 4, 4));
|
|
|
|
out[3] = u32::from_le_bytes(*array_ref!(bytes, 3 * 4, 4));
|
|
|
|
out[4] = u32::from_le_bytes(*array_ref!(bytes, 4 * 4, 4));
|
|
|
|
out[5] = u32::from_le_bytes(*array_ref!(bytes, 5 * 4, 4));
|
|
|
|
out[6] = u32::from_le_bytes(*array_ref!(bytes, 6 * 4, 4));
|
|
|
|
out[7] = u32::from_le_bytes(*array_ref!(bytes, 7 * 4, 4));
|
|
|
|
out
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
pub fn words_from_le_bytes_64(bytes: &[u8; 64]) -> [u32; 16] {
|
|
|
|
let mut out = [0; 16];
|
|
|
|
out[0] = u32::from_le_bytes(*array_ref!(bytes, 0 * 4, 4));
|
|
|
|
out[1] = u32::from_le_bytes(*array_ref!(bytes, 1 * 4, 4));
|
|
|
|
out[2] = u32::from_le_bytes(*array_ref!(bytes, 2 * 4, 4));
|
|
|
|
out[3] = u32::from_le_bytes(*array_ref!(bytes, 3 * 4, 4));
|
|
|
|
out[4] = u32::from_le_bytes(*array_ref!(bytes, 4 * 4, 4));
|
|
|
|
out[5] = u32::from_le_bytes(*array_ref!(bytes, 5 * 4, 4));
|
|
|
|
out[6] = u32::from_le_bytes(*array_ref!(bytes, 6 * 4, 4));
|
|
|
|
out[7] = u32::from_le_bytes(*array_ref!(bytes, 7 * 4, 4));
|
|
|
|
out[8] = u32::from_le_bytes(*array_ref!(bytes, 8 * 4, 4));
|
|
|
|
out[9] = u32::from_le_bytes(*array_ref!(bytes, 9 * 4, 4));
|
|
|
|
out[10] = u32::from_le_bytes(*array_ref!(bytes, 10 * 4, 4));
|
|
|
|
out[11] = u32::from_le_bytes(*array_ref!(bytes, 11 * 4, 4));
|
|
|
|
out[12] = u32::from_le_bytes(*array_ref!(bytes, 12 * 4, 4));
|
|
|
|
out[13] = u32::from_le_bytes(*array_ref!(bytes, 13 * 4, 4));
|
|
|
|
out[14] = u32::from_le_bytes(*array_ref!(bytes, 14 * 4, 4));
|
|
|
|
out[15] = u32::from_le_bytes(*array_ref!(bytes, 15 * 4, 4));
|
|
|
|
out
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
pub fn le_bytes_from_words_32(words: &[u32; 8]) -> [u8; 32] {
|
|
|
|
let mut out = [0; 32];
|
|
|
|
*array_mut_ref!(out, 0 * 4, 4) = words[0].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 1 * 4, 4) = words[1].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 2 * 4, 4) = words[2].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 3 * 4, 4) = words[3].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 4 * 4, 4) = words[4].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 5 * 4, 4) = words[5].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 6 * 4, 4) = words[6].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 7 * 4, 4) = words[7].to_le_bytes();
|
|
|
|
out
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline(always)]
|
|
|
|
pub fn le_bytes_from_words_64(words: &[u32; 16]) -> [u8; 64] {
|
|
|
|
let mut out = [0; 64];
|
|
|
|
*array_mut_ref!(out, 0 * 4, 4) = words[0].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 1 * 4, 4) = words[1].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 2 * 4, 4) = words[2].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 3 * 4, 4) = words[3].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 4 * 4, 4) = words[4].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 5 * 4, 4) = words[5].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 6 * 4, 4) = words[6].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 7 * 4, 4) = words[7].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 8 * 4, 4) = words[8].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 9 * 4, 4) = words[9].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 10 * 4, 4) = words[10].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 11 * 4, 4) = words[11].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 12 * 4, 4) = words[12].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 13 * 4, 4) = words[13].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 14 * 4, 4) = words[14].to_le_bytes();
|
|
|
|
*array_mut_ref!(out, 15 * 4, 4) = words[15].to_le_bytes();
|
|
|
|
out
|
|
|
|
}
|