1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-05-13 11:36:09 +02:00

Compare commits

...

3 Commits

Author SHA1 Message Date
David CARLIER c273deae35
Merge 3289c75996 into 4ec3be8bfa 2024-04-06 12:03:35 +08:00
Jack O'Connor 4ec3be8bfa format the state matrix better in reference_impl.rs 2024-03-20 15:44:05 -07:00
David Carlier 3289c75996 c api, detect NEON support on Linux via auxiliary vector. 2022-03-19 22:38:01 +00:00
2 changed files with 35 additions and 20 deletions

View File

@ -13,6 +13,12 @@
#else
#undef IS_X86 /* Unimplemented! */
#endif
#else
#if defined(_MSC_VER)
//
#elif defined(__linux__)
#include <sys/auxv.h>
#endif
#endif
#if !defined(BLAKE3_ATOMICS)
@ -88,7 +94,17 @@ static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
#endif
#if defined(IS_AARCH64)
static int has_neon(void) {
#if defined(__linux__)
return getauxval(AT_HWCAP) & (1 << 1);
#endif
return 1;
}
#endif
enum cpu_feature {
#if defined(IS_X86)
SSE2 = 1 << 0,
SSSE3 = 1 << 1,
SSE41 = 1 << 2,
@ -96,6 +112,9 @@ enum cpu_feature {
AVX2 = 1 << 4,
AVX512F = 1 << 5,
AVX512VL = 1 << 6,
#else
NEON = 1 << 0,
#endif
/* ... */
UNDEFINED = 1 << 30
};
@ -156,8 +175,10 @@ static
ATOMIC_STORE(g_cpu_features, features);
return features;
#else
/* How to detect NEON? */
return 0;
enum cpu_feature features = 0;
if (has_neon())
features |= NEON;
return features;
#endif
}
}
@ -262,9 +283,12 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
#endif
#if BLAKE3_USE_NEON == 1
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
const enum cpu_feature features = get_cpu_features();
if (features & NEON) {
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
return;
return;
}
#endif
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,

View File

@ -78,23 +78,14 @@ fn compress(
block_len: u32,
flags: u32,
) -> [u32; 16] {
let counter_low = counter as u32;
let counter_high = (counter >> 32) as u32;
#[rustfmt::skip]
let mut state = [
chaining_value[0],
chaining_value[1],
chaining_value[2],
chaining_value[3],
chaining_value[4],
chaining_value[5],
chaining_value[6],
chaining_value[7],
IV[0],
IV[1],
IV[2],
IV[3],
counter as u32,
(counter >> 32) as u32,
block_len,
flags,
chaining_value[0], chaining_value[1], chaining_value[2], chaining_value[3],
chaining_value[4], chaining_value[5], chaining_value[6], chaining_value[7],
IV[0], IV[1], IV[2], IV[3],
counter_low, counter_high, block_len, flags,
];
let mut block = *block_words;