mirror of
https://github.com/BLAKE3-team/BLAKE3
synced 2024-04-28 05:25:13 +02:00
Compare commits
7 Commits
37a16d9955
...
73e3e540bd
Author | SHA1 | Date | |
---|---|---|---|
divinity76 | 73e3e540bd | ||
Jack O'Connor | 4ec3be8bfa | ||
divinity76 | 059ad2d922 | ||
divinity76 | f9b332c61c | ||
divinity76 | 21459753ca | ||
divinity76 | 349f8300f9 | ||
hanshenrik | ba665ca9f1 |
|
@ -96,6 +96,7 @@ enum cpu_feature {
|
|||
AVX2 = 1 << 4,
|
||||
AVX512F = 1 << 5,
|
||||
AVX512VL = 1 << 6,
|
||||
ARM_NEON = 1 << 7,
|
||||
/* ... */
|
||||
UNDEFINED = 1 << 30
|
||||
};
|
||||
|
@ -155,8 +156,19 @@ static
|
|||
}
|
||||
ATOMIC_STORE(g_cpu_features, features);
|
||||
return features;
|
||||
#elif defined(__aarch64__)
|
||||
uint64_t id_aa64pfr0_el1;
|
||||
__asm__ ("mrs %0, ID_AA64PFR0_EL1" : "=r" (id_aa64pfr0_el1));
|
||||
if(((id_aa64pfr0_el1 >> 20) & (1<<0 | 1<<1 | 1<<2 | 1 << 3)) != 15) {
|
||||
// https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/ID-AA64PFR0-EL1--AArch64-Processor-Feature-Register-0?lang=en
|
||||
// 15 means not implemented, 0 means neon is present but float16 is missing, 1 means neon with float16 is present ?
|
||||
features = ARM_NEON;
|
||||
} else {
|
||||
features = 0;
|
||||
}
|
||||
ATOMIC_STORE(g_cpu_features, features);
|
||||
return features;
|
||||
#else
|
||||
/* How to detect NEON? */
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
@ -260,11 +272,19 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
|||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if BLAKE3_USE_NEON == 1
|
||||
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
return;
|
||||
#elif __aarch64__
|
||||
{
|
||||
const enum cpu_feature features = get_cpu_features();
|
||||
if(features & ARM_NEON) {
|
||||
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
|
||||
|
@ -300,6 +320,11 @@ size_t blake3_simd_degree(void) {
|
|||
#endif
|
||||
#if BLAKE3_USE_NEON == 1
|
||||
return 4;
|
||||
#elif defined(__aarch64__)
|
||||
const enum cpu_feature features = get_cpu_features();
|
||||
if(features & ARM_NEON) {
|
||||
return 4;
|
||||
}
|
||||
#endif
|
||||
return 1;
|
||||
}
|
||||
|
|
1
c/main.c
1
c/main.c
|
@ -63,6 +63,7 @@ enum cpu_feature {
|
|||
AVX2 = 1 << 4,
|
||||
AVX512F = 1 << 5,
|
||||
AVX512VL = 1 << 6,
|
||||
ARM_NEON = 1 << 7,
|
||||
/* ... */
|
||||
UNDEFINED = 1 << 30
|
||||
};
|
||||
|
|
|
@ -78,23 +78,14 @@ fn compress(
|
|||
block_len: u32,
|
||||
flags: u32,
|
||||
) -> [u32; 16] {
|
||||
let counter_low = counter as u32;
|
||||
let counter_high = (counter >> 32) as u32;
|
||||
#[rustfmt::skip]
|
||||
let mut state = [
|
||||
chaining_value[0],
|
||||
chaining_value[1],
|
||||
chaining_value[2],
|
||||
chaining_value[3],
|
||||
chaining_value[4],
|
||||
chaining_value[5],
|
||||
chaining_value[6],
|
||||
chaining_value[7],
|
||||
IV[0],
|
||||
IV[1],
|
||||
IV[2],
|
||||
IV[3],
|
||||
counter as u32,
|
||||
(counter >> 32) as u32,
|
||||
block_len,
|
||||
flags,
|
||||
chaining_value[0], chaining_value[1], chaining_value[2], chaining_value[3],
|
||||
chaining_value[4], chaining_value[5], chaining_value[6], chaining_value[7],
|
||||
IV[0], IV[1], IV[2], IV[3],
|
||||
counter_low, counter_high, block_len, flags,
|
||||
];
|
||||
let mut block = *block_words;
|
||||
|
||||
|
|
Loading…
Reference in New Issue