1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-04-28 05:25:13 +02:00

Compare commits

...

7 Commits

Author SHA1 Message Date
divinity76 73e3e540bd
Merge 059ad2d922 into 4ec3be8bfa 2024-03-22 02:31:09 +05:30
Jack O'Connor 4ec3be8bfa format the state matrix better in reference_impl.rs 2024-03-20 15:44:05 -07:00
divinity76 059ad2d922
forgort blake3_simd_degree() 2024-02-06 11:52:38 +01:00
divinity76 f9b332c61c
!= 15
https://github.com/BLAKE3-team/BLAKE3/pull/383#issuecomment-1927210402
2024-02-05 16:40:09 +01:00
divinity76 21459753ca
4 first bits ¯\_(ツ)_/¯ 2024-02-05 16:06:10 +01:00
divinity76 349f8300f9
forgot about UNDEFINED 2024-02-05 15:59:54 +01:00
hanshenrik ba665ca9f1 runtime neon detection
tested on Oracle Cloud's cheapest ARM VPS VM.Standard.A1.Flex
2024-02-05 15:15:53 +01:00
3 changed files with 35 additions and 18 deletions

View File

@ -96,6 +96,7 @@ enum cpu_feature {
AVX2 = 1 << 4,
AVX512F = 1 << 5,
AVX512VL = 1 << 6,
ARM_NEON = 1 << 7,
/* ... */
UNDEFINED = 1 << 30
};
@ -155,8 +156,19 @@ static
}
ATOMIC_STORE(g_cpu_features, features);
return features;
#elif defined(__aarch64__)
uint64_t id_aa64pfr0_el1;
__asm__ ("mrs %0, ID_AA64PFR0_EL1" : "=r" (id_aa64pfr0_el1));
if(((id_aa64pfr0_el1 >> 20) & (1<<0 | 1<<1 | 1<<2 | 1 << 3)) != 15) {
// https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/ID-AA64PFR0-EL1--AArch64-Processor-Feature-Register-0?lang=en
// 15 means not implemented, 0 means neon is present but float16 is missing, 1 means neon with float16 is present ?
features = ARM_NEON;
} else {
features = 0;
}
ATOMIC_STORE(g_cpu_features, features);
return features;
#else
/* How to detect NEON? */
return 0;
#endif
}
@ -260,11 +272,19 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
}
#endif
#endif
#if BLAKE3_USE_NEON == 1
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
return;
#elif __aarch64__
{
const enum cpu_feature features = get_cpu_features();
if(features & ARM_NEON) {
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
return;
}
}
#endif
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
@ -300,6 +320,11 @@ size_t blake3_simd_degree(void) {
#endif
#if BLAKE3_USE_NEON == 1
return 4;
#elif defined(__aarch64__)
const enum cpu_feature features = get_cpu_features();
if(features & ARM_NEON) {
return 4;
}
#endif
return 1;
}

View File

@ -63,6 +63,7 @@ enum cpu_feature {
AVX2 = 1 << 4,
AVX512F = 1 << 5,
AVX512VL = 1 << 6,
ARM_NEON = 1 << 7,
/* ... */
UNDEFINED = 1 << 30
};

View File

@ -78,23 +78,14 @@ fn compress(
block_len: u32,
flags: u32,
) -> [u32; 16] {
let counter_low = counter as u32;
let counter_high = (counter >> 32) as u32;
#[rustfmt::skip]
let mut state = [
chaining_value[0],
chaining_value[1],
chaining_value[2],
chaining_value[3],
chaining_value[4],
chaining_value[5],
chaining_value[6],
chaining_value[7],
IV[0],
IV[1],
IV[2],
IV[3],
counter as u32,
(counter >> 32) as u32,
block_len,
flags,
chaining_value[0], chaining_value[1], chaining_value[2], chaining_value[3],
chaining_value[4], chaining_value[5], chaining_value[6], chaining_value[7],
IV[0], IV[1], IV[2], IV[3],
counter_low, counter_high, block_len, flags,
];
let mut block = *block_words;