1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-05-11 22:06:06 +02:00

runtime neon detection

tested on Oracle Cloud's cheapest ARM VPS VM.Standard.A1.Flex
This commit is contained in:
hanshenrik 2024-02-05 15:15:53 +01:00
parent 8fc36186b8
commit ba665ca9f1
2 changed files with 21 additions and 2 deletions

View File

@ -96,6 +96,7 @@ enum cpu_feature {
AVX2 = 1 << 4,
AVX512F = 1 << 5,
AVX512VL = 1 << 6,
ARM_NEON = 1 << 7,
/* ... */
UNDEFINED = 1 << 30
};
@ -155,8 +156,17 @@ static
}
ATOMIC_STORE(g_cpu_features, features);
return features;
#elif defined(__aarch64__)
uint64_t id_aa64pfr0_el1;
__asm__ ("mrs %0, ID_AA64PFR0_EL1" : "=r" (id_aa64pfr0_el1));
if((id_aa64pfr0_el1 >> 20) & 0xF) {
features |= ARM_NEON;
} else {
features = 0;
}
ATOMIC_STORE(g_cpu_features, features);
return features;
#else
/* How to detect NEON? */
return 0;
#endif
}
@ -260,11 +270,19 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
}
#endif
#endif
#if BLAKE3_USE_NEON == 1
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
return;
#elif __aarch64__
{
const enum cpu_feature features = get_cpu_features();
if(features & ARM_NEON) {
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
return;
}
}
#endif
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,

View File

@ -63,6 +63,7 @@ enum cpu_feature {
AVX2 = 1 << 4,
AVX512F = 1 << 5,
AVX512VL = 1 << 6,
ARM_NEON = 1 << 7,
/* ... */
UNDEFINED = 1 << 30
};