1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-04-27 16:55:04 +02:00
This commit is contained in:
divinity76 2024-03-22 02:31:09 +05:30 committed by GitHub
commit 73e3e540bd
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 28 additions and 2 deletions

View File

@ -96,6 +96,7 @@ enum cpu_feature {
AVX2 = 1 << 4,
AVX512F = 1 << 5,
AVX512VL = 1 << 6,
ARM_NEON = 1 << 7,
/* ... */
UNDEFINED = 1 << 30
};
@ -155,8 +156,19 @@ static
}
ATOMIC_STORE(g_cpu_features, features);
return features;
#elif defined(__aarch64__)
uint64_t id_aa64pfr0_el1;
__asm__ ("mrs %0, ID_AA64PFR0_EL1" : "=r" (id_aa64pfr0_el1));
if(((id_aa64pfr0_el1 >> 20) & (1<<0 | 1<<1 | 1<<2 | 1 << 3)) != 15) {
// https://developer.arm.com/documentation/ddi0595/2021-12/AArch64-Registers/ID-AA64PFR0-EL1--AArch64-Processor-Feature-Register-0?lang=en
// 15 means not implemented, 0 means neon is present but float16 is missing, 1 means neon with float16 is present ?
features = ARM_NEON;
} else {
features = 0;
}
ATOMIC_STORE(g_cpu_features, features);
return features;
#else
/* How to detect NEON? */
return 0;
#endif
}
@ -260,11 +272,19 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
}
#endif
#endif
#if BLAKE3_USE_NEON == 1
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
return;
#elif __aarch64__
{
const enum cpu_feature features = get_cpu_features();
if(features & ARM_NEON) {
blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
return;
}
}
#endif
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
@ -300,6 +320,11 @@ size_t blake3_simd_degree(void) {
#endif
#if BLAKE3_USE_NEON == 1
return 4;
#elif defined(__aarch64__)
const enum cpu_feature features = get_cpu_features();
if(features & ARM_NEON) {
return 4;
}
#endif
return 1;
}

View File

@ -63,6 +63,7 @@ enum cpu_feature {
AVX2 = 1 << 4,
AVX512F = 1 << 5,
AVX512VL = 1 << 6,
ARM_NEON = 1 << 7,
/* ... */
UNDEFINED = 1 << 30
};