diff --git a/c/blake3_dispatch.c b/c/blake3_dispatch.c index b498058..b51bd45 100644 --- a/c/blake3_dispatch.c +++ b/c/blake3_dispatch.c @@ -12,6 +12,12 @@ #else #error "Unimplemented!" #endif +#else +#if defined(_MSC_VER) +// +#elif defined(__linux__) +#include +#endif #endif #define MAYBE_UNUSED(x) (void)((x)) @@ -61,7 +67,17 @@ static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) { #endif +#if defined(IS_AARCH64) +static int has_neon(void) { +#if defined(__linux__) + return getauxval(AT_HWCAP) & (1 << 1); +#endif + return 1; +} +#endif + enum cpu_feature { +#if defined(IS_X86) SSE2 = 1 << 0, SSSE3 = 1 << 1, SSE41 = 1 << 2, @@ -69,6 +85,9 @@ enum cpu_feature { AVX2 = 1 << 4, AVX512F = 1 << 5, AVX512VL = 1 << 6, +#else + NEON = 1 << 0, +#endif /* ... */ UNDEFINED = 1 << 30 }; @@ -127,8 +146,10 @@ static g_cpu_features = features; return features; #else - /* How to detect NEON? */ - return 0; + enum cpu_feature features = 0; + if (has_neon()) + features |= NEON; + return features; #endif } } @@ -233,9 +254,12 @@ void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs, #endif #if BLAKE3_USE_NEON == 1 - blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter, + const enum cpu_feature features = get_cpu_features(); + if (features & NEON) { + blake3_hash_many_neon(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out); - return; + return; + } #endif blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,