mirror of
https://github.com/BLAKE3-team/BLAKE3
synced 2024-05-10 08:36:18 +02:00
clang-format
This commit is contained in:
parent
92d421dea1
commit
087d72e08f
25
c/blake3.c
25
c/blake3.c
|
@ -82,8 +82,8 @@ INLINE output_t make_output(const uint32_t input_cv[8],
|
|||
INLINE void output_chaining_value(const output_t *self, uint8_t cv[32]) {
|
||||
uint32_t cv_words[8];
|
||||
memcpy(cv_words, self->input_cv, 32);
|
||||
blake3_compress_in_place(cv_words, self->block, self->block_len, self->counter,
|
||||
self->flags);
|
||||
blake3_compress_in_place(cv_words, self->block, self->block_len,
|
||||
self->counter, self->flags);
|
||||
memcpy(cv, cv_words, 32);
|
||||
}
|
||||
|
||||
|
@ -93,7 +93,7 @@ INLINE void output_root_bytes(const output_t *self, uint8_t *out,
|
|||
uint8_t wide_buf[64];
|
||||
while (out_len > 0) {
|
||||
blake3_compress_xof(self->input_cv, self->block, self->block_len,
|
||||
output_block_counter, self->flags | ROOT, wide_buf);
|
||||
output_block_counter, self->flags | ROOT, wide_buf);
|
||||
size_t memcpy_len;
|
||||
if (out_len > 64) {
|
||||
memcpy_len = 64;
|
||||
|
@ -114,9 +114,9 @@ INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input,
|
|||
input += take;
|
||||
input_len -= take;
|
||||
if (input_len > 0) {
|
||||
blake3_compress_in_place(self->cv, self->buf, BLAKE3_BLOCK_LEN,
|
||||
self->chunk_counter,
|
||||
self->flags | chunk_state_maybe_start_flag(self));
|
||||
blake3_compress_in_place(
|
||||
self->cv, self->buf, BLAKE3_BLOCK_LEN, self->chunk_counter,
|
||||
self->flags | chunk_state_maybe_start_flag(self));
|
||||
self->blocks_compressed += 1;
|
||||
self->buf_len = 0;
|
||||
memset(self->buf, 0, BLAKE3_BLOCK_LEN);
|
||||
|
@ -124,8 +124,9 @@ INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input,
|
|||
}
|
||||
|
||||
while (input_len > BLAKE3_BLOCK_LEN) {
|
||||
blake3_compress_in_place(self->cv, input, BLAKE3_BLOCK_LEN, self->chunk_counter,
|
||||
self->flags | chunk_state_maybe_start_flag(self));
|
||||
blake3_compress_in_place(self->cv, input, BLAKE3_BLOCK_LEN,
|
||||
self->chunk_counter,
|
||||
self->flags | chunk_state_maybe_start_flag(self));
|
||||
self->blocks_compressed += 1;
|
||||
input += BLAKE3_BLOCK_LEN;
|
||||
input_len -= BLAKE3_BLOCK_LEN;
|
||||
|
@ -208,7 +209,7 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
|
|||
// std::vector<uint8_t> v;
|
||||
// blake3_hasher_update(&hasher, v.data(), v.size());
|
||||
if (input_len == 0) {
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
||||
const uint8_t *input_bytes = (const uint8_t *)input;
|
||||
|
@ -252,8 +253,8 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
|
|||
num_chunks += 1;
|
||||
}
|
||||
blake3_hash_many(chunks, num_chunks, BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN,
|
||||
self->key, self->chunk.chunk_counter, true, self->chunk.flags,
|
||||
CHUNK_START, CHUNK_END, out);
|
||||
self->key, self->chunk.chunk_counter, true,
|
||||
self->chunk.flags, CHUNK_START, CHUNK_END, out);
|
||||
for (size_t chunk_index = 0; chunk_index < num_chunks; chunk_index++) {
|
||||
// The chunk state is empty here, but it stores the counter of the next
|
||||
// chunk hash we need to push. Use that counter, and then move it forward.
|
||||
|
@ -285,7 +286,7 @@ void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
|
|||
// std::vector<uint8_t> v;
|
||||
// blake3_hasher_finalize(&hasher, v.data(), v.size());
|
||||
if (out_len == 0) {
|
||||
return;
|
||||
return;
|
||||
}
|
||||
|
||||
// If the subtree stack is empty, then the current chunk is the root.
|
||||
|
|
|
@ -213,7 +213,7 @@ INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
|
|||
out[13] = loadu(&inputs[5][block_offset + 1 * sizeof(__m256i)]);
|
||||
out[14] = loadu(&inputs[6][block_offset + 1 * sizeof(__m256i)]);
|
||||
out[15] = loadu(&inputs[7][block_offset + 1 * sizeof(__m256i)]);
|
||||
for(size_t i = 0; i < 8; ++i) {
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
|
||||
}
|
||||
transpose_vecs(&out[0]);
|
||||
|
@ -301,10 +301,10 @@ void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
|
|||
uint8_t flags_end, uint8_t *out);
|
||||
#else
|
||||
void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
|
||||
size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, bool increment_counter,
|
||||
uint8_t flags, uint8_t flags_start,
|
||||
uint8_t flags_end, uint8_t *out);
|
||||
size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, bool increment_counter,
|
||||
uint8_t flags, uint8_t flags_start,
|
||||
uint8_t flags_end, uint8_t *out);
|
||||
#endif
|
||||
|
||||
void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
|
||||
|
@ -327,6 +327,7 @@ void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
|
|||
increment_counter, flags, flags_start, flags_end, out);
|
||||
#else
|
||||
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
increment_counter, flags, flags_start, flags_end,
|
||||
out);
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -467,7 +467,7 @@ INLINE void transpose_msg_vecs4(const uint8_t *const *inputs,
|
|||
out[13] = loadu_128(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
|
||||
out[14] = loadu_128(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
|
||||
out[15] = loadu_128(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
|
||||
for(size_t i = 0; i < 4; ++i) {
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
|
||||
}
|
||||
transpose_vecs_128(&out[0]);
|
||||
|
@ -723,7 +723,7 @@ INLINE void transpose_msg_vecs8(const uint8_t *const *inputs,
|
|||
out[13] = loadu_256(&inputs[5][block_offset + 1 * sizeof(__m256i)]);
|
||||
out[14] = loadu_256(&inputs[6][block_offset + 1 * sizeof(__m256i)]);
|
||||
out[15] = loadu_256(&inputs[7][block_offset + 1 * sizeof(__m256i)]);
|
||||
for(size_t i = 0; i < 8; ++i) {
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
|
||||
}
|
||||
transpose_vecs_256(&out[0]);
|
||||
|
@ -1036,7 +1036,7 @@ INLINE void transpose_msg_vecs16(const uint8_t *const *inputs,
|
|||
out[13] = loadu_512(&inputs[13][block_offset]);
|
||||
out[14] = loadu_512(&inputs[14][block_offset]);
|
||||
out[15] = loadu_512(&inputs[15][block_offset]);
|
||||
for(size_t i = 0; i < 16; ++i) {
|
||||
for (size_t i = 0; i < 16; ++i) {
|
||||
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
|
||||
}
|
||||
transpose_vecs_512(out);
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "blake3.h"
|
||||
|
||||
#if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
|
||||
#define IS_X86
|
||||
#if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || \
|
||||
defined(_M_X64)
|
||||
#define IS_X86
|
||||
#endif
|
||||
|
||||
#if defined(__arm__)
|
||||
|
@ -22,7 +23,6 @@
|
|||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
// Declarations for implementation-specific functions.
|
||||
void blake3_compress_in_place_portable(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
|
@ -40,7 +40,6 @@ void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
|
|||
uint8_t flags, uint8_t flags_start,
|
||||
uint8_t flags_end, uint8_t *out);
|
||||
|
||||
|
||||
#if defined(IS_X86)
|
||||
#if !defined(BLAKE3_NO_SSE41)
|
||||
void blake3_compress_in_place_sse41(uint32_t cv[8],
|
||||
|
@ -56,7 +55,7 @@ void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
|
|||
uint64_t counter, bool increment_counter,
|
||||
uint8_t flags, uint8_t flags_start,
|
||||
uint8_t flags_end, uint8_t *out);
|
||||
#endif
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_AVX2)
|
||||
void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
|
||||
size_t blocks, const uint32_t key[8],
|
||||
|
@ -70,7 +69,6 @@ void blake3_compress_in_place_avx512(uint32_t cv[8],
|
|||
uint8_t block_len, uint64_t counter,
|
||||
uint8_t flags);
|
||||
|
||||
|
||||
void blake3_compress_xof_avx512(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter,
|
||||
|
@ -93,39 +91,44 @@ void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
|
|||
#endif
|
||||
|
||||
#if defined(IS_X86)
|
||||
static uint64_t xgetbv()
|
||||
{
|
||||
static uint64_t xgetbv() {
|
||||
#if defined(_MSC_VER)
|
||||
return _xgetbv(0);
|
||||
return _xgetbv(0);
|
||||
#else
|
||||
uint32_t eax=0, edx=0;
|
||||
__asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||
return ((uint64_t)edx << 32) | eax;
|
||||
uint32_t eax = 0, edx = 0;
|
||||
__asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
|
||||
return ((uint64_t)edx << 32) | eax;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void cpuid(uint32_t out[4], uint32_t id)
|
||||
{
|
||||
static void cpuid(uint32_t out[4], uint32_t id) {
|
||||
#if defined(_MSC_VER)
|
||||
__cpuid((int*)out, id);
|
||||
__cpuid((int *)out, id);
|
||||
#else
|
||||
#if defined(__i386__) || defined(_M_IX86)
|
||||
__asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(id));
|
||||
__asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx"
|
||||
: "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3])
|
||||
: "a"(id));
|
||||
#else
|
||||
__asm__ __volatile__("cpuid\n" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(id));
|
||||
__asm__ __volatile__("cpuid\n"
|
||||
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
|
||||
: "a"(id));
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid)
|
||||
{
|
||||
static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
|
||||
#if defined(_MSC_VER)
|
||||
__cpuidex((int*)out, id, sid);
|
||||
__cpuidex((int *)out, id, sid);
|
||||
#else
|
||||
#if defined(__i386__) || defined(_M_IX86)
|
||||
__asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(id), "c"(sid));
|
||||
__asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx"
|
||||
: "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3])
|
||||
: "a"(id), "c"(sid));
|
||||
#else
|
||||
__asm__ __volatile__("cpuid\n" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(id), "c"(sid));
|
||||
__asm__ __volatile__("cpuid\n"
|
||||
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
|
||||
: "a"(id), "c"(sid));
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
@ -133,152 +136,155 @@ static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid)
|
|||
#endif
|
||||
|
||||
enum cpu_feature {
|
||||
SSE2 = 1 << 0,
|
||||
SSSE3 = 1 << 1,
|
||||
SSE41 = 1 << 2,
|
||||
AVX = 1 << 3,
|
||||
AVX2 = 1 << 4,
|
||||
AVX512F = 1 << 5,
|
||||
AVX512VL = 1 << 6,
|
||||
/* ... */
|
||||
UNDEFINED = 1 << 30
|
||||
SSE2 = 1 << 0,
|
||||
SSSE3 = 1 << 1,
|
||||
SSE41 = 1 << 2,
|
||||
AVX = 1 << 3,
|
||||
AVX2 = 1 << 4,
|
||||
AVX512F = 1 << 5,
|
||||
AVX512VL = 1 << 6,
|
||||
/* ... */
|
||||
UNDEFINED = 1 << 30
|
||||
};
|
||||
|
||||
#if !defined(BLAKE3_TESTING)
|
||||
static /* Allow the variable to be controlled manually for testing */
|
||||
#endif
|
||||
enum cpu_feature g_cpu_features = UNDEFINED;
|
||||
enum cpu_feature g_cpu_features = UNDEFINED;
|
||||
|
||||
#if !defined(BLAKE3_TESTING)
|
||||
static
|
||||
static
|
||||
#endif
|
||||
enum cpu_feature get_cpu_features()
|
||||
{
|
||||
|
||||
if( g_cpu_features != UNDEFINED ) {
|
||||
return g_cpu_features;
|
||||
} else {
|
||||
#if defined(IS_X86)
|
||||
uint32_t regs[4] = {0};
|
||||
uint32_t * eax = ®s[0], * ebx = ®s[1], * ecx = ®s[2], * edx = ®s[3];
|
||||
(void)edx;
|
||||
enum cpu_feature features = 0;
|
||||
cpuid(regs, 0);
|
||||
const int max_id = *eax;
|
||||
cpuid(regs, 1);
|
||||
#if defined(__amd64__) || defined(_M_X64)
|
||||
features |= SSE2;
|
||||
#else
|
||||
if(*edx & (1UL << 26))
|
||||
features |= SSE2;
|
||||
#endif
|
||||
if(*ecx & (1UL << 0))
|
||||
features |= SSSE3;
|
||||
if(*ecx & (1UL << 19))
|
||||
features |= SSE41;
|
||||
enum cpu_feature
|
||||
get_cpu_features() {
|
||||
|
||||
if( *ecx & (1UL << 27) ) { // OSXSAVE
|
||||
const uint64_t mask = xgetbv();
|
||||
if( (mask & 6) == 6 ) { // SSE and AVX states
|
||||
if(*ecx & (1UL << 28))
|
||||
features |= AVX;
|
||||
if(max_id >= 7) {
|
||||
cpuidex(regs, 7, 0);
|
||||
if( *ebx & (1UL << 5) )
|
||||
features |= AVX2;
|
||||
if( (mask & 224) == 224 ) { // Opmask, ZMM_Hi256, Hi16_Zmm
|
||||
if( *ebx & (1UL << 31) )
|
||||
features |= AVX512VL;
|
||||
if(*ebx & (1UL << 16))
|
||||
features |= AVX512F;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
g_cpu_features = features;
|
||||
return features;
|
||||
#elif defined(IS_ARM)
|
||||
/* How to detect NEON? */
|
||||
return 0;
|
||||
if (g_cpu_features != UNDEFINED) {
|
||||
return g_cpu_features;
|
||||
} else {
|
||||
#if defined(IS_X86)
|
||||
uint32_t regs[4] = {0};
|
||||
uint32_t *eax = ®s[0], *ebx = ®s[1], *ecx = ®s[2], *edx = ®s[3];
|
||||
(void)edx;
|
||||
enum cpu_feature features = 0;
|
||||
cpuid(regs, 0);
|
||||
const int max_id = *eax;
|
||||
cpuid(regs, 1);
|
||||
#if defined(__amd64__) || defined(_M_X64)
|
||||
features |= SSE2;
|
||||
#else
|
||||
return 0;
|
||||
if (*edx & (1UL << 26))
|
||||
features |= SSE2;
|
||||
#endif
|
||||
if (*ecx & (1UL << 0))
|
||||
features |= SSSE3;
|
||||
if (*ecx & (1UL << 19))
|
||||
features |= SSE41;
|
||||
|
||||
if (*ecx & (1UL << 27)) { // OSXSAVE
|
||||
const uint64_t mask = xgetbv();
|
||||
if ((mask & 6) == 6) { // SSE and AVX states
|
||||
if (*ecx & (1UL << 28))
|
||||
features |= AVX;
|
||||
if (max_id >= 7) {
|
||||
cpuidex(regs, 7, 0);
|
||||
if (*ebx & (1UL << 5))
|
||||
features |= AVX2;
|
||||
if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm
|
||||
if (*ebx & (1UL << 31))
|
||||
features |= AVX512VL;
|
||||
if (*ebx & (1UL << 16))
|
||||
features |= AVX512F;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
g_cpu_features = features;
|
||||
return features;
|
||||
#elif defined(IS_ARM)
|
||||
/* How to detect NEON? */
|
||||
return 0;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void blake3_compress_in_place(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter,
|
||||
uint8_t flags)
|
||||
{
|
||||
const enum cpu_feature features = get_cpu_features();
|
||||
void blake3_compress_in_place(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter,
|
||||
uint8_t flags) {
|
||||
const enum cpu_feature features = get_cpu_features();
|
||||
#if defined(IS_X86)
|
||||
#if !defined(BLAKE3_NO_AVX512)
|
||||
if(features & AVX512VL) {
|
||||
blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
|
||||
return;
|
||||
}
|
||||
if (features & AVX512VL) {
|
||||
blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_SSE41)
|
||||
if(features & SSE41) {
|
||||
blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);
|
||||
return;
|
||||
}
|
||||
if (features & SSE41) {
|
||||
blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
|
||||
blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
|
||||
}
|
||||
|
||||
void blake3_compress_xof(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter,
|
||||
uint8_t flags, uint8_t out[64])
|
||||
{
|
||||
const enum cpu_feature features = get_cpu_features();
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter, uint8_t flags,
|
||||
uint8_t out[64]) {
|
||||
const enum cpu_feature features = get_cpu_features();
|
||||
#if defined(IS_X86)
|
||||
#if !defined(BLAKE3_NO_AVX512)
|
||||
if(features & AVX512VL) {
|
||||
blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
|
||||
return;
|
||||
}
|
||||
if (features & AVX512VL) {
|
||||
blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_SSE41)
|
||||
if(features & SSE41) {
|
||||
blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);
|
||||
return;
|
||||
}
|
||||
if (features & SSE41) {
|
||||
blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
|
||||
blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
|
||||
}
|
||||
|
||||
void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
|
||||
size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, bool increment_counter,
|
||||
uint8_t flags, uint8_t flags_start,
|
||||
uint8_t flags_end, uint8_t *out)
|
||||
{
|
||||
const enum cpu_feature features = get_cpu_features();
|
||||
size_t blocks, const uint32_t key[8], uint64_t counter,
|
||||
bool increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
||||
const enum cpu_feature features = get_cpu_features();
|
||||
#if defined(IS_X86)
|
||||
#if !defined(BLAKE3_NO_AVX512)
|
||||
if(features & AVX512F) {
|
||||
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out);
|
||||
return;
|
||||
}
|
||||
if (features & AVX512F) {
|
||||
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end,
|
||||
out);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_AVX2)
|
||||
if(features & AVX2) {
|
||||
blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out);
|
||||
return;
|
||||
}
|
||||
if (features & AVX2) {
|
||||
blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end,
|
||||
out);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#if !defined(BLAKE3_NO_SSE41)
|
||||
if(features & SSE41) {
|
||||
blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out);
|
||||
return;
|
||||
}
|
||||
if (features & SSE41) {
|
||||
blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end,
|
||||
out);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out);
|
||||
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end,
|
||||
out);
|
||||
}
|
||||
|
||||
|
|
|
@ -428,7 +428,7 @@ INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
|
|||
out[13] = loadu(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
|
||||
out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
|
||||
out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
|
||||
for(size_t i = 0; i < 4; ++i) {
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
|
||||
}
|
||||
transpose_vecs(&out[0]);
|
||||
|
|
37
c/main.c
37
c/main.c
|
@ -51,15 +51,15 @@ int parse_key(char *hex_key, uint8_t out[BLAKE3_KEY_LEN]) {
|
|||
|
||||
/* A little repetition here */
|
||||
enum cpu_feature {
|
||||
SSE2 = 1 << 0,
|
||||
SSSE3 = 1 << 1,
|
||||
SSE41 = 1 << 2,
|
||||
AVX = 1 << 3,
|
||||
AVX2 = 1 << 4,
|
||||
AVX512F = 1 << 5,
|
||||
AVX512VL = 1 << 6,
|
||||
/* ... */
|
||||
UNDEFINED = 1 << 30
|
||||
SSE2 = 1 << 0,
|
||||
SSSE3 = 1 << 1,
|
||||
SSE41 = 1 << 2,
|
||||
AVX = 1 << 3,
|
||||
AVX2 = 1 << 4,
|
||||
AVX512F = 1 << 5,
|
||||
AVX512VL = 1 << 6,
|
||||
/* ... */
|
||||
UNDEFINED = 1 << 30
|
||||
};
|
||||
|
||||
extern enum cpu_feature g_cpu_features;
|
||||
|
@ -80,7 +80,8 @@ int main(int argc, char **argv) {
|
|||
unsigned long long out_len_ll = strtoull(argv[2], &endptr, 10);
|
||||
// TODO: There are so many possible error conditions for parsing a
|
||||
// non-negative size_t...I probably missed something.
|
||||
if (errno != 0 || out_len > SIZE_MAX || endptr == argv[2] || *endptr != 0) {
|
||||
if (errno != 0 || out_len > SIZE_MAX || endptr == argv[2] ||
|
||||
*endptr != 0) {
|
||||
fprintf(stderr, "Bad length argument.\n");
|
||||
return 1;
|
||||
}
|
||||
|
@ -111,12 +112,12 @@ int main(int argc, char **argv) {
|
|||
assert(buf != NULL);
|
||||
size_t buf_len = 0;
|
||||
while (1) {
|
||||
size_t n = fread(&buf[buf_len], 1, buf_capacity - buf_len, stdin);
|
||||
if (n == 0) {
|
||||
break;
|
||||
}
|
||||
buf_len += n;
|
||||
assert(buf_len < buf_capacity);
|
||||
size_t n = fread(&buf[buf_len], 1, buf_capacity - buf_len, stdin);
|
||||
if (n == 0) {
|
||||
break;
|
||||
}
|
||||
buf_len += n;
|
||||
assert(buf_len < buf_capacity);
|
||||
}
|
||||
|
||||
const int mask = get_cpu_features();
|
||||
|
@ -125,7 +126,7 @@ int main(int argc, char **argv) {
|
|||
fprintf(stderr, "Testing 0x%08X\n", feature);
|
||||
g_cpu_features = feature;
|
||||
blake3_hasher hasher;
|
||||
switch(mode) {
|
||||
switch (mode) {
|
||||
case HASH_MODE:
|
||||
blake3_hasher_init(&hasher);
|
||||
break;
|
||||
|
@ -155,6 +156,6 @@ int main(int argc, char **argv) {
|
|||
printf("\n");
|
||||
free(out);
|
||||
feature = (feature - mask) & mask;
|
||||
} while(feature != 0);
|
||||
} while (feature != 0);
|
||||
return 0;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue