1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-05-10 08:36:18 +02:00

clang-format

This commit is contained in:
Jack O'Connor 2020-01-21 16:03:27 -05:00
parent 92d421dea1
commit 087d72e08f
6 changed files with 180 additions and 171 deletions

View File

@ -82,8 +82,8 @@ INLINE output_t make_output(const uint32_t input_cv[8],
INLINE void output_chaining_value(const output_t *self, uint8_t cv[32]) {
uint32_t cv_words[8];
memcpy(cv_words, self->input_cv, 32);
blake3_compress_in_place(cv_words, self->block, self->block_len, self->counter,
self->flags);
blake3_compress_in_place(cv_words, self->block, self->block_len,
self->counter, self->flags);
memcpy(cv, cv_words, 32);
}
@ -93,7 +93,7 @@ INLINE void output_root_bytes(const output_t *self, uint8_t *out,
uint8_t wide_buf[64];
while (out_len > 0) {
blake3_compress_xof(self->input_cv, self->block, self->block_len,
output_block_counter, self->flags | ROOT, wide_buf);
output_block_counter, self->flags | ROOT, wide_buf);
size_t memcpy_len;
if (out_len > 64) {
memcpy_len = 64;
@ -114,9 +114,9 @@ INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input,
input += take;
input_len -= take;
if (input_len > 0) {
blake3_compress_in_place(self->cv, self->buf, BLAKE3_BLOCK_LEN,
self->chunk_counter,
self->flags | chunk_state_maybe_start_flag(self));
blake3_compress_in_place(
self->cv, self->buf, BLAKE3_BLOCK_LEN, self->chunk_counter,
self->flags | chunk_state_maybe_start_flag(self));
self->blocks_compressed += 1;
self->buf_len = 0;
memset(self->buf, 0, BLAKE3_BLOCK_LEN);
@ -124,8 +124,9 @@ INLINE void chunk_state_update(blake3_chunk_state *self, const uint8_t *input,
}
while (input_len > BLAKE3_BLOCK_LEN) {
blake3_compress_in_place(self->cv, input, BLAKE3_BLOCK_LEN, self->chunk_counter,
self->flags | chunk_state_maybe_start_flag(self));
blake3_compress_in_place(self->cv, input, BLAKE3_BLOCK_LEN,
self->chunk_counter,
self->flags | chunk_state_maybe_start_flag(self));
self->blocks_compressed += 1;
input += BLAKE3_BLOCK_LEN;
input_len -= BLAKE3_BLOCK_LEN;
@ -208,7 +209,7 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
// std::vector<uint8_t> v;
// blake3_hasher_update(&hasher, v.data(), v.size());
if (input_len == 0) {
return;
return;
}
const uint8_t *input_bytes = (const uint8_t *)input;
@ -252,8 +253,8 @@ void blake3_hasher_update(blake3_hasher *self, const void *input,
num_chunks += 1;
}
blake3_hash_many(chunks, num_chunks, BLAKE3_CHUNK_LEN / BLAKE3_BLOCK_LEN,
self->key, self->chunk.chunk_counter, true, self->chunk.flags,
CHUNK_START, CHUNK_END, out);
self->key, self->chunk.chunk_counter, true,
self->chunk.flags, CHUNK_START, CHUNK_END, out);
for (size_t chunk_index = 0; chunk_index < num_chunks; chunk_index++) {
// The chunk state is empty here, but it stores the counter of the next
// chunk hash we need to push. Use that counter, and then move it forward.
@ -285,7 +286,7 @@ void blake3_hasher_finalize(const blake3_hasher *self, uint8_t *out,
// std::vector<uint8_t> v;
// blake3_hasher_finalize(&hasher, v.data(), v.size());
if (out_len == 0) {
return;
return;
}
// If the subtree stack is empty, then the current chunk is the root.

View File

@ -213,7 +213,7 @@ INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
out[13] = loadu(&inputs[5][block_offset + 1 * sizeof(__m256i)]);
out[14] = loadu(&inputs[6][block_offset + 1 * sizeof(__m256i)]);
out[15] = loadu(&inputs[7][block_offset + 1 * sizeof(__m256i)]);
for(size_t i = 0; i < 8; ++i) {
for (size_t i = 0; i < 8; ++i) {
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
}
transpose_vecs(&out[0]);
@ -301,10 +301,10 @@ void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
uint8_t flags_end, uint8_t *out);
#else
void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
size_t blocks, const uint32_t key[8],
uint64_t counter, bool increment_counter,
uint8_t flags, uint8_t flags_start,
uint8_t flags_end, uint8_t *out);
size_t blocks, const uint32_t key[8],
uint64_t counter, bool increment_counter,
uint8_t flags, uint8_t flags_start,
uint8_t flags_end, uint8_t *out);
#endif
void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
@ -327,6 +327,7 @@ void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
increment_counter, flags, flags_start, flags_end, out);
#else
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end, out);
increment_counter, flags, flags_start, flags_end,
out);
#endif
}

View File

@ -467,7 +467,7 @@ INLINE void transpose_msg_vecs4(const uint8_t *const *inputs,
out[13] = loadu_128(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
out[14] = loadu_128(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
out[15] = loadu_128(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
for(size_t i = 0; i < 4; ++i) {
for (size_t i = 0; i < 4; ++i) {
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
}
transpose_vecs_128(&out[0]);
@ -723,7 +723,7 @@ INLINE void transpose_msg_vecs8(const uint8_t *const *inputs,
out[13] = loadu_256(&inputs[5][block_offset + 1 * sizeof(__m256i)]);
out[14] = loadu_256(&inputs[6][block_offset + 1 * sizeof(__m256i)]);
out[15] = loadu_256(&inputs[7][block_offset + 1 * sizeof(__m256i)]);
for(size_t i = 0; i < 8; ++i) {
for (size_t i = 0; i < 8; ++i) {
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
}
transpose_vecs_256(&out[0]);
@ -1036,7 +1036,7 @@ INLINE void transpose_msg_vecs16(const uint8_t *const *inputs,
out[13] = loadu_512(&inputs[13][block_offset]);
out[14] = loadu_512(&inputs[14][block_offset]);
out[15] = loadu_512(&inputs[15][block_offset]);
for(size_t i = 0; i < 16; ++i) {
for (size_t i = 0; i < 16; ++i) {
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
}
transpose_vecs_512(out);

View File

@ -1,11 +1,12 @@
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include "blake3.h"
#if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || defined(_M_X64)
#define IS_X86
#if defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || \
defined(_M_X64)
#define IS_X86
#endif
#if defined(__arm__)
@ -22,7 +23,6 @@
#endif
#endif
// Declarations for implementation-specific functions.
void blake3_compress_in_place_portable(uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN],
@ -40,7 +40,6 @@ void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
uint8_t flags, uint8_t flags_start,
uint8_t flags_end, uint8_t *out);
#if defined(IS_X86)
#if !defined(BLAKE3_NO_SSE41)
void blake3_compress_in_place_sse41(uint32_t cv[8],
@ -56,7 +55,7 @@ void blake3_hash_many_sse41(const uint8_t *const *inputs, size_t num_inputs,
uint64_t counter, bool increment_counter,
uint8_t flags, uint8_t flags_start,
uint8_t flags_end, uint8_t *out);
#endif
#endif
#if !defined(BLAKE3_NO_AVX2)
void blake3_hash_many_avx2(const uint8_t *const *inputs, size_t num_inputs,
size_t blocks, const uint32_t key[8],
@ -70,7 +69,6 @@ void blake3_compress_in_place_avx512(uint32_t cv[8],
uint8_t block_len, uint64_t counter,
uint8_t flags);
void blake3_compress_xof_avx512(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN],
uint8_t block_len, uint64_t counter,
@ -93,39 +91,44 @@ void blake3_hash_many_neon(const uint8_t *const *inputs, size_t num_inputs,
#endif
#if defined(IS_X86)
static uint64_t xgetbv()
{
static uint64_t xgetbv() {
#if defined(_MSC_VER)
return _xgetbv(0);
return _xgetbv(0);
#else
uint32_t eax=0, edx=0;
__asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
return ((uint64_t)edx << 32) | eax;
uint32_t eax = 0, edx = 0;
__asm__ __volatile__("xgetbv\n" : "=a"(eax), "=d"(edx) : "c"(0));
return ((uint64_t)edx << 32) | eax;
#endif
}
static void cpuid(uint32_t out[4], uint32_t id)
{
static void cpuid(uint32_t out[4], uint32_t id) {
#if defined(_MSC_VER)
__cpuid((int*)out, id);
__cpuid((int *)out, id);
#else
#if defined(__i386__) || defined(_M_IX86)
__asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(id));
__asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx"
: "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3])
: "a"(id));
#else
__asm__ __volatile__("cpuid\n" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(id));
__asm__ __volatile__("cpuid\n"
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
: "a"(id));
#endif
#endif
}
static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid)
{
static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid) {
#if defined(_MSC_VER)
__cpuidex((int*)out, id, sid);
__cpuidex((int *)out, id, sid);
#else
#if defined(__i386__) || defined(_M_IX86)
__asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx" : "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(id), "c"(sid));
__asm__ __volatile__("pushl %%ebx\ncpuid\nmovl %%ebp, %%esi\npopl %%ebx"
: "=a"(out[0]), "=S"(out[1]), "=c"(out[2]), "=d"(out[3])
: "a"(id), "c"(sid));
#else
__asm__ __volatile__("cpuid\n" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(id), "c"(sid));
__asm__ __volatile__("cpuid\n"
: "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3])
: "a"(id), "c"(sid));
#endif
#endif
}
@ -133,152 +136,155 @@ static void cpuidex(uint32_t out[4], uint32_t id, uint32_t sid)
#endif
enum cpu_feature {
SSE2 = 1 << 0,
SSSE3 = 1 << 1,
SSE41 = 1 << 2,
AVX = 1 << 3,
AVX2 = 1 << 4,
AVX512F = 1 << 5,
AVX512VL = 1 << 6,
/* ... */
UNDEFINED = 1 << 30
SSE2 = 1 << 0,
SSSE3 = 1 << 1,
SSE41 = 1 << 2,
AVX = 1 << 3,
AVX2 = 1 << 4,
AVX512F = 1 << 5,
AVX512VL = 1 << 6,
/* ... */
UNDEFINED = 1 << 30
};
#if !defined(BLAKE3_TESTING)
static /* Allow the variable to be controlled manually for testing */
#endif
enum cpu_feature g_cpu_features = UNDEFINED;
enum cpu_feature g_cpu_features = UNDEFINED;
#if !defined(BLAKE3_TESTING)
static
static
#endif
enum cpu_feature get_cpu_features()
{
if( g_cpu_features != UNDEFINED ) {
return g_cpu_features;
} else {
#if defined(IS_X86)
uint32_t regs[4] = {0};
uint32_t * eax = &regs[0], * ebx = &regs[1], * ecx = &regs[2], * edx = &regs[3];
(void)edx;
enum cpu_feature features = 0;
cpuid(regs, 0);
const int max_id = *eax;
cpuid(regs, 1);
#if defined(__amd64__) || defined(_M_X64)
features |= SSE2;
#else
if(*edx & (1UL << 26))
features |= SSE2;
#endif
if(*ecx & (1UL << 0))
features |= SSSE3;
if(*ecx & (1UL << 19))
features |= SSE41;
enum cpu_feature
get_cpu_features() {
if( *ecx & (1UL << 27) ) { // OSXSAVE
const uint64_t mask = xgetbv();
if( (mask & 6) == 6 ) { // SSE and AVX states
if(*ecx & (1UL << 28))
features |= AVX;
if(max_id >= 7) {
cpuidex(regs, 7, 0);
if( *ebx & (1UL << 5) )
features |= AVX2;
if( (mask & 224) == 224 ) { // Opmask, ZMM_Hi256, Hi16_Zmm
if( *ebx & (1UL << 31) )
features |= AVX512VL;
if(*ebx & (1UL << 16))
features |= AVX512F;
}
}
}
}
g_cpu_features = features;
return features;
#elif defined(IS_ARM)
/* How to detect NEON? */
return 0;
if (g_cpu_features != UNDEFINED) {
return g_cpu_features;
} else {
#if defined(IS_X86)
uint32_t regs[4] = {0};
uint32_t *eax = &regs[0], *ebx = &regs[1], *ecx = &regs[2], *edx = &regs[3];
(void)edx;
enum cpu_feature features = 0;
cpuid(regs, 0);
const int max_id = *eax;
cpuid(regs, 1);
#if defined(__amd64__) || defined(_M_X64)
features |= SSE2;
#else
return 0;
if (*edx & (1UL << 26))
features |= SSE2;
#endif
if (*ecx & (1UL << 0))
features |= SSSE3;
if (*ecx & (1UL << 19))
features |= SSE41;
if (*ecx & (1UL << 27)) { // OSXSAVE
const uint64_t mask = xgetbv();
if ((mask & 6) == 6) { // SSE and AVX states
if (*ecx & (1UL << 28))
features |= AVX;
if (max_id >= 7) {
cpuidex(regs, 7, 0);
if (*ebx & (1UL << 5))
features |= AVX2;
if ((mask & 224) == 224) { // Opmask, ZMM_Hi256, Hi16_Zmm
if (*ebx & (1UL << 31))
features |= AVX512VL;
if (*ebx & (1UL << 16))
features |= AVX512F;
}
}
}
}
g_cpu_features = features;
return features;
#elif defined(IS_ARM)
/* How to detect NEON? */
return 0;
#else
return 0;
#endif
}
}
void blake3_compress_in_place(uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN],
uint8_t block_len, uint64_t counter,
uint8_t flags)
{
const enum cpu_feature features = get_cpu_features();
void blake3_compress_in_place(uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN],
uint8_t block_len, uint64_t counter,
uint8_t flags) {
const enum cpu_feature features = get_cpu_features();
#if defined(IS_X86)
#if !defined(BLAKE3_NO_AVX512)
if(features & AVX512VL) {
blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
return;
}
if (features & AVX512VL) {
blake3_compress_in_place_avx512(cv, block, block_len, counter, flags);
return;
}
#endif
#if !defined(BLAKE3_NO_SSE41)
if(features & SSE41) {
blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);
return;
}
if (features & SSE41) {
blake3_compress_in_place_sse41(cv, block, block_len, counter, flags);
return;
}
#endif
#endif
blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
blake3_compress_in_place_portable(cv, block, block_len, counter, flags);
}
void blake3_compress_xof(const uint32_t cv[8],
const uint8_t block[BLAKE3_BLOCK_LEN],
uint8_t block_len, uint64_t counter,
uint8_t flags, uint8_t out[64])
{
const enum cpu_feature features = get_cpu_features();
const uint8_t block[BLAKE3_BLOCK_LEN],
uint8_t block_len, uint64_t counter, uint8_t flags,
uint8_t out[64]) {
const enum cpu_feature features = get_cpu_features();
#if defined(IS_X86)
#if !defined(BLAKE3_NO_AVX512)
if(features & AVX512VL) {
blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
return;
}
if (features & AVX512VL) {
blake3_compress_xof_avx512(cv, block, block_len, counter, flags, out);
return;
}
#endif
#if !defined(BLAKE3_NO_SSE41)
if(features & SSE41) {
blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);
return;
}
if (features & SSE41) {
blake3_compress_xof_sse41(cv, block, block_len, counter, flags, out);
return;
}
#endif
#endif
blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
blake3_compress_xof_portable(cv, block, block_len, counter, flags, out);
}
void blake3_hash_many(const uint8_t *const *inputs, size_t num_inputs,
size_t blocks, const uint32_t key[8],
uint64_t counter, bool increment_counter,
uint8_t flags, uint8_t flags_start,
uint8_t flags_end, uint8_t *out)
{
const enum cpu_feature features = get_cpu_features();
size_t blocks, const uint32_t key[8], uint64_t counter,
bool increment_counter, uint8_t flags,
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
const enum cpu_feature features = get_cpu_features();
#if defined(IS_X86)
#if !defined(BLAKE3_NO_AVX512)
if(features & AVX512F) {
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out);
return;
}
if (features & AVX512F) {
blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end,
out);
return;
}
#endif
#if !defined(BLAKE3_NO_AVX2)
if(features & AVX2) {
blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out);
return;
}
if (features & AVX2) {
blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end,
out);
return;
}
#endif
#if !defined(BLAKE3_NO_SSE41)
if(features & SSE41) {
blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out);
return;
}
if (features & SSE41) {
blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end,
out);
return;
}
#endif
#endif
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter, increment_counter, flags, flags_start, flags_end, out);
blake3_hash_many_portable(inputs, num_inputs, blocks, key, counter,
increment_counter, flags, flags_start, flags_end,
out);
}

View File

@ -428,7 +428,7 @@ INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
out[13] = loadu(&inputs[1][block_offset + 3 * sizeof(__m128i)]);
out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
for(size_t i = 0; i < 4; ++i) {
for (size_t i = 0; i < 4; ++i) {
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
}
transpose_vecs(&out[0]);

View File

@ -51,15 +51,15 @@ int parse_key(char *hex_key, uint8_t out[BLAKE3_KEY_LEN]) {
/* A little repetition here */
enum cpu_feature {
SSE2 = 1 << 0,
SSSE3 = 1 << 1,
SSE41 = 1 << 2,
AVX = 1 << 3,
AVX2 = 1 << 4,
AVX512F = 1 << 5,
AVX512VL = 1 << 6,
/* ... */
UNDEFINED = 1 << 30
SSE2 = 1 << 0,
SSSE3 = 1 << 1,
SSE41 = 1 << 2,
AVX = 1 << 3,
AVX2 = 1 << 4,
AVX512F = 1 << 5,
AVX512VL = 1 << 6,
/* ... */
UNDEFINED = 1 << 30
};
extern enum cpu_feature g_cpu_features;
@ -80,7 +80,8 @@ int main(int argc, char **argv) {
unsigned long long out_len_ll = strtoull(argv[2], &endptr, 10);
// TODO: There are so many possible error conditions for parsing a
// non-negative size_t...I probably missed something.
if (errno != 0 || out_len > SIZE_MAX || endptr == argv[2] || *endptr != 0) {
if (errno != 0 || out_len > SIZE_MAX || endptr == argv[2] ||
*endptr != 0) {
fprintf(stderr, "Bad length argument.\n");
return 1;
}
@ -111,12 +112,12 @@ int main(int argc, char **argv) {
assert(buf != NULL);
size_t buf_len = 0;
while (1) {
size_t n = fread(&buf[buf_len], 1, buf_capacity - buf_len, stdin);
if (n == 0) {
break;
}
buf_len += n;
assert(buf_len < buf_capacity);
size_t n = fread(&buf[buf_len], 1, buf_capacity - buf_len, stdin);
if (n == 0) {
break;
}
buf_len += n;
assert(buf_len < buf_capacity);
}
const int mask = get_cpu_features();
@ -125,7 +126,7 @@ int main(int argc, char **argv) {
fprintf(stderr, "Testing 0x%08X\n", feature);
g_cpu_features = feature;
blake3_hasher hasher;
switch(mode) {
switch (mode) {
case HASH_MODE:
blake3_hasher_init(&hasher);
break;
@ -155,6 +156,6 @@ int main(int argc, char **argv) {
printf("\n");
free(out);
feature = (feature - mask) & mask;
} while(feature != 0);
} while (feature != 0);
return 0;
}