1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-04-18 23:33:50 +02:00

fix some compiler warnings

This commit is contained in:
Samuel Neves 2022-01-08 20:12:14 +00:00 committed by Jack O'Connor
parent 9643f9563a
commit a4ce789f28
5 changed files with 22 additions and 16 deletions

View File

@ -208,7 +208,7 @@ INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
out[14] = loadu(&inputs[6][block_offset + 1 * sizeof(__m256i)]);
out[15] = loadu(&inputs[7][block_offset + 1 * sizeof(__m256i)]);
for (size_t i = 0; i < 8; ++i) {
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
_mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
}
transpose_vecs(&out[0]);
transpose_vecs(&out[8]);
@ -219,14 +219,15 @@ INLINE void load_counters(uint64_t counter, bool increment_counter,
const __m256i mask = _mm256_set1_epi32(-(int32_t)increment_counter);
const __m256i add0 = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
const __m256i add1 = _mm256_and_si256(mask, add0);
__m256i l = _mm256_add_epi32(_mm256_set1_epi32(counter), add1);
__m256i l = _mm256_add_epi32(_mm256_set1_epi32((int32_t)counter), add1);
__m256i carry = _mm256_cmpgt_epi32(_mm256_xor_si256(add1, _mm256_set1_epi32(0x80000000)),
_mm256_xor_si256( l, _mm256_set1_epi32(0x80000000)));
__m256i h = _mm256_sub_epi32(_mm256_set1_epi32(counter >> 32), carry);
__m256i h = _mm256_sub_epi32(_mm256_set1_epi32((int32_t)(counter >> 32)), carry);
*out_lo = l;
*out_hi = h;
}
static
void blake3_hash8_avx2(const uint8_t *const *inputs, size_t blocks,
const uint32_t key[8], uint64_t counter,
bool increment_counter, uint8_t flags,

View File

@ -468,7 +468,7 @@ INLINE void transpose_msg_vecs4(const uint8_t *const *inputs,
out[14] = loadu_128(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
out[15] = loadu_128(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
for (size_t i = 0; i < 4; ++i) {
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
_mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
}
transpose_vecs_128(&out[0]);
transpose_vecs_128(&out[4]);
@ -488,6 +488,7 @@ INLINE void load_counters4(uint64_t counter, bool increment_counter,
*out_hi = _mm256_cvtepi64_epi32(_mm256_srli_epi64(counters, 32));
}
static
void blake3_hash4_avx512(const uint8_t *const *inputs, size_t blocks,
const uint32_t key[8], uint64_t counter,
bool increment_counter, uint8_t flags,
@ -724,7 +725,7 @@ INLINE void transpose_msg_vecs8(const uint8_t *const *inputs,
out[14] = loadu_256(&inputs[6][block_offset + 1 * sizeof(__m256i)]);
out[15] = loadu_256(&inputs[7][block_offset + 1 * sizeof(__m256i)]);
for (size_t i = 0; i < 8; ++i) {
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
_mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
}
transpose_vecs_256(&out[0]);
transpose_vecs_256(&out[8]);
@ -742,6 +743,7 @@ INLINE void load_counters8(uint64_t counter, bool increment_counter,
*out_hi = _mm512_cvtepi64_epi32(_mm512_srli_epi64(counters, 32));
}
static
void blake3_hash8_avx512(const uint8_t *const *inputs, size_t blocks,
const uint32_t key[8], uint64_t counter,
bool increment_counter, uint8_t flags,
@ -1037,7 +1039,7 @@ INLINE void transpose_msg_vecs16(const uint8_t *const *inputs,
out[14] = loadu_512(&inputs[14][block_offset]);
out[15] = loadu_512(&inputs[15][block_offset]);
for (size_t i = 0; i < 16; ++i) {
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
_mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
}
transpose_vecs_512(out);
}
@ -1047,13 +1049,14 @@ INLINE void load_counters16(uint64_t counter, bool increment_counter,
const __m512i mask = _mm512_set1_epi32(-(int32_t)increment_counter);
const __m512i add0 = _mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
const __m512i add1 = _mm512_and_si512(mask, add0);
__m512i l = _mm512_add_epi32(_mm512_set1_epi32(counter), add1);
__m512i l = _mm512_add_epi32(_mm512_set1_epi32((int32_t)counter), add1);
__mmask16 carry = _mm512_cmp_epu32_mask(l, add1, _MM_CMPINT_LT);
__m512i h = _mm512_mask_add_epi32(_mm512_set1_epi32(counter >> 32), carry, _mm512_set1_epi32(counter >> 32), _mm512_set1_epi32(1));
__m512i h = _mm512_mask_add_epi32(_mm512_set1_epi32((int32_t)(counter >> 32)), carry, _mm512_set1_epi32((int32_t)(counter >> 32)), _mm512_set1_epi32(1));
*out_lo = l;
*out_hi = h;
}
static
void blake3_hash16_avx512(const uint8_t *const *inputs, size_t blocks,
const uint32_t key[8], uint64_t counter,
bool increment_counter, uint8_t flags,

View File

@ -78,7 +78,7 @@ INLINE void undiagonalize(__m128i *row0, __m128i *row2, __m128i *row3) {
*row2 = _mm_shuffle_epi32(*row2, _MM_SHUFFLE(2, 1, 0, 3));
}
INLINE __m128i blend_epi16(__m128i a, __m128i b, const int imm8) {
INLINE __m128i blend_epi16(__m128i a, __m128i b, const int16_t imm8) {
const __m128i bits = _mm_set_epi16(0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01);
__m128i mask = _mm_set1_epi16(imm8);
mask = _mm_and_si128(mask, bits);
@ -435,7 +435,7 @@ INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
for (size_t i = 0; i < 4; ++i) {
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
_mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
}
transpose_vecs(&out[0]);
transpose_vecs(&out[4]);
@ -448,14 +448,15 @@ INLINE void load_counters(uint64_t counter, bool increment_counter,
const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter);
const __m128i add0 = _mm_set_epi32(3, 2, 1, 0);
const __m128i add1 = _mm_and_si128(mask, add0);
__m128i l = _mm_add_epi32(_mm_set1_epi32(counter), add1);
__m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
__m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)),
_mm_xor_si128( l, _mm_set1_epi32(0x80000000)));
__m128i h = _mm_sub_epi32(_mm_set1_epi32(counter >> 32), carry);
__m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry);
*out_lo = l;
*out_hi = h;
}
static
void blake3_hash4_sse2(const uint8_t *const *inputs, size_t blocks,
const uint32_t key[8], uint64_t counter,
bool increment_counter, uint8_t flags,

View File

@ -429,7 +429,7 @@ INLINE void transpose_msg_vecs(const uint8_t *const *inputs,
out[14] = loadu(&inputs[2][block_offset + 3 * sizeof(__m128i)]);
out[15] = loadu(&inputs[3][block_offset + 3 * sizeof(__m128i)]);
for (size_t i = 0; i < 4; ++i) {
_mm_prefetch(&inputs[i][block_offset + 256], _MM_HINT_T0);
_mm_prefetch((const void *)&inputs[i][block_offset + 256], _MM_HINT_T0);
}
transpose_vecs(&out[0]);
transpose_vecs(&out[4]);
@ -442,14 +442,15 @@ INLINE void load_counters(uint64_t counter, bool increment_counter,
const __m128i mask = _mm_set1_epi32(-(int32_t)increment_counter);
const __m128i add0 = _mm_set_epi32(3, 2, 1, 0);
const __m128i add1 = _mm_and_si128(mask, add0);
__m128i l = _mm_add_epi32(_mm_set1_epi32(counter), add1);
__m128i l = _mm_add_epi32(_mm_set1_epi32((int32_t)counter), add1);
__m128i carry = _mm_cmpgt_epi32(_mm_xor_si128(add1, _mm_set1_epi32(0x80000000)),
_mm_xor_si128( l, _mm_set1_epi32(0x80000000)));
__m128i h = _mm_sub_epi32(_mm_set1_epi32(counter >> 32), carry);
__m128i h = _mm_sub_epi32(_mm_set1_epi32((int32_t)(counter >> 32)), carry);
*out_lo = l;
*out_hi = h;
}
static
void blake3_hash4_sse41(const uint8_t *const *inputs, size_t blocks,
const uint32_t key[8], uint64_t counter,
bool increment_counter, uint8_t flags,

View File

@ -68,7 +68,7 @@ enum cpu_feature {
};
extern enum cpu_feature g_cpu_features;
enum cpu_feature get_cpu_features();
enum cpu_feature get_cpu_features(void);
int main(int argc, char **argv) {
size_t out_len = BLAKE3_OUT_LEN;