1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-05-30 07:26:02 +02:00
Commit Graph

32 Commits

Author SHA1 Message Date
Jack O'Connor 37e153cc60 add NEON support to blake3_dispatch.c
Currently this requires setting the BLAKE3_USE_NEON preprocessor flag.
In the future we may enable this automatically on AArch32/64 or include
some kind of dynamic feature detection. (Though ARM makes this harder
than x86.)

As part of this, get rid of the IS_ARM flag. It wasn't being set
properly when I tried it on a Raspberry Pi.

Closes #30.
2020-01-28 15:59:16 -05:00
Jack O'Connor d7a37fa54d clear errno before strtoull
I ran into a bug on ARM where we were getting non-zero here, from
something else that stuck around in error.
2020-01-28 14:11:26 -05:00
Jack O'Connor 4304cd1085 one more warning 2020-01-28 13:26:37 -05:00
Jack O'Connor d980514c44 fix unused variable warning 2020-01-28 13:25:22 -05:00
Jack O'Connor 6742722898 add a note about testing in main.c 2020-01-27 16:21:34 -05:00
TheVice 8ce1cddedc [memset] removed call of 'memset' function according to the overwriting
of it content inside of blake3_hasher_finalize function.
2020-01-27 16:17:09 -05:00
TheVice 4730ab237e [memset] placed function after checking of memory was done
on which it should be apply.
2020-01-27 16:17:09 -05:00
Jack O'Connor dec0c49576 add a note about AVX-512 flags 2020-01-27 13:10:25 -05:00
Jack O'Connor 444a338b45 remove an obsolete remark about performance 2020-01-27 13:04:36 -05:00
Jack O'Connor 71e605fd5d
typo 2020-01-26 16:12:10 -05:00
Jack O'Connor 1db856a3e5 expand the C README for public consumption 2020-01-26 16:07:51 -05:00
Erik Johansson 182aea4871 Add extern "C" to blake3.h
So that the header can be included in C++-programs without getting linker
errors.
2020-01-23 20:42:34 +01:00
Samuel Neves a830ab2661 streamline load_counters
avx2 before:

        mov     eax, esi
        neg     rax
        vmovq   xmm0, rax
        vpbroadcastq    ymm0, xmm0
        vpand   ymm0, ymm0, ymmword ptr [rip + .LCPI1_0]
        vmovq   xmm2, rdi
        vpbroadcastq    ymm1, xmm2
        vpaddq  ymm1, ymm0, ymm1
        vmovdqa ymm0, ymmword ptr [rip + .LCPI1_1] # ymm0 = [0,2,4,6,4,6,6,7]
        vpermd  ymm3, ymm0, ymm1
        mov     r8d, eax
        and     r8d, 5
        add     r8, rdi
        mov     esi, eax
        and     esi, 6
        add     rsi, rdi
        and     eax, 7
        vpshufd xmm4, xmm3, 231         # xmm4 = xmm3[3,1,2,3]
        vpinsrd xmm4, xmm4, r8d, 1
        add     rax, rdi
        vpinsrd xmm4, xmm4, esi, 2
        vpinsrd xmm4, xmm4, eax, 3
        vpshufd xmm3, xmm3, 144         # xmm3 = xmm3[0,0,1,2]
        vpinsrd xmm3, xmm3, edi, 0
        vmovdqa xmmword ptr [rdx], xmm3
        vmovdqa xmmword ptr [rdx + 16], xmm4
        vpermq  ymm3, ymm1, 144         # ymm3 = ymm1[0,0,1,2]
        vpblendd        ymm2, ymm3, ymm2, 3 # ymm2 = ymm2[0,1],ymm3[2,3,4,5,6,7]
        vpsrlq  ymm2, ymm2, 32
        vpermd  ymm2, ymm0, ymm2
        vextracti128    xmm1, ymm1, 1
        vmovq   xmm3, rax
        vmovq   xmm4, rsi
        vpunpcklqdq     xmm3, xmm4, xmm3 # xmm3 = xmm4[0],xmm3[0]
        vmovq   xmm4, r8
        vpalignr        xmm1, xmm4, xmm1, 8 # xmm1 = xmm1[8,9,10,11,12,13,14,15],xmm4[0,1,2,3,4,5,6,7]
        vinserti128     ymm1, ymm1, xmm3, 1
        vpsrlq  ymm1, ymm1, 32
        vpermd  ymm0, ymm0, ymm1

avx2 after:

        neg     esi
        vmovd   xmm0, esi
        vpbroadcastd    ymm0, xmm0
        vmovd   xmm1, edi
        vpbroadcastd    ymm1, xmm1
        vpand   ymm0, ymm0, ymmword ptr [rip + .LCPI0_0]
        vpaddd  ymm1, ymm1, ymm0
        vpbroadcastd    ymm2, dword ptr [rip + .LCPI0_1] # ymm2 = [2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648,2147483648]
        vpor    ymm0, ymm0, ymm2
        vpxor   ymm2, ymm1, ymm2
        vpcmpgtd        ymm0, ymm0, ymm2
        shr     rdi, 32
        vmovd   xmm2, edi
        vpbroadcastd    ymm2, xmm2
        vpsubd  ymm0, ymm2, ymm0
2020-01-23 12:17:43 +00:00
Samuel Neves de1458c565 name collision 2020-01-23 11:51:46 +00:00
Samuel Neves 37ea737c16 more robust bit-trickery functions 2020-01-23 10:58:45 +00:00
Jack O'Connor 163f52245d port compress_subtree_to_parent_node from Rust to C
This recursive function performs parallel parent node hashing, which is
an important optimization.
2020-01-22 21:32:39 -05:00
Jack O'Connor de1cf0038e add the round_down_to_power_of_2 algoirthm
This could probably be sped up by detecting LZCNT support, but it's
unlikely to be a bottleneck.
2020-01-22 21:32:39 -05:00
Jack O'Connor 087d72e08f clang-format 2020-01-22 21:32:35 -05:00
Jack O'Connor 92d421dea1 add a larger test case
One thing I like to test is that, if I hack simd_degree to be higher
than MAX_SIMD_DEGREE, assertions fire. This requires a test case long
enough to exceed that number of chunks.
2020-01-22 21:19:47 -05:00
Jack O'Connor d0c8fc16b3 use a better popcnt fallback algorithm
This one loops once for every set bit, rather than once for each bit
position to the right of the highest set bit.

https://en.wikipedia.org/wiki/Hamming_weight#Efficient_implementation
2020-01-21 10:47:00 -05:00
Jack O'Connor 40f4bdc22a switch from BLAKE3_USE_* to BLAKE3_NO_*
This means that compiling C sources includes all implementations by
default, which is what most callers are going to want.
2020-01-20 15:24:03 -05:00
Samuel Neves 66da5afb0c make things more modular 2020-01-20 12:03:31 -05:00
Samuel Neves b8c33e11ef manually prefetch message blocks 2020-01-19 18:45:37 +00:00
Jack O'Connor 28701d1585 add a README.md in c/blake3_c_rust_bindings 2020-01-16 18:29:20 -05:00
Jack O'Connor 84c26670bf add blake3_c_rust_bindings for testing and benchmarking 2020-01-16 16:09:42 -05:00
Jack O'Connor d7d71b2c5f move 0-length checks to the top-level C API functions 2020-01-10 10:49:33 -05:00
Guido Vranken 253e830c26 C impl: Prevent memcpy undefined behavior 2020-01-10 10:41:35 -05:00
Jack O'Connor af9b44c881 replace the C code README with a "not yet ready" remark 2020-01-09 09:48:52 -05:00
Jack O'Connor 88dcee7005 remove the C code's separate LICENSE file
The LICENSE file at the top level of this repo is identical.
2020-01-09 09:48:52 -05:00
Jack O'Connor a0d7b4f3f8 merge the C code's ci.yml into the top level one
CI is not currently working due to quota issues, so I'll need to check
that this is actually working later.
2020-01-09 09:48:52 -05:00
Jack O'Connor 10c13c8d1c remove the C code's duplicated copy of the test vectors 2020-01-09 09:48:52 -05:00
Jack O'Connor a7579d30ad merge BLAKE3-c into this repo
This is commit 4476d9da0e370993823e7ad17592b84e905afd76 of
https://github.com/veorq/BLAKE3-c.
2020-01-09 09:48:52 -05:00