1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-04-25 10:45:06 +02:00

Work around clang bug 36144 by replacing anonymous label numbers.

https://bugs.llvm.org/show_bug.cgi?id=36144

Fixes #60.
This commit is contained in:
Samuel Neves 2020-02-13 13:46:32 +00:00
parent fcc14c8c1b
commit fa6f14cafa
6 changed files with 108 additions and 108 deletions

View File

@ -63,7 +63,7 @@ blake3_hash_many_avx2:
or eax, ebx
xor edx, edx
.p2align 5
1:
9:
movzx ebx, byte ptr [rbp+0x48]
or ebx, eax
add rdx, 64
@ -1231,7 +1231,7 @@ blake3_hash_many_avx2:
vpxor ymm6, ymm6, ymm14
vpxor ymm7, ymm7, ymm15
movzx eax, byte ptr [rbp+0x38]
jne 1b
jne 9b
mov rbx, qword ptr [rbp+0x50]
vunpcklps ymm8, ymm0, ymm1
vunpcklps ymm9, ymm2, ymm3
@ -1374,7 +1374,7 @@ blake3_hash_many_avx2:
vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
vmovdqa ymm10, ymm2
mov al, 7
1:
9:
vpaddd ymm0, ymm0, ymm4
vpaddd ymm8, ymm8, ymm12
vmovdqa ymmword ptr [rsp+0x40], ymm4
@ -1470,7 +1470,7 @@ blake3_hash_many_avx2:
vpshufd ymm2, ymm2, 0x93
vpshufd ymm10, ymm10, 0x93
dec al
je 1f
je 9f
vmovdqa ymm4, ymmword ptr [rsp+0x40]
vmovdqa ymm5, ymmword ptr [rsp+0x80]
vshufps ymm12, ymm4, ymm5, 214
@ -1503,8 +1503,8 @@ blake3_hash_many_avx2:
vmovdqa ymm14, ymm5
vmovdqa ymm5, ymmword ptr [rsp+0x40]
vmovdqa ymm6, ymmword ptr [rsp+0x80]
jmp 1b
1:
jmp 9b
9:
vpxor ymm0, ymm0, ymm2
vpxor ymm1, ymm1, ymm3
vpxor ymm8, ymm8, ymm10
@ -1577,7 +1577,7 @@ blake3_hash_many_avx2:
vpshufd ymm6, ymm6, 0x93
vpshufd ymm7, ymm7, 0x93
mov al, 7
1:
9:
vpaddd ymm0, ymm0, ymm4
vpaddd ymm0, ymm0, ymm1
vpxor ymm3, ymm3, ymm0
@ -1621,7 +1621,7 @@ blake3_hash_many_avx2:
vpshufd ymm3, ymm3, 0x4E
vpshufd ymm2, ymm2, 0x93
dec al
jz 1f
jz 9f
vshufps ymm8, ymm4, ymm5, 214
vpshufd ymm9, ymm4, 0x0F
vpshufd ymm4, ymm8, 0x39
@ -1635,8 +1635,8 @@ blake3_hash_many_avx2:
vpshufd ymm7, ymm6, 0x1E
vmovdqa ymm5, ymm9
vmovdqa ymm6, ymm8
jmp 1b
1:
jmp 9b
9:
vpxor ymm0, ymm0, ymm2
vpxor ymm1, ymm1, ymm3
mov eax, r13d
@ -1693,7 +1693,7 @@ blake3_hash_many_avx2:
vpshufd xmm6, xmm6, 0x93
vpshufd xmm7, xmm7, 0x93
mov al, 7
1:
9:
vpaddd xmm0, xmm0, xmm4
vpaddd xmm0, xmm0, xmm1
vpxor xmm3, xmm3, xmm0
@ -1737,7 +1737,7 @@ blake3_hash_many_avx2:
vpshufd xmm3, xmm3, 0x4E
vpshufd xmm2, xmm2, 0x93
dec al
jz 1f
jz 9f
vshufps xmm8, xmm4, xmm5, 214
vpshufd xmm9, xmm4, 0x0F
vpshufd xmm4, xmm8, 0x39
@ -1751,8 +1751,8 @@ blake3_hash_many_avx2:
vpshufd xmm7, xmm6, 0x1E
vmovdqa xmm5, xmm9
vmovdqa xmm6, xmm8
jmp 1b
1:
jmp 9b
9:
vpxor xmm0, xmm0, xmm2
vpxor xmm1, xmm1, xmm3
mov eax, r13d

View File

@ -77,7 +77,7 @@ blake3_hash_many_avx2:
or eax, ebx
xor edx, edx
.p2align 5
1:
9:
movzx ebx, byte ptr [rbp+0x88]
or ebx, eax
add rdx, 64
@ -1245,7 +1245,7 @@ blake3_hash_many_avx2:
vpxor ymm6, ymm6, ymm14
vpxor ymm7, ymm7, ymm15
movzx eax, byte ptr [rbp+0x78]
jne 1b
jne 9b
mov rbx, qword ptr [rbp+0x90]
vunpcklps ymm8, ymm0, ymm1
vunpcklps ymm9, ymm2, ymm3
@ -1396,7 +1396,7 @@ blake3_hash_many_avx2:
vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
vmovdqa ymm10, ymm2
mov al, 7
1:
9:
vpaddd ymm0, ymm0, ymm4
vpaddd ymm8, ymm8, ymm12
vmovdqa ymmword ptr [rsp+0x40], ymm4
@ -1492,7 +1492,7 @@ blake3_hash_many_avx2:
vpshufd ymm2, ymm2, 0x93
vpshufd ymm10, ymm10, 0x93
dec al
je 1f
je 9f
vmovdqa ymm4, ymmword ptr [rsp+0x40]
vmovdqa ymm5, ymmword ptr [rsp+0x80]
vshufps ymm12, ymm4, ymm5, 214
@ -1525,8 +1525,8 @@ blake3_hash_many_avx2:
vmovdqa ymm14, ymm5
vmovdqa ymm5, ymmword ptr [rsp+0x40]
vmovdqa ymm6, ymmword ptr [rsp+0x80]
jmp 1b
1:
jmp 9b
9:
vpxor ymm0, ymm0, ymm2
vpxor ymm1, ymm1, ymm3
vpxor ymm8, ymm8, ymm10
@ -1599,7 +1599,7 @@ blake3_hash_many_avx2:
vpshufd ymm6, ymm6, 0x93
vpshufd ymm7, ymm7, 0x93
mov al, 7
1:
9:
vpaddd ymm0, ymm0, ymm4
vpaddd ymm0, ymm0, ymm1
vpxor ymm3, ymm3, ymm0
@ -1643,7 +1643,7 @@ blake3_hash_many_avx2:
vpshufd ymm3, ymm3, 0x4E
vpshufd ymm2, ymm2, 0x93
dec al
jz 1f
jz 9f
vshufps ymm8, ymm4, ymm5, 214
vpshufd ymm9, ymm4, 0x0F
vpshufd ymm4, ymm8, 0x39
@ -1657,8 +1657,8 @@ blake3_hash_many_avx2:
vpshufd ymm7, ymm6, 0x1E
vmovdqa ymm5, ymm9
vmovdqa ymm6, ymm8
jmp 1b
1:
jmp 9b
9:
vpxor ymm0, ymm0, ymm2
vpxor ymm1, ymm1, ymm3
mov eax, r13d
@ -1715,7 +1715,7 @@ blake3_hash_many_avx2:
vpshufd xmm6, xmm6, 0x93
vpshufd xmm7, xmm7, 0x93
mov al, 7
1:
9:
vpaddd xmm0, xmm0, xmm4
vpaddd xmm0, xmm0, xmm1
vpxor xmm3, xmm3, xmm0
@ -1759,7 +1759,7 @@ blake3_hash_many_avx2:
vpshufd xmm3, xmm3, 0x4E
vpshufd xmm2, xmm2, 0x93
dec al
jz 1f
jz 9f
vshufps xmm8, xmm4, xmm5, 214
vpshufd xmm9, xmm4, 0x0F
vpshufd xmm4, xmm8, 0x39
@ -1773,8 +1773,8 @@ blake3_hash_many_avx2:
vpshufd xmm7, xmm6, 0x1E
vmovdqa xmm5, xmm9
vmovdqa xmm6, xmm8
jmp 1b
1:
jmp 9b
9:
vpxor xmm0, xmm0, xmm2
vpxor xmm1, xmm1, xmm3
mov eax, r13d

View File

@ -66,7 +66,7 @@ blake3_hash_many_avx512:
or eax, ebx
xor edx, edx
.p2align 5
1:
9:
movzx ebx, byte ptr [rbp+0x48]
or ebx, eax
add rdx, 64
@ -1011,7 +1011,7 @@ blake3_hash_many_avx512:
vpxord zmm6, zmm6, zmm14
vpxord zmm7, zmm7, zmm15
movzx eax, byte ptr [rbp+0x38]
jne 1b
jne 9b
mov rbx, qword ptr [rbp+0x50]
vpunpckldq zmm16, zmm0, zmm1
vpunpckhdq zmm17, zmm0, zmm1
@ -2089,7 +2089,7 @@ blake3_hash_many_avx512:
vpshufd zmm6, zmm6, 0x93
vpshufd zmm7, zmm7, 0x93
mov al, 7
1:
9:
vpaddd zmm0, zmm0, zmm4
vpaddd zmm0, zmm0, zmm1
vpxord zmm3, zmm3, zmm0
@ -2125,7 +2125,7 @@ blake3_hash_many_avx512:
vpshufd zmm3, zmm3, 0x4E
vpshufd zmm2, zmm2, 0x93
dec al
jz 1f
jz 9f
vshufps zmm8, zmm4, zmm5, 214
vpshufd zmm9, zmm4, 0x0F
vpshufd zmm4, zmm8, 0x39
@ -2139,8 +2139,8 @@ blake3_hash_many_avx512:
vpshufd zmm7, zmm6, 0x1E
vmovdqa32 zmm5, zmm9
vmovdqa32 zmm6, zmm8
jmp 1b
1:
jmp 9b
9:
vpxord zmm0, zmm0, zmm2
vpxord zmm1, zmm1, zmm3
mov eax, r13d
@ -2206,7 +2206,7 @@ blake3_hash_many_avx512:
vpshufd ymm6, ymm6, 0x93
vpshufd ymm7, ymm7, 0x93
mov al, 7
1:
9:
vpaddd ymm0, ymm0, ymm4
vpaddd ymm0, ymm0, ymm1
vpxord ymm3, ymm3, ymm0
@ -2242,7 +2242,7 @@ blake3_hash_many_avx512:
vpshufd ymm3, ymm3, 0x4E
vpshufd ymm2, ymm2, 0x93
dec al
jz 1f
jz 9f
vshufps ymm8, ymm4, ymm5, 214
vpshufd ymm9, ymm4, 0x0F
vpshufd ymm4, ymm8, 0x39
@ -2256,8 +2256,8 @@ blake3_hash_many_avx512:
vpshufd ymm7, ymm6, 0x1E
vmovdqa ymm5, ymm9
vmovdqa ymm6, ymm8
jmp 1b
1:
jmp 9b
9:
vpxor ymm0, ymm0, ymm2
vpxor ymm1, ymm1, ymm3
mov eax, r13d
@ -2309,7 +2309,7 @@ blake3_hash_many_avx512:
vpshufd xmm6, xmm6, 0x93
vpshufd xmm7, xmm7, 0x93
mov al, 7
1:
9:
vpaddd xmm0, xmm0, xmm4
vpaddd xmm0, xmm0, xmm1
vpxord xmm3, xmm3, xmm0
@ -2345,7 +2345,7 @@ blake3_hash_many_avx512:
vpshufd xmm3, xmm3, 0x4E
vpshufd xmm2, xmm2, 0x93
dec al
jz 1f
jz 9f
vshufps xmm8, xmm4, xmm5, 214
vpshufd xmm9, xmm4, 0x0F
vpshufd xmm4, xmm8, 0x39
@ -2359,8 +2359,8 @@ blake3_hash_many_avx512:
vpshufd xmm7, xmm6, 0x1E
vmovdqa xmm5, xmm9
vmovdqa xmm6, xmm8
jmp 1b
1:
jmp 9b
9:
vpxor xmm0, xmm0, xmm2
vpxor xmm1, xmm1, xmm3
mov eax, r13d
@ -2393,7 +2393,7 @@ blake3_compress_in_place_avx512:
vpshufd xmm6, xmm6, 0x93
vpshufd xmm7, xmm7, 0x93
mov al, 7
1:
9:
vpaddd xmm0, xmm0, xmm4
vpaddd xmm0, xmm0, xmm1
vpxord xmm3, xmm3, xmm0
@ -2429,7 +2429,7 @@ blake3_compress_in_place_avx512:
vpshufd xmm3, xmm3, 0x4E
vpshufd xmm2, xmm2, 0x93
dec al
jz 1f
jz 9f
vshufps xmm8, xmm4, xmm5, 214
vpshufd xmm9, xmm4, 0x0F
vpshufd xmm4, xmm8, 0x39
@ -2443,8 +2443,8 @@ blake3_compress_in_place_avx512:
vpshufd xmm7, xmm6, 0x1E
vmovdqa xmm5, xmm9
vmovdqa xmm6, xmm8
jmp 1b
1:
jmp 9b
9:
vpxor xmm0, xmm0, xmm2
vpxor xmm1, xmm1, xmm3
vmovdqu xmmword ptr [rdi], xmm0
@ -2475,7 +2475,7 @@ blake3_compress_xof_avx512:
vpshufd xmm6, xmm6, 0x93
vpshufd xmm7, xmm7, 0x93
mov al, 7
1:
9:
vpaddd xmm0, xmm0, xmm4
vpaddd xmm0, xmm0, xmm1
vpxord xmm3, xmm3, xmm0
@ -2511,7 +2511,7 @@ blake3_compress_xof_avx512:
vpshufd xmm3, xmm3, 0x4E
vpshufd xmm2, xmm2, 0x93
dec al
jz 1f
jz 9f
vshufps xmm8, xmm4, xmm5, 214
vpshufd xmm9, xmm4, 0x0F
vpshufd xmm4, xmm8, 0x39
@ -2525,8 +2525,8 @@ blake3_compress_xof_avx512:
vpshufd xmm7, xmm6, 0x1E
vmovdqa xmm5, xmm9
vmovdqa xmm6, xmm8
jmp 1b
1:
jmp 9b
9:
vpxor xmm0, xmm0, xmm2
vpxor xmm1, xmm1, xmm3
vpxor xmm2, xmm2, [rdi]

View File

@ -80,7 +80,7 @@ blake3_hash_many_avx512:
or eax, ebx
xor edx, edx
.p2align 5
1:
9:
movzx ebx, byte ptr [rbp+0x88]
or ebx, eax
add rdx, 64
@ -1025,7 +1025,7 @@ blake3_hash_many_avx512:
vpxord zmm6, zmm6, zmm14
vpxord zmm7, zmm7, zmm15
movzx eax, byte ptr [rbp+0x78]
jne 1b
jne 9b
mov rbx, qword ptr [rbp+0x90]
vpunpckldq zmm16, zmm0, zmm1
vpunpckhdq zmm17, zmm0, zmm1
@ -2115,7 +2115,7 @@ blake3_hash_many_avx512:
vpshufd zmm6, zmm6, 0x93
vpshufd zmm7, zmm7, 0x93
mov al, 7
1:
9:
vpaddd zmm0, zmm0, zmm4
vpaddd zmm0, zmm0, zmm1
vpxord zmm3, zmm3, zmm0
@ -2151,7 +2151,7 @@ blake3_hash_many_avx512:
vpshufd zmm3, zmm3, 0x4E
vpshufd zmm2, zmm2, 0x93
dec al
jz 1f
jz 9f
vshufps zmm8, zmm4, zmm5, 214
vpshufd zmm9, zmm4, 0x0F
vpshufd zmm4, zmm8, 0x39
@ -2165,8 +2165,8 @@ blake3_hash_many_avx512:
vpshufd zmm7, zmm6, 0x1E
vmovdqa32 zmm5, zmm9
vmovdqa32 zmm6, zmm8
jmp 1b
1:
jmp 9b
9:
vpxord zmm0, zmm0, zmm2
vpxord zmm1, zmm1, zmm3
mov eax, r13d
@ -2232,7 +2232,7 @@ blake3_hash_many_avx512:
vpshufd ymm6, ymm6, 0x93
vpshufd ymm7, ymm7, 0x93
mov al, 7
1:
9:
vpaddd ymm0, ymm0, ymm4
vpaddd ymm0, ymm0, ymm1
vpxord ymm3, ymm3, ymm0
@ -2268,7 +2268,7 @@ blake3_hash_many_avx512:
vpshufd ymm3, ymm3, 0x4E
vpshufd ymm2, ymm2, 0x93
dec al
jz 1f
jz 9f
vshufps ymm8, ymm4, ymm5, 214
vpshufd ymm9, ymm4, 0x0F
vpshufd ymm4, ymm8, 0x39
@ -2282,8 +2282,8 @@ blake3_hash_many_avx512:
vpshufd ymm7, ymm6, 0x1E
vmovdqa ymm5, ymm9
vmovdqa ymm6, ymm8
jmp 1b
1:
jmp 9b
9:
vpxor ymm0, ymm0, ymm2
vpxor ymm1, ymm1, ymm3
mov eax, r13d
@ -2335,7 +2335,7 @@ blake3_hash_many_avx512:
vpshufd xmm6, xmm6, 0x93
vpshufd xmm7, xmm7, 0x93
mov al, 7
1:
9:
vpaddd xmm0, xmm0, xmm4
vpaddd xmm0, xmm0, xmm1
vpxord xmm3, xmm3, xmm0
@ -2371,7 +2371,7 @@ blake3_hash_many_avx512:
vpshufd xmm3, xmm3, 0x4E
vpshufd xmm2, xmm2, 0x93
dec al
jz 1f
jz 9f
vshufps xmm8, xmm4, xmm5, 214
vpshufd xmm9, xmm4, 0x0F
vpshufd xmm4, xmm8, 0x39
@ -2385,8 +2385,8 @@ blake3_hash_many_avx512:
vpshufd xmm7, xmm6, 0x1E
vmovdqa xmm5, xmm9
vmovdqa xmm6, xmm8
jmp 1b
1:
jmp 9b
9:
vpxor xmm0, xmm0, xmm2
vpxor xmm1, xmm1, xmm3
mov eax, r13d
@ -2426,7 +2426,7 @@ blake3_compress_in_place_avx512:
vpshufd xmm6, xmm6, 0x93
vpshufd xmm7, xmm7, 0x93
mov al, 7
1:
9:
vpaddd xmm0, xmm0, xmm4
vpaddd xmm0, xmm0, xmm1
vpxord xmm3, xmm3, xmm0
@ -2462,7 +2462,7 @@ blake3_compress_in_place_avx512:
vpshufd xmm3, xmm3, 0x4E
vpshufd xmm2, xmm2, 0x93
dec al
jz 1f
jz 9f
vshufps xmm8, xmm4, xmm5, 214
vpshufd xmm9, xmm4, 0x0F
vpshufd xmm4, xmm8, 0x39
@ -2476,8 +2476,8 @@ blake3_compress_in_place_avx512:
vpshufd xmm7, xmm6, 0x1E
vmovdqa xmm5, xmm9
vmovdqa xmm6, xmm8
jmp 1b
1:
jmp 9b
9:
vpxor xmm0, xmm0, xmm2
vpxor xmm1, xmm1, xmm3
vmovdqu xmmword ptr [rcx], xmm0
@ -2520,7 +2520,7 @@ blake3_compress_xof_avx512:
vpshufd xmm6, xmm6, 0x93
vpshufd xmm7, xmm7, 0x93
mov al, 7
1:
9:
vpaddd xmm0, xmm0, xmm4
vpaddd xmm0, xmm0, xmm1
vpxord xmm3, xmm3, xmm0
@ -2556,7 +2556,7 @@ blake3_compress_xof_avx512:
vpshufd xmm3, xmm3, 0x4E
vpshufd xmm2, xmm2, 0x93
dec al
jz 1f
jz 9f
vshufps xmm8, xmm4, xmm5, 214
vpshufd xmm9, xmm4, 0x0F
vpshufd xmm4, xmm8, 0x39
@ -2570,8 +2570,8 @@ blake3_compress_xof_avx512:
vpshufd xmm7, xmm6, 0x1E
vmovdqa xmm5, xmm9
vmovdqa xmm6, xmm8
jmp 1b
1:
jmp 9b
9:
vpxor xmm0, xmm0, xmm2
vpxor xmm1, xmm1, xmm3
vpxor xmm2, xmm2, xmmword ptr [rcx]

View File

@ -67,7 +67,7 @@ blake3_hash_many_sse41:
movzx eax, byte ptr [rbp+0x40]
or eax, r13d
xor edx, edx
1:
9:
mov r14d, eax
or eax, r12d
add rdx, 64
@ -1377,7 +1377,7 @@ blake3_hash_many_sse41:
pxor xmm6, xmm14
pxor xmm7, xmm15
mov eax, r13d
jne 1b
jne 9b
movdqa xmm9, xmm0
punpckldq xmm0, xmm1
punpckhdq xmm9, xmm1
@ -1496,7 +1496,7 @@ blake3_hash_many_sse41:
pinsrd xmm3, eax, 3
pinsrd xmm11, eax, 3
mov al, 7
1:
9:
paddd xmm0, xmm4
paddd xmm8, xmm12
movaps xmmword ptr [rsp+0x20], xmm4
@ -1596,7 +1596,7 @@ blake3_hash_many_sse41:
pshufd xmm2, xmm2, 0x93
pshufd xmm10, xmm10, 0x93
dec al
je 1f
je 9f
movdqa xmm12, xmmword ptr [rsp+0x20]
movdqa xmm5, xmmword ptr [rsp+0x40]
pshufd xmm13, xmm12, 0x0F
@ -1633,8 +1633,8 @@ blake3_hash_many_sse41:
movdqa xmm14, xmm5
movdqa xmm5, xmmword ptr [rsp+0x20]
movdqa xmm6, xmmword ptr [rsp+0x40]
jmp 1b
1:
jmp 9b
9:
pxor xmm0, xmm2
pxor xmm1, xmm3
pxor xmm8, xmm10
@ -1695,7 +1695,7 @@ blake3_hash_many_sse41:
shufps xmm8, xmm7, 221
pshufd xmm7, xmm8, 0x93
mov al, 7
1:
9:
paddd xmm0, xmm4
paddd xmm0, xmm1
pxor xmm3, xmm0
@ -1743,7 +1743,7 @@ blake3_hash_many_sse41:
pshufd xmm3, xmm3, 0x4E
pshufd xmm2, xmm2, 0x93
dec al
jz 1f
jz 9f
movdqa xmm8, xmm4
shufps xmm8, xmm5, 214
pshufd xmm9, xmm4, 0x0F
@ -1760,8 +1760,8 @@ blake3_hash_many_sse41:
pshufd xmm7, xmm6, 0x1E
movdqa xmm5, xmm9
movdqa xmm6, xmm8
jmp 1b
1:
jmp 9b
9:
pxor xmm0, xmm2
pxor xmm1, xmm3
mov eax, r13d
@ -1798,7 +1798,7 @@ _blake3_compress_in_place_sse41:
movaps xmm14, xmmword ptr [ROT8+rip]
movaps xmm15, xmmword ptr [ROT16+rip]
mov al, 7
1:
9:
paddd xmm0, xmm4
paddd xmm0, xmm1
pxor xmm3, xmm0
@ -1846,7 +1846,7 @@ _blake3_compress_in_place_sse41:
pshufd xmm3, xmm3, 0x4E
pshufd xmm2, xmm2, 0x93
dec al
jz 1f
jz 9f
movdqa xmm8, xmm4
shufps xmm8, xmm5, 214
pshufd xmm9, xmm4, 0x0F
@ -1863,8 +1863,8 @@ _blake3_compress_in_place_sse41:
pshufd xmm7, xmm6, 0x1E
movdqa xmm5, xmm9
movdqa xmm6, xmm8
jmp 1b
1:
jmp 9b
9:
pxor xmm0, xmm2
pxor xmm1, xmm3
movups xmmword ptr [rdi], xmm0
@ -1900,7 +1900,7 @@ _blake3_compress_xof_sse41:
movaps xmm14, xmmword ptr [ROT8+rip]
movaps xmm15, xmmword ptr [ROT16+rip]
mov al, 7
1:
9:
paddd xmm0, xmm4
paddd xmm0, xmm1
pxor xmm3, xmm0
@ -1948,7 +1948,7 @@ _blake3_compress_xof_sse41:
pshufd xmm3, xmm3, 0x4E
pshufd xmm2, xmm2, 0x93
dec al
jz 1f
jz 9f
movdqa xmm8, xmm4
shufps xmm8, xmm5, 214
pshufd xmm9, xmm4, 0x0F
@ -1965,8 +1965,8 @@ _blake3_compress_xof_sse41:
pshufd xmm7, xmm6, 0x1E
movdqa xmm5, xmm9
movdqa xmm6, xmm8
jmp 1b
1:
jmp 9b
9:
movdqu xmm4, xmmword ptr [rdi]
movdqu xmm5, xmmword ptr [rdi+0x10]
pxor xmm0, xmm2

View File

@ -81,7 +81,7 @@ blake3_hash_many_sse41:
movzx eax, byte ptr [rbp+0x80]
or eax, r13d
xor edx, edx
1:
9:
mov r14d, eax
or eax, r12d
add rdx, 64
@ -1391,7 +1391,7 @@ blake3_hash_many_sse41:
pxor xmm6, xmm14
pxor xmm7, xmm15
mov eax, r13d
jne 1b
jne 9b
movdqa xmm9, xmm0
punpckldq xmm0, xmm1
punpckhdq xmm9, xmm1
@ -1522,7 +1522,7 @@ blake3_hash_many_sse41:
pinsrd xmm3, eax, 3
pinsrd xmm11, eax, 3
mov al, 7
1:
9:
paddd xmm0, xmm4
paddd xmm8, xmm12
movaps xmmword ptr [rsp+0x20], xmm4
@ -1622,7 +1622,7 @@ blake3_hash_many_sse41:
pshufd xmm2, xmm2, 0x93
pshufd xmm10, xmm10, 0x93
dec al
je 1f
je 9f
movdqa xmm12, xmmword ptr [rsp+0x20]
movdqa xmm5, xmmword ptr [rsp+0x40]
pshufd xmm13, xmm12, 0x0F
@ -1659,8 +1659,8 @@ blake3_hash_many_sse41:
movdqa xmm14, xmm5
movdqa xmm5, xmmword ptr [rsp+0x20]
movdqa xmm6, xmmword ptr [rsp+0x40]
jmp 1b
1:
jmp 9b
9:
pxor xmm0, xmm2
pxor xmm1, xmm3
pxor xmm8, xmm10
@ -1721,7 +1721,7 @@ blake3_hash_many_sse41:
shufps xmm8, xmm7, 221
pshufd xmm7, xmm8, 0x93
mov al, 7
1:
9:
paddd xmm0, xmm4
paddd xmm0, xmm1
pxor xmm3, xmm0
@ -1769,7 +1769,7 @@ blake3_hash_many_sse41:
pshufd xmm3, xmm3, 0x4E
pshufd xmm2, xmm2, 0x93
dec al
jz 1f
jz 9f
movdqa xmm8, xmm4
shufps xmm8, xmm5, 214
pshufd xmm9, xmm4, 0x0F
@ -1786,8 +1786,8 @@ blake3_hash_many_sse41:
pshufd xmm7, xmm6, 0x1E
movdqa xmm5, xmm9
movdqa xmm6, xmm8
jmp 1b
1:
jmp 9b
9:
pxor xmm0, xmm2
pxor xmm1, xmm3
mov eax, r13d
@ -1831,7 +1831,7 @@ _blake3_compress_in_place_sse41:
movaps xmm14, xmmword ptr [ROT8+rip]
movaps xmm15, xmmword ptr [ROT16+rip]
mov al, 7
1:
9:
paddd xmm0, xmm4
paddd xmm0, xmm1
pxor xmm3, xmm0
@ -1879,7 +1879,7 @@ _blake3_compress_in_place_sse41:
pshufd xmm3, xmm3, 0x4E
pshufd xmm2, xmm2, 0x93
dec al
jz 1f
jz 9f
movdqa xmm8, xmm4
shufps xmm8, xmm5, 214
pshufd xmm9, xmm4, 0x0F
@ -1896,8 +1896,8 @@ _blake3_compress_in_place_sse41:
pshufd xmm7, xmm6, 0x1E
movdqa xmm5, xmm9
movdqa xmm6, xmm8
jmp 1b
1:
jmp 9b
9:
pxor xmm0, xmm2
pxor xmm1, xmm3
movups xmmword ptr [rcx], xmm0
@ -1945,7 +1945,7 @@ blake3_compress_xof_sse41:
movaps xmm14, xmmword ptr [ROT8+rip]
movaps xmm15, xmmword ptr [ROT16+rip]
mov al, 7
1:
9:
paddd xmm0, xmm4
paddd xmm0, xmm1
pxor xmm3, xmm0
@ -1993,7 +1993,7 @@ blake3_compress_xof_sse41:
pshufd xmm3, xmm3, 0x4E
pshufd xmm2, xmm2, 0x93
dec al
jz 1f
jz 9f
movdqa xmm8, xmm4
shufps xmm8, xmm5, 214
pshufd xmm9, xmm4, 0x0F
@ -2010,8 +2010,8 @@ blake3_compress_xof_sse41:
pshufd xmm7, xmm6, 0x1E
movdqa xmm5, xmm9
movdqa xmm6, xmm8
jmp 1b
1:
jmp 9b
9:
movdqu xmm4, xmmword ptr [rcx]
movdqu xmm5, xmmword ptr [rcx+0x10]
pxor xmm0, xmm2