mirror of
https://github.com/BLAKE3-team/BLAKE3
synced 2024-04-25 10:45:06 +02:00
Work around clang bug 36144 by replacing anonymous label numbers.
https://bugs.llvm.org/show_bug.cgi?id=36144 Fixes #60.
This commit is contained in:
parent
fcc14c8c1b
commit
fa6f14cafa
|
@ -63,7 +63,7 @@ blake3_hash_many_avx2:
|
|||
or eax, ebx
|
||||
xor edx, edx
|
||||
.p2align 5
|
||||
1:
|
||||
9:
|
||||
movzx ebx, byte ptr [rbp+0x48]
|
||||
or ebx, eax
|
||||
add rdx, 64
|
||||
|
@ -1231,7 +1231,7 @@ blake3_hash_many_avx2:
|
|||
vpxor ymm6, ymm6, ymm14
|
||||
vpxor ymm7, ymm7, ymm15
|
||||
movzx eax, byte ptr [rbp+0x38]
|
||||
jne 1b
|
||||
jne 9b
|
||||
mov rbx, qword ptr [rbp+0x50]
|
||||
vunpcklps ymm8, ymm0, ymm1
|
||||
vunpcklps ymm9, ymm2, ymm3
|
||||
|
@ -1374,7 +1374,7 @@ blake3_hash_many_avx2:
|
|||
vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
|
||||
vmovdqa ymm10, ymm2
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd ymm0, ymm0, ymm4
|
||||
vpaddd ymm8, ymm8, ymm12
|
||||
vmovdqa ymmword ptr [rsp+0x40], ymm4
|
||||
|
@ -1470,7 +1470,7 @@ blake3_hash_many_avx2:
|
|||
vpshufd ymm2, ymm2, 0x93
|
||||
vpshufd ymm10, ymm10, 0x93
|
||||
dec al
|
||||
je 1f
|
||||
je 9f
|
||||
vmovdqa ymm4, ymmword ptr [rsp+0x40]
|
||||
vmovdqa ymm5, ymmword ptr [rsp+0x80]
|
||||
vshufps ymm12, ymm4, ymm5, 214
|
||||
|
@ -1503,8 +1503,8 @@ blake3_hash_many_avx2:
|
|||
vmovdqa ymm14, ymm5
|
||||
vmovdqa ymm5, ymmword ptr [rsp+0x40]
|
||||
vmovdqa ymm6, ymmword ptr [rsp+0x80]
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxor ymm0, ymm0, ymm2
|
||||
vpxor ymm1, ymm1, ymm3
|
||||
vpxor ymm8, ymm8, ymm10
|
||||
|
@ -1577,7 +1577,7 @@ blake3_hash_many_avx2:
|
|||
vpshufd ymm6, ymm6, 0x93
|
||||
vpshufd ymm7, ymm7, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd ymm0, ymm0, ymm4
|
||||
vpaddd ymm0, ymm0, ymm1
|
||||
vpxor ymm3, ymm3, ymm0
|
||||
|
@ -1621,7 +1621,7 @@ blake3_hash_many_avx2:
|
|||
vpshufd ymm3, ymm3, 0x4E
|
||||
vpshufd ymm2, ymm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
vshufps ymm8, ymm4, ymm5, 214
|
||||
vpshufd ymm9, ymm4, 0x0F
|
||||
vpshufd ymm4, ymm8, 0x39
|
||||
|
@ -1635,8 +1635,8 @@ blake3_hash_many_avx2:
|
|||
vpshufd ymm7, ymm6, 0x1E
|
||||
vmovdqa ymm5, ymm9
|
||||
vmovdqa ymm6, ymm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxor ymm0, ymm0, ymm2
|
||||
vpxor ymm1, ymm1, ymm3
|
||||
mov eax, r13d
|
||||
|
@ -1693,7 +1693,7 @@ blake3_hash_many_avx2:
|
|||
vpshufd xmm6, xmm6, 0x93
|
||||
vpshufd xmm7, xmm7, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd xmm0, xmm0, xmm4
|
||||
vpaddd xmm0, xmm0, xmm1
|
||||
vpxor xmm3, xmm3, xmm0
|
||||
|
@ -1737,7 +1737,7 @@ blake3_hash_many_avx2:
|
|||
vpshufd xmm3, xmm3, 0x4E
|
||||
vpshufd xmm2, xmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
vshufps xmm8, xmm4, xmm5, 214
|
||||
vpshufd xmm9, xmm4, 0x0F
|
||||
vpshufd xmm4, xmm8, 0x39
|
||||
|
@ -1751,8 +1751,8 @@ blake3_hash_many_avx2:
|
|||
vpshufd xmm7, xmm6, 0x1E
|
||||
vmovdqa xmm5, xmm9
|
||||
vmovdqa xmm6, xmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxor xmm0, xmm0, xmm2
|
||||
vpxor xmm1, xmm1, xmm3
|
||||
mov eax, r13d
|
||||
|
|
|
@ -77,7 +77,7 @@ blake3_hash_many_avx2:
|
|||
or eax, ebx
|
||||
xor edx, edx
|
||||
.p2align 5
|
||||
1:
|
||||
9:
|
||||
movzx ebx, byte ptr [rbp+0x88]
|
||||
or ebx, eax
|
||||
add rdx, 64
|
||||
|
@ -1245,7 +1245,7 @@ blake3_hash_many_avx2:
|
|||
vpxor ymm6, ymm6, ymm14
|
||||
vpxor ymm7, ymm7, ymm15
|
||||
movzx eax, byte ptr [rbp+0x78]
|
||||
jne 1b
|
||||
jne 9b
|
||||
mov rbx, qword ptr [rbp+0x90]
|
||||
vunpcklps ymm8, ymm0, ymm1
|
||||
vunpcklps ymm9, ymm2, ymm3
|
||||
|
@ -1396,7 +1396,7 @@ blake3_hash_many_avx2:
|
|||
vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip]
|
||||
vmovdqa ymm10, ymm2
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd ymm0, ymm0, ymm4
|
||||
vpaddd ymm8, ymm8, ymm12
|
||||
vmovdqa ymmword ptr [rsp+0x40], ymm4
|
||||
|
@ -1492,7 +1492,7 @@ blake3_hash_many_avx2:
|
|||
vpshufd ymm2, ymm2, 0x93
|
||||
vpshufd ymm10, ymm10, 0x93
|
||||
dec al
|
||||
je 1f
|
||||
je 9f
|
||||
vmovdqa ymm4, ymmword ptr [rsp+0x40]
|
||||
vmovdqa ymm5, ymmword ptr [rsp+0x80]
|
||||
vshufps ymm12, ymm4, ymm5, 214
|
||||
|
@ -1525,8 +1525,8 @@ blake3_hash_many_avx2:
|
|||
vmovdqa ymm14, ymm5
|
||||
vmovdqa ymm5, ymmword ptr [rsp+0x40]
|
||||
vmovdqa ymm6, ymmword ptr [rsp+0x80]
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxor ymm0, ymm0, ymm2
|
||||
vpxor ymm1, ymm1, ymm3
|
||||
vpxor ymm8, ymm8, ymm10
|
||||
|
@ -1599,7 +1599,7 @@ blake3_hash_many_avx2:
|
|||
vpshufd ymm6, ymm6, 0x93
|
||||
vpshufd ymm7, ymm7, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd ymm0, ymm0, ymm4
|
||||
vpaddd ymm0, ymm0, ymm1
|
||||
vpxor ymm3, ymm3, ymm0
|
||||
|
@ -1643,7 +1643,7 @@ blake3_hash_many_avx2:
|
|||
vpshufd ymm3, ymm3, 0x4E
|
||||
vpshufd ymm2, ymm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
vshufps ymm8, ymm4, ymm5, 214
|
||||
vpshufd ymm9, ymm4, 0x0F
|
||||
vpshufd ymm4, ymm8, 0x39
|
||||
|
@ -1657,8 +1657,8 @@ blake3_hash_many_avx2:
|
|||
vpshufd ymm7, ymm6, 0x1E
|
||||
vmovdqa ymm5, ymm9
|
||||
vmovdqa ymm6, ymm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxor ymm0, ymm0, ymm2
|
||||
vpxor ymm1, ymm1, ymm3
|
||||
mov eax, r13d
|
||||
|
@ -1715,7 +1715,7 @@ blake3_hash_many_avx2:
|
|||
vpshufd xmm6, xmm6, 0x93
|
||||
vpshufd xmm7, xmm7, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd xmm0, xmm0, xmm4
|
||||
vpaddd xmm0, xmm0, xmm1
|
||||
vpxor xmm3, xmm3, xmm0
|
||||
|
@ -1759,7 +1759,7 @@ blake3_hash_many_avx2:
|
|||
vpshufd xmm3, xmm3, 0x4E
|
||||
vpshufd xmm2, xmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
vshufps xmm8, xmm4, xmm5, 214
|
||||
vpshufd xmm9, xmm4, 0x0F
|
||||
vpshufd xmm4, xmm8, 0x39
|
||||
|
@ -1773,8 +1773,8 @@ blake3_hash_many_avx2:
|
|||
vpshufd xmm7, xmm6, 0x1E
|
||||
vmovdqa xmm5, xmm9
|
||||
vmovdqa xmm6, xmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxor xmm0, xmm0, xmm2
|
||||
vpxor xmm1, xmm1, xmm3
|
||||
mov eax, r13d
|
||||
|
|
|
@ -66,7 +66,7 @@ blake3_hash_many_avx512:
|
|||
or eax, ebx
|
||||
xor edx, edx
|
||||
.p2align 5
|
||||
1:
|
||||
9:
|
||||
movzx ebx, byte ptr [rbp+0x48]
|
||||
or ebx, eax
|
||||
add rdx, 64
|
||||
|
@ -1011,7 +1011,7 @@ blake3_hash_many_avx512:
|
|||
vpxord zmm6, zmm6, zmm14
|
||||
vpxord zmm7, zmm7, zmm15
|
||||
movzx eax, byte ptr [rbp+0x38]
|
||||
jne 1b
|
||||
jne 9b
|
||||
mov rbx, qword ptr [rbp+0x50]
|
||||
vpunpckldq zmm16, zmm0, zmm1
|
||||
vpunpckhdq zmm17, zmm0, zmm1
|
||||
|
@ -2089,7 +2089,7 @@ blake3_hash_many_avx512:
|
|||
vpshufd zmm6, zmm6, 0x93
|
||||
vpshufd zmm7, zmm7, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd zmm0, zmm0, zmm4
|
||||
vpaddd zmm0, zmm0, zmm1
|
||||
vpxord zmm3, zmm3, zmm0
|
||||
|
@ -2125,7 +2125,7 @@ blake3_hash_many_avx512:
|
|||
vpshufd zmm3, zmm3, 0x4E
|
||||
vpshufd zmm2, zmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
vshufps zmm8, zmm4, zmm5, 214
|
||||
vpshufd zmm9, zmm4, 0x0F
|
||||
vpshufd zmm4, zmm8, 0x39
|
||||
|
@ -2139,8 +2139,8 @@ blake3_hash_many_avx512:
|
|||
vpshufd zmm7, zmm6, 0x1E
|
||||
vmovdqa32 zmm5, zmm9
|
||||
vmovdqa32 zmm6, zmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxord zmm0, zmm0, zmm2
|
||||
vpxord zmm1, zmm1, zmm3
|
||||
mov eax, r13d
|
||||
|
@ -2206,7 +2206,7 @@ blake3_hash_many_avx512:
|
|||
vpshufd ymm6, ymm6, 0x93
|
||||
vpshufd ymm7, ymm7, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd ymm0, ymm0, ymm4
|
||||
vpaddd ymm0, ymm0, ymm1
|
||||
vpxord ymm3, ymm3, ymm0
|
||||
|
@ -2242,7 +2242,7 @@ blake3_hash_many_avx512:
|
|||
vpshufd ymm3, ymm3, 0x4E
|
||||
vpshufd ymm2, ymm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
vshufps ymm8, ymm4, ymm5, 214
|
||||
vpshufd ymm9, ymm4, 0x0F
|
||||
vpshufd ymm4, ymm8, 0x39
|
||||
|
@ -2256,8 +2256,8 @@ blake3_hash_many_avx512:
|
|||
vpshufd ymm7, ymm6, 0x1E
|
||||
vmovdqa ymm5, ymm9
|
||||
vmovdqa ymm6, ymm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxor ymm0, ymm0, ymm2
|
||||
vpxor ymm1, ymm1, ymm3
|
||||
mov eax, r13d
|
||||
|
@ -2309,7 +2309,7 @@ blake3_hash_many_avx512:
|
|||
vpshufd xmm6, xmm6, 0x93
|
||||
vpshufd xmm7, xmm7, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd xmm0, xmm0, xmm4
|
||||
vpaddd xmm0, xmm0, xmm1
|
||||
vpxord xmm3, xmm3, xmm0
|
||||
|
@ -2345,7 +2345,7 @@ blake3_hash_many_avx512:
|
|||
vpshufd xmm3, xmm3, 0x4E
|
||||
vpshufd xmm2, xmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
vshufps xmm8, xmm4, xmm5, 214
|
||||
vpshufd xmm9, xmm4, 0x0F
|
||||
vpshufd xmm4, xmm8, 0x39
|
||||
|
@ -2359,8 +2359,8 @@ blake3_hash_many_avx512:
|
|||
vpshufd xmm7, xmm6, 0x1E
|
||||
vmovdqa xmm5, xmm9
|
||||
vmovdqa xmm6, xmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxor xmm0, xmm0, xmm2
|
||||
vpxor xmm1, xmm1, xmm3
|
||||
mov eax, r13d
|
||||
|
@ -2393,7 +2393,7 @@ blake3_compress_in_place_avx512:
|
|||
vpshufd xmm6, xmm6, 0x93
|
||||
vpshufd xmm7, xmm7, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd xmm0, xmm0, xmm4
|
||||
vpaddd xmm0, xmm0, xmm1
|
||||
vpxord xmm3, xmm3, xmm0
|
||||
|
@ -2429,7 +2429,7 @@ blake3_compress_in_place_avx512:
|
|||
vpshufd xmm3, xmm3, 0x4E
|
||||
vpshufd xmm2, xmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
vshufps xmm8, xmm4, xmm5, 214
|
||||
vpshufd xmm9, xmm4, 0x0F
|
||||
vpshufd xmm4, xmm8, 0x39
|
||||
|
@ -2443,8 +2443,8 @@ blake3_compress_in_place_avx512:
|
|||
vpshufd xmm7, xmm6, 0x1E
|
||||
vmovdqa xmm5, xmm9
|
||||
vmovdqa xmm6, xmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxor xmm0, xmm0, xmm2
|
||||
vpxor xmm1, xmm1, xmm3
|
||||
vmovdqu xmmword ptr [rdi], xmm0
|
||||
|
@ -2475,7 +2475,7 @@ blake3_compress_xof_avx512:
|
|||
vpshufd xmm6, xmm6, 0x93
|
||||
vpshufd xmm7, xmm7, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd xmm0, xmm0, xmm4
|
||||
vpaddd xmm0, xmm0, xmm1
|
||||
vpxord xmm3, xmm3, xmm0
|
||||
|
@ -2511,7 +2511,7 @@ blake3_compress_xof_avx512:
|
|||
vpshufd xmm3, xmm3, 0x4E
|
||||
vpshufd xmm2, xmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
vshufps xmm8, xmm4, xmm5, 214
|
||||
vpshufd xmm9, xmm4, 0x0F
|
||||
vpshufd xmm4, xmm8, 0x39
|
||||
|
@ -2525,8 +2525,8 @@ blake3_compress_xof_avx512:
|
|||
vpshufd xmm7, xmm6, 0x1E
|
||||
vmovdqa xmm5, xmm9
|
||||
vmovdqa xmm6, xmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxor xmm0, xmm0, xmm2
|
||||
vpxor xmm1, xmm1, xmm3
|
||||
vpxor xmm2, xmm2, [rdi]
|
||||
|
|
|
@ -80,7 +80,7 @@ blake3_hash_many_avx512:
|
|||
or eax, ebx
|
||||
xor edx, edx
|
||||
.p2align 5
|
||||
1:
|
||||
9:
|
||||
movzx ebx, byte ptr [rbp+0x88]
|
||||
or ebx, eax
|
||||
add rdx, 64
|
||||
|
@ -1025,7 +1025,7 @@ blake3_hash_many_avx512:
|
|||
vpxord zmm6, zmm6, zmm14
|
||||
vpxord zmm7, zmm7, zmm15
|
||||
movzx eax, byte ptr [rbp+0x78]
|
||||
jne 1b
|
||||
jne 9b
|
||||
mov rbx, qword ptr [rbp+0x90]
|
||||
vpunpckldq zmm16, zmm0, zmm1
|
||||
vpunpckhdq zmm17, zmm0, zmm1
|
||||
|
@ -2115,7 +2115,7 @@ blake3_hash_many_avx512:
|
|||
vpshufd zmm6, zmm6, 0x93
|
||||
vpshufd zmm7, zmm7, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd zmm0, zmm0, zmm4
|
||||
vpaddd zmm0, zmm0, zmm1
|
||||
vpxord zmm3, zmm3, zmm0
|
||||
|
@ -2151,7 +2151,7 @@ blake3_hash_many_avx512:
|
|||
vpshufd zmm3, zmm3, 0x4E
|
||||
vpshufd zmm2, zmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
vshufps zmm8, zmm4, zmm5, 214
|
||||
vpshufd zmm9, zmm4, 0x0F
|
||||
vpshufd zmm4, zmm8, 0x39
|
||||
|
@ -2165,8 +2165,8 @@ blake3_hash_many_avx512:
|
|||
vpshufd zmm7, zmm6, 0x1E
|
||||
vmovdqa32 zmm5, zmm9
|
||||
vmovdqa32 zmm6, zmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxord zmm0, zmm0, zmm2
|
||||
vpxord zmm1, zmm1, zmm3
|
||||
mov eax, r13d
|
||||
|
@ -2232,7 +2232,7 @@ blake3_hash_many_avx512:
|
|||
vpshufd ymm6, ymm6, 0x93
|
||||
vpshufd ymm7, ymm7, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd ymm0, ymm0, ymm4
|
||||
vpaddd ymm0, ymm0, ymm1
|
||||
vpxord ymm3, ymm3, ymm0
|
||||
|
@ -2268,7 +2268,7 @@ blake3_hash_many_avx512:
|
|||
vpshufd ymm3, ymm3, 0x4E
|
||||
vpshufd ymm2, ymm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
vshufps ymm8, ymm4, ymm5, 214
|
||||
vpshufd ymm9, ymm4, 0x0F
|
||||
vpshufd ymm4, ymm8, 0x39
|
||||
|
@ -2282,8 +2282,8 @@ blake3_hash_many_avx512:
|
|||
vpshufd ymm7, ymm6, 0x1E
|
||||
vmovdqa ymm5, ymm9
|
||||
vmovdqa ymm6, ymm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxor ymm0, ymm0, ymm2
|
||||
vpxor ymm1, ymm1, ymm3
|
||||
mov eax, r13d
|
||||
|
@ -2335,7 +2335,7 @@ blake3_hash_many_avx512:
|
|||
vpshufd xmm6, xmm6, 0x93
|
||||
vpshufd xmm7, xmm7, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd xmm0, xmm0, xmm4
|
||||
vpaddd xmm0, xmm0, xmm1
|
||||
vpxord xmm3, xmm3, xmm0
|
||||
|
@ -2371,7 +2371,7 @@ blake3_hash_many_avx512:
|
|||
vpshufd xmm3, xmm3, 0x4E
|
||||
vpshufd xmm2, xmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
vshufps xmm8, xmm4, xmm5, 214
|
||||
vpshufd xmm9, xmm4, 0x0F
|
||||
vpshufd xmm4, xmm8, 0x39
|
||||
|
@ -2385,8 +2385,8 @@ blake3_hash_many_avx512:
|
|||
vpshufd xmm7, xmm6, 0x1E
|
||||
vmovdqa xmm5, xmm9
|
||||
vmovdqa xmm6, xmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxor xmm0, xmm0, xmm2
|
||||
vpxor xmm1, xmm1, xmm3
|
||||
mov eax, r13d
|
||||
|
@ -2426,7 +2426,7 @@ blake3_compress_in_place_avx512:
|
|||
vpshufd xmm6, xmm6, 0x93
|
||||
vpshufd xmm7, xmm7, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd xmm0, xmm0, xmm4
|
||||
vpaddd xmm0, xmm0, xmm1
|
||||
vpxord xmm3, xmm3, xmm0
|
||||
|
@ -2462,7 +2462,7 @@ blake3_compress_in_place_avx512:
|
|||
vpshufd xmm3, xmm3, 0x4E
|
||||
vpshufd xmm2, xmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
vshufps xmm8, xmm4, xmm5, 214
|
||||
vpshufd xmm9, xmm4, 0x0F
|
||||
vpshufd xmm4, xmm8, 0x39
|
||||
|
@ -2476,8 +2476,8 @@ blake3_compress_in_place_avx512:
|
|||
vpshufd xmm7, xmm6, 0x1E
|
||||
vmovdqa xmm5, xmm9
|
||||
vmovdqa xmm6, xmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxor xmm0, xmm0, xmm2
|
||||
vpxor xmm1, xmm1, xmm3
|
||||
vmovdqu xmmword ptr [rcx], xmm0
|
||||
|
@ -2520,7 +2520,7 @@ blake3_compress_xof_avx512:
|
|||
vpshufd xmm6, xmm6, 0x93
|
||||
vpshufd xmm7, xmm7, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
vpaddd xmm0, xmm0, xmm4
|
||||
vpaddd xmm0, xmm0, xmm1
|
||||
vpxord xmm3, xmm3, xmm0
|
||||
|
@ -2556,7 +2556,7 @@ blake3_compress_xof_avx512:
|
|||
vpshufd xmm3, xmm3, 0x4E
|
||||
vpshufd xmm2, xmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
vshufps xmm8, xmm4, xmm5, 214
|
||||
vpshufd xmm9, xmm4, 0x0F
|
||||
vpshufd xmm4, xmm8, 0x39
|
||||
|
@ -2570,8 +2570,8 @@ blake3_compress_xof_avx512:
|
|||
vpshufd xmm7, xmm6, 0x1E
|
||||
vmovdqa xmm5, xmm9
|
||||
vmovdqa xmm6, xmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
vpxor xmm0, xmm0, xmm2
|
||||
vpxor xmm1, xmm1, xmm3
|
||||
vpxor xmm2, xmm2, xmmword ptr [rcx]
|
||||
|
|
|
@ -67,7 +67,7 @@ blake3_hash_many_sse41:
|
|||
movzx eax, byte ptr [rbp+0x40]
|
||||
or eax, r13d
|
||||
xor edx, edx
|
||||
1:
|
||||
9:
|
||||
mov r14d, eax
|
||||
or eax, r12d
|
||||
add rdx, 64
|
||||
|
@ -1377,7 +1377,7 @@ blake3_hash_many_sse41:
|
|||
pxor xmm6, xmm14
|
||||
pxor xmm7, xmm15
|
||||
mov eax, r13d
|
||||
jne 1b
|
||||
jne 9b
|
||||
movdqa xmm9, xmm0
|
||||
punpckldq xmm0, xmm1
|
||||
punpckhdq xmm9, xmm1
|
||||
|
@ -1496,7 +1496,7 @@ blake3_hash_many_sse41:
|
|||
pinsrd xmm3, eax, 3
|
||||
pinsrd xmm11, eax, 3
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
paddd xmm0, xmm4
|
||||
paddd xmm8, xmm12
|
||||
movaps xmmword ptr [rsp+0x20], xmm4
|
||||
|
@ -1596,7 +1596,7 @@ blake3_hash_many_sse41:
|
|||
pshufd xmm2, xmm2, 0x93
|
||||
pshufd xmm10, xmm10, 0x93
|
||||
dec al
|
||||
je 1f
|
||||
je 9f
|
||||
movdqa xmm12, xmmword ptr [rsp+0x20]
|
||||
movdqa xmm5, xmmword ptr [rsp+0x40]
|
||||
pshufd xmm13, xmm12, 0x0F
|
||||
|
@ -1633,8 +1633,8 @@ blake3_hash_many_sse41:
|
|||
movdqa xmm14, xmm5
|
||||
movdqa xmm5, xmmword ptr [rsp+0x20]
|
||||
movdqa xmm6, xmmword ptr [rsp+0x40]
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
pxor xmm0, xmm2
|
||||
pxor xmm1, xmm3
|
||||
pxor xmm8, xmm10
|
||||
|
@ -1695,7 +1695,7 @@ blake3_hash_many_sse41:
|
|||
shufps xmm8, xmm7, 221
|
||||
pshufd xmm7, xmm8, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
paddd xmm0, xmm4
|
||||
paddd xmm0, xmm1
|
||||
pxor xmm3, xmm0
|
||||
|
@ -1743,7 +1743,7 @@ blake3_hash_many_sse41:
|
|||
pshufd xmm3, xmm3, 0x4E
|
||||
pshufd xmm2, xmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
movdqa xmm8, xmm4
|
||||
shufps xmm8, xmm5, 214
|
||||
pshufd xmm9, xmm4, 0x0F
|
||||
|
@ -1760,8 +1760,8 @@ blake3_hash_many_sse41:
|
|||
pshufd xmm7, xmm6, 0x1E
|
||||
movdqa xmm5, xmm9
|
||||
movdqa xmm6, xmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
pxor xmm0, xmm2
|
||||
pxor xmm1, xmm3
|
||||
mov eax, r13d
|
||||
|
@ -1798,7 +1798,7 @@ _blake3_compress_in_place_sse41:
|
|||
movaps xmm14, xmmword ptr [ROT8+rip]
|
||||
movaps xmm15, xmmword ptr [ROT16+rip]
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
paddd xmm0, xmm4
|
||||
paddd xmm0, xmm1
|
||||
pxor xmm3, xmm0
|
||||
|
@ -1846,7 +1846,7 @@ _blake3_compress_in_place_sse41:
|
|||
pshufd xmm3, xmm3, 0x4E
|
||||
pshufd xmm2, xmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
movdqa xmm8, xmm4
|
||||
shufps xmm8, xmm5, 214
|
||||
pshufd xmm9, xmm4, 0x0F
|
||||
|
@ -1863,8 +1863,8 @@ _blake3_compress_in_place_sse41:
|
|||
pshufd xmm7, xmm6, 0x1E
|
||||
movdqa xmm5, xmm9
|
||||
movdqa xmm6, xmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
pxor xmm0, xmm2
|
||||
pxor xmm1, xmm3
|
||||
movups xmmword ptr [rdi], xmm0
|
||||
|
@ -1900,7 +1900,7 @@ _blake3_compress_xof_sse41:
|
|||
movaps xmm14, xmmword ptr [ROT8+rip]
|
||||
movaps xmm15, xmmword ptr [ROT16+rip]
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
paddd xmm0, xmm4
|
||||
paddd xmm0, xmm1
|
||||
pxor xmm3, xmm0
|
||||
|
@ -1948,7 +1948,7 @@ _blake3_compress_xof_sse41:
|
|||
pshufd xmm3, xmm3, 0x4E
|
||||
pshufd xmm2, xmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
movdqa xmm8, xmm4
|
||||
shufps xmm8, xmm5, 214
|
||||
pshufd xmm9, xmm4, 0x0F
|
||||
|
@ -1965,8 +1965,8 @@ _blake3_compress_xof_sse41:
|
|||
pshufd xmm7, xmm6, 0x1E
|
||||
movdqa xmm5, xmm9
|
||||
movdqa xmm6, xmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
movdqu xmm4, xmmword ptr [rdi]
|
||||
movdqu xmm5, xmmword ptr [rdi+0x10]
|
||||
pxor xmm0, xmm2
|
||||
|
|
|
@ -81,7 +81,7 @@ blake3_hash_many_sse41:
|
|||
movzx eax, byte ptr [rbp+0x80]
|
||||
or eax, r13d
|
||||
xor edx, edx
|
||||
1:
|
||||
9:
|
||||
mov r14d, eax
|
||||
or eax, r12d
|
||||
add rdx, 64
|
||||
|
@ -1391,7 +1391,7 @@ blake3_hash_many_sse41:
|
|||
pxor xmm6, xmm14
|
||||
pxor xmm7, xmm15
|
||||
mov eax, r13d
|
||||
jne 1b
|
||||
jne 9b
|
||||
movdqa xmm9, xmm0
|
||||
punpckldq xmm0, xmm1
|
||||
punpckhdq xmm9, xmm1
|
||||
|
@ -1522,7 +1522,7 @@ blake3_hash_many_sse41:
|
|||
pinsrd xmm3, eax, 3
|
||||
pinsrd xmm11, eax, 3
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
paddd xmm0, xmm4
|
||||
paddd xmm8, xmm12
|
||||
movaps xmmword ptr [rsp+0x20], xmm4
|
||||
|
@ -1622,7 +1622,7 @@ blake3_hash_many_sse41:
|
|||
pshufd xmm2, xmm2, 0x93
|
||||
pshufd xmm10, xmm10, 0x93
|
||||
dec al
|
||||
je 1f
|
||||
je 9f
|
||||
movdqa xmm12, xmmword ptr [rsp+0x20]
|
||||
movdqa xmm5, xmmword ptr [rsp+0x40]
|
||||
pshufd xmm13, xmm12, 0x0F
|
||||
|
@ -1659,8 +1659,8 @@ blake3_hash_many_sse41:
|
|||
movdqa xmm14, xmm5
|
||||
movdqa xmm5, xmmword ptr [rsp+0x20]
|
||||
movdqa xmm6, xmmword ptr [rsp+0x40]
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
pxor xmm0, xmm2
|
||||
pxor xmm1, xmm3
|
||||
pxor xmm8, xmm10
|
||||
|
@ -1721,7 +1721,7 @@ blake3_hash_many_sse41:
|
|||
shufps xmm8, xmm7, 221
|
||||
pshufd xmm7, xmm8, 0x93
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
paddd xmm0, xmm4
|
||||
paddd xmm0, xmm1
|
||||
pxor xmm3, xmm0
|
||||
|
@ -1769,7 +1769,7 @@ blake3_hash_many_sse41:
|
|||
pshufd xmm3, xmm3, 0x4E
|
||||
pshufd xmm2, xmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
movdqa xmm8, xmm4
|
||||
shufps xmm8, xmm5, 214
|
||||
pshufd xmm9, xmm4, 0x0F
|
||||
|
@ -1786,8 +1786,8 @@ blake3_hash_many_sse41:
|
|||
pshufd xmm7, xmm6, 0x1E
|
||||
movdqa xmm5, xmm9
|
||||
movdqa xmm6, xmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
pxor xmm0, xmm2
|
||||
pxor xmm1, xmm3
|
||||
mov eax, r13d
|
||||
|
@ -1831,7 +1831,7 @@ _blake3_compress_in_place_sse41:
|
|||
movaps xmm14, xmmword ptr [ROT8+rip]
|
||||
movaps xmm15, xmmword ptr [ROT16+rip]
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
paddd xmm0, xmm4
|
||||
paddd xmm0, xmm1
|
||||
pxor xmm3, xmm0
|
||||
|
@ -1879,7 +1879,7 @@ _blake3_compress_in_place_sse41:
|
|||
pshufd xmm3, xmm3, 0x4E
|
||||
pshufd xmm2, xmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
movdqa xmm8, xmm4
|
||||
shufps xmm8, xmm5, 214
|
||||
pshufd xmm9, xmm4, 0x0F
|
||||
|
@ -1896,8 +1896,8 @@ _blake3_compress_in_place_sse41:
|
|||
pshufd xmm7, xmm6, 0x1E
|
||||
movdqa xmm5, xmm9
|
||||
movdqa xmm6, xmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
pxor xmm0, xmm2
|
||||
pxor xmm1, xmm3
|
||||
movups xmmword ptr [rcx], xmm0
|
||||
|
@ -1945,7 +1945,7 @@ blake3_compress_xof_sse41:
|
|||
movaps xmm14, xmmword ptr [ROT8+rip]
|
||||
movaps xmm15, xmmword ptr [ROT16+rip]
|
||||
mov al, 7
|
||||
1:
|
||||
9:
|
||||
paddd xmm0, xmm4
|
||||
paddd xmm0, xmm1
|
||||
pxor xmm3, xmm0
|
||||
|
@ -1993,7 +1993,7 @@ blake3_compress_xof_sse41:
|
|||
pshufd xmm3, xmm3, 0x4E
|
||||
pshufd xmm2, xmm2, 0x93
|
||||
dec al
|
||||
jz 1f
|
||||
jz 9f
|
||||
movdqa xmm8, xmm4
|
||||
shufps xmm8, xmm5, 214
|
||||
pshufd xmm9, xmm4, 0x0F
|
||||
|
@ -2010,8 +2010,8 @@ blake3_compress_xof_sse41:
|
|||
pshufd xmm7, xmm6, 0x1E
|
||||
movdqa xmm5, xmm9
|
||||
movdqa xmm6, xmm8
|
||||
jmp 1b
|
||||
1:
|
||||
jmp 9b
|
||||
9:
|
||||
movdqu xmm4, xmmword ptr [rcx]
|
||||
movdqu xmm5, xmmword ptr [rcx+0x10]
|
||||
pxor xmm0, xmm2
|
||||
|
|
Loading…
Reference in New Issue