1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-04-27 00:15:09 +02:00

save missing clobbered registers on Windows

This commit is contained in:
Samuel Neves 2020-03-29 05:53:37 +01:00
parent be4e7babee
commit 13556be388
2 changed files with 38 additions and 14 deletions

View File

@ -1800,15 +1800,18 @@ blake3_hash_many_sse41:
.p2align 6
blake3_compress_in_place_sse41:
_blake3_compress_in_place_sse41:
sub rsp, 72
sub rsp, 120
movdqa xmmword ptr [rsp], xmm6
movdqa xmmword ptr [rsp+0x10], xmm7
movdqa xmmword ptr [rsp+0x20], xmm8
movdqa xmmword ptr [rsp+0x30], xmm9
movdqa xmmword ptr [rsp+0x40], xmm11
movdqa xmmword ptr [rsp+0x50], xmm14
movdqa xmmword ptr [rsp+0x60], xmm15
movups xmm0, xmmword ptr [rcx]
movups xmm1, xmmword ptr [rcx+0x10]
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
movzx eax, byte ptr [rsp+0x70]
movzx eax, byte ptr [rsp+0xA0]
movzx r8d, r8b
shl rax, 32
add r8, rax
@ -1906,24 +1909,30 @@ _blake3_compress_in_place_sse41:
movdqa xmm7, xmmword ptr [rsp+0x10]
movdqa xmm8, xmmword ptr [rsp+0x20]
movdqa xmm9, xmmword ptr [rsp+0x30]
add rsp, 72
movdqa xmm11, xmmword ptr [rsp+0x40]
movdqa xmm14, xmmword ptr [rsp+0x50]
movdqa xmm15, xmmword ptr [rsp+0x60]
add rsp, 120
ret
.p2align 6
_blake3_compress_xof_sse41:
blake3_compress_xof_sse41:
sub rsp, 72
sub rsp, 120
movdqa xmmword ptr [rsp], xmm6
movdqa xmmword ptr [rsp+0x10], xmm7
movdqa xmmword ptr [rsp+0x20], xmm8
movdqa xmmword ptr [rsp+0x30], xmm9
movdqa xmmword ptr [rsp+0x40], xmm11
movdqa xmmword ptr [rsp+0x50], xmm14
movdqa xmmword ptr [rsp+0x60], xmm15
movups xmm0, xmmword ptr [rcx]
movups xmm1, xmmword ptr [rcx+0x10]
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
movzx eax, byte ptr [rsp+0x70]
movzx eax, byte ptr [rsp+0xA0]
movzx r8d, r8b
mov r10, qword ptr [rsp+0x78]
mov r10, qword ptr [rsp+0xA8]
shl rax, 32
add r8, rax
movq xmm3, r9
@ -2026,7 +2035,10 @@ blake3_compress_xof_sse41:
movdqa xmm7, xmmword ptr [rsp+0x10]
movdqa xmm8, xmmword ptr [rsp+0x20]
movdqa xmm9, xmmword ptr [rsp+0x30]
add rsp, 72
movdqa xmm11, xmmword ptr [rsp+0x40]
movdqa xmm14, xmmword ptr [rsp+0x50]
movdqa xmm15, xmmword ptr [rsp+0x60]
add rsp, 120
ret

View File

@ -1802,15 +1802,18 @@ blake3_hash_many_sse41 ENDP
blake3_compress_in_place_sse41 PROC
_blake3_compress_in_place_sse41 PROC
sub rsp, 72
sub rsp, 120
movdqa xmmword ptr [rsp], xmm6
movdqa xmmword ptr [rsp+10H], xmm7
movdqa xmmword ptr [rsp+20H], xmm8
movdqa xmmword ptr [rsp+30H], xmm9
movdqa xmmword ptr [rsp+40H], xmm11
movdqa xmmword ptr [rsp+50H], xmm14
movdqa xmmword ptr [rsp+60H], xmm15
movups xmm0, xmmword ptr [rcx]
movups xmm1, xmmword ptr [rcx+10H]
movaps xmm2, xmmword ptr [BLAKE3_IV]
movzx eax, byte ptr [rsp+70H]
movzx eax, byte ptr [rsp+0A0H]
movzx r8d, r8b
shl rax, 32
add r8, rax
@ -1908,7 +1911,10 @@ _blake3_compress_in_place_sse41 PROC
movdqa xmm7, xmmword ptr [rsp+10H]
movdqa xmm8, xmmword ptr [rsp+20H]
movdqa xmm9, xmmword ptr [rsp+30H]
add rsp, 72
movdqa xmm11, xmmword ptr [rsp+40H]
movdqa xmm14, xmmword ptr [rsp+50H]
movdqa xmm15, xmmword ptr [rsp+60H]
add rsp, 120
ret
_blake3_compress_in_place_sse41 ENDP
blake3_compress_in_place_sse41 ENDP
@ -1916,17 +1922,20 @@ blake3_compress_in_place_sse41 ENDP
ALIGN 16
blake3_compress_xof_sse41 PROC
_blake3_compress_xof_sse41 PROC
sub rsp, 72
sub rsp, 120
movdqa xmmword ptr [rsp], xmm6
movdqa xmmword ptr [rsp+10H], xmm7
movdqa xmmword ptr [rsp+20H], xmm8
movdqa xmmword ptr [rsp+30H], xmm9
movdqa xmmword ptr [rsp+40H], xmm11
movdqa xmmword ptr [rsp+50H], xmm14
movdqa xmmword ptr [rsp+60H], xmm15
movups xmm0, xmmword ptr [rcx]
movups xmm1, xmmword ptr [rcx+10H]
movaps xmm2, xmmword ptr [BLAKE3_IV]
movzx eax, byte ptr [rsp+70H]
movzx eax, byte ptr [rsp+0A0H]
movzx r8d, r8b
mov r10, qword ptr [rsp+78H]
mov r10, qword ptr [rsp+0A8H]
shl rax, 32
add r8, rax
movq xmm3, r9
@ -2029,7 +2038,10 @@ _blake3_compress_xof_sse41 PROC
movdqa xmm7, xmmword ptr [rsp+10H]
movdqa xmm8, xmmword ptr [rsp+20H]
movdqa xmm9, xmmword ptr [rsp+30H]
add rsp, 72
movdqa xmm11, xmmword ptr [rsp+40H]
movdqa xmm14, xmmword ptr [rsp+50H]
movdqa xmm15, xmmword ptr [rsp+60H]
add rsp, 120
ret
_blake3_compress_xof_sse41 ENDP
blake3_compress_xof_sse41 ENDP