1
0
Fork 0
mirror of https://github.com/BLAKE3-team/BLAKE3 synced 2024-04-18 15:13:53 +02:00

More movd/movq discrepancies. Fixes #149. (#150)

This should be irrelevant, but some toolchains will not accept movd with 64-bit arguments.
This commit is contained in:
Samuel Neves 2021-02-06 20:02:53 +00:00 committed by GitHub
parent aea29ace2d
commit 953654e25e
Signed by: GitHub
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 14 additions and 14 deletions

View File

@ -1704,7 +1704,7 @@ blake3_hash_many_sse2:
pshufd xmm15, xmm11, 0x93
shl rax, 0x20
or rax, 0x40
movd xmm3, rax
movq xmm3, rax
movdqa xmmword ptr [rsp+0x20], xmm3
movaps xmm3, xmmword ptr [rsp]
movaps xmm11, xmmword ptr [rsp+0x10]
@ -1917,7 +1917,7 @@ blake3_hash_many_sse2:
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
shl rax, 32
or rax, 64
movd xmm12, rax
movq xmm12, rax
movdqa xmm3, xmm13
punpcklqdq xmm3, xmm12
movups xmm4, xmmword ptr [r8+rdx-0x40]

View File

@ -1715,7 +1715,7 @@ blake3_hash_many_sse2:
pshufd xmm15, xmm11, 0x93
shl rax, 0x20
or rax, 0x40
movd xmm3, rax
movq xmm3, rax
movdqa xmmword ptr [rsp+0x20], xmm3
movaps xmm3, xmmword ptr [rsp]
movaps xmm11, xmmword ptr [rsp+0x10]
@ -1928,7 +1928,7 @@ blake3_hash_many_sse2:
movaps xmm2, xmmword ptr [BLAKE3_IV+rip]
shl rax, 32
or rax, 64
movd xmm12, rax
movq xmm12, rax
movdqa xmm3, xmm13
punpcklqdq xmm3, xmm12
movups xmm4, xmmword ptr [r8+rdx-0x40]

View File

@ -1716,7 +1716,7 @@ innerloop2:
pshufd xmm15, xmm11, 93H
shl rax, 20H
or rax, 40H
movd xmm3, rax
movq xmm3, rax
movdqa xmmword ptr [rsp+20H], xmm3
movaps xmm3, xmmword ptr [rsp]
movaps xmm11, xmmword ptr [rsp+10H]
@ -1929,7 +1929,7 @@ innerloop1:
movaps xmm2, xmmword ptr [BLAKE3_IV]
shl rax, 32
or rax, 64
movd xmm12, rax
movq xmm12, rax
movdqa xmm3, xmm13
punpcklqdq xmm3, xmm12
movups xmm4, xmmword ptr [r8+rdx-40H]
@ -2054,8 +2054,8 @@ _blake3_compress_in_place_sse2 PROC
movzx r8d, r8b
shl rax, 32
add r8, rax
movd xmm3, r9
movd xmm4, r8
movq xmm3, r9
movq xmm4, r8
punpcklqdq xmm3, xmm4
movups xmm4, xmmword ptr [rdx]
movups xmm5, xmmword ptr [rdx+10H]
@ -2186,8 +2186,8 @@ _blake3_compress_xof_sse2 PROC
mov r10, qword ptr [rsp+0A8H]
shl rax, 32
add r8, rax
movd xmm3, r9
movd xmm4, r8
movq xmm3, r9
movq xmm4, r8
punpcklqdq xmm3, xmm4
movups xmm4, xmmword ptr [rdx]
movups xmm5, xmmword ptr [rdx+10H]

View File

@ -1817,8 +1817,8 @@ _blake3_compress_in_place_sse41 PROC
movzx r8d, r8b
shl rax, 32
add r8, rax
movd xmm3, r9
movd xmm4, r8
movq xmm3, r9
movq xmm4, r8
punpcklqdq xmm3, xmm4
movups xmm4, xmmword ptr [rdx]
movups xmm5, xmmword ptr [rdx+10H]
@ -1938,8 +1938,8 @@ _blake3_compress_xof_sse41 PROC
mov r10, qword ptr [rsp+0A8H]
shl rax, 32
add r8, rax
movd xmm3, r9
movd xmm4, r8
movq xmm3, r9
movq xmm4, r8
punpcklqdq xmm3, xmm4
movups xmm4, xmmword ptr [rdx]
movups xmm5, xmmword ptr [rdx+10H]