mirror of
https://github.com/BLAKE3-team/BLAKE3
synced 2024-05-28 17:56:06 +02:00
initial blake3_xof_many_avx512
This commit is contained in:
parent
76f9339312
commit
160b753f9a
|
@ -19,6 +19,8 @@
|
|||
.global _blake3_compress_in_place_avx512
|
||||
.global blake3_compress_xof_avx512
|
||||
.global _blake3_compress_xof_avx512
|
||||
.global blake3_xof_many_avx512
|
||||
.global _blake3_xof_many_avx512
|
||||
|
||||
#ifdef __APPLE__
|
||||
.text
|
||||
|
@ -2553,6 +2555,2243 @@ blake3_compress_xof_avx512:
|
|||
vmovdqu xmmword ptr [r9+0x30], xmm3
|
||||
ret
|
||||
|
||||
.p2align 6
|
||||
blake3_xof_many_avx512:
|
||||
_blake3_xof_many_avx512:
|
||||
_CET_ENDBR
|
||||
mov r10,QWORD PTR [rsp+0x8]
|
||||
cmp r10,0x1
|
||||
ja 2f
|
||||
vmovdqu xmm0,XMMWORD PTR [rdi]
|
||||
vmovdqu xmm1,XMMWORD PTR [rdi+0x10]
|
||||
movzx eax,r8b
|
||||
movzx edx,dl
|
||||
shl rax,0x20
|
||||
add rdx,rax
|
||||
vmovq xmm3,rcx
|
||||
vmovq xmm4,rdx
|
||||
vpunpcklqdq xmm3,xmm3,xmm4
|
||||
vmovaps xmm2,XMMWORD PTR [BLAKE3_IV+rip]
|
||||
vmovups xmm8,XMMWORD PTR [rsi]
|
||||
vmovups xmm9,XMMWORD PTR [rsi+0x10]
|
||||
vshufps xmm4,xmm8,xmm9,0x88
|
||||
vshufps xmm5,xmm8,xmm9,0xdd
|
||||
vmovups xmm8,XMMWORD PTR [rsi+0x20]
|
||||
vmovups xmm9,XMMWORD PTR [rsi+0x30]
|
||||
vshufps xmm6,xmm8,xmm9,0x88
|
||||
vshufps xmm7,xmm8,xmm9,0xdd
|
||||
vpshufd xmm6,xmm6,0x93
|
||||
vpshufd xmm7,xmm7,0x93
|
||||
mov al,0x7
|
||||
3:
|
||||
vpaddd xmm0,xmm0,xmm4
|
||||
vpaddd xmm0,xmm0,xmm1
|
||||
vpxord xmm3,xmm3,xmm0
|
||||
vprord xmm3,xmm3,0x10
|
||||
vpaddd xmm2,xmm2,xmm3
|
||||
vpxord xmm1,xmm1,xmm2
|
||||
vprord xmm1,xmm1,0xc
|
||||
vpaddd xmm0,xmm0,xmm5
|
||||
vpaddd xmm0,xmm0,xmm1
|
||||
vpxord xmm3,xmm3,xmm0
|
||||
vprord xmm3,xmm3,0x8
|
||||
vpaddd xmm2,xmm2,xmm3
|
||||
vpxord xmm1,xmm1,xmm2
|
||||
vprord xmm1,xmm1,0x7
|
||||
vpshufd xmm0,xmm0,0x93
|
||||
vpshufd xmm3,xmm3,0x4e
|
||||
vpshufd xmm2,xmm2,0x39
|
||||
vpaddd xmm0,xmm0,xmm6
|
||||
vpaddd xmm0,xmm0,xmm1
|
||||
vpxord xmm3,xmm3,xmm0
|
||||
vprord xmm3,xmm3,0x10
|
||||
vpaddd xmm2,xmm2,xmm3
|
||||
vpxord xmm1,xmm1,xmm2
|
||||
vprord xmm1,xmm1,0xc
|
||||
vpaddd xmm0,xmm0,xmm7
|
||||
vpaddd xmm0,xmm0,xmm1
|
||||
vpxord xmm3,xmm3,xmm0
|
||||
vprord xmm3,xmm3,0x8
|
||||
vpaddd xmm2,xmm2,xmm3
|
||||
vpxord xmm1,xmm1,xmm2
|
||||
vprord xmm1,xmm1,0x7
|
||||
vpshufd xmm0,xmm0,0x39
|
||||
vpshufd xmm3,xmm3,0x4e
|
||||
vpshufd xmm2,xmm2,0x93
|
||||
dec al
|
||||
je 3f
|
||||
vshufps xmm8,xmm4,xmm5,0xd6
|
||||
vpshufd xmm9,xmm4,0xf
|
||||
vpshufd xmm4,xmm8,0x39
|
||||
vshufps xmm8,xmm6,xmm7,0xfa
|
||||
vpblendd xmm9,xmm9,xmm8,0xaa
|
||||
vpunpcklqdq xmm8,xmm7,xmm5
|
||||
vpblendd xmm8,xmm8,xmm6,0x88
|
||||
vpshufd xmm8,xmm8,0x78
|
||||
vpunpckhdq xmm5,xmm5,xmm7
|
||||
vpunpckldq xmm6,xmm6,xmm5
|
||||
vpshufd xmm7,xmm6,0x1e
|
||||
vmovdqa xmm5,xmm9
|
||||
vmovdqa xmm6,xmm8
|
||||
jmp 3b
|
||||
3:
|
||||
vpxor xmm0,xmm0,xmm2
|
||||
vpxor xmm1,xmm1,xmm3
|
||||
vpxor xmm2,xmm2,XMMWORD PTR [rdi]
|
||||
vpxor xmm3,xmm3,XMMWORD PTR [rdi+0x10]
|
||||
vmovdqu XMMWORD PTR [r9],xmm0
|
||||
vmovdqu XMMWORD PTR [r9+0x10],xmm1
|
||||
vmovdqu XMMWORD PTR [r9+0x20],xmm2
|
||||
vmovdqu XMMWORD PTR [r9+0x30],xmm3
|
||||
ret
|
||||
.p2align 6
|
||||
2:
|
||||
push rbp
|
||||
mov rbp,rsp
|
||||
sub rsp,0x90
|
||||
and rsp,0xffffffffffffffc0
|
||||
vpbroadcastd zmm0,ecx
|
||||
shr rcx,0x20
|
||||
vpbroadcastd zmm1,ecx
|
||||
vpaddd zmm2,zmm0,ZMMWORD PTR [ADD0+rip]
|
||||
vpcmpltud k1,zmm2,zmm0
|
||||
vpaddd zmm1{k1},zmm1,DWORD PTR [ADD1+rip]{1to16}
|
||||
vmovdqa32 ZMMWORD PTR [rsp],zmm2
|
||||
vmovdqa32 ZMMWORD PTR [rsp+0x40],zmm1
|
||||
cmp r10,0x10
|
||||
jb 2f
|
||||
3:
|
||||
vpbroadcastd zmm16,DWORD PTR [rsi]
|
||||
vpbroadcastd zmm17,DWORD PTR [rsi+0x4]
|
||||
vpbroadcastd zmm18,DWORD PTR [rsi+0x8]
|
||||
vpbroadcastd zmm19,DWORD PTR [rsi+0xc]
|
||||
vpbroadcastd zmm20,DWORD PTR [rsi+0x10]
|
||||
vpbroadcastd zmm21,DWORD PTR [rsi+0x14]
|
||||
vpbroadcastd zmm22,DWORD PTR [rsi+0x18]
|
||||
vpbroadcastd zmm23,DWORD PTR [rsi+0x1c]
|
||||
vpbroadcastd zmm24,DWORD PTR [rsi+0x20]
|
||||
vpbroadcastd zmm25,DWORD PTR [rsi+0x24]
|
||||
vpbroadcastd zmm26,DWORD PTR [rsi+0x28]
|
||||
vpbroadcastd zmm27,DWORD PTR [rsi+0x2c]
|
||||
vpbroadcastd zmm28,DWORD PTR [rsi+0x30]
|
||||
vpbroadcastd zmm29,DWORD PTR [rsi+0x34]
|
||||
vpbroadcastd zmm30,DWORD PTR [rsi+0x38]
|
||||
vpbroadcastd zmm31,DWORD PTR [rsi+0x3c]
|
||||
vpbroadcastd zmm0,DWORD PTR [rdi]
|
||||
vpbroadcastd zmm1,DWORD PTR [rdi+0x4]
|
||||
vpbroadcastd zmm2,DWORD PTR [rdi+0x8]
|
||||
vpbroadcastd zmm3,DWORD PTR [rdi+0xc]
|
||||
vpbroadcastd zmm4,DWORD PTR [rdi+0x10]
|
||||
vpbroadcastd zmm5,DWORD PTR [rdi+0x14]
|
||||
vpbroadcastd zmm6,DWORD PTR [rdi+0x18]
|
||||
vpbroadcastd zmm7,DWORD PTR [rdi+0x1c]
|
||||
vpbroadcastd zmm8,DWORD PTR [BLAKE3_IV_0+rip]
|
||||
vpbroadcastd zmm9,DWORD PTR [BLAKE3_IV_1+rip]
|
||||
vpbroadcastd zmm10,DWORD PTR [BLAKE3_IV_2+rip]
|
||||
vpbroadcastd zmm11,DWORD PTR [BLAKE3_IV_3+rip]
|
||||
vmovdqa32 zmm12,ZMMWORD PTR [rsp]
|
||||
vmovdqa32 zmm13,ZMMWORD PTR [rsp+0x40]
|
||||
vpbroadcastd zmm14,edx
|
||||
vpbroadcastd zmm15,r8d
|
||||
vpaddd zmm0,zmm0,zmm16
|
||||
vpaddd zmm1,zmm1,zmm18
|
||||
vpaddd zmm2,zmm2,zmm20
|
||||
vpaddd zmm3,zmm3,zmm22
|
||||
vpaddd zmm0,zmm0,zmm4
|
||||
vpaddd zmm1,zmm1,zmm5
|
||||
vpaddd zmm2,zmm2,zmm6
|
||||
vpaddd zmm3,zmm3,zmm7
|
||||
vpxord zmm12,zmm12,zmm0
|
||||
vpxord zmm13,zmm13,zmm1
|
||||
vpxord zmm14,zmm14,zmm2
|
||||
vpxord zmm15,zmm15,zmm3
|
||||
vprord zmm12,zmm12,0x10
|
||||
vprord zmm13,zmm13,0x10
|
||||
vprord zmm14,zmm14,0x10
|
||||
vprord zmm15,zmm15,0x10
|
||||
vpaddd zmm8,zmm8,zmm12
|
||||
vpaddd zmm9,zmm9,zmm13
|
||||
vpaddd zmm10,zmm10,zmm14
|
||||
vpaddd zmm11,zmm11,zmm15
|
||||
vpxord zmm4,zmm4,zmm8
|
||||
vpxord zmm5,zmm5,zmm9
|
||||
vpxord zmm6,zmm6,zmm10
|
||||
vpxord zmm7,zmm7,zmm11
|
||||
vprord zmm4,zmm4,0xc
|
||||
vprord zmm5,zmm5,0xc
|
||||
vprord zmm6,zmm6,0xc
|
||||
vprord zmm7,zmm7,0xc
|
||||
vpaddd zmm0,zmm0,zmm17
|
||||
vpaddd zmm1,zmm1,zmm19
|
||||
vpaddd zmm2,zmm2,zmm21
|
||||
vpaddd zmm3,zmm3,zmm23
|
||||
vpaddd zmm0,zmm0,zmm4
|
||||
vpaddd zmm1,zmm1,zmm5
|
||||
vpaddd zmm2,zmm2,zmm6
|
||||
vpaddd zmm3,zmm3,zmm7
|
||||
vpxord zmm12,zmm12,zmm0
|
||||
vpxord zmm13,zmm13,zmm1
|
||||
vpxord zmm14,zmm14,zmm2
|
||||
vpxord zmm15,zmm15,zmm3
|
||||
vprord zmm12,zmm12,0x8
|
||||
vprord zmm13,zmm13,0x8
|
||||
vprord zmm14,zmm14,0x8
|
||||
vprord zmm15,zmm15,0x8
|
||||
vpaddd zmm8,zmm8,zmm12
|
||||
vpaddd zmm9,zmm9,zmm13
|
||||
vpaddd zmm10,zmm10,zmm14
|
||||
vpaddd zmm11,zmm11,zmm15
|
||||
vpxord zmm4,zmm4,zmm8
|
||||
vpxord zmm5,zmm5,zmm9
|
||||
vpxord zmm6,zmm6,zmm10
|
||||
vpxord zmm7,zmm7,zmm11
|
||||
vprord zmm4,zmm4,0x7
|
||||
vprord zmm5,zmm5,0x7
|
||||
vprord zmm6,zmm6,0x7
|
||||
vprord zmm7,zmm7,0x7
|
||||
vpaddd zmm0,zmm0,zmm24
|
||||
vpaddd zmm1,zmm1,zmm26
|
||||
vpaddd zmm2,zmm2,zmm28
|
||||
vpaddd zmm3,zmm3,zmm30
|
||||
vpaddd zmm0,zmm0,zmm5
|
||||
vpaddd zmm1,zmm1,zmm6
|
||||
vpaddd zmm2,zmm2,zmm7
|
||||
vpaddd zmm3,zmm3,zmm4
|
||||
vpxord zmm15,zmm15,zmm0
|
||||
vpxord zmm12,zmm12,zmm1
|
||||
vpxord zmm13,zmm13,zmm2
|
||||
vpxord zmm14,zmm14,zmm3
|
||||
vprord zmm15,zmm15,0x10
|
||||
vprord zmm12,zmm12,0x10
|
||||
vprord zmm13,zmm13,0x10
|
||||
vprord zmm14,zmm14,0x10
|
||||
vpaddd zmm10,zmm10,zmm15
|
||||
vpaddd zmm11,zmm11,zmm12
|
||||
vpaddd zmm8,zmm8,zmm13
|
||||
vpaddd zmm9,zmm9,zmm14
|
||||
vpxord zmm5,zmm5,zmm10
|
||||
vpxord zmm6,zmm6,zmm11
|
||||
vpxord zmm7,zmm7,zmm8
|
||||
vpxord zmm4,zmm4,zmm9
|
||||
vprord zmm5,zmm5,0xc
|
||||
vprord zmm6,zmm6,0xc
|
||||
vprord zmm7,zmm7,0xc
|
||||
vprord zmm4,zmm4,0xc
|
||||
vpaddd zmm0,zmm0,zmm25
|
||||
vpaddd zmm1,zmm1,zmm27
|
||||
vpaddd zmm2,zmm2,zmm29
|
||||
vpaddd zmm3,zmm3,zmm31
|
||||
vpaddd zmm0,zmm0,zmm5
|
||||
vpaddd zmm1,zmm1,zmm6
|
||||
vpaddd zmm2,zmm2,zmm7
|
||||
vpaddd zmm3,zmm3,zmm4
|
||||
vpxord zmm15,zmm15,zmm0
|
||||
vpxord zmm12,zmm12,zmm1
|
||||
vpxord zmm13,zmm13,zmm2
|
||||
vpxord zmm14,zmm14,zmm3
|
||||
vprord zmm15,zmm15,0x8
|
||||
vprord zmm12,zmm12,0x8
|
||||
vprord zmm13,zmm13,0x8
|
||||
vprord zmm14,zmm14,0x8
|
||||
vpaddd zmm10,zmm10,zmm15
|
||||
vpaddd zmm11,zmm11,zmm12
|
||||
vpaddd zmm8,zmm8,zmm13
|
||||
vpaddd zmm9,zmm9,zmm14
|
||||
vpxord zmm5,zmm5,zmm10
|
||||
vpxord zmm6,zmm6,zmm11
|
||||
vpxord zmm7,zmm7,zmm8
|
||||
vpxord zmm4,zmm4,zmm9
|
||||
vprord zmm5,zmm5,0x7
|
||||
vprord zmm6,zmm6,0x7
|
||||
vprord zmm7,zmm7,0x7
|
||||
vprord zmm4,zmm4,0x7
|
||||
vpaddd zmm0,zmm0,zmm18
|
||||
vpaddd zmm1,zmm1,zmm19
|
||||
vpaddd zmm2,zmm2,zmm23
|
||||
vpaddd zmm3,zmm3,zmm20
|
||||
vpaddd zmm0,zmm0,zmm4
|
||||
vpaddd zmm1,zmm1,zmm5
|
||||
vpaddd zmm2,zmm2,zmm6
|
||||
vpaddd zmm3,zmm3,zmm7
|
||||
vpxord zmm12,zmm12,zmm0
|
||||
vpxord zmm13,zmm13,zmm1
|
||||
vpxord zmm14,zmm14,zmm2
|
||||
vpxord zmm15,zmm15,zmm3
|
||||
vprord zmm12,zmm12,0x10
|
||||
vprord zmm13,zmm13,0x10
|
||||
vprord zmm14,zmm14,0x10
|
||||
vprord zmm15,zmm15,0x10
|
||||
vpaddd zmm8,zmm8,zmm12
|
||||
vpaddd zmm9,zmm9,zmm13
|
||||
vpaddd zmm10,zmm10,zmm14
|
||||
vpaddd zmm11,zmm11,zmm15
|
||||
vpxord zmm4,zmm4,zmm8
|
||||
vpxord zmm5,zmm5,zmm9
|
||||
vpxord zmm6,zmm6,zmm10
|
||||
vpxord zmm7,zmm7,zmm11
|
||||
vprord zmm4,zmm4,0xc
|
||||
vprord zmm5,zmm5,0xc
|
||||
vprord zmm6,zmm6,0xc
|
||||
vprord zmm7,zmm7,0xc
|
||||
vpaddd zmm0,zmm0,zmm22
|
||||
vpaddd zmm1,zmm1,zmm26
|
||||
vpaddd zmm2,zmm2,zmm16
|
||||
vpaddd zmm3,zmm3,zmm29
|
||||
vpaddd zmm0,zmm0,zmm4
|
||||
vpaddd zmm1,zmm1,zmm5
|
||||
vpaddd zmm2,zmm2,zmm6
|
||||
vpaddd zmm3,zmm3,zmm7
|
||||
vpxord zmm12,zmm12,zmm0
|
||||
vpxord zmm13,zmm13,zmm1
|
||||
vpxord zmm14,zmm14,zmm2
|
||||
vpxord zmm15,zmm15,zmm3
|
||||
vprord zmm12,zmm12,0x8
|
||||
vprord zmm13,zmm13,0x8
|
||||
vprord zmm14,zmm14,0x8
|
||||
vprord zmm15,zmm15,0x8
|
||||
vpaddd zmm8,zmm8,zmm12
|
||||
vpaddd zmm9,zmm9,zmm13
|
||||
vpaddd zmm10,zmm10,zmm14
|
||||
vpaddd zmm11,zmm11,zmm15
|
||||
vpxord zmm4,zmm4,zmm8
|
||||
vpxord zmm5,zmm5,zmm9
|
||||
vpxord zmm6,zmm6,zmm10
|
||||
vpxord zmm7,zmm7,zmm11
|
||||
vprord zmm4,zmm4,0x7
|
||||
vprord zmm5,zmm5,0x7
|
||||
vprord zmm6,zmm6,0x7
|
||||
vprord zmm7,zmm7,0x7
|
||||
vpaddd zmm0,zmm0,zmm17
|
||||
vpaddd zmm1,zmm1,zmm28
|
||||
vpaddd zmm2,zmm2,zmm25
|
||||
vpaddd zmm3,zmm3,zmm31
|
||||
vpaddd zmm0,zmm0,zmm5
|
||||
vpaddd zmm1,zmm1,zmm6
|
||||
vpaddd zmm2,zmm2,zmm7
|
||||
vpaddd zmm3,zmm3,zmm4
|
||||
vpxord zmm15,zmm15,zmm0
|
||||
vpxord zmm12,zmm12,zmm1
|
||||
vpxord zmm13,zmm13,zmm2
|
||||
vpxord zmm14,zmm14,zmm3
|
||||
vprord zmm15,zmm15,0x10
|
||||
vprord zmm12,zmm12,0x10
|
||||
vprord zmm13,zmm13,0x10
|
||||
vprord zmm14,zmm14,0x10
|
||||
vpaddd zmm10,zmm10,zmm15
|
||||
vpaddd zmm11,zmm11,zmm12
|
||||
vpaddd zmm8,zmm8,zmm13
|
||||
vpaddd zmm9,zmm9,zmm14
|
||||
vpxord zmm5,zmm5,zmm10
|
||||
vpxord zmm6,zmm6,zmm11
|
||||
vpxord zmm7,zmm7,zmm8
|
||||
vpxord zmm4,zmm4,zmm9
|
||||
vprord zmm5,zmm5,0xc
|
||||
vprord zmm6,zmm6,0xc
|
||||
vprord zmm7,zmm7,0xc
|
||||
vprord zmm4,zmm4,0xc
|
||||
vpaddd zmm0,zmm0,zmm27
|
||||
vpaddd zmm1,zmm1,zmm21
|
||||
vpaddd zmm2,zmm2,zmm30
|
||||
vpaddd zmm3,zmm3,zmm24
|
||||
vpaddd zmm0,zmm0,zmm5
|
||||
vpaddd zmm1,zmm1,zmm6
|
||||
vpaddd zmm2,zmm2,zmm7
|
||||
vpaddd zmm3,zmm3,zmm4
|
||||
vpxord zmm15,zmm15,zmm0
|
||||
vpxord zmm12,zmm12,zmm1
|
||||
vpxord zmm13,zmm13,zmm2
|
||||
vpxord zmm14,zmm14,zmm3
|
||||
vprord zmm15,zmm15,0x8
|
||||
vprord zmm12,zmm12,0x8
|
||||
vprord zmm13,zmm13,0x8
|
||||
vprord zmm14,zmm14,0x8
|
||||
vpaddd zmm10,zmm10,zmm15
|
||||
vpaddd zmm11,zmm11,zmm12
|
||||
vpaddd zmm8,zmm8,zmm13
|
||||
vpaddd zmm9,zmm9,zmm14
|
||||
vpxord zmm5,zmm5,zmm10
|
||||
vpxord zmm6,zmm6,zmm11
|
||||
vpxord zmm7,zmm7,zmm8
|
||||
vpxord zmm4,zmm4,zmm9
|
||||
vprord zmm5,zmm5,0x7
|
||||
vprord zmm6,zmm6,0x7
|
||||
vprord zmm7,zmm7,0x7
|
||||
vprord zmm4,zmm4,0x7
|
||||
vpaddd zmm0,zmm0,zmm19
|
||||
vpaddd zmm1,zmm1,zmm26
|
||||
vpaddd zmm2,zmm2,zmm29
|
||||
vpaddd zmm3,zmm3,zmm23
|
||||
vpaddd zmm0,zmm0,zmm4
|
||||
vpaddd zmm1,zmm1,zmm5
|
||||
vpaddd zmm2,zmm2,zmm6
|
||||
vpaddd zmm3,zmm3,zmm7
|
||||
vpxord zmm12,zmm12,zmm0
|
||||
vpxord zmm13,zmm13,zmm1
|
||||
vpxord zmm14,zmm14,zmm2
|
||||
vpxord zmm15,zmm15,zmm3
|
||||
vprord zmm12,zmm12,0x10
|
||||
vprord zmm13,zmm13,0x10
|
||||
vprord zmm14,zmm14,0x10
|
||||
vprord zmm15,zmm15,0x10
|
||||
vpaddd zmm8,zmm8,zmm12
|
||||
vpaddd zmm9,zmm9,zmm13
|
||||
vpaddd zmm10,zmm10,zmm14
|
||||
vpaddd zmm11,zmm11,zmm15
|
||||
vpxord zmm4,zmm4,zmm8
|
||||
vpxord zmm5,zmm5,zmm9
|
||||
vpxord zmm6,zmm6,zmm10
|
||||
vpxord zmm7,zmm7,zmm11
|
||||
vprord zmm4,zmm4,0xc
|
||||
vprord zmm5,zmm5,0xc
|
||||
vprord zmm6,zmm6,0xc
|
||||
vprord zmm7,zmm7,0xc
|
||||
vpaddd zmm0,zmm0,zmm20
|
||||
vpaddd zmm1,zmm1,zmm28
|
||||
vpaddd zmm2,zmm2,zmm18
|
||||
vpaddd zmm3,zmm3,zmm30
|
||||
vpaddd zmm0,zmm0,zmm4
|
||||
vpaddd zmm1,zmm1,zmm5
|
||||
vpaddd zmm2,zmm2,zmm6
|
||||
vpaddd zmm3,zmm3,zmm7
|
||||
vpxord zmm12,zmm12,zmm0
|
||||
vpxord zmm13,zmm13,zmm1
|
||||
vpxord zmm14,zmm14,zmm2
|
||||
vpxord zmm15,zmm15,zmm3
|
||||
vprord zmm12,zmm12,0x8
|
||||
vprord zmm13,zmm13,0x8
|
||||
vprord zmm14,zmm14,0x8
|
||||
vprord zmm15,zmm15,0x8
|
||||
vpaddd zmm8,zmm8,zmm12
|
||||
vpaddd zmm9,zmm9,zmm13
|
||||
vpaddd zmm10,zmm10,zmm14
|
||||
vpaddd zmm11,zmm11,zmm15
|
||||
vpxord zmm4,zmm4,zmm8
|
||||
vpxord zmm5,zmm5,zmm9
|
||||
vpxord zmm6,zmm6,zmm10
|
||||
vpxord zmm7,zmm7,zmm11
|
||||
vprord zmm4,zmm4,0x7
|
||||
vprord zmm5,zmm5,0x7
|
||||
vprord zmm6,zmm6,0x7
|
||||
vprord zmm7,zmm7,0x7
|
||||
vpaddd zmm0,zmm0,zmm22
|
||||
vpaddd zmm1,zmm1,zmm25
|
||||
vpaddd zmm2,zmm2,zmm27
|
||||
vpaddd zmm3,zmm3,zmm24
|
||||
vpaddd zmm0,zmm0,zmm5
|
||||
vpaddd zmm1,zmm1,zmm6
|
||||
vpaddd zmm2,zmm2,zmm7
|
||||
vpaddd zmm3,zmm3,zmm4
|
||||
vpxord zmm15,zmm15,zmm0
|
||||
vpxord zmm12,zmm12,zmm1
|
||||
vpxord zmm13,zmm13,zmm2
|
||||
vpxord zmm14,zmm14,zmm3
|
||||
vprord zmm15,zmm15,0x10
|
||||
vprord zmm12,zmm12,0x10
|
||||
vprord zmm13,zmm13,0x10
|
||||
vprord zmm14,zmm14,0x10
|
||||
vpaddd zmm10,zmm10,zmm15
|
||||
vpaddd zmm11,zmm11,zmm12
|
||||
vpaddd zmm8,zmm8,zmm13
|
||||
vpaddd zmm9,zmm9,zmm14
|
||||
vpxord zmm5,zmm5,zmm10
|
||||
vpxord zmm6,zmm6,zmm11
|
||||
vpxord zmm7,zmm7,zmm8
|
||||
vpxord zmm4,zmm4,zmm9
|
||||
vprord zmm5,zmm5,0xc
|
||||
vprord zmm6,zmm6,0xc
|
||||
vprord zmm7,zmm7,0xc
|
||||
vprord zmm4,zmm4,0xc
|
||||
vpaddd zmm0,zmm0,zmm21
|
||||
vpaddd zmm1,zmm1,zmm16
|
||||
vpaddd zmm2,zmm2,zmm31
|
||||
vpaddd zmm3,zmm3,zmm17
|
||||
vpaddd zmm0,zmm0,zmm5
|
||||
vpaddd zmm1,zmm1,zmm6
|
||||
vpaddd zmm2,zmm2,zmm7
|
||||
vpaddd zmm3,zmm3,zmm4
|
||||
vpxord zmm15,zmm15,zmm0
|
||||
vpxord zmm12,zmm12,zmm1
|
||||
vpxord zmm13,zmm13,zmm2
|
||||
vpxord zmm14,zmm14,zmm3
|
||||
vprord zmm15,zmm15,0x8
|
||||
vprord zmm12,zmm12,0x8
|
||||
vprord zmm13,zmm13,0x8
|
||||
vprord zmm14,zmm14,0x8
|
||||
vpaddd zmm10,zmm10,zmm15
|
||||
vpaddd zmm11,zmm11,zmm12
|
||||
vpaddd zmm8,zmm8,zmm13
|
||||
vpaddd zmm9,zmm9,zmm14
|
||||
vpxord zmm5,zmm5,zmm10
|
||||
vpxord zmm6,zmm6,zmm11
|
||||
vpxord zmm7,zmm7,zmm8
|
||||
vpxord zmm4,zmm4,zmm9
|
||||
vprord zmm5,zmm5,0x7
|
||||
vprord zmm6,zmm6,0x7
|
||||
vprord zmm7,zmm7,0x7
|
||||
vprord zmm4,zmm4,0x7
|
||||
vpaddd zmm0,zmm0,zmm26
|
||||
vpaddd zmm1,zmm1,zmm28
|
||||
vpaddd zmm2,zmm2,zmm30
|
||||
vpaddd zmm3,zmm3,zmm29
|
||||
vpaddd zmm0,zmm0,zmm4
|
||||
vpaddd zmm1,zmm1,zmm5
|
||||
vpaddd zmm2,zmm2,zmm6
|
||||
vpaddd zmm3,zmm3,zmm7
|
||||
vpxord zmm12,zmm12,zmm0
|
||||
vpxord zmm13,zmm13,zmm1
|
||||
vpxord zmm14,zmm14,zmm2
|
||||
vpxord zmm15,zmm15,zmm3
|
||||
vprord zmm12,zmm12,0x10
|
||||
vprord zmm13,zmm13,0x10
|
||||
vprord zmm14,zmm14,0x10
|
||||
vprord zmm15,zmm15,0x10
|
||||
vpaddd zmm8,zmm8,zmm12
|
||||
vpaddd zmm9,zmm9,zmm13
|
||||
vpaddd zmm10,zmm10,zmm14
|
||||
vpaddd zmm11,zmm11,zmm15
|
||||
vpxord zmm4,zmm4,zmm8
|
||||
vpxord zmm5,zmm5,zmm9
|
||||
vpxord zmm6,zmm6,zmm10
|
||||
vpxord zmm7,zmm7,zmm11
|
||||
vprord zmm4,zmm4,0xc
|
||||
vprord zmm5,zmm5,0xc
|
||||
vprord zmm6,zmm6,0xc
|
||||
vprord zmm7,zmm7,0xc
|
||||
vpaddd zmm0,zmm0,zmm23
|
||||
vpaddd zmm1,zmm1,zmm25
|
||||
vpaddd zmm2,zmm2,zmm19
|
||||
vpaddd zmm3,zmm3,zmm31
|
||||
vpaddd zmm0,zmm0,zmm4
|
||||
vpaddd zmm1,zmm1,zmm5
|
||||
vpaddd zmm2,zmm2,zmm6
|
||||
vpaddd zmm3,zmm3,zmm7
|
||||
vpxord zmm12,zmm12,zmm0
|
||||
vpxord zmm13,zmm13,zmm1
|
||||
vpxord zmm14,zmm14,zmm2
|
||||
vpxord zmm15,zmm15,zmm3
|
||||
vprord zmm12,zmm12,0x8
|
||||
vprord zmm13,zmm13,0x8
|
||||
vprord zmm14,zmm14,0x8
|
||||
vprord zmm15,zmm15,0x8
|
||||
vpaddd zmm8,zmm8,zmm12
|
||||
vpaddd zmm9,zmm9,zmm13
|
||||
vpaddd zmm10,zmm10,zmm14
|
||||
vpaddd zmm11,zmm11,zmm15
|
||||
vpxord zmm4,zmm4,zmm8
|
||||
vpxord zmm5,zmm5,zmm9
|
||||
vpxord zmm6,zmm6,zmm10
|
||||
vpxord zmm7,zmm7,zmm11
|
||||
vprord zmm4,zmm4,0x7
|
||||
vprord zmm5,zmm5,0x7
|
||||
vprord zmm6,zmm6,0x7
|
||||
vprord zmm7,zmm7,0x7
|
||||
vpaddd zmm0,zmm0,zmm20
|
||||
vpaddd zmm1,zmm1,zmm27
|
||||
vpaddd zmm2,zmm2,zmm21
|
||||
vpaddd zmm3,zmm3,zmm17
|
||||
vpaddd zmm0,zmm0,zmm5
|
||||
vpaddd zmm1,zmm1,zmm6
|
||||
vpaddd zmm2,zmm2,zmm7
|
||||
vpaddd zmm3,zmm3,zmm4
|
||||
vpxord zmm15,zmm15,zmm0
|
||||
vpxord zmm12,zmm12,zmm1
|
||||
vpxord zmm13,zmm13,zmm2
|
||||
vpxord zmm14,zmm14,zmm3
|
||||
vprord zmm15,zmm15,0x10
|
||||
vprord zmm12,zmm12,0x10
|
||||
vprord zmm13,zmm13,0x10
|
||||
vprord zmm14,zmm14,0x10
|
||||
vpaddd zmm10,zmm10,zmm15
|
||||
vpaddd zmm11,zmm11,zmm12
|
||||
vpaddd zmm8,zmm8,zmm13
|
||||
vpaddd zmm9,zmm9,zmm14
|
||||
vpxord zmm5,zmm5,zmm10
|
||||
vpxord zmm6,zmm6,zmm11
|
||||
vpxord zmm7,zmm7,zmm8
|
||||
vpxord zmm4,zmm4,zmm9
|
||||
vprord zmm5,zmm5,0xc
|
||||
vprord zmm6,zmm6,0xc
|
||||
vprord zmm7,zmm7,0xc
|
||||
vprord zmm4,zmm4,0xc
|
||||
vpaddd zmm0,zmm0,zmm16
|
||||
vpaddd zmm1,zmm1,zmm18
|
||||
vpaddd zmm2,zmm2,zmm24
|
||||
vpaddd zmm3,zmm3,zmm22
|
||||
vpaddd zmm0,zmm0,zmm5
|
||||
vpaddd zmm1,zmm1,zmm6
|
||||
vpaddd zmm2,zmm2,zmm7
|
||||
vpaddd zmm3,zmm3,zmm4
|
||||
vpxord zmm15,zmm15,zmm0
|
||||
vpxord zmm12,zmm12,zmm1
|
||||
vpxord zmm13,zmm13,zmm2
|
||||
vpxord zmm14,zmm14,zmm3
|
||||
vprord zmm15,zmm15,0x8
|
||||
vprord zmm12,zmm12,0x8
|
||||
vprord zmm13,zmm13,0x8
|
||||
vprord zmm14,zmm14,0x8
|
||||
vpaddd zmm10,zmm10,zmm15
|
||||
vpaddd zmm11,zmm11,zmm12
|
||||
vpaddd zmm8,zmm8,zmm13
|
||||
vpaddd zmm9,zmm9,zmm14
|
||||
vpxord zmm5,zmm5,zmm10
|
||||
vpxord zmm6,zmm6,zmm11
|
||||
vpxord zmm7,zmm7,zmm8
|
||||
vpxord zmm4,zmm4,zmm9
|
||||
vprord zmm5,zmm5,0x7
|
||||
vprord zmm6,zmm6,0x7
|
||||
vprord zmm7,zmm7,0x7
|
||||
vprord zmm4,zmm4,0x7
|
||||
vpaddd zmm0,zmm0,zmm28
|
||||
vpaddd zmm1,zmm1,zmm25
|
||||
vpaddd zmm2,zmm2,zmm31
|
||||
vpaddd zmm3,zmm3,zmm30
|
||||
vpaddd zmm0,zmm0,zmm4
|
||||
vpaddd zmm1,zmm1,zmm5
|
||||
vpaddd zmm2,zmm2,zmm6
|
||||
vpaddd zmm3,zmm3,zmm7
|
||||
vpxord zmm12,zmm12,zmm0
|
||||
vpxord zmm13,zmm13,zmm1
|
||||
vpxord zmm14,zmm14,zmm2
|
||||
vpxord zmm15,zmm15,zmm3
|
||||
vprord zmm12,zmm12,0x10
|
||||
vprord zmm13,zmm13,0x10
|
||||
vprord zmm14,zmm14,0x10
|
||||
vprord zmm15,zmm15,0x10
|
||||
vpaddd zmm8,zmm8,zmm12
|
||||
vpaddd zmm9,zmm9,zmm13
|
||||
vpaddd zmm10,zmm10,zmm14
|
||||
vpaddd zmm11,zmm11,zmm15
|
||||
vpxord zmm4,zmm4,zmm8
|
||||
vpxord zmm5,zmm5,zmm9
|
||||
vpxord zmm6,zmm6,zmm10
|
||||
vpxord zmm7,zmm7,zmm11
|
||||
vprord zmm4,zmm4,0xc
|
||||
vprord zmm5,zmm5,0xc
|
||||
vprord zmm6,zmm6,0xc
|
||||
vprord zmm7,zmm7,0xc
|
||||
vpaddd zmm0,zmm0,zmm29
|
||||
vpaddd zmm1,zmm1,zmm27
|
||||
vpaddd zmm2,zmm2,zmm26
|
||||
vpaddd zmm3,zmm3,zmm24
|
||||
vpaddd zmm0,zmm0,zmm4
|
||||
vpaddd zmm1,zmm1,zmm5
|
||||
vpaddd zmm2,zmm2,zmm6
|
||||
vpaddd zmm3,zmm3,zmm7
|
||||
vpxord zmm12,zmm12,zmm0
|
||||
vpxord zmm13,zmm13,zmm1
|
||||
vpxord zmm14,zmm14,zmm2
|
||||
vpxord zmm15,zmm15,zmm3
|
||||
vprord zmm12,zmm12,0x8
|
||||
vprord zmm13,zmm13,0x8
|
||||
vprord zmm14,zmm14,0x8
|
||||
vprord zmm15,zmm15,0x8
|
||||
vpaddd zmm8,zmm8,zmm12
|
||||
vpaddd zmm9,zmm9,zmm13
|
||||
vpaddd zmm10,zmm10,zmm14
|
||||
vpaddd zmm11,zmm11,zmm15
|
||||
vpxord zmm4,zmm4,zmm8
|
||||
vpxord zmm5,zmm5,zmm9
|
||||
vpxord zmm6,zmm6,zmm10
|
||||
vpxord zmm7,zmm7,zmm11
|
||||
vprord zmm4,zmm4,0x7
|
||||
vprord zmm5,zmm5,0x7
|
||||
vprord zmm6,zmm6,0x7
|
||||
vprord zmm7,zmm7,0x7
|
||||
vpaddd zmm0,zmm0,zmm23
|
||||
vpaddd zmm1,zmm1,zmm21
|
||||
vpaddd zmm2,zmm2,zmm16
|
||||
vpaddd zmm3,zmm3,zmm22
|
||||
vpaddd zmm0,zmm0,zmm5
|
||||
vpaddd zmm1,zmm1,zmm6
|
||||
vpaddd zmm2,zmm2,zmm7
|
||||
vpaddd zmm3,zmm3,zmm4
|
||||
vpxord zmm15,zmm15,zmm0
|
||||
vpxord zmm12,zmm12,zmm1
|
||||
vpxord zmm13,zmm13,zmm2
|
||||
vpxord zmm14,zmm14,zmm3
|
||||
vprord zmm15,zmm15,0x10
|
||||
vprord zmm12,zmm12,0x10
|
||||
vprord zmm13,zmm13,0x10
|
||||
vprord zmm14,zmm14,0x10
|
||||
vpaddd zmm10,zmm10,zmm15
|
||||
vpaddd zmm11,zmm11,zmm12
|
||||
vpaddd zmm8,zmm8,zmm13
|
||||
vpaddd zmm9,zmm9,zmm14
|
||||
vpxord zmm5,zmm5,zmm10
|
||||
vpxord zmm6,zmm6,zmm11
|
||||
vpxord zmm7,zmm7,zmm8
|
||||
vpxord zmm4,zmm4,zmm9
|
||||
vprord zmm5,zmm5,0xc
|
||||
vprord zmm6,zmm6,0xc
|
||||
vprord zmm7,zmm7,0xc
|
||||
vprord zmm4,zmm4,0xc
|
||||
vpaddd zmm0,zmm0,zmm18
|
||||
vpaddd zmm1,zmm1,zmm19
|
||||
vpaddd zmm2,zmm2,zmm17
|
||||
vpaddd zmm3,zmm3,zmm20
|
||||
vpaddd zmm0,zmm0,zmm5
|
||||
vpaddd zmm1,zmm1,zmm6
|
||||
vpaddd zmm2,zmm2,zmm7
|
||||
vpaddd zmm3,zmm3,zmm4
|
||||
vpxord zmm15,zmm15,zmm0
|
||||
vpxord zmm12,zmm12,zmm1
|
||||
vpxord zmm13,zmm13,zmm2
|
||||
vpxord zmm14,zmm14,zmm3
|
||||
vprord zmm15,zmm15,0x8
|
||||
vprord zmm12,zmm12,0x8
|
||||
vprord zmm13,zmm13,0x8
|
||||
vprord zmm14,zmm14,0x8
|
||||
vpaddd zmm10,zmm10,zmm15
|
||||
vpaddd zmm11,zmm11,zmm12
|
||||
vpaddd zmm8,zmm8,zmm13
|
||||
vpaddd zmm9,zmm9,zmm14
|
||||
vpxord zmm5,zmm5,zmm10
|
||||
vpxord zmm6,zmm6,zmm11
|
||||
vpxord zmm7,zmm7,zmm8
|
||||
vpxord zmm4,zmm4,zmm9
|
||||
vprord zmm5,zmm5,0x7
|
||||
vprord zmm6,zmm6,0x7
|
||||
vprord zmm7,zmm7,0x7
|
||||
vprord zmm4,zmm4,0x7
|
||||
vpaddd zmm0,zmm0,zmm25
|
||||
vpaddd zmm1,zmm1,zmm27
|
||||
vpaddd zmm2,zmm2,zmm24
|
||||
vpaddd zmm3,zmm3,zmm31
|
||||
vpaddd zmm0,zmm0,zmm4
|
||||
vpaddd zmm1,zmm1,zmm5
|
||||
vpaddd zmm2,zmm2,zmm6
|
||||
vpaddd zmm3,zmm3,zmm7
|
||||
vpxord zmm12,zmm12,zmm0
|
||||
vpxord zmm13,zmm13,zmm1
|
||||
vpxord zmm14,zmm14,zmm2
|
||||
vpxord zmm15,zmm15,zmm3
|
||||
vprord zmm12,zmm12,0x10
|
||||
vprord zmm13,zmm13,0x10
|
||||
vprord zmm14,zmm14,0x10
|
||||
vprord zmm15,zmm15,0x10
|
||||
vpaddd zmm8,zmm8,zmm12
|
||||
vpaddd zmm9,zmm9,zmm13
|
||||
vpaddd zmm10,zmm10,zmm14
|
||||
vpaddd zmm11,zmm11,zmm15
|
||||
vpxord zmm4,zmm4,zmm8
|
||||
vpxord zmm5,zmm5,zmm9
|
||||
vpxord zmm6,zmm6,zmm10
|
||||
vpxord zmm7,zmm7,zmm11
|
||||
vprord zmm4,zmm4,0xc
|
||||
vprord zmm5,zmm5,0xc
|
||||
vprord zmm6,zmm6,0xc
|
||||
vprord zmm7,zmm7,0xc
|
||||
vpaddd zmm0,zmm0,zmm30
|
||||
vpaddd zmm1,zmm1,zmm21
|
||||
vpaddd zmm2,zmm2,zmm28
|
||||
vpaddd zmm3,zmm3,zmm17
|
||||
vpaddd zmm0,zmm0,zmm4
|
||||
vpaddd zmm1,zmm1,zmm5
|
||||
vpaddd zmm2,zmm2,zmm6
|
||||
vpaddd zmm3,zmm3,zmm7
|
||||
vpxord zmm12,zmm12,zmm0
|
||||
vpxord zmm13,zmm13,zmm1
|
||||
vpxord zmm14,zmm14,zmm2
|
||||
vpxord zmm15,zmm15,zmm3
|
||||
vprord zmm12,zmm12,0x8
|
||||
vprord zmm13,zmm13,0x8
|
||||
vprord zmm14,zmm14,0x8
|
||||
vprord zmm15,zmm15,0x8
|
||||
vpaddd zmm8,zmm8,zmm12
|
||||
vpaddd zmm9,zmm9,zmm13
|
||||
vpaddd zmm10,zmm10,zmm14
|
||||
vpaddd zmm11,zmm11,zmm15
|
||||
vpxord zmm4,zmm4,zmm8
|
||||
vpxord zmm5,zmm5,zmm9
|
||||
vpxord zmm6,zmm6,zmm10
|
||||
vpxord zmm7,zmm7,zmm11
|
||||
vprord zmm4,zmm4,0x7
|
||||
vprord zmm5,zmm5,0x7
|
||||
vprord zmm6,zmm6,0x7
|
||||
vprord zmm7,zmm7,0x7
|
||||
vpaddd zmm0,zmm0,zmm29
|
||||
vpaddd zmm1,zmm1,zmm16
|
||||
vpaddd zmm2,zmm2,zmm18
|
||||
vpaddd zmm3,zmm3,zmm20
|
||||
vpaddd zmm0,zmm0,zmm5
|
||||
vpaddd zmm1,zmm1,zmm6
|
||||
vpaddd zmm2,zmm2,zmm7
|
||||
vpaddd zmm3,zmm3,zmm4
|
||||
vpxord zmm15,zmm15,zmm0
|
||||
vpxord zmm12,zmm12,zmm1
|
||||
vpxord zmm13,zmm13,zmm2
|
||||
vpxord zmm14,zmm14,zmm3
|
||||
vprord zmm15,zmm15,0x10
|
||||
vprord zmm12,zmm12,0x10
|
||||
vprord zmm13,zmm13,0x10
|
||||
vprord zmm14,zmm14,0x10
|
||||
vpaddd zmm10,zmm10,zmm15
|
||||
vpaddd zmm11,zmm11,zmm12
|
||||
vpaddd zmm8,zmm8,zmm13
|
||||
vpaddd zmm9,zmm9,zmm14
|
||||
vpxord zmm5,zmm5,zmm10
|
||||
vpxord zmm6,zmm6,zmm11
|
||||
vpxord zmm7,zmm7,zmm8
|
||||
vpxord zmm4,zmm4,zmm9
|
||||
vprord zmm5,zmm5,0xc
|
||||
vprord zmm6,zmm6,0xc
|
||||
vprord zmm7,zmm7,0xc
|
||||
vprord zmm4,zmm4,0xc
|
||||
vpaddd zmm0,zmm0,zmm19
|
||||
vpaddd zmm1,zmm1,zmm26
|
||||
vpaddd zmm2,zmm2,zmm22
|
||||
vpaddd zmm3,zmm3,zmm23
|
||||
vpaddd zmm0,zmm0,zmm5
|
||||
vpaddd zmm1,zmm1,zmm6
|
||||
vpaddd zmm2,zmm2,zmm7
|
||||
vpaddd zmm3,zmm3,zmm4
|
||||
vpxord zmm15,zmm15,zmm0
|
||||
vpxord zmm12,zmm12,zmm1
|
||||
vpxord zmm13,zmm13,zmm2
|
||||
vpxord zmm14,zmm14,zmm3
|
||||
vprord zmm15,zmm15,0x8
|
||||
vprord zmm12,zmm12,0x8
|
||||
vprord zmm13,zmm13,0x8
|
||||
vprord zmm14,zmm14,0x8
|
||||
vpaddd zmm10,zmm10,zmm15
|
||||
vpaddd zmm11,zmm11,zmm12
|
||||
vpaddd zmm8,zmm8,zmm13
|
||||
vpaddd zmm9,zmm9,zmm14
|
||||
vpxord zmm5,zmm5,zmm10
|
||||
vpxord zmm6,zmm6,zmm11
|
||||
vpxord zmm7,zmm7,zmm8
|
||||
vpxord zmm4,zmm4,zmm9
|
||||
vprord zmm5,zmm5,0x7
|
||||
vprord zmm6,zmm6,0x7
|
||||
vprord zmm7,zmm7,0x7
|
||||
vprord zmm4,zmm4,0x7
|
||||
vpaddd zmm0,zmm0,zmm27
|
||||
vpaddd zmm1,zmm1,zmm21
|
||||
vpaddd zmm2,zmm2,zmm17
|
||||
vpaddd zmm3,zmm3,zmm24
|
||||
vpaddd zmm0,zmm0,zmm4
|
||||
vpaddd zmm1,zmm1,zmm5
|
||||
vpaddd zmm2,zmm2,zmm6
|
||||
vpaddd zmm3,zmm3,zmm7
|
||||
vpxord zmm12,zmm12,zmm0
|
||||
vpxord zmm13,zmm13,zmm1
|
||||
vpxord zmm14,zmm14,zmm2
|
||||
vpxord zmm15,zmm15,zmm3
|
||||
vprord zmm12,zmm12,0x10
|
||||
vprord zmm13,zmm13,0x10
|
||||
vprord zmm14,zmm14,0x10
|
||||
vprord zmm15,zmm15,0x10
|
||||
vpaddd zmm8,zmm8,zmm12
|
||||
vpaddd zmm9,zmm9,zmm13
|
||||
vpaddd zmm10,zmm10,zmm14
|
||||
vpaddd zmm11,zmm11,zmm15
|
||||
vpxord zmm4,zmm4,zmm8
|
||||
vpxord zmm5,zmm5,zmm9
|
||||
vpxord zmm6,zmm6,zmm10
|
||||
vpxord zmm7,zmm7,zmm11
|
||||
vprord zmm4,zmm4,0xc
|
||||
vprord zmm5,zmm5,0xc
|
||||
vprord zmm6,zmm6,0xc
|
||||
vprord zmm7,zmm7,0xc
|
||||
vpaddd zmm0,zmm0,zmm31
|
||||
vpaddd zmm1,zmm1,zmm16
|
||||
vpaddd zmm2,zmm2,zmm25
|
||||
vpaddd zmm3,zmm3,zmm22
|
||||
vpaddd zmm0,zmm0,zmm4
|
||||
vpaddd zmm1,zmm1,zmm5
|
||||
vpaddd zmm2,zmm2,zmm6
|
||||
vpaddd zmm3,zmm3,zmm7
|
||||
vpxord zmm12,zmm12,zmm0
|
||||
vpxord zmm13,zmm13,zmm1
|
||||
vpxord zmm14,zmm14,zmm2
|
||||
vpxord zmm15,zmm15,zmm3
|
||||
vprord zmm12,zmm12,0x8
|
||||
vprord zmm13,zmm13,0x8
|
||||
vprord zmm14,zmm14,0x8
|
||||
vprord zmm15,zmm15,0x8
|
||||
vpaddd zmm8,zmm8,zmm12
|
||||
vpaddd zmm9,zmm9,zmm13
|
||||
vpaddd zmm10,zmm10,zmm14
|
||||
vpaddd zmm11,zmm11,zmm15
|
||||
vpxord zmm4,zmm4,zmm8
|
||||
vpxord zmm5,zmm5,zmm9
|
||||
vpxord zmm6,zmm6,zmm10
|
||||
vpxord zmm7,zmm7,zmm11
|
||||
vprord zmm4,zmm4,0x7
|
||||
vprord zmm5,zmm5,0x7
|
||||
vprord zmm6,zmm6,0x7
|
||||
vprord zmm7,zmm7,0x7
|
||||
vpaddd zmm0,zmm0,zmm30
|
||||
vpaddd zmm1,zmm1,zmm18
|
||||
vpaddd zmm2,zmm2,zmm19
|
||||
vpaddd zmm3,zmm3,zmm23
|
||||
vpaddd zmm0,zmm0,zmm5
|
||||
vpaddd zmm1,zmm1,zmm6
|
||||
vpaddd zmm2,zmm2,zmm7
|
||||
vpaddd zmm3,zmm3,zmm4
|
||||
vpxord zmm15,zmm15,zmm0
|
||||
vpxord zmm12,zmm12,zmm1
|
||||
vpxord zmm13,zmm13,zmm2
|
||||
vpxord zmm14,zmm14,zmm3
|
||||
vprord zmm15,zmm15,0x10
|
||||
vprord zmm12,zmm12,0x10
|
||||
vprord zmm13,zmm13,0x10
|
||||
vprord zmm14,zmm14,0x10
|
||||
vpaddd zmm10,zmm10,zmm15
|
||||
vpaddd zmm11,zmm11,zmm12
|
||||
vpaddd zmm8,zmm8,zmm13
|
||||
vpaddd zmm9,zmm9,zmm14
|
||||
vpxord zmm5,zmm5,zmm10
|
||||
vpxord zmm6,zmm6,zmm11
|
||||
vpxord zmm7,zmm7,zmm8
|
||||
vpxord zmm4,zmm4,zmm9
|
||||
vprord zmm5,zmm5,0xc
|
||||
vprord zmm6,zmm6,0xc
|
||||
vprord zmm7,zmm7,0xc
|
||||
vprord zmm4,zmm4,0xc
|
||||
vpaddd zmm0,zmm0,zmm26
|
||||
vpaddd zmm1,zmm1,zmm28
|
||||
vpaddd zmm2,zmm2,zmm20
|
||||
vpaddd zmm3,zmm3,zmm29
|
||||
vpaddd zmm0,zmm0,zmm5
|
||||
vpaddd zmm1,zmm1,zmm6
|
||||
vpaddd zmm2,zmm2,zmm7
|
||||
vpaddd zmm3,zmm3,zmm4
|
||||
vpxord zmm15,zmm15,zmm0
|
||||
vpxord zmm12,zmm12,zmm1
|
||||
vpxord zmm13,zmm13,zmm2
|
||||
vpxord zmm14,zmm14,zmm3
|
||||
vprord zmm15,zmm15,0x8
|
||||
vprord zmm12,zmm12,0x8
|
||||
vprord zmm13,zmm13,0x8
|
||||
vprord zmm14,zmm14,0x8
|
||||
vpaddd zmm10,zmm10,zmm15
|
||||
vpaddd zmm11,zmm11,zmm12
|
||||
vpaddd zmm8,zmm8,zmm13
|
||||
vpaddd zmm9,zmm9,zmm14
|
||||
vpxord zmm5,zmm5,zmm10
|
||||
vpxord zmm6,zmm6,zmm11
|
||||
vpxord zmm7,zmm7,zmm8
|
||||
vpxord zmm4,zmm4,zmm9
|
||||
vprord zmm5,zmm5,0x7
|
||||
vprord zmm6,zmm6,0x7
|
||||
vprord zmm7,zmm7,0x7
|
||||
vprord zmm4,zmm4,0x7
|
||||
vpxord zmm0,zmm0,zmm8
|
||||
vpxord zmm1,zmm1,zmm9
|
||||
vpxord zmm2,zmm2,zmm10
|
||||
vpxord zmm3,zmm3,zmm11
|
||||
vpxord zmm4,zmm4,zmm12
|
||||
vpxord zmm5,zmm5,zmm13
|
||||
vpxord zmm6,zmm6,zmm14
|
||||
vpxord zmm7,zmm7,zmm15
|
||||
vpxord zmm8,zmm8,DWORD PTR [rdi]{1to16}
|
||||
vpxord zmm9,zmm9,DWORD PTR [rdi+0x4]{1to16}
|
||||
vpxord zmm10,zmm10,DWORD PTR [rdi+0x8]{1to16}
|
||||
vpxord zmm11,zmm11,DWORD PTR [rdi+0xc]{1to16}
|
||||
vpxord zmm12,zmm12,DWORD PTR [rdi+0x10]{1to16}
|
||||
vpxord zmm13,zmm13,DWORD PTR [rdi+0x14]{1to16}
|
||||
vpxord zmm14,zmm14,DWORD PTR [rdi+0x18]{1to16}
|
||||
vpxord zmm15,zmm15,DWORD PTR [rdi+0x1c]{1to16}
|
||||
vpunpckldq zmm16,zmm0,zmm1
|
||||
vpunpckhdq zmm17,zmm0,zmm1
|
||||
vpunpckldq zmm18,zmm2,zmm3
|
||||
vpunpckhdq zmm19,zmm2,zmm3
|
||||
vpunpckldq zmm20,zmm4,zmm5
|
||||
vpunpckhdq zmm21,zmm4,zmm5
|
||||
vpunpckldq zmm22,zmm6,zmm7
|
||||
vpunpckhdq zmm23,zmm6,zmm7
|
||||
vpunpckldq zmm24,zmm8,zmm9
|
||||
vpunpckhdq zmm25,zmm8,zmm9
|
||||
vpunpckldq zmm26,zmm10,zmm11
|
||||
vpunpckhdq zmm27,zmm10,zmm11
|
||||
vpunpckldq zmm28,zmm12,zmm13
|
||||
vpunpckhdq zmm29,zmm12,zmm13
|
||||
vpunpckldq zmm30,zmm14,zmm15
|
||||
vpunpckhdq zmm31,zmm14,zmm15
|
||||
vpunpcklqdq zmm0,zmm16,zmm18
|
||||
vpunpckhqdq zmm1,zmm16,zmm18
|
||||
vpunpcklqdq zmm2,zmm17,zmm19
|
||||
vpunpckhqdq zmm3,zmm17,zmm19
|
||||
vpunpcklqdq zmm4,zmm20,zmm22
|
||||
vpunpckhqdq zmm5,zmm20,zmm22
|
||||
vpunpcklqdq zmm6,zmm21,zmm23
|
||||
vpunpckhqdq zmm7,zmm21,zmm23
|
||||
vpunpcklqdq zmm8,zmm24,zmm26
|
||||
vpunpckhqdq zmm9,zmm24,zmm26
|
||||
vpunpcklqdq zmm10,zmm25,zmm27
|
||||
vpunpckhqdq zmm11,zmm25,zmm27
|
||||
vpunpcklqdq zmm12,zmm28,zmm30
|
||||
vpunpckhqdq zmm13,zmm28,zmm30
|
||||
vpunpcklqdq zmm14,zmm29,zmm31
|
||||
vpunpckhqdq zmm15,zmm29,zmm31
|
||||
vshufi32x4 zmm16,zmm0,zmm4,0x88
|
||||
vshufi32x4 zmm17,zmm1,zmm5,0x88
|
||||
vshufi32x4 zmm18,zmm2,zmm6,0x88
|
||||
vshufi32x4 zmm19,zmm3,zmm7,0x88
|
||||
vshufi32x4 zmm20,zmm0,zmm4,0xdd
|
||||
vshufi32x4 zmm21,zmm1,zmm5,0xdd
|
||||
vshufi32x4 zmm22,zmm2,zmm6,0xdd
|
||||
vshufi32x4 zmm23,zmm3,zmm7,0xdd
|
||||
vshufi32x4 zmm24,zmm8,zmm12,0x88
|
||||
vshufi32x4 zmm25,zmm9,zmm13,0x88
|
||||
vshufi32x4 zmm26,zmm10,zmm14,0x88
|
||||
vshufi32x4 zmm27,zmm11,zmm15,0x88
|
||||
vshufi32x4 zmm28,zmm8,zmm12,0xdd
|
||||
vshufi32x4 zmm29,zmm9,zmm13,0xdd
|
||||
vshufi32x4 zmm30,zmm10,zmm14,0xdd
|
||||
vshufi32x4 zmm31,zmm11,zmm15,0xdd
|
||||
vshufi32x4 zmm0,zmm16,zmm24,0x88
|
||||
vshufi32x4 zmm1,zmm17,zmm25,0x88
|
||||
vshufi32x4 zmm2,zmm18,zmm26,0x88
|
||||
vshufi32x4 zmm3,zmm19,zmm27,0x88
|
||||
vshufi32x4 zmm4,zmm20,zmm28,0x88
|
||||
vshufi32x4 zmm5,zmm21,zmm29,0x88
|
||||
vshufi32x4 zmm6,zmm22,zmm30,0x88
|
||||
vshufi32x4 zmm7,zmm23,zmm31,0x88
|
||||
vshufi32x4 zmm8,zmm16,zmm24,0xdd
|
||||
vshufi32x4 zmm9,zmm17,zmm25,0xdd
|
||||
vshufi32x4 zmm10,zmm18,zmm26,0xdd
|
||||
vshufi32x4 zmm11,zmm19,zmm27,0xdd
|
||||
vshufi32x4 zmm12,zmm20,zmm28,0xdd
|
||||
vshufi32x4 zmm13,zmm21,zmm29,0xdd
|
||||
vshufi32x4 zmm14,zmm22,zmm30,0xdd
|
||||
vshufi32x4 zmm15,zmm23,zmm31,0xdd
|
||||
vmovdqu32 ZMMWORD PTR [r9],zmm0
|
||||
vmovdqu32 ZMMWORD PTR [r9+0x40],zmm1
|
||||
vmovdqu32 ZMMWORD PTR [r9+0x80],zmm2
|
||||
vmovdqu32 ZMMWORD PTR [r9+0xc0],zmm3
|
||||
vmovdqu32 ZMMWORD PTR [r9+0x100],zmm4
|
||||
vmovdqu32 ZMMWORD PTR [r9+0x140],zmm5
|
||||
vmovdqu32 ZMMWORD PTR [r9+0x180],zmm6
|
||||
vmovdqu32 ZMMWORD PTR [r9+0x1c0],zmm7
|
||||
vmovdqu32 ZMMWORD PTR [r9+0x200],zmm8
|
||||
vmovdqu32 ZMMWORD PTR [r9+0x240],zmm9
|
||||
vmovdqu32 ZMMWORD PTR [r9+0x280],zmm10
|
||||
vmovdqu32 ZMMWORD PTR [r9+0x2c0],zmm11
|
||||
vmovdqu32 ZMMWORD PTR [r9+0x300],zmm12
|
||||
vmovdqu32 ZMMWORD PTR [r9+0x340],zmm13
|
||||
vmovdqu32 ZMMWORD PTR [r9+0x380],zmm14
|
||||
vmovdqu32 ZMMWORD PTR [r9+0x3c0],zmm15
|
||||
vmovdqa32 zmm0,ZMMWORD PTR [rsp]
|
||||
vmovdqa32 zmm1,ZMMWORD PTR [rsp+0x40]
|
||||
vpaddd zmm2,zmm0,DWORD PTR [ADD16+rip]{1to16}
|
||||
vpcmpltud k1,zmm2,zmm0
|
||||
vpaddd zmm1{k1},zmm1,DWORD PTR [ADD1+rip]{1to16}
|
||||
vmovdqa32 ZMMWORD PTR [rsp],zmm2
|
||||
vmovdqa32 ZMMWORD PTR [rsp+0x40],zmm1
|
||||
add r9,0x400
|
||||
sub r10,0x10
|
||||
cmp r10,0x10
|
||||
jae 3b
|
||||
test r10,r10
|
||||
jne 2f
|
||||
9:
|
||||
vzeroupper
|
||||
mov rsp,rbp
|
||||
pop rbp
|
||||
ret
|
||||
2:
|
||||
test r10,0x8
|
||||
je 2f
|
||||
vpbroadcastd ymm16,DWORD PTR [rsi]
|
||||
vpbroadcastd ymm17,DWORD PTR [rsi+0x4]
|
||||
vpbroadcastd ymm18,DWORD PTR [rsi+0x8]
|
||||
vpbroadcastd ymm19,DWORD PTR [rsi+0xc]
|
||||
vpbroadcastd ymm20,DWORD PTR [rsi+0x10]
|
||||
vpbroadcastd ymm21,DWORD PTR [rsi+0x14]
|
||||
vpbroadcastd ymm22,DWORD PTR [rsi+0x18]
|
||||
vpbroadcastd ymm23,DWORD PTR [rsi+0x1c]
|
||||
vpbroadcastd ymm24,DWORD PTR [rsi+0x20]
|
||||
vpbroadcastd ymm25,DWORD PTR [rsi+0x24]
|
||||
vpbroadcastd ymm26,DWORD PTR [rsi+0x28]
|
||||
vpbroadcastd ymm27,DWORD PTR [rsi+0x2c]
|
||||
vpbroadcastd ymm28,DWORD PTR [rsi+0x30]
|
||||
vpbroadcastd ymm29,DWORD PTR [rsi+0x34]
|
||||
vpbroadcastd ymm30,DWORD PTR [rsi+0x38]
|
||||
vpbroadcastd ymm31,DWORD PTR [rsi+0x3c]
|
||||
vpbroadcastd ymm0,DWORD PTR [rdi]
|
||||
vpbroadcastd ymm1,DWORD PTR [rdi+0x4]
|
||||
vpbroadcastd ymm2,DWORD PTR [rdi+0x8]
|
||||
vpbroadcastd ymm3,DWORD PTR [rdi+0xc]
|
||||
vpbroadcastd ymm4,DWORD PTR [rdi+0x10]
|
||||
vpbroadcastd ymm5,DWORD PTR [rdi+0x14]
|
||||
vpbroadcastd ymm6,DWORD PTR [rdi+0x18]
|
||||
vpbroadcastd ymm7,DWORD PTR [rdi+0x1c]
|
||||
vpbroadcastd ymm8,DWORD PTR [BLAKE3_IV_0+rip]
|
||||
vpbroadcastd ymm9,DWORD PTR [BLAKE3_IV_1+rip]
|
||||
vpbroadcastd ymm10,DWORD PTR [BLAKE3_IV_2+rip]
|
||||
vpbroadcastd ymm11,DWORD PTR [BLAKE3_IV_3+rip]
|
||||
vmovdqa ymm12,YMMWORD PTR [rsp]
|
||||
vmovdqa ymm13,YMMWORD PTR [rsp+0x40]
|
||||
vpbroadcastd ymm14,edx
|
||||
vpbroadcastd ymm15,r8d
|
||||
vpaddd ymm0,ymm0,ymm16
|
||||
vpaddd ymm1,ymm1,ymm18
|
||||
vpaddd ymm2,ymm2,ymm20
|
||||
vpaddd ymm3,ymm3,ymm22
|
||||
vpaddd ymm0,ymm0,ymm4
|
||||
vpaddd ymm1,ymm1,ymm5
|
||||
vpaddd ymm2,ymm2,ymm6
|
||||
vpaddd ymm3,ymm3,ymm7
|
||||
vpxord ymm12,ymm12,ymm0
|
||||
vpxord ymm13,ymm13,ymm1
|
||||
vpxord ymm14,ymm14,ymm2
|
||||
vpxord ymm15,ymm15,ymm3
|
||||
vprord ymm12,ymm12,0x10
|
||||
vprord ymm13,ymm13,0x10
|
||||
vprord ymm14,ymm14,0x10
|
||||
vprord ymm15,ymm15,0x10
|
||||
vpaddd ymm8,ymm8,ymm12
|
||||
vpaddd ymm9,ymm9,ymm13
|
||||
vpaddd ymm10,ymm10,ymm14
|
||||
vpaddd ymm11,ymm11,ymm15
|
||||
vpxord ymm4,ymm4,ymm8
|
||||
vpxord ymm5,ymm5,ymm9
|
||||
vpxord ymm6,ymm6,ymm10
|
||||
vpxord ymm7,ymm7,ymm11
|
||||
vprord ymm4,ymm4,0xc
|
||||
vprord ymm5,ymm5,0xc
|
||||
vprord ymm6,ymm6,0xc
|
||||
vprord ymm7,ymm7,0xc
|
||||
vpaddd ymm0,ymm0,ymm17
|
||||
vpaddd ymm1,ymm1,ymm19
|
||||
vpaddd ymm2,ymm2,ymm21
|
||||
vpaddd ymm3,ymm3,ymm23
|
||||
vpaddd ymm0,ymm0,ymm4
|
||||
vpaddd ymm1,ymm1,ymm5
|
||||
vpaddd ymm2,ymm2,ymm6
|
||||
vpaddd ymm3,ymm3,ymm7
|
||||
vpxord ymm12,ymm12,ymm0
|
||||
vpxord ymm13,ymm13,ymm1
|
||||
vpxord ymm14,ymm14,ymm2
|
||||
vpxord ymm15,ymm15,ymm3
|
||||
vprord ymm12,ymm12,0x8
|
||||
vprord ymm13,ymm13,0x8
|
||||
vprord ymm14,ymm14,0x8
|
||||
vprord ymm15,ymm15,0x8
|
||||
vpaddd ymm8,ymm8,ymm12
|
||||
vpaddd ymm9,ymm9,ymm13
|
||||
vpaddd ymm10,ymm10,ymm14
|
||||
vpaddd ymm11,ymm11,ymm15
|
||||
vpxord ymm4,ymm4,ymm8
|
||||
vpxord ymm5,ymm5,ymm9
|
||||
vpxord ymm6,ymm6,ymm10
|
||||
vpxord ymm7,ymm7,ymm11
|
||||
vprord ymm4,ymm4,0x7
|
||||
vprord ymm5,ymm5,0x7
|
||||
vprord ymm6,ymm6,0x7
|
||||
vprord ymm7,ymm7,0x7
|
||||
vpaddd ymm0,ymm0,ymm24
|
||||
vpaddd ymm1,ymm1,ymm26
|
||||
vpaddd ymm2,ymm2,ymm28
|
||||
vpaddd ymm3,ymm3,ymm30
|
||||
vpaddd ymm0,ymm0,ymm5
|
||||
vpaddd ymm1,ymm1,ymm6
|
||||
vpaddd ymm2,ymm2,ymm7
|
||||
vpaddd ymm3,ymm3,ymm4
|
||||
vpxord ymm15,ymm15,ymm0
|
||||
vpxord ymm12,ymm12,ymm1
|
||||
vpxord ymm13,ymm13,ymm2
|
||||
vpxord ymm14,ymm14,ymm3
|
||||
vprord ymm15,ymm15,0x10
|
||||
vprord ymm12,ymm12,0x10
|
||||
vprord ymm13,ymm13,0x10
|
||||
vprord ymm14,ymm14,0x10
|
||||
vpaddd ymm10,ymm10,ymm15
|
||||
vpaddd ymm11,ymm11,ymm12
|
||||
vpaddd ymm8,ymm8,ymm13
|
||||
vpaddd ymm9,ymm9,ymm14
|
||||
vpxord ymm5,ymm5,ymm10
|
||||
vpxord ymm6,ymm6,ymm11
|
||||
vpxord ymm7,ymm7,ymm8
|
||||
vpxord ymm4,ymm4,ymm9
|
||||
vprord ymm5,ymm5,0xc
|
||||
vprord ymm6,ymm6,0xc
|
||||
vprord ymm7,ymm7,0xc
|
||||
vprord ymm4,ymm4,0xc
|
||||
vpaddd ymm0,ymm0,ymm25
|
||||
vpaddd ymm1,ymm1,ymm27
|
||||
vpaddd ymm2,ymm2,ymm29
|
||||
vpaddd ymm3,ymm3,ymm31
|
||||
vpaddd ymm0,ymm0,ymm5
|
||||
vpaddd ymm1,ymm1,ymm6
|
||||
vpaddd ymm2,ymm2,ymm7
|
||||
vpaddd ymm3,ymm3,ymm4
|
||||
vpxord ymm15,ymm15,ymm0
|
||||
vpxord ymm12,ymm12,ymm1
|
||||
vpxord ymm13,ymm13,ymm2
|
||||
vpxord ymm14,ymm14,ymm3
|
||||
vprord ymm15,ymm15,0x8
|
||||
vprord ymm12,ymm12,0x8
|
||||
vprord ymm13,ymm13,0x8
|
||||
vprord ymm14,ymm14,0x8
|
||||
vpaddd ymm10,ymm10,ymm15
|
||||
vpaddd ymm11,ymm11,ymm12
|
||||
vpaddd ymm8,ymm8,ymm13
|
||||
vpaddd ymm9,ymm9,ymm14
|
||||
vpxord ymm5,ymm5,ymm10
|
||||
vpxord ymm6,ymm6,ymm11
|
||||
vpxord ymm7,ymm7,ymm8
|
||||
vpxord ymm4,ymm4,ymm9
|
||||
vprord ymm5,ymm5,0x7
|
||||
vprord ymm6,ymm6,0x7
|
||||
vprord ymm7,ymm7,0x7
|
||||
vprord ymm4,ymm4,0x7
|
||||
vpaddd ymm0,ymm0,ymm18
|
||||
vpaddd ymm1,ymm1,ymm19
|
||||
vpaddd ymm2,ymm2,ymm23
|
||||
vpaddd ymm3,ymm3,ymm20
|
||||
vpaddd ymm0,ymm0,ymm4
|
||||
vpaddd ymm1,ymm1,ymm5
|
||||
vpaddd ymm2,ymm2,ymm6
|
||||
vpaddd ymm3,ymm3,ymm7
|
||||
vpxord ymm12,ymm12,ymm0
|
||||
vpxord ymm13,ymm13,ymm1
|
||||
vpxord ymm14,ymm14,ymm2
|
||||
vpxord ymm15,ymm15,ymm3
|
||||
vprord ymm12,ymm12,0x10
|
||||
vprord ymm13,ymm13,0x10
|
||||
vprord ymm14,ymm14,0x10
|
||||
vprord ymm15,ymm15,0x10
|
||||
vpaddd ymm8,ymm8,ymm12
|
||||
vpaddd ymm9,ymm9,ymm13
|
||||
vpaddd ymm10,ymm10,ymm14
|
||||
vpaddd ymm11,ymm11,ymm15
|
||||
vpxord ymm4,ymm4,ymm8
|
||||
vpxord ymm5,ymm5,ymm9
|
||||
vpxord ymm6,ymm6,ymm10
|
||||
vpxord ymm7,ymm7,ymm11
|
||||
vprord ymm4,ymm4,0xc
|
||||
vprord ymm5,ymm5,0xc
|
||||
vprord ymm6,ymm6,0xc
|
||||
vprord ymm7,ymm7,0xc
|
||||
vpaddd ymm0,ymm0,ymm22
|
||||
vpaddd ymm1,ymm1,ymm26
|
||||
vpaddd ymm2,ymm2,ymm16
|
||||
vpaddd ymm3,ymm3,ymm29
|
||||
vpaddd ymm0,ymm0,ymm4
|
||||
vpaddd ymm1,ymm1,ymm5
|
||||
vpaddd ymm2,ymm2,ymm6
|
||||
vpaddd ymm3,ymm3,ymm7
|
||||
vpxord ymm12,ymm12,ymm0
|
||||
vpxord ymm13,ymm13,ymm1
|
||||
vpxord ymm14,ymm14,ymm2
|
||||
vpxord ymm15,ymm15,ymm3
|
||||
vprord ymm12,ymm12,0x8
|
||||
vprord ymm13,ymm13,0x8
|
||||
vprord ymm14,ymm14,0x8
|
||||
vprord ymm15,ymm15,0x8
|
||||
vpaddd ymm8,ymm8,ymm12
|
||||
vpaddd ymm9,ymm9,ymm13
|
||||
vpaddd ymm10,ymm10,ymm14
|
||||
vpaddd ymm11,ymm11,ymm15
|
||||
vpxord ymm4,ymm4,ymm8
|
||||
vpxord ymm5,ymm5,ymm9
|
||||
vpxord ymm6,ymm6,ymm10
|
||||
vpxord ymm7,ymm7,ymm11
|
||||
vprord ymm4,ymm4,0x7
|
||||
vprord ymm5,ymm5,0x7
|
||||
vprord ymm6,ymm6,0x7
|
||||
vprord ymm7,ymm7,0x7
|
||||
vpaddd ymm0,ymm0,ymm17
|
||||
vpaddd ymm1,ymm1,ymm28
|
||||
vpaddd ymm2,ymm2,ymm25
|
||||
vpaddd ymm3,ymm3,ymm31
|
||||
vpaddd ymm0,ymm0,ymm5
|
||||
vpaddd ymm1,ymm1,ymm6
|
||||
vpaddd ymm2,ymm2,ymm7
|
||||
vpaddd ymm3,ymm3,ymm4
|
||||
vpxord ymm15,ymm15,ymm0
|
||||
vpxord ymm12,ymm12,ymm1
|
||||
vpxord ymm13,ymm13,ymm2
|
||||
vpxord ymm14,ymm14,ymm3
|
||||
vprord ymm15,ymm15,0x10
|
||||
vprord ymm12,ymm12,0x10
|
||||
vprord ymm13,ymm13,0x10
|
||||
vprord ymm14,ymm14,0x10
|
||||
vpaddd ymm10,ymm10,ymm15
|
||||
vpaddd ymm11,ymm11,ymm12
|
||||
vpaddd ymm8,ymm8,ymm13
|
||||
vpaddd ymm9,ymm9,ymm14
|
||||
vpxord ymm5,ymm5,ymm10
|
||||
vpxord ymm6,ymm6,ymm11
|
||||
vpxord ymm7,ymm7,ymm8
|
||||
vpxord ymm4,ymm4,ymm9
|
||||
vprord ymm5,ymm5,0xc
|
||||
vprord ymm6,ymm6,0xc
|
||||
vprord ymm7,ymm7,0xc
|
||||
vprord ymm4,ymm4,0xc
|
||||
vpaddd ymm0,ymm0,ymm27
|
||||
vpaddd ymm1,ymm1,ymm21
|
||||
vpaddd ymm2,ymm2,ymm30
|
||||
vpaddd ymm3,ymm3,ymm24
|
||||
vpaddd ymm0,ymm0,ymm5
|
||||
vpaddd ymm1,ymm1,ymm6
|
||||
vpaddd ymm2,ymm2,ymm7
|
||||
vpaddd ymm3,ymm3,ymm4
|
||||
vpxord ymm15,ymm15,ymm0
|
||||
vpxord ymm12,ymm12,ymm1
|
||||
vpxord ymm13,ymm13,ymm2
|
||||
vpxord ymm14,ymm14,ymm3
|
||||
vprord ymm15,ymm15,0x8
|
||||
vprord ymm12,ymm12,0x8
|
||||
vprord ymm13,ymm13,0x8
|
||||
vprord ymm14,ymm14,0x8
|
||||
vpaddd ymm10,ymm10,ymm15
|
||||
vpaddd ymm11,ymm11,ymm12
|
||||
vpaddd ymm8,ymm8,ymm13
|
||||
vpaddd ymm9,ymm9,ymm14
|
||||
vpxord ymm5,ymm5,ymm10
|
||||
vpxord ymm6,ymm6,ymm11
|
||||
vpxord ymm7,ymm7,ymm8
|
||||
vpxord ymm4,ymm4,ymm9
|
||||
vprord ymm5,ymm5,0x7
|
||||
vprord ymm6,ymm6,0x7
|
||||
vprord ymm7,ymm7,0x7
|
||||
vprord ymm4,ymm4,0x7
|
||||
vpaddd ymm0,ymm0,ymm19
|
||||
vpaddd ymm1,ymm1,ymm26
|
||||
vpaddd ymm2,ymm2,ymm29
|
||||
vpaddd ymm3,ymm3,ymm23
|
||||
vpaddd ymm0,ymm0,ymm4
|
||||
vpaddd ymm1,ymm1,ymm5
|
||||
vpaddd ymm2,ymm2,ymm6
|
||||
vpaddd ymm3,ymm3,ymm7
|
||||
vpxord ymm12,ymm12,ymm0
|
||||
vpxord ymm13,ymm13,ymm1
|
||||
vpxord ymm14,ymm14,ymm2
|
||||
vpxord ymm15,ymm15,ymm3
|
||||
vprord ymm12,ymm12,0x10
|
||||
vprord ymm13,ymm13,0x10
|
||||
vprord ymm14,ymm14,0x10
|
||||
vprord ymm15,ymm15,0x10
|
||||
vpaddd ymm8,ymm8,ymm12
|
||||
vpaddd ymm9,ymm9,ymm13
|
||||
vpaddd ymm10,ymm10,ymm14
|
||||
vpaddd ymm11,ymm11,ymm15
|
||||
vpxord ymm4,ymm4,ymm8
|
||||
vpxord ymm5,ymm5,ymm9
|
||||
vpxord ymm6,ymm6,ymm10
|
||||
vpxord ymm7,ymm7,ymm11
|
||||
vprord ymm4,ymm4,0xc
|
||||
vprord ymm5,ymm5,0xc
|
||||
vprord ymm6,ymm6,0xc
|
||||
vprord ymm7,ymm7,0xc
|
||||
vpaddd ymm0,ymm0,ymm20
|
||||
vpaddd ymm1,ymm1,ymm28
|
||||
vpaddd ymm2,ymm2,ymm18
|
||||
vpaddd ymm3,ymm3,ymm30
|
||||
vpaddd ymm0,ymm0,ymm4
|
||||
vpaddd ymm1,ymm1,ymm5
|
||||
vpaddd ymm2,ymm2,ymm6
|
||||
vpaddd ymm3,ymm3,ymm7
|
||||
vpxord ymm12,ymm12,ymm0
|
||||
vpxord ymm13,ymm13,ymm1
|
||||
vpxord ymm14,ymm14,ymm2
|
||||
vpxord ymm15,ymm15,ymm3
|
||||
vprord ymm12,ymm12,0x8
|
||||
vprord ymm13,ymm13,0x8
|
||||
vprord ymm14,ymm14,0x8
|
||||
vprord ymm15,ymm15,0x8
|
||||
vpaddd ymm8,ymm8,ymm12
|
||||
vpaddd ymm9,ymm9,ymm13
|
||||
vpaddd ymm10,ymm10,ymm14
|
||||
vpaddd ymm11,ymm11,ymm15
|
||||
vpxord ymm4,ymm4,ymm8
|
||||
vpxord ymm5,ymm5,ymm9
|
||||
vpxord ymm6,ymm6,ymm10
|
||||
vpxord ymm7,ymm7,ymm11
|
||||
vprord ymm4,ymm4,0x7
|
||||
vprord ymm5,ymm5,0x7
|
||||
vprord ymm6,ymm6,0x7
|
||||
vprord ymm7,ymm7,0x7
|
||||
vpaddd ymm0,ymm0,ymm22
|
||||
vpaddd ymm1,ymm1,ymm25
|
||||
vpaddd ymm2,ymm2,ymm27
|
||||
vpaddd ymm3,ymm3,ymm24
|
||||
vpaddd ymm0,ymm0,ymm5
|
||||
vpaddd ymm1,ymm1,ymm6
|
||||
vpaddd ymm2,ymm2,ymm7
|
||||
vpaddd ymm3,ymm3,ymm4
|
||||
vpxord ymm15,ymm15,ymm0
|
||||
vpxord ymm12,ymm12,ymm1
|
||||
vpxord ymm13,ymm13,ymm2
|
||||
vpxord ymm14,ymm14,ymm3
|
||||
vprord ymm15,ymm15,0x10
|
||||
vprord ymm12,ymm12,0x10
|
||||
vprord ymm13,ymm13,0x10
|
||||
vprord ymm14,ymm14,0x10
|
||||
vpaddd ymm10,ymm10,ymm15
|
||||
vpaddd ymm11,ymm11,ymm12
|
||||
vpaddd ymm8,ymm8,ymm13
|
||||
vpaddd ymm9,ymm9,ymm14
|
||||
vpxord ymm5,ymm5,ymm10
|
||||
vpxord ymm6,ymm6,ymm11
|
||||
vpxord ymm7,ymm7,ymm8
|
||||
vpxord ymm4,ymm4,ymm9
|
||||
vprord ymm5,ymm5,0xc
|
||||
vprord ymm6,ymm6,0xc
|
||||
vprord ymm7,ymm7,0xc
|
||||
vprord ymm4,ymm4,0xc
|
||||
vpaddd ymm0,ymm0,ymm21
|
||||
vpaddd ymm1,ymm1,ymm16
|
||||
vpaddd ymm2,ymm2,ymm31
|
||||
vpaddd ymm3,ymm3,ymm17
|
||||
vpaddd ymm0,ymm0,ymm5
|
||||
vpaddd ymm1,ymm1,ymm6
|
||||
vpaddd ymm2,ymm2,ymm7
|
||||
vpaddd ymm3,ymm3,ymm4
|
||||
vpxord ymm15,ymm15,ymm0
|
||||
vpxord ymm12,ymm12,ymm1
|
||||
vpxord ymm13,ymm13,ymm2
|
||||
vpxord ymm14,ymm14,ymm3
|
||||
vprord ymm15,ymm15,0x8
|
||||
vprord ymm12,ymm12,0x8
|
||||
vprord ymm13,ymm13,0x8
|
||||
vprord ymm14,ymm14,0x8
|
||||
vpaddd ymm10,ymm10,ymm15
|
||||
vpaddd ymm11,ymm11,ymm12
|
||||
vpaddd ymm8,ymm8,ymm13
|
||||
vpaddd ymm9,ymm9,ymm14
|
||||
vpxord ymm5,ymm5,ymm10
|
||||
vpxord ymm6,ymm6,ymm11
|
||||
vpxord ymm7,ymm7,ymm8
|
||||
vpxord ymm4,ymm4,ymm9
|
||||
vprord ymm5,ymm5,0x7
|
||||
vprord ymm6,ymm6,0x7
|
||||
vprord ymm7,ymm7,0x7
|
||||
vprord ymm4,ymm4,0x7
|
||||
vpaddd ymm0,ymm0,ymm26
|
||||
vpaddd ymm1,ymm1,ymm28
|
||||
vpaddd ymm2,ymm2,ymm30
|
||||
vpaddd ymm3,ymm3,ymm29
|
||||
vpaddd ymm0,ymm0,ymm4
|
||||
vpaddd ymm1,ymm1,ymm5
|
||||
vpaddd ymm2,ymm2,ymm6
|
||||
vpaddd ymm3,ymm3,ymm7
|
||||
vpxord ymm12,ymm12,ymm0
|
||||
vpxord ymm13,ymm13,ymm1
|
||||
vpxord ymm14,ymm14,ymm2
|
||||
vpxord ymm15,ymm15,ymm3
|
||||
vprord ymm12,ymm12,0x10
|
||||
vprord ymm13,ymm13,0x10
|
||||
vprord ymm14,ymm14,0x10
|
||||
vprord ymm15,ymm15,0x10
|
||||
vpaddd ymm8,ymm8,ymm12
|
||||
vpaddd ymm9,ymm9,ymm13
|
||||
vpaddd ymm10,ymm10,ymm14
|
||||
vpaddd ymm11,ymm11,ymm15
|
||||
vpxord ymm4,ymm4,ymm8
|
||||
vpxord ymm5,ymm5,ymm9
|
||||
vpxord ymm6,ymm6,ymm10
|
||||
vpxord ymm7,ymm7,ymm11
|
||||
vprord ymm4,ymm4,0xc
|
||||
vprord ymm5,ymm5,0xc
|
||||
vprord ymm6,ymm6,0xc
|
||||
vprord ymm7,ymm7,0xc
|
||||
vpaddd ymm0,ymm0,ymm23
|
||||
vpaddd ymm1,ymm1,ymm25
|
||||
vpaddd ymm2,ymm2,ymm19
|
||||
vpaddd ymm3,ymm3,ymm31
|
||||
vpaddd ymm0,ymm0,ymm4
|
||||
vpaddd ymm1,ymm1,ymm5
|
||||
vpaddd ymm2,ymm2,ymm6
|
||||
vpaddd ymm3,ymm3,ymm7
|
||||
vpxord ymm12,ymm12,ymm0
|
||||
vpxord ymm13,ymm13,ymm1
|
||||
vpxord ymm14,ymm14,ymm2
|
||||
vpxord ymm15,ymm15,ymm3
|
||||
vprord ymm12,ymm12,0x8
|
||||
vprord ymm13,ymm13,0x8
|
||||
vprord ymm14,ymm14,0x8
|
||||
vprord ymm15,ymm15,0x8
|
||||
vpaddd ymm8,ymm8,ymm12
|
||||
vpaddd ymm9,ymm9,ymm13
|
||||
vpaddd ymm10,ymm10,ymm14
|
||||
vpaddd ymm11,ymm11,ymm15
|
||||
vpxord ymm4,ymm4,ymm8
|
||||
vpxord ymm5,ymm5,ymm9
|
||||
vpxord ymm6,ymm6,ymm10
|
||||
vpxord ymm7,ymm7,ymm11
|
||||
vprord ymm4,ymm4,0x7
|
||||
vprord ymm5,ymm5,0x7
|
||||
vprord ymm6,ymm6,0x7
|
||||
vprord ymm7,ymm7,0x7
|
||||
vpaddd ymm0,ymm0,ymm20
|
||||
vpaddd ymm1,ymm1,ymm27
|
||||
vpaddd ymm2,ymm2,ymm21
|
||||
vpaddd ymm3,ymm3,ymm17
|
||||
vpaddd ymm0,ymm0,ymm5
|
||||
vpaddd ymm1,ymm1,ymm6
|
||||
vpaddd ymm2,ymm2,ymm7
|
||||
vpaddd ymm3,ymm3,ymm4
|
||||
vpxord ymm15,ymm15,ymm0
|
||||
vpxord ymm12,ymm12,ymm1
|
||||
vpxord ymm13,ymm13,ymm2
|
||||
vpxord ymm14,ymm14,ymm3
|
||||
vprord ymm15,ymm15,0x10
|
||||
vprord ymm12,ymm12,0x10
|
||||
vprord ymm13,ymm13,0x10
|
||||
vprord ymm14,ymm14,0x10
|
||||
vpaddd ymm10,ymm10,ymm15
|
||||
vpaddd ymm11,ymm11,ymm12
|
||||
vpaddd ymm8,ymm8,ymm13
|
||||
vpaddd ymm9,ymm9,ymm14
|
||||
vpxord ymm5,ymm5,ymm10
|
||||
vpxord ymm6,ymm6,ymm11
|
||||
vpxord ymm7,ymm7,ymm8
|
||||
vpxord ymm4,ymm4,ymm9
|
||||
vprord ymm5,ymm5,0xc
|
||||
vprord ymm6,ymm6,0xc
|
||||
vprord ymm7,ymm7,0xc
|
||||
vprord ymm4,ymm4,0xc
|
||||
vpaddd ymm0,ymm0,ymm16
|
||||
vpaddd ymm1,ymm1,ymm18
|
||||
vpaddd ymm2,ymm2,ymm24
|
||||
vpaddd ymm3,ymm3,ymm22
|
||||
vpaddd ymm0,ymm0,ymm5
|
||||
vpaddd ymm1,ymm1,ymm6
|
||||
vpaddd ymm2,ymm2,ymm7
|
||||
vpaddd ymm3,ymm3,ymm4
|
||||
vpxord ymm15,ymm15,ymm0
|
||||
vpxord ymm12,ymm12,ymm1
|
||||
vpxord ymm13,ymm13,ymm2
|
||||
vpxord ymm14,ymm14,ymm3
|
||||
vprord ymm15,ymm15,0x8
|
||||
vprord ymm12,ymm12,0x8
|
||||
vprord ymm13,ymm13,0x8
|
||||
vprord ymm14,ymm14,0x8
|
||||
vpaddd ymm10,ymm10,ymm15
|
||||
vpaddd ymm11,ymm11,ymm12
|
||||
vpaddd ymm8,ymm8,ymm13
|
||||
vpaddd ymm9,ymm9,ymm14
|
||||
vpxord ymm5,ymm5,ymm10
|
||||
vpxord ymm6,ymm6,ymm11
|
||||
vpxord ymm7,ymm7,ymm8
|
||||
vpxord ymm4,ymm4,ymm9
|
||||
vprord ymm5,ymm5,0x7
|
||||
vprord ymm6,ymm6,0x7
|
||||
vprord ymm7,ymm7,0x7
|
||||
vprord ymm4,ymm4,0x7
|
||||
vpaddd ymm0,ymm0,ymm28
|
||||
vpaddd ymm1,ymm1,ymm25
|
||||
vpaddd ymm2,ymm2,ymm31
|
||||
vpaddd ymm3,ymm3,ymm30
|
||||
vpaddd ymm0,ymm0,ymm4
|
||||
vpaddd ymm1,ymm1,ymm5
|
||||
vpaddd ymm2,ymm2,ymm6
|
||||
vpaddd ymm3,ymm3,ymm7
|
||||
vpxord ymm12,ymm12,ymm0
|
||||
vpxord ymm13,ymm13,ymm1
|
||||
vpxord ymm14,ymm14,ymm2
|
||||
vpxord ymm15,ymm15,ymm3
|
||||
vprord ymm12,ymm12,0x10
|
||||
vprord ymm13,ymm13,0x10
|
||||
vprord ymm14,ymm14,0x10
|
||||
vprord ymm15,ymm15,0x10
|
||||
vpaddd ymm8,ymm8,ymm12
|
||||
vpaddd ymm9,ymm9,ymm13
|
||||
vpaddd ymm10,ymm10,ymm14
|
||||
vpaddd ymm11,ymm11,ymm15
|
||||
vpxord ymm4,ymm4,ymm8
|
||||
vpxord ymm5,ymm5,ymm9
|
||||
vpxord ymm6,ymm6,ymm10
|
||||
vpxord ymm7,ymm7,ymm11
|
||||
vprord ymm4,ymm4,0xc
|
||||
vprord ymm5,ymm5,0xc
|
||||
vprord ymm6,ymm6,0xc
|
||||
vprord ymm7,ymm7,0xc
|
||||
vpaddd ymm0,ymm0,ymm29
|
||||
vpaddd ymm1,ymm1,ymm27
|
||||
vpaddd ymm2,ymm2,ymm26
|
||||
vpaddd ymm3,ymm3,ymm24
|
||||
vpaddd ymm0,ymm0,ymm4
|
||||
vpaddd ymm1,ymm1,ymm5
|
||||
vpaddd ymm2,ymm2,ymm6
|
||||
vpaddd ymm3,ymm3,ymm7
|
||||
vpxord ymm12,ymm12,ymm0
|
||||
vpxord ymm13,ymm13,ymm1
|
||||
vpxord ymm14,ymm14,ymm2
|
||||
vpxord ymm15,ymm15,ymm3
|
||||
vprord ymm12,ymm12,0x8
|
||||
vprord ymm13,ymm13,0x8
|
||||
vprord ymm14,ymm14,0x8
|
||||
vprord ymm15,ymm15,0x8
|
||||
vpaddd ymm8,ymm8,ymm12
|
||||
vpaddd ymm9,ymm9,ymm13
|
||||
vpaddd ymm10,ymm10,ymm14
|
||||
vpaddd ymm11,ymm11,ymm15
|
||||
vpxord ymm4,ymm4,ymm8
|
||||
vpxord ymm5,ymm5,ymm9
|
||||
vpxord ymm6,ymm6,ymm10
|
||||
vpxord ymm7,ymm7,ymm11
|
||||
vprord ymm4,ymm4,0x7
|
||||
vprord ymm5,ymm5,0x7
|
||||
vprord ymm6,ymm6,0x7
|
||||
vprord ymm7,ymm7,0x7
|
||||
vpaddd ymm0,ymm0,ymm23
|
||||
vpaddd ymm1,ymm1,ymm21
|
||||
vpaddd ymm2,ymm2,ymm16
|
||||
vpaddd ymm3,ymm3,ymm22
|
||||
vpaddd ymm0,ymm0,ymm5
|
||||
vpaddd ymm1,ymm1,ymm6
|
||||
vpaddd ymm2,ymm2,ymm7
|
||||
vpaddd ymm3,ymm3,ymm4
|
||||
vpxord ymm15,ymm15,ymm0
|
||||
vpxord ymm12,ymm12,ymm1
|
||||
vpxord ymm13,ymm13,ymm2
|
||||
vpxord ymm14,ymm14,ymm3
|
||||
vprord ymm15,ymm15,0x10
|
||||
vprord ymm12,ymm12,0x10
|
||||
vprord ymm13,ymm13,0x10
|
||||
vprord ymm14,ymm14,0x10
|
||||
vpaddd ymm10,ymm10,ymm15
|
||||
vpaddd ymm11,ymm11,ymm12
|
||||
vpaddd ymm8,ymm8,ymm13
|
||||
vpaddd ymm9,ymm9,ymm14
|
||||
vpxord ymm5,ymm5,ymm10
|
||||
vpxord ymm6,ymm6,ymm11
|
||||
vpxord ymm7,ymm7,ymm8
|
||||
vpxord ymm4,ymm4,ymm9
|
||||
vprord ymm5,ymm5,0xc
|
||||
vprord ymm6,ymm6,0xc
|
||||
vprord ymm7,ymm7,0xc
|
||||
vprord ymm4,ymm4,0xc
|
||||
vpaddd ymm0,ymm0,ymm18
|
||||
vpaddd ymm1,ymm1,ymm19
|
||||
vpaddd ymm2,ymm2,ymm17
|
||||
vpaddd ymm3,ymm3,ymm20
|
||||
vpaddd ymm0,ymm0,ymm5
|
||||
vpaddd ymm1,ymm1,ymm6
|
||||
vpaddd ymm2,ymm2,ymm7
|
||||
vpaddd ymm3,ymm3,ymm4
|
||||
vpxord ymm15,ymm15,ymm0
|
||||
vpxord ymm12,ymm12,ymm1
|
||||
vpxord ymm13,ymm13,ymm2
|
||||
vpxord ymm14,ymm14,ymm3
|
||||
vprord ymm15,ymm15,0x8
|
||||
vprord ymm12,ymm12,0x8
|
||||
vprord ymm13,ymm13,0x8
|
||||
vprord ymm14,ymm14,0x8
|
||||
vpaddd ymm10,ymm10,ymm15
|
||||
vpaddd ymm11,ymm11,ymm12
|
||||
vpaddd ymm8,ymm8,ymm13
|
||||
vpaddd ymm9,ymm9,ymm14
|
||||
vpxord ymm5,ymm5,ymm10
|
||||
vpxord ymm6,ymm6,ymm11
|
||||
vpxord ymm7,ymm7,ymm8
|
||||
vpxord ymm4,ymm4,ymm9
|
||||
vprord ymm5,ymm5,0x7
|
||||
vprord ymm6,ymm6,0x7
|
||||
vprord ymm7,ymm7,0x7
|
||||
vprord ymm4,ymm4,0x7
|
||||
vpaddd ymm0,ymm0,ymm25
|
||||
vpaddd ymm1,ymm1,ymm27
|
||||
vpaddd ymm2,ymm2,ymm24
|
||||
vpaddd ymm3,ymm3,ymm31
|
||||
vpaddd ymm0,ymm0,ymm4
|
||||
vpaddd ymm1,ymm1,ymm5
|
||||
vpaddd ymm2,ymm2,ymm6
|
||||
vpaddd ymm3,ymm3,ymm7
|
||||
vpxord ymm12,ymm12,ymm0
|
||||
vpxord ymm13,ymm13,ymm1
|
||||
vpxord ymm14,ymm14,ymm2
|
||||
vpxord ymm15,ymm15,ymm3
|
||||
vprord ymm12,ymm12,0x10
|
||||
vprord ymm13,ymm13,0x10
|
||||
vprord ymm14,ymm14,0x10
|
||||
vprord ymm15,ymm15,0x10
|
||||
vpaddd ymm8,ymm8,ymm12
|
||||
vpaddd ymm9,ymm9,ymm13
|
||||
vpaddd ymm10,ymm10,ymm14
|
||||
vpaddd ymm11,ymm11,ymm15
|
||||
vpxord ymm4,ymm4,ymm8
|
||||
vpxord ymm5,ymm5,ymm9
|
||||
vpxord ymm6,ymm6,ymm10
|
||||
vpxord ymm7,ymm7,ymm11
|
||||
vprord ymm4,ymm4,0xc
|
||||
vprord ymm5,ymm5,0xc
|
||||
vprord ymm6,ymm6,0xc
|
||||
vprord ymm7,ymm7,0xc
|
||||
vpaddd ymm0,ymm0,ymm30
|
||||
vpaddd ymm1,ymm1,ymm21
|
||||
vpaddd ymm2,ymm2,ymm28
|
||||
vpaddd ymm3,ymm3,ymm17
|
||||
vpaddd ymm0,ymm0,ymm4
|
||||
vpaddd ymm1,ymm1,ymm5
|
||||
vpaddd ymm2,ymm2,ymm6
|
||||
vpaddd ymm3,ymm3,ymm7
|
||||
vpxord ymm12,ymm12,ymm0
|
||||
vpxord ymm13,ymm13,ymm1
|
||||
vpxord ymm14,ymm14,ymm2
|
||||
vpxord ymm15,ymm15,ymm3
|
||||
vprord ymm12,ymm12,0x8
|
||||
vprord ymm13,ymm13,0x8
|
||||
vprord ymm14,ymm14,0x8
|
||||
vprord ymm15,ymm15,0x8
|
||||
vpaddd ymm8,ymm8,ymm12
|
||||
vpaddd ymm9,ymm9,ymm13
|
||||
vpaddd ymm10,ymm10,ymm14
|
||||
vpaddd ymm11,ymm11,ymm15
|
||||
vpxord ymm4,ymm4,ymm8
|
||||
vpxord ymm5,ymm5,ymm9
|
||||
vpxord ymm6,ymm6,ymm10
|
||||
vpxord ymm7,ymm7,ymm11
|
||||
vprord ymm4,ymm4,0x7
|
||||
vprord ymm5,ymm5,0x7
|
||||
vprord ymm6,ymm6,0x7
|
||||
vprord ymm7,ymm7,0x7
|
||||
vpaddd ymm0,ymm0,ymm29
|
||||
vpaddd ymm1,ymm1,ymm16
|
||||
vpaddd ymm2,ymm2,ymm18
|
||||
vpaddd ymm3,ymm3,ymm20
|
||||
vpaddd ymm0,ymm0,ymm5
|
||||
vpaddd ymm1,ymm1,ymm6
|
||||
vpaddd ymm2,ymm2,ymm7
|
||||
vpaddd ymm3,ymm3,ymm4
|
||||
vpxord ymm15,ymm15,ymm0
|
||||
vpxord ymm12,ymm12,ymm1
|
||||
vpxord ymm13,ymm13,ymm2
|
||||
vpxord ymm14,ymm14,ymm3
|
||||
vprord ymm15,ymm15,0x10
|
||||
vprord ymm12,ymm12,0x10
|
||||
vprord ymm13,ymm13,0x10
|
||||
vprord ymm14,ymm14,0x10
|
||||
vpaddd ymm10,ymm10,ymm15
|
||||
vpaddd ymm11,ymm11,ymm12
|
||||
vpaddd ymm8,ymm8,ymm13
|
||||
vpaddd ymm9,ymm9,ymm14
|
||||
vpxord ymm5,ymm5,ymm10
|
||||
vpxord ymm6,ymm6,ymm11
|
||||
vpxord ymm7,ymm7,ymm8
|
||||
vpxord ymm4,ymm4,ymm9
|
||||
vprord ymm5,ymm5,0xc
|
||||
vprord ymm6,ymm6,0xc
|
||||
vprord ymm7,ymm7,0xc
|
||||
vprord ymm4,ymm4,0xc
|
||||
vpaddd ymm0,ymm0,ymm19
|
||||
vpaddd ymm1,ymm1,ymm26
|
||||
vpaddd ymm2,ymm2,ymm22
|
||||
vpaddd ymm3,ymm3,ymm23
|
||||
vpaddd ymm0,ymm0,ymm5
|
||||
vpaddd ymm1,ymm1,ymm6
|
||||
vpaddd ymm2,ymm2,ymm7
|
||||
vpaddd ymm3,ymm3,ymm4
|
||||
vpxord ymm15,ymm15,ymm0
|
||||
vpxord ymm12,ymm12,ymm1
|
||||
vpxord ymm13,ymm13,ymm2
|
||||
vpxord ymm14,ymm14,ymm3
|
||||
vprord ymm15,ymm15,0x8
|
||||
vprord ymm12,ymm12,0x8
|
||||
vprord ymm13,ymm13,0x8
|
||||
vprord ymm14,ymm14,0x8
|
||||
vpaddd ymm10,ymm10,ymm15
|
||||
vpaddd ymm11,ymm11,ymm12
|
||||
vpaddd ymm8,ymm8,ymm13
|
||||
vpaddd ymm9,ymm9,ymm14
|
||||
vpxord ymm5,ymm5,ymm10
|
||||
vpxord ymm6,ymm6,ymm11
|
||||
vpxord ymm7,ymm7,ymm8
|
||||
vpxord ymm4,ymm4,ymm9
|
||||
vprord ymm5,ymm5,0x7
|
||||
vprord ymm6,ymm6,0x7
|
||||
vprord ymm7,ymm7,0x7
|
||||
vprord ymm4,ymm4,0x7
|
||||
vpaddd ymm0,ymm0,ymm27
|
||||
vpaddd ymm1,ymm1,ymm21
|
||||
vpaddd ymm2,ymm2,ymm17
|
||||
vpaddd ymm3,ymm3,ymm24
|
||||
vpaddd ymm0,ymm0,ymm4
|
||||
vpaddd ymm1,ymm1,ymm5
|
||||
vpaddd ymm2,ymm2,ymm6
|
||||
vpaddd ymm3,ymm3,ymm7
|
||||
vpxord ymm12,ymm12,ymm0
|
||||
vpxord ymm13,ymm13,ymm1
|
||||
vpxord ymm14,ymm14,ymm2
|
||||
vpxord ymm15,ymm15,ymm3
|
||||
vprord ymm12,ymm12,0x10
|
||||
vprord ymm13,ymm13,0x10
|
||||
vprord ymm14,ymm14,0x10
|
||||
vprord ymm15,ymm15,0x10
|
||||
vpaddd ymm8,ymm8,ymm12
|
||||
vpaddd ymm9,ymm9,ymm13
|
||||
vpaddd ymm10,ymm10,ymm14
|
||||
vpaddd ymm11,ymm11,ymm15
|
||||
vpxord ymm4,ymm4,ymm8
|
||||
vpxord ymm5,ymm5,ymm9
|
||||
vpxord ymm6,ymm6,ymm10
|
||||
vpxord ymm7,ymm7,ymm11
|
||||
vprord ymm4,ymm4,0xc
|
||||
vprord ymm5,ymm5,0xc
|
||||
vprord ymm6,ymm6,0xc
|
||||
vprord ymm7,ymm7,0xc
|
||||
vpaddd ymm0,ymm0,ymm31
|
||||
vpaddd ymm1,ymm1,ymm16
|
||||
vpaddd ymm2,ymm2,ymm25
|
||||
vpaddd ymm3,ymm3,ymm22
|
||||
vpaddd ymm0,ymm0,ymm4
|
||||
vpaddd ymm1,ymm1,ymm5
|
||||
vpaddd ymm2,ymm2,ymm6
|
||||
vpaddd ymm3,ymm3,ymm7
|
||||
vpxord ymm12,ymm12,ymm0
|
||||
vpxord ymm13,ymm13,ymm1
|
||||
vpxord ymm14,ymm14,ymm2
|
||||
vpxord ymm15,ymm15,ymm3
|
||||
vprord ymm12,ymm12,0x8
|
||||
vprord ymm13,ymm13,0x8
|
||||
vprord ymm14,ymm14,0x8
|
||||
vprord ymm15,ymm15,0x8
|
||||
vpaddd ymm8,ymm8,ymm12
|
||||
vpaddd ymm9,ymm9,ymm13
|
||||
vpaddd ymm10,ymm10,ymm14
|
||||
vpaddd ymm11,ymm11,ymm15
|
||||
vpxord ymm4,ymm4,ymm8
|
||||
vpxord ymm5,ymm5,ymm9
|
||||
vpxord ymm6,ymm6,ymm10
|
||||
vpxord ymm7,ymm7,ymm11
|
||||
vprord ymm4,ymm4,0x7
|
||||
vprord ymm5,ymm5,0x7
|
||||
vprord ymm6,ymm6,0x7
|
||||
vprord ymm7,ymm7,0x7
|
||||
vpaddd ymm0,ymm0,ymm30
|
||||
vpaddd ymm1,ymm1,ymm18
|
||||
vpaddd ymm2,ymm2,ymm19
|
||||
vpaddd ymm3,ymm3,ymm23
|
||||
vpaddd ymm0,ymm0,ymm5
|
||||
vpaddd ymm1,ymm1,ymm6
|
||||
vpaddd ymm2,ymm2,ymm7
|
||||
vpaddd ymm3,ymm3,ymm4
|
||||
vpxord ymm15,ymm15,ymm0
|
||||
vpxord ymm12,ymm12,ymm1
|
||||
vpxord ymm13,ymm13,ymm2
|
||||
vpxord ymm14,ymm14,ymm3
|
||||
vprord ymm15,ymm15,0x10
|
||||
vprord ymm12,ymm12,0x10
|
||||
vprord ymm13,ymm13,0x10
|
||||
vprord ymm14,ymm14,0x10
|
||||
vpaddd ymm10,ymm10,ymm15
|
||||
vpaddd ymm11,ymm11,ymm12
|
||||
vpaddd ymm8,ymm8,ymm13
|
||||
vpaddd ymm9,ymm9,ymm14
|
||||
vpxord ymm5,ymm5,ymm10
|
||||
vpxord ymm6,ymm6,ymm11
|
||||
vpxord ymm7,ymm7,ymm8
|
||||
vpxord ymm4,ymm4,ymm9
|
||||
vprord ymm5,ymm5,0xc
|
||||
vprord ymm6,ymm6,0xc
|
||||
vprord ymm7,ymm7,0xc
|
||||
vprord ymm4,ymm4,0xc
|
||||
vpaddd ymm0,ymm0,ymm26
|
||||
vpaddd ymm1,ymm1,ymm28
|
||||
vpaddd ymm2,ymm2,ymm20
|
||||
vpaddd ymm3,ymm3,ymm29
|
||||
vpaddd ymm0,ymm0,ymm5
|
||||
vpaddd ymm1,ymm1,ymm6
|
||||
vpaddd ymm2,ymm2,ymm7
|
||||
vpaddd ymm3,ymm3,ymm4
|
||||
vpxord ymm15,ymm15,ymm0
|
||||
vpxord ymm12,ymm12,ymm1
|
||||
vpxord ymm13,ymm13,ymm2
|
||||
vpxord ymm14,ymm14,ymm3
|
||||
vprord ymm15,ymm15,0x8
|
||||
vprord ymm12,ymm12,0x8
|
||||
vprord ymm13,ymm13,0x8
|
||||
vprord ymm14,ymm14,0x8
|
||||
vpaddd ymm10,ymm10,ymm15
|
||||
vpaddd ymm11,ymm11,ymm12
|
||||
vpaddd ymm8,ymm8,ymm13
|
||||
vpaddd ymm9,ymm9,ymm14
|
||||
vpxord ymm5,ymm5,ymm10
|
||||
vpxord ymm6,ymm6,ymm11
|
||||
vpxord ymm7,ymm7,ymm8
|
||||
vpxord ymm4,ymm4,ymm9
|
||||
vprord ymm5,ymm5,0x7
|
||||
vprord ymm6,ymm6,0x7
|
||||
vprord ymm7,ymm7,0x7
|
||||
vprord ymm4,ymm4,0x7
|
||||
vpxor ymm0,ymm0,ymm8
|
||||
vpxor ymm1,ymm1,ymm9
|
||||
vpxor ymm2,ymm2,ymm10
|
||||
vpxor ymm3,ymm3,ymm11
|
||||
vpxor ymm4,ymm4,ymm12
|
||||
vpxor ymm5,ymm5,ymm13
|
||||
vpxor ymm6,ymm6,ymm14
|
||||
vpxor ymm7,ymm7,ymm15
|
||||
vpxord ymm8,ymm8,DWORD PTR [rdi]{1to8}
|
||||
vpxord ymm9,ymm9,DWORD PTR [rdi+0x4]{1to8}
|
||||
vpxord ymm10,ymm10,DWORD PTR [rdi+0x8]{1to8}
|
||||
vpxord ymm11,ymm11,DWORD PTR [rdi+0xc]{1to8}
|
||||
vpxord ymm12,ymm12,DWORD PTR [rdi+0x10]{1to8}
|
||||
vpxord ymm13,ymm13,DWORD PTR [rdi+0x14]{1to8}
|
||||
vpxord ymm14,ymm14,DWORD PTR [rdi+0x18]{1to8}
|
||||
vpxord ymm15,ymm15,DWORD PTR [rdi+0x1c]{1to8}
|
||||
vpunpckldq ymm16,ymm0,ymm1
|
||||
vpunpckhdq ymm17,ymm0,ymm1
|
||||
vpunpckldq ymm18,ymm2,ymm3
|
||||
vpunpckhdq ymm19,ymm2,ymm3
|
||||
vpunpckldq ymm20,ymm4,ymm5
|
||||
vpunpckhdq ymm21,ymm4,ymm5
|
||||
vpunpckldq ymm22,ymm6,ymm7
|
||||
vpunpckhdq ymm23,ymm6,ymm7
|
||||
vpunpckldq ymm24,ymm8,ymm9
|
||||
vpunpckhdq ymm25,ymm8,ymm9
|
||||
vpunpckldq ymm26,ymm10,ymm11
|
||||
vpunpckhdq ymm27,ymm10,ymm11
|
||||
vpunpckldq ymm28,ymm12,ymm13
|
||||
vpunpckhdq ymm29,ymm12,ymm13
|
||||
vpunpckldq ymm30,ymm14,ymm15
|
||||
vpunpckhdq ymm31,ymm14,ymm15
|
||||
vpunpcklqdq ymm0,ymm16,ymm18
|
||||
vpunpckhqdq ymm1,ymm16,ymm18
|
||||
vpunpcklqdq ymm2,ymm17,ymm19
|
||||
vpunpckhqdq ymm3,ymm17,ymm19
|
||||
vpunpcklqdq ymm4,ymm20,ymm22
|
||||
vpunpckhqdq ymm5,ymm20,ymm22
|
||||
vpunpcklqdq ymm6,ymm21,ymm23
|
||||
vpunpckhqdq ymm7,ymm21,ymm23
|
||||
vpunpcklqdq ymm8,ymm24,ymm26
|
||||
vpunpckhqdq ymm9,ymm24,ymm26
|
||||
vpunpcklqdq ymm10,ymm25,ymm27
|
||||
vpunpckhqdq ymm11,ymm25,ymm27
|
||||
vpunpcklqdq ymm12,ymm28,ymm30
|
||||
vpunpckhqdq ymm13,ymm28,ymm30
|
||||
vpunpcklqdq ymm14,ymm29,ymm31
|
||||
vpunpckhqdq ymm15,ymm29,ymm31
|
||||
vshufi32x4 ymm16,ymm0,ymm4,0x0
|
||||
vshufi32x4 ymm17,ymm8,ymm12,0x0
|
||||
vshufi32x4 ymm18,ymm1,ymm5,0x0
|
||||
vshufi32x4 ymm19,ymm9,ymm13,0x0
|
||||
vshufi32x4 ymm20,ymm2,ymm6,0x0
|
||||
vshufi32x4 ymm21,ymm10,ymm14,0x0
|
||||
vshufi32x4 ymm22,ymm3,ymm7,0x0
|
||||
vshufi32x4 ymm23,ymm11,ymm15,0x0
|
||||
vshufi32x4 ymm24,ymm0,ymm4,0x3
|
||||
vshufi32x4 ymm25,ymm8,ymm12,0x3
|
||||
vshufi32x4 ymm26,ymm1,ymm5,0x3
|
||||
vshufi32x4 ymm27,ymm9,ymm13,0x3
|
||||
vshufi32x4 ymm28,ymm2,ymm6,0x3
|
||||
vshufi32x4 ymm29,ymm10,ymm14,0x3
|
||||
vshufi32x4 ymm30,ymm3,ymm7,0x3
|
||||
vshufi32x4 ymm31,ymm11,ymm15,0x3
|
||||
vmovdqu32 YMMWORD PTR [r9],ymm16
|
||||
vmovdqu32 YMMWORD PTR [r9+0x20],ymm17
|
||||
vmovdqu32 YMMWORD PTR [r9+0x40],ymm18
|
||||
vmovdqu32 YMMWORD PTR [r9+0x60],ymm19
|
||||
vmovdqu32 YMMWORD PTR [r9+0x80],ymm20
|
||||
vmovdqu32 YMMWORD PTR [r9+0xa0],ymm21
|
||||
vmovdqu32 YMMWORD PTR [r9+0xc0],ymm22
|
||||
vmovdqu32 YMMWORD PTR [r9+0xe0],ymm23
|
||||
vmovdqu32 YMMWORD PTR [r9+0x100],ymm24
|
||||
vmovdqu32 YMMWORD PTR [r9+0x120],ymm25
|
||||
vmovdqu32 YMMWORD PTR [r9+0x140],ymm26
|
||||
vmovdqu32 YMMWORD PTR [r9+0x160],ymm27
|
||||
vmovdqu32 YMMWORD PTR [r9+0x180],ymm28
|
||||
vmovdqu32 YMMWORD PTR [r9+0x1a0],ymm29
|
||||
vmovdqu32 YMMWORD PTR [r9+0x1c0],ymm30
|
||||
vmovdqu32 YMMWORD PTR [r9+0x1e0],ymm31
|
||||
vmovdqa ymm0,YMMWORD PTR [rsp+0x20]
|
||||
vmovdqa ymm1,YMMWORD PTR [rsp+0x60]
|
||||
vmovdqa YMMWORD PTR [rsp],ymm0
|
||||
vmovdqa YMMWORD PTR [rsp+0x40],ymm1
|
||||
add r9,0x200
|
||||
sub r10,0x8
|
||||
2:
|
||||
test r10,0x4
|
||||
je 2f
|
||||
vbroadcasti32x4 zmm0,XMMWORD PTR [rdi]
|
||||
vbroadcasti32x4 zmm1,XMMWORD PTR [rdi+0x10]
|
||||
vbroadcasti32x4 zmm2,XMMWORD PTR [BLAKE3_IV+rip]
|
||||
vmovdqa xmm12,XMMWORD PTR [rsp]
|
||||
vmovdqa xmm13,XMMWORD PTR [rsp+0x40]
|
||||
vpunpckldq xmm14,xmm12,xmm13
|
||||
vpunpckhdq xmm15,xmm12,xmm13
|
||||
vpermq ymm14,ymm14,0xdc
|
||||
vpermq ymm15,ymm15,0xdc
|
||||
vpbroadcastd zmm12,edx
|
||||
vinserti64x4 zmm13,zmm14,ymm15,0x1
|
||||
mov eax,0x4444
|
||||
kmovw k2,eax
|
||||
vpblendmd zmm13{k2},zmm13,zmm12
|
||||
vpbroadcastd zmm15,r8d
|
||||
mov eax,0x8888
|
||||
kmovw k4,eax
|
||||
vpblendmd zmm3{k4},zmm13,zmm15
|
||||
mov eax,0xaaaa
|
||||
kmovw k3,eax
|
||||
vbroadcasti32x4 zmm8,XMMWORD PTR [rsi]
|
||||
vbroadcasti32x4 zmm9,XMMWORD PTR [rsi+0x10]
|
||||
vshufps zmm4,zmm8,zmm9,0x88
|
||||
vshufps zmm5,zmm8,zmm9,0xdd
|
||||
vbroadcasti32x4 zmm8,XMMWORD PTR [rsi+0x20]
|
||||
vbroadcasti32x4 zmm9,XMMWORD PTR [rsi+0x30]
|
||||
vshufps zmm6,zmm8,zmm9,0x88
|
||||
vshufps zmm7,zmm8,zmm9,0xdd
|
||||
vpshufd zmm6,zmm6,0x93
|
||||
vpshufd zmm7,zmm7,0x93
|
||||
mov al,0x7
|
||||
3:
|
||||
vpaddd zmm0,zmm0,zmm4
|
||||
vpaddd zmm0,zmm0,zmm1
|
||||
vpxord zmm3,zmm3,zmm0
|
||||
vprord zmm3,zmm3,0x10
|
||||
vpaddd zmm2,zmm2,zmm3
|
||||
vpxord zmm1,zmm1,zmm2
|
||||
vprord zmm1,zmm1,0xc
|
||||
vpaddd zmm0,zmm0,zmm5
|
||||
vpaddd zmm0,zmm0,zmm1
|
||||
vpxord zmm3,zmm3,zmm0
|
||||
vprord zmm3,zmm3,0x8
|
||||
vpaddd zmm2,zmm2,zmm3
|
||||
vpxord zmm1,zmm1,zmm2
|
||||
vprord zmm1,zmm1,0x7
|
||||
vpshufd zmm0,zmm0,0x93
|
||||
vpshufd zmm3,zmm3,0x4e
|
||||
vpshufd zmm2,zmm2,0x39
|
||||
vpaddd zmm0,zmm0,zmm6
|
||||
vpaddd zmm0,zmm0,zmm1
|
||||
vpxord zmm3,zmm3,zmm0
|
||||
vprord zmm3,zmm3,0x10
|
||||
vpaddd zmm2,zmm2,zmm3
|
||||
vpxord zmm1,zmm1,zmm2
|
||||
vprord zmm1,zmm1,0xc
|
||||
vpaddd zmm0,zmm0,zmm7
|
||||
vpaddd zmm0,zmm0,zmm1
|
||||
vpxord zmm3,zmm3,zmm0
|
||||
vprord zmm3,zmm3,0x8
|
||||
vpaddd zmm2,zmm2,zmm3
|
||||
vpxord zmm1,zmm1,zmm2
|
||||
vprord zmm1,zmm1,0x7
|
||||
vpshufd zmm0,zmm0,0x39
|
||||
vpshufd zmm3,zmm3,0x4e
|
||||
vpshufd zmm2,zmm2,0x93
|
||||
dec al
|
||||
je 3f
|
||||
vshufps zmm8,zmm4,zmm5,0xd6
|
||||
vpshufd zmm9,zmm4,0xf
|
||||
vpshufd zmm4,zmm8,0x39
|
||||
vshufps zmm8,zmm6,zmm7,0xfa
|
||||
vpblendmd zmm9{k3},zmm9,zmm8
|
||||
vpunpcklqdq zmm8,zmm7,zmm5
|
||||
vpblendmd zmm8{k4},zmm8,zmm6
|
||||
vpshufd zmm8,zmm8,0x78
|
||||
vpunpckhdq zmm5,zmm5,zmm7
|
||||
vpunpckldq zmm6,zmm6,zmm5
|
||||
vpshufd zmm7,zmm6,0x1e
|
||||
vmovdqa32 zmm5,zmm9
|
||||
vmovdqa32 zmm6,zmm8
|
||||
jmp 3b
|
||||
3:
|
||||
vpxord zmm0,zmm0,zmm2
|
||||
vpxord zmm1,zmm1,zmm3
|
||||
vbroadcasti32x4 zmm8,XMMWORD PTR [rdi]
|
||||
vbroadcasti32x4 zmm9,XMMWORD PTR [rdi+0x10]
|
||||
vpxord zmm2,zmm2,zmm8
|
||||
vpxord zmm3,zmm3,zmm9
|
||||
vmovdqu XMMWORD PTR [r9],xmm0
|
||||
vmovdqu XMMWORD PTR [r9+0x10],xmm1
|
||||
vmovdqu XMMWORD PTR [r9+0x20],xmm2
|
||||
vmovdqu XMMWORD PTR [r9+0x30],xmm3
|
||||
vextracti128 XMMWORD PTR [r9+0x40],ymm0,0x1
|
||||
vextracti128 XMMWORD PTR [r9+0x50],ymm1,0x1
|
||||
vextracti128 XMMWORD PTR [r9+0x60],ymm2,0x1
|
||||
vextracti128 XMMWORD PTR [r9+0x70],ymm3,0x1
|
||||
vextracti32x4 XMMWORD PTR [r9+0x80],zmm0,0x2
|
||||
vextracti32x4 XMMWORD PTR [r9+0x90],zmm1,0x2
|
||||
vextracti32x4 XMMWORD PTR [r9+0xa0],zmm2,0x2
|
||||
vextracti32x4 XMMWORD PTR [r9+0xb0],zmm3,0x2
|
||||
vextracti32x4 XMMWORD PTR [r9+0xc0],zmm0,0x3
|
||||
vextracti32x4 XMMWORD PTR [r9+0xd0],zmm1,0x3
|
||||
vextracti32x4 XMMWORD PTR [r9+0xe0],zmm2,0x3
|
||||
vextracti32x4 XMMWORD PTR [r9+0xf0],zmm3,0x3
|
||||
vmovdqa xmm0,XMMWORD PTR [rsp+0x10]
|
||||
vmovdqa xmm1,XMMWORD PTR [rsp+0x50]
|
||||
vmovdqa XMMWORD PTR [rsp],xmm0
|
||||
vmovdqa XMMWORD PTR [rsp+0x40],xmm1
|
||||
add r9,0x100
|
||||
sub r10,0x4
|
||||
2:
|
||||
test r10,0x2
|
||||
je 2f
|
||||
vbroadcasti128 ymm0,XMMWORD PTR [rdi]
|
||||
vbroadcasti128 ymm1,XMMWORD PTR [rdi+0x10]
|
||||
vmovd xmm13,DWORD PTR [rsp]
|
||||
vpinsrd xmm13,xmm13,DWORD PTR [rsp+0x40],0x1
|
||||
vpinsrd xmm13,xmm13,edx,0x2
|
||||
vmovd xmm14,DWORD PTR [rsp+0x4]
|
||||
vpinsrd xmm14,xmm14,DWORD PTR [rsp+0x44],0x1
|
||||
vpinsrd xmm14,xmm14,edx,0x2
|
||||
vinserti128 ymm13,ymm13,xmm14,0x1
|
||||
vbroadcasti128 ymm2,XMMWORD PTR [BLAKE3_IV+rip]
|
||||
vpbroadcastd ymm8,r8d
|
||||
vpblendd ymm3,ymm13,ymm8,0x88
|
||||
vbroadcasti128 ymm8,XMMWORD PTR [rsi]
|
||||
vbroadcasti128 ymm9,XMMWORD PTR [rsi+0x10]
|
||||
vshufps ymm4,ymm8,ymm9,0x88
|
||||
vshufps ymm5,ymm8,ymm9,0xdd
|
||||
vbroadcasti128 ymm8,XMMWORD PTR [rsi+0x20]
|
||||
vbroadcasti128 ymm9,XMMWORD PTR [rsi+0x30]
|
||||
vshufps ymm6,ymm8,ymm9,0x88
|
||||
vshufps ymm7,ymm8,ymm9,0xdd
|
||||
vpshufd ymm6,ymm6,0x93
|
||||
vpshufd ymm7,ymm7,0x93
|
||||
mov al,0x7
|
||||
3:
|
||||
vpaddd ymm0,ymm0,ymm4
|
||||
vpaddd ymm0,ymm0,ymm1
|
||||
vpxord ymm3,ymm3,ymm0
|
||||
vprord ymm3,ymm3,0x10
|
||||
vpaddd ymm2,ymm2,ymm3
|
||||
vpxord ymm1,ymm1,ymm2
|
||||
vprord ymm1,ymm1,0xc
|
||||
vpaddd ymm0,ymm0,ymm5
|
||||
vpaddd ymm0,ymm0,ymm1
|
||||
vpxord ymm3,ymm3,ymm0
|
||||
vprord ymm3,ymm3,0x8
|
||||
vpaddd ymm2,ymm2,ymm3
|
||||
vpxord ymm1,ymm1,ymm2
|
||||
vprord ymm1,ymm1,0x7
|
||||
vpshufd ymm0,ymm0,0x93
|
||||
vpshufd ymm3,ymm3,0x4e
|
||||
vpshufd ymm2,ymm2,0x39
|
||||
vpaddd ymm0,ymm0,ymm6
|
||||
vpaddd ymm0,ymm0,ymm1
|
||||
vpxord ymm3,ymm3,ymm0
|
||||
vprord ymm3,ymm3,0x10
|
||||
vpaddd ymm2,ymm2,ymm3
|
||||
vpxord ymm1,ymm1,ymm2
|
||||
vprord ymm1,ymm1,0xc
|
||||
vpaddd ymm0,ymm0,ymm7
|
||||
vpaddd ymm0,ymm0,ymm1
|
||||
vpxord ymm3,ymm3,ymm0
|
||||
vprord ymm3,ymm3,0x8
|
||||
vpaddd ymm2,ymm2,ymm3
|
||||
vpxord ymm1,ymm1,ymm2
|
||||
vprord ymm1,ymm1,0x7
|
||||
vpshufd ymm0,ymm0,0x39
|
||||
vpshufd ymm3,ymm3,0x4e
|
||||
vpshufd ymm2,ymm2,0x93
|
||||
dec al
|
||||
je 3f
|
||||
vshufps ymm8,ymm4,ymm5,0xd6
|
||||
vpshufd ymm9,ymm4,0xf
|
||||
vpshufd ymm4,ymm8,0x39
|
||||
vshufps ymm8,ymm6,ymm7,0xfa
|
||||
vpblendd ymm9,ymm9,ymm8,0xaa
|
||||
vpunpcklqdq ymm8,ymm7,ymm5
|
||||
vpblendd ymm8,ymm8,ymm6,0x88
|
||||
vpshufd ymm8,ymm8,0x78
|
||||
vpunpckhdq ymm5,ymm5,ymm7
|
||||
vpunpckldq ymm6,ymm6,ymm5
|
||||
vpshufd ymm7,ymm6,0x1e
|
||||
vmovdqa ymm5,ymm9
|
||||
vmovdqa ymm6,ymm8
|
||||
jmp 3b
|
||||
3:
|
||||
vpxor ymm0,ymm0,ymm2
|
||||
vpxor ymm1,ymm1,ymm3
|
||||
vbroadcasti128 ymm8,XMMWORD PTR [rdi]
|
||||
vbroadcasti128 ymm9,XMMWORD PTR [rdi+0x10]
|
||||
vpxor ymm2,ymm2,ymm8
|
||||
vpxor ymm3,ymm3,ymm9
|
||||
vmovdqu XMMWORD PTR [r9],xmm0
|
||||
vmovdqu XMMWORD PTR [r9+0x10],xmm1
|
||||
vmovdqu XMMWORD PTR [r9+0x20],xmm2
|
||||
vmovdqu XMMWORD PTR [r9+0x30],xmm3
|
||||
vextracti128 XMMWORD PTR [r9+0x40],ymm0,0x1
|
||||
vextracti128 XMMWORD PTR [r9+0x50],ymm1,0x1
|
||||
vextracti128 XMMWORD PTR [r9+0x60],ymm2,0x1
|
||||
vextracti128 XMMWORD PTR [r9+0x70],ymm3,0x1
|
||||
vmovdqu xmm0,XMMWORD PTR [rsp+0x8]
|
||||
vmovdqu xmm1,XMMWORD PTR [rsp+0x48]
|
||||
vmovdqa XMMWORD PTR [rsp],xmm0
|
||||
vmovdqa XMMWORD PTR [rsp+0x40],xmm1
|
||||
add r9,0x80
|
||||
sub r10,0x2
|
||||
2:
|
||||
test r10,0x1
|
||||
je 9b
|
||||
vmovdqu xmm0,XMMWORD PTR [rdi]
|
||||
vmovdqu xmm1,XMMWORD PTR [rdi+0x10]
|
||||
vmovd xmm14,DWORD PTR [rsp]
|
||||
vpinsrd xmm14,xmm14,DWORD PTR [rsp+0x40],0x1
|
||||
vpinsrd xmm14,xmm14,edx,0x2
|
||||
vmovdqa xmm2,XMMWORD PTR [BLAKE3_IV+rip]
|
||||
vpinsrd xmm3,xmm14,r8d,0x3
|
||||
vmovups xmm8,XMMWORD PTR [rsi]
|
||||
vmovups xmm9,XMMWORD PTR [rsi+0x10]
|
||||
vshufps xmm4,xmm8,xmm9,0x88
|
||||
vshufps xmm5,xmm8,xmm9,0xdd
|
||||
vmovups xmm8,XMMWORD PTR [rsi+0x20]
|
||||
vmovups xmm9,XMMWORD PTR [rsi+0x30]
|
||||
vshufps xmm6,xmm8,xmm9,0x88
|
||||
vshufps xmm7,xmm8,xmm9,0xdd
|
||||
vpshufd xmm6,xmm6,0x93
|
||||
vpshufd xmm7,xmm7,0x93
|
||||
mov al,0x7
|
||||
3:
|
||||
vpaddd xmm0,xmm0,xmm4
|
||||
vpaddd xmm0,xmm0,xmm1
|
||||
vpxord xmm3,xmm3,xmm0
|
||||
vprord xmm3,xmm3,0x10
|
||||
vpaddd xmm2,xmm2,xmm3
|
||||
vpxord xmm1,xmm1,xmm2
|
||||
vprord xmm1,xmm1,0xc
|
||||
vpaddd xmm0,xmm0,xmm5
|
||||
vpaddd xmm0,xmm0,xmm1
|
||||
vpxord xmm3,xmm3,xmm0
|
||||
vprord xmm3,xmm3,0x8
|
||||
vpaddd xmm2,xmm2,xmm3
|
||||
vpxord xmm1,xmm1,xmm2
|
||||
vprord xmm1,xmm1,0x7
|
||||
vpshufd xmm0,xmm0,0x93
|
||||
vpshufd xmm3,xmm3,0x4e
|
||||
vpshufd xmm2,xmm2,0x39
|
||||
vpaddd xmm0,xmm0,xmm6
|
||||
vpaddd xmm0,xmm0,xmm1
|
||||
vpxord xmm3,xmm3,xmm0
|
||||
vprord xmm3,xmm3,0x10
|
||||
vpaddd xmm2,xmm2,xmm3
|
||||
vpxord xmm1,xmm1,xmm2
|
||||
vprord xmm1,xmm1,0xc
|
||||
vpaddd xmm0,xmm0,xmm7
|
||||
vpaddd xmm0,xmm0,xmm1
|
||||
vpxord xmm3,xmm3,xmm0
|
||||
vprord xmm3,xmm3,0x8
|
||||
vpaddd xmm2,xmm2,xmm3
|
||||
vpxord xmm1,xmm1,xmm2
|
||||
vprord xmm1,xmm1,0x7
|
||||
vpshufd xmm0,xmm0,0x39
|
||||
vpshufd xmm3,xmm3,0x4e
|
||||
vpshufd xmm2,xmm2,0x93
|
||||
dec al
|
||||
je 3f
|
||||
vshufps xmm8,xmm4,xmm5,0xd6
|
||||
vpshufd xmm9,xmm4,0xf
|
||||
vpshufd xmm4,xmm8,0x39
|
||||
vshufps xmm8,xmm6,xmm7,0xfa
|
||||
vpblendd xmm9,xmm9,xmm8,0xaa
|
||||
vpunpcklqdq xmm8,xmm7,xmm5
|
||||
vpblendd xmm8,xmm8,xmm6,0x88
|
||||
vpshufd xmm8,xmm8,0x78
|
||||
vpunpckhdq xmm5,xmm5,xmm7
|
||||
vpunpckldq xmm6,xmm6,xmm5
|
||||
vpshufd xmm7,xmm6,0x1e
|
||||
vmovdqa xmm5,xmm9
|
||||
vmovdqa xmm6,xmm8
|
||||
jmp 3b
|
||||
3:
|
||||
vpxor xmm0,xmm0,xmm2
|
||||
vpxor xmm1,xmm1,xmm3
|
||||
vpxor xmm2,xmm2,XMMWORD PTR [rdi]
|
||||
vpxor xmm3,xmm3,XMMWORD PTR [rdi+0x10]
|
||||
vmovdqu XMMWORD PTR [r9],xmm0
|
||||
vmovdqu XMMWORD PTR [r9+0x10],xmm1
|
||||
vmovdqu XMMWORD PTR [r9+0x20],xmm2
|
||||
vmovdqu XMMWORD PTR [r9+0x30],xmm3
|
||||
jmp 9b
|
||||
|
||||
|
||||
#ifdef __APPLE__
|
||||
.static_data
|
||||
#else
|
||||
|
|
Loading…
Reference in New Issue