aboutsummaryrefslogtreecommitdiff
path: root/secure/lib/libcrypto/arch/amd64/rsaz-avx512.S
diff options
context:
space:
mode:
Diffstat (limited to 'secure/lib/libcrypto/arch/amd64/rsaz-avx512.S')
-rw-r--r--secure/lib/libcrypto/arch/amd64/rsaz-avx512.S902
1 files changed, 0 insertions, 902 deletions
diff --git a/secure/lib/libcrypto/arch/amd64/rsaz-avx512.S b/secure/lib/libcrypto/arch/amd64/rsaz-avx512.S
deleted file mode 100644
index 0ea3ae6c2a9d..000000000000
--- a/secure/lib/libcrypto/arch/amd64/rsaz-avx512.S
+++ /dev/null
@@ -1,902 +0,0 @@
-/* Do not modify. This file is auto-generated from rsaz-avx512.pl. */
-
-.globl ossl_rsaz_avx512ifma_eligible
-.type ossl_rsaz_avx512ifma_eligible,@function
-.align 32
-ossl_rsaz_avx512ifma_eligible:
- movl OPENSSL_ia32cap_P+8(%rip),%ecx
- xorl %eax,%eax
- andl $2149777408,%ecx
- cmpl $2149777408,%ecx
- cmovel %ecx,%eax
- .byte 0xf3,0xc3
-.size ossl_rsaz_avx512ifma_eligible, .-ossl_rsaz_avx512ifma_eligible
-.text
-
-.globl ossl_rsaz_amm52x20_x1_256
-.type ossl_rsaz_amm52x20_x1_256,@function
-.align 32
-ossl_rsaz_amm52x20_x1_256:
-.cfi_startproc
-.byte 243,15,30,250
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r15,-56
-.Lrsaz_amm52x20_x1_256_body:
-
-
- vpxord %ymm0,%ymm0,%ymm0
- vmovdqa64 %ymm0,%ymm1
- vmovdqa64 %ymm0,%ymm16
- vmovdqa64 %ymm0,%ymm17
- vmovdqa64 %ymm0,%ymm18
- vmovdqa64 %ymm0,%ymm19
-
- xorl %r9d,%r9d
-
- movq %rdx,%r11
- movq $0xfffffffffffff,%rax
-
-
- movl $5,%ebx
-
-.align 32
-.Lloop5:
- movq 0(%r11),%r13
-
- vpbroadcastq %r13,%ymm3
- movq 0(%rsi),%rdx
- mulxq %r13,%r13,%r12
- addq %r13,%r9
- movq %r12,%r10
- adcq $0,%r10
-
- movq %r8,%r13
- imulq %r9,%r13
- andq %rax,%r13
-
- vpbroadcastq %r13,%ymm4
- movq 0(%rcx),%rdx
- mulxq %r13,%r13,%r12
- addq %r13,%r9
- adcq %r12,%r10
-
- shrq $52,%r9
- salq $12,%r10
- orq %r10,%r9
-
- vpmadd52luq 0(%rsi),%ymm3,%ymm1
- vpmadd52luq 32(%rsi),%ymm3,%ymm16
- vpmadd52luq 64(%rsi),%ymm3,%ymm17
- vpmadd52luq 96(%rsi),%ymm3,%ymm18
- vpmadd52luq 128(%rsi),%ymm3,%ymm19
-
- vpmadd52luq 0(%rcx),%ymm4,%ymm1
- vpmadd52luq 32(%rcx),%ymm4,%ymm16
- vpmadd52luq 64(%rcx),%ymm4,%ymm17
- vpmadd52luq 96(%rcx),%ymm4,%ymm18
- vpmadd52luq 128(%rcx),%ymm4,%ymm19
-
-
- valignq $1,%ymm1,%ymm16,%ymm1
- valignq $1,%ymm16,%ymm17,%ymm16
- valignq $1,%ymm17,%ymm18,%ymm17
- valignq $1,%ymm18,%ymm19,%ymm18
- valignq $1,%ymm19,%ymm0,%ymm19
-
- vmovq %xmm1,%r13
- addq %r13,%r9
-
- vpmadd52huq 0(%rsi),%ymm3,%ymm1
- vpmadd52huq 32(%rsi),%ymm3,%ymm16
- vpmadd52huq 64(%rsi),%ymm3,%ymm17
- vpmadd52huq 96(%rsi),%ymm3,%ymm18
- vpmadd52huq 128(%rsi),%ymm3,%ymm19
-
- vpmadd52huq 0(%rcx),%ymm4,%ymm1
- vpmadd52huq 32(%rcx),%ymm4,%ymm16
- vpmadd52huq 64(%rcx),%ymm4,%ymm17
- vpmadd52huq 96(%rcx),%ymm4,%ymm18
- vpmadd52huq 128(%rcx),%ymm4,%ymm19
- movq 8(%r11),%r13
-
- vpbroadcastq %r13,%ymm3
- movq 0(%rsi),%rdx
- mulxq %r13,%r13,%r12
- addq %r13,%r9
- movq %r12,%r10
- adcq $0,%r10
-
- movq %r8,%r13
- imulq %r9,%r13
- andq %rax,%r13
-
- vpbroadcastq %r13,%ymm4
- movq 0(%rcx),%rdx
- mulxq %r13,%r13,%r12
- addq %r13,%r9
- adcq %r12,%r10
-
- shrq $52,%r9
- salq $12,%r10
- orq %r10,%r9
-
- vpmadd52luq 0(%rsi),%ymm3,%ymm1
- vpmadd52luq 32(%rsi),%ymm3,%ymm16
- vpmadd52luq 64(%rsi),%ymm3,%ymm17
- vpmadd52luq 96(%rsi),%ymm3,%ymm18
- vpmadd52luq 128(%rsi),%ymm3,%ymm19
-
- vpmadd52luq 0(%rcx),%ymm4,%ymm1
- vpmadd52luq 32(%rcx),%ymm4,%ymm16
- vpmadd52luq 64(%rcx),%ymm4,%ymm17
- vpmadd52luq 96(%rcx),%ymm4,%ymm18
- vpmadd52luq 128(%rcx),%ymm4,%ymm19
-
-
- valignq $1,%ymm1,%ymm16,%ymm1
- valignq $1,%ymm16,%ymm17,%ymm16
- valignq $1,%ymm17,%ymm18,%ymm17
- valignq $1,%ymm18,%ymm19,%ymm18
- valignq $1,%ymm19,%ymm0,%ymm19
-
- vmovq %xmm1,%r13
- addq %r13,%r9
-
- vpmadd52huq 0(%rsi),%ymm3,%ymm1
- vpmadd52huq 32(%rsi),%ymm3,%ymm16
- vpmadd52huq 64(%rsi),%ymm3,%ymm17
- vpmadd52huq 96(%rsi),%ymm3,%ymm18
- vpmadd52huq 128(%rsi),%ymm3,%ymm19
-
- vpmadd52huq 0(%rcx),%ymm4,%ymm1
- vpmadd52huq 32(%rcx),%ymm4,%ymm16
- vpmadd52huq 64(%rcx),%ymm4,%ymm17
- vpmadd52huq 96(%rcx),%ymm4,%ymm18
- vpmadd52huq 128(%rcx),%ymm4,%ymm19
- movq 16(%r11),%r13
-
- vpbroadcastq %r13,%ymm3
- movq 0(%rsi),%rdx
- mulxq %r13,%r13,%r12
- addq %r13,%r9
- movq %r12,%r10
- adcq $0,%r10
-
- movq %r8,%r13
- imulq %r9,%r13
- andq %rax,%r13
-
- vpbroadcastq %r13,%ymm4
- movq 0(%rcx),%rdx
- mulxq %r13,%r13,%r12
- addq %r13,%r9
- adcq %r12,%r10
-
- shrq $52,%r9
- salq $12,%r10
- orq %r10,%r9
-
- vpmadd52luq 0(%rsi),%ymm3,%ymm1
- vpmadd52luq 32(%rsi),%ymm3,%ymm16
- vpmadd52luq 64(%rsi),%ymm3,%ymm17
- vpmadd52luq 96(%rsi),%ymm3,%ymm18
- vpmadd52luq 128(%rsi),%ymm3,%ymm19
-
- vpmadd52luq 0(%rcx),%ymm4,%ymm1
- vpmadd52luq 32(%rcx),%ymm4,%ymm16
- vpmadd52luq 64(%rcx),%ymm4,%ymm17
- vpmadd52luq 96(%rcx),%ymm4,%ymm18
- vpmadd52luq 128(%rcx),%ymm4,%ymm19
-
-
- valignq $1,%ymm1,%ymm16,%ymm1
- valignq $1,%ymm16,%ymm17,%ymm16
- valignq $1,%ymm17,%ymm18,%ymm17
- valignq $1,%ymm18,%ymm19,%ymm18
- valignq $1,%ymm19,%ymm0,%ymm19
-
- vmovq %xmm1,%r13
- addq %r13,%r9
-
- vpmadd52huq 0(%rsi),%ymm3,%ymm1
- vpmadd52huq 32(%rsi),%ymm3,%ymm16
- vpmadd52huq 64(%rsi),%ymm3,%ymm17
- vpmadd52huq 96(%rsi),%ymm3,%ymm18
- vpmadd52huq 128(%rsi),%ymm3,%ymm19
-
- vpmadd52huq 0(%rcx),%ymm4,%ymm1
- vpmadd52huq 32(%rcx),%ymm4,%ymm16
- vpmadd52huq 64(%rcx),%ymm4,%ymm17
- vpmadd52huq 96(%rcx),%ymm4,%ymm18
- vpmadd52huq 128(%rcx),%ymm4,%ymm19
- movq 24(%r11),%r13
-
- vpbroadcastq %r13,%ymm3
- movq 0(%rsi),%rdx
- mulxq %r13,%r13,%r12
- addq %r13,%r9
- movq %r12,%r10
- adcq $0,%r10
-
- movq %r8,%r13
- imulq %r9,%r13
- andq %rax,%r13
-
- vpbroadcastq %r13,%ymm4
- movq 0(%rcx),%rdx
- mulxq %r13,%r13,%r12
- addq %r13,%r9
- adcq %r12,%r10
-
- shrq $52,%r9
- salq $12,%r10
- orq %r10,%r9
-
- vpmadd52luq 0(%rsi),%ymm3,%ymm1
- vpmadd52luq 32(%rsi),%ymm3,%ymm16
- vpmadd52luq 64(%rsi),%ymm3,%ymm17
- vpmadd52luq 96(%rsi),%ymm3,%ymm18
- vpmadd52luq 128(%rsi),%ymm3,%ymm19
-
- vpmadd52luq 0(%rcx),%ymm4,%ymm1
- vpmadd52luq 32(%rcx),%ymm4,%ymm16
- vpmadd52luq 64(%rcx),%ymm4,%ymm17
- vpmadd52luq 96(%rcx),%ymm4,%ymm18
- vpmadd52luq 128(%rcx),%ymm4,%ymm19
-
-
- valignq $1,%ymm1,%ymm16,%ymm1
- valignq $1,%ymm16,%ymm17,%ymm16
- valignq $1,%ymm17,%ymm18,%ymm17
- valignq $1,%ymm18,%ymm19,%ymm18
- valignq $1,%ymm19,%ymm0,%ymm19
-
- vmovq %xmm1,%r13
- addq %r13,%r9
-
- vpmadd52huq 0(%rsi),%ymm3,%ymm1
- vpmadd52huq 32(%rsi),%ymm3,%ymm16
- vpmadd52huq 64(%rsi),%ymm3,%ymm17
- vpmadd52huq 96(%rsi),%ymm3,%ymm18
- vpmadd52huq 128(%rsi),%ymm3,%ymm19
-
- vpmadd52huq 0(%rcx),%ymm4,%ymm1
- vpmadd52huq 32(%rcx),%ymm4,%ymm16
- vpmadd52huq 64(%rcx),%ymm4,%ymm17
- vpmadd52huq 96(%rcx),%ymm4,%ymm18
- vpmadd52huq 128(%rcx),%ymm4,%ymm19
- leaq 32(%r11),%r11
- decl %ebx
- jne .Lloop5
-
- vmovdqa64 .Lmask52x4(%rip),%ymm4
-
- vpbroadcastq %r9,%ymm3
- vpblendd $3,%ymm3,%ymm1,%ymm1
-
-
-
- vpsrlq $52,%ymm1,%ymm24
- vpsrlq $52,%ymm16,%ymm25
- vpsrlq $52,%ymm17,%ymm26
- vpsrlq $52,%ymm18,%ymm27
- vpsrlq $52,%ymm19,%ymm28
-
-
- valignq $3,%ymm27,%ymm28,%ymm28
- valignq $3,%ymm26,%ymm27,%ymm27
- valignq $3,%ymm25,%ymm26,%ymm26
- valignq $3,%ymm24,%ymm25,%ymm25
- valignq $3,%ymm0,%ymm24,%ymm24
-
-
- vpandq %ymm4,%ymm1,%ymm1
- vpandq %ymm4,%ymm16,%ymm16
- vpandq %ymm4,%ymm17,%ymm17
- vpandq %ymm4,%ymm18,%ymm18
- vpandq %ymm4,%ymm19,%ymm19
-
-
- vpaddq %ymm24,%ymm1,%ymm1
- vpaddq %ymm25,%ymm16,%ymm16
- vpaddq %ymm26,%ymm17,%ymm17
- vpaddq %ymm27,%ymm18,%ymm18
- vpaddq %ymm28,%ymm19,%ymm19
-
-
-
- vpcmpuq $1,%ymm1,%ymm4,%k1
- vpcmpuq $1,%ymm16,%ymm4,%k2
- vpcmpuq $1,%ymm17,%ymm4,%k3
- vpcmpuq $1,%ymm18,%ymm4,%k4
- vpcmpuq $1,%ymm19,%ymm4,%k5
- kmovb %k1,%r14d
- kmovb %k2,%r13d
- kmovb %k3,%r12d
- kmovb %k4,%r11d
- kmovb %k5,%r10d
-
-
- vpcmpuq $0,%ymm1,%ymm4,%k1
- vpcmpuq $0,%ymm16,%ymm4,%k2
- vpcmpuq $0,%ymm17,%ymm4,%k3
- vpcmpuq $0,%ymm18,%ymm4,%k4
- vpcmpuq $0,%ymm19,%ymm4,%k5
- kmovb %k1,%r9d
- kmovb %k2,%r8d
- kmovb %k3,%ebx
- kmovb %k4,%ecx
- kmovb %k5,%edx
-
-
-
- shlb $4,%r13b
- orb %r13b,%r14b
- shlb $4,%r11b
- orb %r11b,%r12b
-
- addb %r14b,%r14b
- adcb %r12b,%r12b
- adcb %r10b,%r10b
-
- shlb $4,%r8b
- orb %r8b,%r9b
- shlb $4,%cl
- orb %cl,%bl
-
- addb %r9b,%r14b
- adcb %bl,%r12b
- adcb %dl,%r10b
-
- xorb %r9b,%r14b
- xorb %bl,%r12b
- xorb %dl,%r10b
-
- kmovb %r14d,%k1
- shrb $4,%r14b
- kmovb %r14d,%k2
- kmovb %r12d,%k3
- shrb $4,%r12b
- kmovb %r12d,%k4
- kmovb %r10d,%k5
-
-
- vpsubq %ymm4,%ymm1,%ymm1{%k1}
- vpsubq %ymm4,%ymm16,%ymm16{%k2}
- vpsubq %ymm4,%ymm17,%ymm17{%k3}
- vpsubq %ymm4,%ymm18,%ymm18{%k4}
- vpsubq %ymm4,%ymm19,%ymm19{%k5}
-
- vpandq %ymm4,%ymm1,%ymm1
- vpandq %ymm4,%ymm16,%ymm16
- vpandq %ymm4,%ymm17,%ymm17
- vpandq %ymm4,%ymm18,%ymm18
- vpandq %ymm4,%ymm19,%ymm19
-
- vmovdqu64 %ymm1,(%rdi)
- vmovdqu64 %ymm16,32(%rdi)
- vmovdqu64 %ymm17,64(%rdi)
- vmovdqu64 %ymm18,96(%rdi)
- vmovdqu64 %ymm19,128(%rdi)
-
- vzeroupper
- movq 0(%rsp),%r15
-.cfi_restore %r15
- movq 8(%rsp),%r14
-.cfi_restore %r14
- movq 16(%rsp),%r13
-.cfi_restore %r13
- movq 24(%rsp),%r12
-.cfi_restore %r12
- movq 32(%rsp),%rbp
-.cfi_restore %rbp
- movq 40(%rsp),%rbx
-.cfi_restore %rbx
- leaq 48(%rsp),%rsp
-.cfi_adjust_cfa_offset -48
-.Lrsaz_amm52x20_x1_256_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size ossl_rsaz_amm52x20_x1_256, .-ossl_rsaz_amm52x20_x1_256
-.data
-.align 32
-.Lmask52x4:
-.quad 0xfffffffffffff
-.quad 0xfffffffffffff
-.quad 0xfffffffffffff
-.quad 0xfffffffffffff
-.text
-
-.globl ossl_rsaz_amm52x20_x2_256
-.type ossl_rsaz_amm52x20_x2_256,@function
-.align 32
-ossl_rsaz_amm52x20_x2_256:
-.cfi_startproc
-.byte 243,15,30,250
- pushq %rbx
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbx,-16
- pushq %rbp
-.cfi_adjust_cfa_offset 8
-.cfi_offset %rbp,-24
- pushq %r12
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r12,-32
- pushq %r13
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r13,-40
- pushq %r14
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r14,-48
- pushq %r15
-.cfi_adjust_cfa_offset 8
-.cfi_offset %r15,-56
-.Lrsaz_amm52x20_x2_256_body:
-
-
- vpxord %ymm0,%ymm0,%ymm0
- vmovdqa64 %ymm0,%ymm1
- vmovdqa64 %ymm0,%ymm16
- vmovdqa64 %ymm0,%ymm17
- vmovdqa64 %ymm0,%ymm18
- vmovdqa64 %ymm0,%ymm19
- vmovdqa64 %ymm0,%ymm2
- vmovdqa64 %ymm0,%ymm20
- vmovdqa64 %ymm0,%ymm21
- vmovdqa64 %ymm0,%ymm22
- vmovdqa64 %ymm0,%ymm23
-
- xorl %r9d,%r9d
- xorl %r15d,%r15d
-
- movq %rdx,%r11
- movq $0xfffffffffffff,%rax
-
- movl $20,%ebx
-
-.align 32
-.Lloop20:
- movq 0(%r11),%r13
-
- vpbroadcastq %r13,%ymm3
- movq 0(%rsi),%rdx
- mulxq %r13,%r13,%r12
- addq %r13,%r9
- movq %r12,%r10
- adcq $0,%r10
-
- movq (%r8),%r13
- imulq %r9,%r13
- andq %rax,%r13
-
- vpbroadcastq %r13,%ymm4
- movq 0(%rcx),%rdx
- mulxq %r13,%r13,%r12
- addq %r13,%r9
- adcq %r12,%r10
-
- shrq $52,%r9
- salq $12,%r10
- orq %r10,%r9
-
- vpmadd52luq 0(%rsi),%ymm3,%ymm1
- vpmadd52luq 32(%rsi),%ymm3,%ymm16
- vpmadd52luq 64(%rsi),%ymm3,%ymm17
- vpmadd52luq 96(%rsi),%ymm3,%ymm18
- vpmadd52luq 128(%rsi),%ymm3,%ymm19
-
- vpmadd52luq 0(%rcx),%ymm4,%ymm1
- vpmadd52luq 32(%rcx),%ymm4,%ymm16
- vpmadd52luq 64(%rcx),%ymm4,%ymm17
- vpmadd52luq 96(%rcx),%ymm4,%ymm18
- vpmadd52luq 128(%rcx),%ymm4,%ymm19
-
-
- valignq $1,%ymm1,%ymm16,%ymm1
- valignq $1,%ymm16,%ymm17,%ymm16
- valignq $1,%ymm17,%ymm18,%ymm17
- valignq $1,%ymm18,%ymm19,%ymm18
- valignq $1,%ymm19,%ymm0,%ymm19
-
- vmovq %xmm1,%r13
- addq %r13,%r9
-
- vpmadd52huq 0(%rsi),%ymm3,%ymm1
- vpmadd52huq 32(%rsi),%ymm3,%ymm16
- vpmadd52huq 64(%rsi),%ymm3,%ymm17
- vpmadd52huq 96(%rsi),%ymm3,%ymm18
- vpmadd52huq 128(%rsi),%ymm3,%ymm19
-
- vpmadd52huq 0(%rcx),%ymm4,%ymm1
- vpmadd52huq 32(%rcx),%ymm4,%ymm16
- vpmadd52huq 64(%rcx),%ymm4,%ymm17
- vpmadd52huq 96(%rcx),%ymm4,%ymm18
- vpmadd52huq 128(%rcx),%ymm4,%ymm19
- movq 160(%r11),%r13
-
- vpbroadcastq %r13,%ymm3
- movq 160(%rsi),%rdx
- mulxq %r13,%r13,%r12
- addq %r13,%r15
- movq %r12,%r10
- adcq $0,%r10
-
- movq 8(%r8),%r13
- imulq %r15,%r13
- andq %rax,%r13
-
- vpbroadcastq %r13,%ymm4
- movq 160(%rcx),%rdx
- mulxq %r13,%r13,%r12
- addq %r13,%r15
- adcq %r12,%r10
-
- shrq $52,%r15
- salq $12,%r10
- orq %r10,%r15
-
- vpmadd52luq 160(%rsi),%ymm3,%ymm2
- vpmadd52luq 192(%rsi),%ymm3,%ymm20
- vpmadd52luq 224(%rsi),%ymm3,%ymm21
- vpmadd52luq 256(%rsi),%ymm3,%ymm22
- vpmadd52luq 288(%rsi),%ymm3,%ymm23
-
- vpmadd52luq 160(%rcx),%ymm4,%ymm2
- vpmadd52luq 192(%rcx),%ymm4,%ymm20
- vpmadd52luq 224(%rcx),%ymm4,%ymm21
- vpmadd52luq 256(%rcx),%ymm4,%ymm22
- vpmadd52luq 288(%rcx),%ymm4,%ymm23
-
-
- valignq $1,%ymm2,%ymm20,%ymm2
- valignq $1,%ymm20,%ymm21,%ymm20
- valignq $1,%ymm21,%ymm22,%ymm21
- valignq $1,%ymm22,%ymm23,%ymm22
- valignq $1,%ymm23,%ymm0,%ymm23
-
- vmovq %xmm2,%r13
- addq %r13,%r15
-
- vpmadd52huq 160(%rsi),%ymm3,%ymm2
- vpmadd52huq 192(%rsi),%ymm3,%ymm20
- vpmadd52huq 224(%rsi),%ymm3,%ymm21
- vpmadd52huq 256(%rsi),%ymm3,%ymm22
- vpmadd52huq 288(%rsi),%ymm3,%ymm23
-
- vpmadd52huq 160(%rcx),%ymm4,%ymm2
- vpmadd52huq 192(%rcx),%ymm4,%ymm20
- vpmadd52huq 224(%rcx),%ymm4,%ymm21
- vpmadd52huq 256(%rcx),%ymm4,%ymm22
- vpmadd52huq 288(%rcx),%ymm4,%ymm23
- leaq 8(%r11),%r11
- decl %ebx
- jne .Lloop20
-
- vmovdqa64 .Lmask52x4(%rip),%ymm4
-
- vpbroadcastq %r9,%ymm3
- vpblendd $3,%ymm3,%ymm1,%ymm1
-
-
-
- vpsrlq $52,%ymm1,%ymm24
- vpsrlq $52,%ymm16,%ymm25
- vpsrlq $52,%ymm17,%ymm26
- vpsrlq $52,%ymm18,%ymm27
- vpsrlq $52,%ymm19,%ymm28
-
-
- valignq $3,%ymm27,%ymm28,%ymm28
- valignq $3,%ymm26,%ymm27,%ymm27
- valignq $3,%ymm25,%ymm26,%ymm26
- valignq $3,%ymm24,%ymm25,%ymm25
- valignq $3,%ymm0,%ymm24,%ymm24
-
-
- vpandq %ymm4,%ymm1,%ymm1
- vpandq %ymm4,%ymm16,%ymm16
- vpandq %ymm4,%ymm17,%ymm17
- vpandq %ymm4,%ymm18,%ymm18
- vpandq %ymm4,%ymm19,%ymm19
-
-
- vpaddq %ymm24,%ymm1,%ymm1
- vpaddq %ymm25,%ymm16,%ymm16
- vpaddq %ymm26,%ymm17,%ymm17
- vpaddq %ymm27,%ymm18,%ymm18
- vpaddq %ymm28,%ymm19,%ymm19
-
-
-
- vpcmpuq $1,%ymm1,%ymm4,%k1
- vpcmpuq $1,%ymm16,%ymm4,%k2
- vpcmpuq $1,%ymm17,%ymm4,%k3
- vpcmpuq $1,%ymm18,%ymm4,%k4
- vpcmpuq $1,%ymm19,%ymm4,%k5
- kmovb %k1,%r14d
- kmovb %k2,%r13d
- kmovb %k3,%r12d
- kmovb %k4,%r11d
- kmovb %k5,%r10d
-
-
- vpcmpuq $0,%ymm1,%ymm4,%k1
- vpcmpuq $0,%ymm16,%ymm4,%k2
- vpcmpuq $0,%ymm17,%ymm4,%k3
- vpcmpuq $0,%ymm18,%ymm4,%k4
- vpcmpuq $0,%ymm19,%ymm4,%k5
- kmovb %k1,%r9d
- kmovb %k2,%r8d
- kmovb %k3,%ebx
- kmovb %k4,%ecx
- kmovb %k5,%edx
-
-
-
- shlb $4,%r13b
- orb %r13b,%r14b
- shlb $4,%r11b
- orb %r11b,%r12b
-
- addb %r14b,%r14b
- adcb %r12b,%r12b
- adcb %r10b,%r10b
-
- shlb $4,%r8b
- orb %r8b,%r9b
- shlb $4,%cl
- orb %cl,%bl
-
- addb %r9b,%r14b
- adcb %bl,%r12b
- adcb %dl,%r10b
-
- xorb %r9b,%r14b
- xorb %bl,%r12b
- xorb %dl,%r10b
-
- kmovb %r14d,%k1
- shrb $4,%r14b
- kmovb %r14d,%k2
- kmovb %r12d,%k3
- shrb $4,%r12b
- kmovb %r12d,%k4
- kmovb %r10d,%k5
-
-
- vpsubq %ymm4,%ymm1,%ymm1{%k1}
- vpsubq %ymm4,%ymm16,%ymm16{%k2}
- vpsubq %ymm4,%ymm17,%ymm17{%k3}
- vpsubq %ymm4,%ymm18,%ymm18{%k4}
- vpsubq %ymm4,%ymm19,%ymm19{%k5}
-
- vpandq %ymm4,%ymm1,%ymm1
- vpandq %ymm4,%ymm16,%ymm16
- vpandq %ymm4,%ymm17,%ymm17
- vpandq %ymm4,%ymm18,%ymm18
- vpandq %ymm4,%ymm19,%ymm19
-
- vpbroadcastq %r15,%ymm3
- vpblendd $3,%ymm3,%ymm2,%ymm2
-
-
-
- vpsrlq $52,%ymm2,%ymm24
- vpsrlq $52,%ymm20,%ymm25
- vpsrlq $52,%ymm21,%ymm26
- vpsrlq $52,%ymm22,%ymm27
- vpsrlq $52,%ymm23,%ymm28
-
-
- valignq $3,%ymm27,%ymm28,%ymm28
- valignq $3,%ymm26,%ymm27,%ymm27
- valignq $3,%ymm25,%ymm26,%ymm26
- valignq $3,%ymm24,%ymm25,%ymm25
- valignq $3,%ymm0,%ymm24,%ymm24
-
-
- vpandq %ymm4,%ymm2,%ymm2
- vpandq %ymm4,%ymm20,%ymm20
- vpandq %ymm4,%ymm21,%ymm21
- vpandq %ymm4,%ymm22,%ymm22
- vpandq %ymm4,%ymm23,%ymm23
-
-
- vpaddq %ymm24,%ymm2,%ymm2
- vpaddq %ymm25,%ymm20,%ymm20
- vpaddq %ymm26,%ymm21,%ymm21
- vpaddq %ymm27,%ymm22,%ymm22
- vpaddq %ymm28,%ymm23,%ymm23
-
-
-
- vpcmpuq $1,%ymm2,%ymm4,%k1
- vpcmpuq $1,%ymm20,%ymm4,%k2
- vpcmpuq $1,%ymm21,%ymm4,%k3
- vpcmpuq $1,%ymm22,%ymm4,%k4
- vpcmpuq $1,%ymm23,%ymm4,%k5
- kmovb %k1,%r14d
- kmovb %k2,%r13d
- kmovb %k3,%r12d
- kmovb %k4,%r11d
- kmovb %k5,%r10d
-
-
- vpcmpuq $0,%ymm2,%ymm4,%k1
- vpcmpuq $0,%ymm20,%ymm4,%k2
- vpcmpuq $0,%ymm21,%ymm4,%k3
- vpcmpuq $0,%ymm22,%ymm4,%k4
- vpcmpuq $0,%ymm23,%ymm4,%k5
- kmovb %k1,%r9d
- kmovb %k2,%r8d
- kmovb %k3,%ebx
- kmovb %k4,%ecx
- kmovb %k5,%edx
-
-
-
- shlb $4,%r13b
- orb %r13b,%r14b
- shlb $4,%r11b
- orb %r11b,%r12b
-
- addb %r14b,%r14b
- adcb %r12b,%r12b
- adcb %r10b,%r10b
-
- shlb $4,%r8b
- orb %r8b,%r9b
- shlb $4,%cl
- orb %cl,%bl
-
- addb %r9b,%r14b
- adcb %bl,%r12b
- adcb %dl,%r10b
-
- xorb %r9b,%r14b
- xorb %bl,%r12b
- xorb %dl,%r10b
-
- kmovb %r14d,%k1
- shrb $4,%r14b
- kmovb %r14d,%k2
- kmovb %r12d,%k3
- shrb $4,%r12b
- kmovb %r12d,%k4
- kmovb %r10d,%k5
-
-
- vpsubq %ymm4,%ymm2,%ymm2{%k1}
- vpsubq %ymm4,%ymm20,%ymm20{%k2}
- vpsubq %ymm4,%ymm21,%ymm21{%k3}
- vpsubq %ymm4,%ymm22,%ymm22{%k4}
- vpsubq %ymm4,%ymm23,%ymm23{%k5}
-
- vpandq %ymm4,%ymm2,%ymm2
- vpandq %ymm4,%ymm20,%ymm20
- vpandq %ymm4,%ymm21,%ymm21
- vpandq %ymm4,%ymm22,%ymm22
- vpandq %ymm4,%ymm23,%ymm23
-
- vmovdqu64 %ymm1,(%rdi)
- vmovdqu64 %ymm16,32(%rdi)
- vmovdqu64 %ymm17,64(%rdi)
- vmovdqu64 %ymm18,96(%rdi)
- vmovdqu64 %ymm19,128(%rdi)
-
- vmovdqu64 %ymm2,160(%rdi)
- vmovdqu64 %ymm20,192(%rdi)
- vmovdqu64 %ymm21,224(%rdi)
- vmovdqu64 %ymm22,256(%rdi)
- vmovdqu64 %ymm23,288(%rdi)
-
- vzeroupper
- movq 0(%rsp),%r15
-.cfi_restore %r15
- movq 8(%rsp),%r14
-.cfi_restore %r14
- movq 16(%rsp),%r13
-.cfi_restore %r13
- movq 24(%rsp),%r12
-.cfi_restore %r12
- movq 32(%rsp),%rbp
-.cfi_restore %rbp
- movq 40(%rsp),%rbx
-.cfi_restore %rbx
- leaq 48(%rsp),%rsp
-.cfi_adjust_cfa_offset -48
-.Lrsaz_amm52x20_x2_256_epilogue:
- .byte 0xf3,0xc3
-.cfi_endproc
-.size ossl_rsaz_amm52x20_x2_256, .-ossl_rsaz_amm52x20_x2_256
-.text
-
-.align 32
-.globl ossl_extract_multiplier_2x20_win5
-.type ossl_extract_multiplier_2x20_win5,@function
-ossl_extract_multiplier_2x20_win5:
-.cfi_startproc
-.byte 243,15,30,250
- leaq (%rcx,%rcx,4),%rax
- salq $5,%rax
- addq %rax,%rsi
-
- vmovdqa64 .Lones(%rip),%ymm23
- vpbroadcastq %rdx,%ymm22
- leaq 10240(%rsi),%rax
-
- vpxor %xmm4,%xmm4,%xmm4
- vmovdqa64 %ymm4,%ymm3
- vmovdqa64 %ymm4,%ymm2
- vmovdqa64 %ymm4,%ymm1
- vmovdqa64 %ymm4,%ymm0
- vmovdqa64 %ymm4,%ymm21
-
-.align 32
-.Lloop:
- vpcmpq $0,%ymm21,%ymm22,%k1
- addq $320,%rsi
- vpaddq %ymm23,%ymm21,%ymm21
- vmovdqu64 -320(%rsi),%ymm16
- vmovdqu64 -288(%rsi),%ymm17
- vmovdqu64 -256(%rsi),%ymm18
- vmovdqu64 -224(%rsi),%ymm19
- vmovdqu64 -192(%rsi),%ymm20
- vpblendmq %ymm16,%ymm0,%ymm0{%k1}
- vpblendmq %ymm17,%ymm1,%ymm1{%k1}
- vpblendmq %ymm18,%ymm2,%ymm2{%k1}
- vpblendmq %ymm19,%ymm3,%ymm3{%k1}
- vpblendmq %ymm20,%ymm4,%ymm4{%k1}
- cmpq %rsi,%rax
- jne .Lloop
-
- vmovdqu64 %ymm0,(%rdi)
- vmovdqu64 %ymm1,32(%rdi)
- vmovdqu64 %ymm2,64(%rdi)
- vmovdqu64 %ymm3,96(%rdi)
- vmovdqu64 %ymm4,128(%rdi)
-
- .byte 0xf3,0xc3
-.cfi_endproc
-.size ossl_extract_multiplier_2x20_win5, .-ossl_extract_multiplier_2x20_win5
-.data
-.align 32
-.Lones:
-.quad 1,1,1,1
- .section ".note.gnu.property", "a"
- .p2align 3
- .long 1f - 0f
- .long 4f - 1f
- .long 5
-0:
- # "GNU" encoded with .byte, since .asciz isn't supported
- # on Solaris.
- .byte 0x47
- .byte 0x4e
- .byte 0x55
- .byte 0
-1:
- .p2align 3
- .long 0xc0000002
- .long 3f - 2f
-2:
- .long 3
-3:
- .p2align 3
-4: