aboutsummaryrefslogtreecommitdiff
path: root/module/icp/asm-x86_64/modes/ghash-x86_64.S
diff options
context:
space:
mode:
Diffstat (limited to 'module/icp/asm-x86_64/modes/ghash-x86_64.S')
-rw-r--r--module/icp/asm-x86_64/modes/ghash-x86_64.S70
1 files changed, 38 insertions, 32 deletions
diff --git a/module/icp/asm-x86_64/modes/ghash-x86_64.S b/module/icp/asm-x86_64/modes/ghash-x86_64.S
index 90cc36b43a78..f62e056d4b64 100644
--- a/module/icp/asm-x86_64/modes/ghash-x86_64.S
+++ b/module/icp/asm-x86_64/modes/ghash-x86_64.S
@@ -97,13 +97,18 @@
#if defined(__x86_64__) && defined(HAVE_AVX) && \
defined(HAVE_AES) && defined(HAVE_PCLMULQDQ)
+#define _ASM
+#include <sys/asm_linkage.h>
+
.text
-.globl gcm_gmult_clmul
-.type gcm_gmult_clmul,@function
-.align 16
-gcm_gmult_clmul:
+/* Windows userland links with OpenSSL */
+#if !defined (_WIN32) || defined (_KERNEL)
+ENTRY_ALIGN(gcm_gmult_clmul, 16)
+
.cfi_startproc
+ ENDBR
+
.L_gmult_clmul:
movdqu (%rdi),%xmm0
movdqa .Lbswap_mask(%rip),%xmm5
@@ -149,15 +154,14 @@ gcm_gmult_clmul:
pxor %xmm1,%xmm0
.byte 102,15,56,0,197
movdqu %xmm0,(%rdi)
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
-.size gcm_gmult_clmul,.-gcm_gmult_clmul
+SET_SIZE(gcm_gmult_clmul)
+#endif /* !_WIN32 || _KERNEL */
-.globl gcm_init_htab_avx
-.type gcm_init_htab_avx,@function
-.align 32
-gcm_init_htab_avx:
+ENTRY_ALIGN(gcm_init_htab_avx, 32)
.cfi_startproc
+ ENDBR
vzeroupper
vmovdqu (%rsi),%xmm2
@@ -184,7 +188,7 @@ gcm_init_htab_avx:
vpxor %xmm2,%xmm6,%xmm6
movq $4,%r10
jmp .Linit_start_avx
-.align 32
+.balign 32
.Linit_loop_avx:
vpalignr $8,%xmm3,%xmm4,%xmm5
vmovdqu %xmm5,-16(%rdi)
@@ -262,23 +266,21 @@ gcm_init_htab_avx:
vmovdqu %xmm5,-16(%rdi)
vzeroupper
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
-.size gcm_init_htab_avx,.-gcm_init_htab_avx
+SET_SIZE(gcm_init_htab_avx)
-.globl gcm_gmult_avx
-.type gcm_gmult_avx,@function
-.align 32
-gcm_gmult_avx:
+#if !defined (_WIN32) || defined (_KERNEL)
+ENTRY_ALIGN(gcm_gmult_avx, 32)
.cfi_startproc
+ ENDBR
jmp .L_gmult_clmul
.cfi_endproc
-.size gcm_gmult_avx,.-gcm_gmult_avx
-.globl gcm_ghash_avx
-.type gcm_ghash_avx,@function
-.align 32
-gcm_ghash_avx:
+SET_SIZE(gcm_gmult_avx)
+
+ENTRY_ALIGN(gcm_ghash_avx, 32)
.cfi_startproc
+ ENDBR
vzeroupper
vmovdqu (%rdi),%xmm10
@@ -384,7 +386,7 @@ gcm_ghash_avx:
subq $0x80,%rcx
jmp .Loop8x_avx
-.align 32
+.balign 32
.Loop8x_avx:
vpunpckhqdq %xmm15,%xmm15,%xmm8
vmovdqu 112(%rdx),%xmm14
@@ -504,7 +506,7 @@ gcm_ghash_avx:
addq $0x80,%rcx
jmp .Ltail_no_xor_avx
-.align 32
+.balign 32
.Lshort_avx:
vmovdqu -16(%rdx,%rcx,1),%xmm14
leaq (%rdx,%rcx,1),%rdx
@@ -608,7 +610,7 @@ gcm_ghash_avx:
subq $0x10,%rcx
jmp .Ltail_avx
-.align 32
+.balign 32
.Ltail_avx:
vpxor %xmm10,%xmm15,%xmm15
.Ltail_no_xor_avx:
@@ -649,10 +651,14 @@ gcm_ghash_avx:
vpshufb %xmm13,%xmm10,%xmm10
vmovdqu %xmm10,(%rdi)
vzeroupper
- .byte 0xf3,0xc3
+ RET
.cfi_endproc
-.size gcm_ghash_avx,.-gcm_ghash_avx
-.align 64
+SET_SIZE(gcm_ghash_avx)
+
+#endif /* !_WIN32 || _KERNEL */
+
+SECTION_STATIC
+.balign 64
.Lbswap_mask:
.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
.L0x1c2_polynomial:
@@ -661,14 +667,14 @@ gcm_ghash_avx:
.long 7,0,7,0
.L7_mask_poly:
.long 7,0,450,0
-.align 64
-.type .Lrem_4bit,@object
+.balign 64
+SET_OBJ(.Lrem_4bit)
.Lrem_4bit:
.long 0,0,0,471859200,0,943718400,0,610271232
.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208
.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008
.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160
-.type .Lrem_8bit,@object
+SET_OBJ(.Lrem_8bit)
.Lrem_8bit:
.value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
.value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
@@ -704,7 +710,7 @@ gcm_ghash_avx:
.value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
-.align 64
+.balign 64
/* Mark the stack non-executable. */
#if defined(__linux__) && defined(__ELF__)