diff options
Diffstat (limited to 'module/icp/asm-x86_64/modes/ghash-x86_64.S')
-rw-r--r-- | module/icp/asm-x86_64/modes/ghash-x86_64.S | 70 |
1 files changed, 38 insertions, 32 deletions
diff --git a/module/icp/asm-x86_64/modes/ghash-x86_64.S b/module/icp/asm-x86_64/modes/ghash-x86_64.S index 90cc36b43a78..f62e056d4b64 100644 --- a/module/icp/asm-x86_64/modes/ghash-x86_64.S +++ b/module/icp/asm-x86_64/modes/ghash-x86_64.S @@ -97,13 +97,18 @@ #if defined(__x86_64__) && defined(HAVE_AVX) && \ defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) +#define _ASM +#include <sys/asm_linkage.h> + .text -.globl gcm_gmult_clmul -.type gcm_gmult_clmul,@function -.align 16 -gcm_gmult_clmul: +/* Windows userland links with OpenSSL */ +#if !defined (_WIN32) || defined (_KERNEL) +ENTRY_ALIGN(gcm_gmult_clmul, 16) + .cfi_startproc + ENDBR + .L_gmult_clmul: movdqu (%rdi),%xmm0 movdqa .Lbswap_mask(%rip),%xmm5 @@ -149,15 +154,14 @@ gcm_gmult_clmul: pxor %xmm1,%xmm0 .byte 102,15,56,0,197 movdqu %xmm0,(%rdi) - .byte 0xf3,0xc3 + RET .cfi_endproc -.size gcm_gmult_clmul,.-gcm_gmult_clmul +SET_SIZE(gcm_gmult_clmul) +#endif /* !_WIN32 || _KERNEL */ -.globl gcm_init_htab_avx -.type gcm_init_htab_avx,@function -.align 32 -gcm_init_htab_avx: +ENTRY_ALIGN(gcm_init_htab_avx, 32) .cfi_startproc + ENDBR vzeroupper vmovdqu (%rsi),%xmm2 @@ -184,7 +188,7 @@ gcm_init_htab_avx: vpxor %xmm2,%xmm6,%xmm6 movq $4,%r10 jmp .Linit_start_avx -.align 32 +.balign 32 .Linit_loop_avx: vpalignr $8,%xmm3,%xmm4,%xmm5 vmovdqu %xmm5,-16(%rdi) @@ -262,23 +266,21 @@ gcm_init_htab_avx: vmovdqu %xmm5,-16(%rdi) vzeroupper - .byte 0xf3,0xc3 + RET .cfi_endproc -.size gcm_init_htab_avx,.-gcm_init_htab_avx +SET_SIZE(gcm_init_htab_avx) -.globl gcm_gmult_avx -.type gcm_gmult_avx,@function -.align 32 -gcm_gmult_avx: +#if !defined (_WIN32) || defined (_KERNEL) +ENTRY_ALIGN(gcm_gmult_avx, 32) .cfi_startproc + ENDBR jmp .L_gmult_clmul .cfi_endproc -.size gcm_gmult_avx,.-gcm_gmult_avx -.globl gcm_ghash_avx -.type gcm_ghash_avx,@function -.align 32 -gcm_ghash_avx: +SET_SIZE(gcm_gmult_avx) + +ENTRY_ALIGN(gcm_ghash_avx, 32) .cfi_startproc + ENDBR vzeroupper vmovdqu (%rdi),%xmm10 @@ -384,7 +386,7 @@ gcm_ghash_avx: subq $0x80,%rcx jmp .Loop8x_avx -.align 32 +.balign 32 .Loop8x_avx: vpunpckhqdq %xmm15,%xmm15,%xmm8 vmovdqu 112(%rdx),%xmm14 @@ -504,7 +506,7 @@ gcm_ghash_avx: addq $0x80,%rcx jmp .Ltail_no_xor_avx -.align 32 +.balign 32 .Lshort_avx: vmovdqu -16(%rdx,%rcx,1),%xmm14 leaq (%rdx,%rcx,1),%rdx @@ -608,7 +610,7 @@ gcm_ghash_avx: subq $0x10,%rcx jmp .Ltail_avx -.align 32 +.balign 32 .Ltail_avx: vpxor %xmm10,%xmm15,%xmm15 .Ltail_no_xor_avx: @@ -649,10 +651,14 @@ gcm_ghash_avx: vpshufb %xmm13,%xmm10,%xmm10 vmovdqu %xmm10,(%rdi) vzeroupper - .byte 0xf3,0xc3 + RET .cfi_endproc -.size gcm_ghash_avx,.-gcm_ghash_avx -.align 64 +SET_SIZE(gcm_ghash_avx) + +#endif /* !_WIN32 || _KERNEL */ + +SECTION_STATIC +.balign 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .L0x1c2_polynomial: @@ -661,14 +667,14 @@ gcm_ghash_avx: .long 7,0,7,0 .L7_mask_poly: .long 7,0,450,0 -.align 64 -.type .Lrem_4bit,@object +.balign 64 +SET_OBJ(.Lrem_4bit) .Lrem_4bit: .long 0,0,0,471859200,0,943718400,0,610271232 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 -.type .Lrem_8bit,@object +SET_OBJ(.Lrem_8bit) .Lrem_8bit: .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E @@ -704,7 +710,7 @@ gcm_ghash_avx: .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.align 64 +.balign 64 /* Mark the stack non-executable. */ #if defined(__linux__) && defined(__ELF__) |