diff options
Diffstat (limited to 'crypto/sha/asm/sha1-mb-x86_64.pl')
-rwxr-xr-x | crypto/sha/asm/sha1-mb-x86_64.pl | 80 |
1 files changed, 67 insertions, 13 deletions
diff --git a/crypto/sha/asm/sha1-mb-x86_64.pl b/crypto/sha/asm/sha1-mb-x86_64.pl index a8d8708d4b75..443b649830f4 100755 --- a/crypto/sha/asm/sha1-mb-x86_64.pl +++ b/crypto/sha/asm/sha1-mb-x86_64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -19,6 +26,7 @@ # Sandy Bridge (8.16 +5.15=13.3)/n 4.99 5.98 +80% # Ivy Bridge (8.08 +5.14=13.2)/n 4.60 5.54 +68% # Haswell(iii) (8.96 +5.00=14.0)/n 3.57 4.55 +160% +# Skylake (8.70 +5.00=13.7)/n 3.64 4.20 +145% # Bulldozer (9.76 +5.76=15.5)/n 5.95 6.37 +64% # # (i) multi-block CBC encrypt with 128-bit key; @@ -62,7 +70,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([ $avx = ($2>=3.0) + ($2>3.0); } -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; # void sha1_multi_block ( @@ -87,7 +95,7 @@ $K="%xmm15"; if (1) { # Atom-specific optimization aiming to eliminate pshufb with high - # registers [and thus get rid of 48 cycles accumulated penalty] + # registers [and thus get rid of 48 cycles accumulated penalty] @Xi=map("%xmm$_",(0..4)); ($tx,$t0,$t1,$t2,$t3)=map("%xmm$_",(5..9)); @V=($A,$B,$C,$D,$E)=map("%xmm$_",(10..14)); @@ -118,7 +126,7 @@ my $k=$i+2; # ... # $i==13: 14,15,15,15, # $i==14: 15 -# +# # Then at $i==15 Xupdate is applied one iteration in advance... $code.=<<___ if ($i==0); movd (@ptr[0]),@Xi[0] @@ -355,6 +363,7 @@ $code.=<<___; .type sha1_multi_block,\@function,3 .align 32 sha1_multi_block: +.cfi_startproc mov OPENSSL_ia32cap_P+4(%rip),%rcx bt \$61,%rcx # check SHA bit jc _shaext_shortcut @@ -365,8 +374,11 @@ $code.=<<___ if ($avx); ___ $code.=<<___; mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbx ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -385,6 +397,7 @@ $code.=<<___; sub \$`$REG_SZ*18`,%rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8 .Lbody: lea K_XX_XX(%rip),$Tbl lea `$REG_SZ*16`(%rsp),%rbx @@ -431,7 +444,7 @@ for(;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); } $code.=<<___; movdqa (%rbx),@Xi[0] # pull counters mov \$1,%ecx - cmp 4*0(%rbx),%ecx # examinte counters + cmp 4*0(%rbx),%ecx # examine counters pxor $t2,$t2 cmovge $Tbl,@ptr[0] # cancel input cmp 4*1(%rbx),%ecx @@ -478,7 +491,8 @@ $code.=<<___; jnz .Loop_grande .Ldone: - mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp + mov `$REG_SZ*17`(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps -0xb8(%rax),%xmm6 @@ -494,10 +508,14 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue: ret +.cfi_endproc .size sha1_multi_block,.-sha1_multi_block ___ {{{ @@ -509,10 +527,14 @@ $code.=<<___; .type sha1_multi_block_shaext,\@function,3 .align 32 sha1_multi_block_shaext: +.cfi_startproc _shaext_shortcut: mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -538,7 +560,7 @@ $code.=<<___; movdqa K_XX_XX+0x80(%rip),$BSWAP # byte-n-word swap .Loop_grande_shaext: - mov $num,`$REG_SZ*17+8`(%rsp) # orignal $num + mov $num,`$REG_SZ*17+8`(%rsp) # original $num xor $num,$num ___ for($i=0;$i<2;$i++) { @@ -748,10 +770,14 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_shaext: ret +.cfi_endproc .size sha1_multi_block_shaext,.-sha1_multi_block_shaext ___ }}} @@ -994,6 +1020,7 @@ $code.=<<___; .type sha1_multi_block_avx,\@function,3 .align 32 sha1_multi_block_avx: +.cfi_startproc _avx_shortcut: ___ $code.=<<___ if ($avx>1); @@ -1008,8 +1035,11 @@ $code.=<<___ if ($avx>1); ___ $code.=<<___; mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -1028,6 +1058,7 @@ $code.=<<___; sub \$`$REG_SZ*18`, %rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8 .Lbody_avx: lea K_XX_XX(%rip),$Tbl lea `$REG_SZ*16`(%rsp),%rbx @@ -1116,7 +1147,8 @@ $code.=<<___; jnz .Loop_grande_avx .Ldone_avx: - mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp + mov `$REG_SZ*17`(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 vzeroupper ___ $code.=<<___ if ($win64); @@ -1133,10 +1165,14 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx: ret +.cfi_endproc .size sha1_multi_block_avx,.-sha1_multi_block_avx ___ @@ -1156,14 +1192,22 @@ $code.=<<___; .type sha1_multi_block_avx2,\@function,3 .align 32 sha1_multi_block_avx2: +.cfi_startproc _avx2_shortcut: mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -1182,6 +1226,7 @@ $code.=<<___; sub \$`$REG_SZ*18`, %rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8 .Lbody_avx2: lea K_XX_XX(%rip),$Tbl shr \$1,$num @@ -1271,7 +1316,8 @@ $code.=<<___; #jnz .Loop_grande_avx2 .Ldone_avx2: - mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp + mov `$REG_SZ*17`(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 vzeroupper ___ $code.=<<___ if ($win64); @@ -1288,14 +1334,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx2: ret +.cfi_endproc .size sha1_multi_block_avx2,.-sha1_multi_block_avx2 ___ } }}} @@ -1454,10 +1508,10 @@ avx2_handler: mov -48(%rax),%r15 mov %rbx,144($context) # restore context->Rbx mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore cotnext->R12 - mov %r13,224($context) # restore cotnext->R13 - mov %r14,232($context) # restore cotnext->R14 - mov %r15,240($context) # restore cotnext->R15 + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 lea -56-10*16(%rax),%rsi lea 512($context),%rdi # &context.Xmm6 |