diff options
author | Jung-uk Kim <jkim@FreeBSD.org> | 2018-09-13 19:18:07 +0000 |
---|---|---|
committer | Jung-uk Kim <jkim@FreeBSD.org> | 2018-09-13 19:18:07 +0000 |
commit | a43ce912fc025d11e1395506111f75fc194d7ba5 (patch) | |
tree | 9794cf7720d75938ed0ea4f499c0dcd4b6eacdda /crypto/sha/asm/sha256-mb-x86_64.pl | |
parent | 02be298e504b8554caca6dc85af450e1ea44d19d (diff) | |
download | src-a43ce912fc025d11e1395506111f75fc194d7ba5.tar.gz src-a43ce912fc025d11e1395506111f75fc194d7ba5.zip |
Import OpenSSL 1.1.1.vendor/openssl/1.1.1
Notes
Notes:
svn path=/vendor-crypto/openssl/dist/; revision=338658
svn path=/vendor-crypto/openssl/1.1.1/; revision=338659; tag=vendor/openssl/1.1.1
Diffstat (limited to 'crypto/sha/asm/sha256-mb-x86_64.pl')
-rwxr-xr-x | crypto/sha/asm/sha256-mb-x86_64.pl | 76 |
1 files changed, 65 insertions, 11 deletions
diff --git a/crypto/sha/asm/sha256-mb-x86_64.pl b/crypto/sha/asm/sha256-mb-x86_64.pl index 9770286b9596..73978dbd81d6 100755 --- a/crypto/sha/asm/sha256-mb-x86_64.pl +++ b/crypto/sha/asm/sha256-mb-x86_64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2013-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @@ -19,6 +26,7 @@ # Sandy Bridge (20.5 +5.15=25.7)/n 11.6 13.0 +103% # Ivy Bridge (20.4 +5.14=25.5)/n 10.3 11.6 +82% # Haswell(iii) (21.0 +5.00=26.0)/n 7.80 8.79 +170% +# Skylake (18.9 +5.00=23.9)/n 7.70 8.17 +170% # Bulldozer (21.6 +5.76=27.4)/n 13.6 13.7 +100% # # (i) multi-block CBC encrypt with 128-bit key; @@ -28,7 +36,7 @@ # (iii) "this" is for n=8, when we gather twice as much data, result # for n=4 is 20.3+4.44=24.7; # (iv) presented improvement coefficients are asymptotic limits and -# in real-life application are somewhat lower, e.g. for 2KB +# in real-life application are somewhat lower, e.g. for 2KB # fragments they range from 75% to 130% (on Haswell); $flavour = shift; @@ -63,7 +71,7 @@ if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|.*based on LLVM) ([ $avx = ($2>=3.0) + ($2>3.0); } -open OUT,"| \"$^X\" $xlate $flavour $output"; +open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""; *STDOUT=*OUT; # void sha256_multi_block ( @@ -236,6 +244,7 @@ $code.=<<___; .type sha256_multi_block,\@function,3 .align 32 sha256_multi_block: +.cfi_startproc mov OPENSSL_ia32cap_P+4(%rip),%rcx bt \$61,%rcx # check SHA bit jc _shaext_shortcut @@ -246,8 +255,11 @@ $code.=<<___ if ($avx); ___ $code.=<<___; mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -266,6 +278,7 @@ $code.=<<___; sub \$`$REG_SZ*18`, %rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8 .Lbody: lea K256+128(%rip),$Tbl lea `$REG_SZ*16`(%rsp),%rbx @@ -382,7 +395,8 @@ $code.=<<___; jnz .Loop_grande .Ldone: - mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp + mov `$REG_SZ*17`(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps -0xb8(%rax),%xmm6 @@ -398,10 +412,14 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue: ret +.cfi_endproc .size sha256_multi_block,.-sha256_multi_block ___ {{{ @@ -413,10 +431,14 @@ $code.=<<___; .type sha256_multi_block_shaext,\@function,3 .align 32 sha256_multi_block_shaext: +.cfi_startproc _shaext_shortcut: mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -442,7 +464,7 @@ $code.=<<___; lea K256_shaext+0x80(%rip),$Tbl .Loop_grande_shaext: - mov $num,`$REG_SZ*17+8`(%rsp) # orignal $num + mov $num,`$REG_SZ*17+8`(%rsp) # original $num xor $num,$num ___ for($i=0;$i<2;$i++) { @@ -750,10 +772,14 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_shaext: ret +.cfi_endproc .size sha256_multi_block_shaext,.-sha256_multi_block_shaext ___ }}} @@ -913,6 +939,7 @@ $code.=<<___; .type sha256_multi_block_avx,\@function,3 .align 32 sha256_multi_block_avx: +.cfi_startproc _avx_shortcut: ___ $code.=<<___ if ($avx>1); @@ -927,8 +954,11 @@ $code.=<<___ if ($avx>1); ___ $code.=<<___; mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -947,6 +977,7 @@ $code.=<<___; sub \$`$REG_SZ*18`, %rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8 .Lbody_avx: lea K256+128(%rip),$Tbl lea `$REG_SZ*16`(%rsp),%rbx @@ -1061,7 +1092,8 @@ $code.=<<___; jnz .Loop_grande_avx .Ldone_avx: - mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp + mov `$REG_SZ*17`(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 vzeroupper ___ $code.=<<___ if ($win64); @@ -1078,10 +1110,14 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx: ret +.cfi_endproc .size sha256_multi_block_avx,.-sha256_multi_block_avx ___ if ($avx>1) { @@ -1097,14 +1133,22 @@ $code.=<<___; .type sha256_multi_block_avx2,\@function,3 .align 32 sha256_multi_block_avx2: +.cfi_startproc _avx2_shortcut: mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 ___ $code.=<<___ if ($win64); lea -0xa8(%rsp),%rsp @@ -1123,6 +1167,7 @@ $code.=<<___; sub \$`$REG_SZ*18`, %rsp and \$-256,%rsp mov %rax,`$REG_SZ*17`(%rsp) # original %rsp +.cfi_cfa_expression %rsp+`$REG_SZ*17`,deref,+8 .Lbody_avx2: lea K256+128(%rip),$Tbl lea 0x80($ctx),$ctx # size optimization @@ -1237,7 +1282,8 @@ $code.=<<___; #jnz .Loop_grande_avx2 .Ldone_avx2: - mov `$REG_SZ*17`(%rsp),%rax # orignal %rsp + mov `$REG_SZ*17`(%rsp),%rax # original %rsp +.cfi_def_cfa %rax,8 vzeroupper ___ $code.=<<___ if ($win64); @@ -1254,14 +1300,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax),%r15 +.cfi_restore %r15 mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Lepilogue_avx2: ret +.cfi_endproc .size sha256_multi_block_avx2,.-sha256_multi_block_avx2 ___ } }}} @@ -1454,10 +1508,10 @@ avx2_handler: mov -48(%rax),%r15 mov %rbx,144($context) # restore context->Rbx mov %rbp,160($context) # restore context->Rbp - mov %r12,216($context) # restore cotnext->R12 - mov %r13,224($context) # restore cotnext->R13 - mov %r14,232($context) # restore cotnext->R14 - mov %r15,240($context) # restore cotnext->R15 + mov %r12,216($context) # restore context->R12 + mov %r13,224($context) # restore context->R13 + mov %r14,232($context) # restore context->R14 + mov %r15,240($context) # restore context->R15 lea -56-10*16(%rax),%rsi lea 512($context),%rdi # &context.Xmm6 |