src - FreeBSD source tree

diff options


context:
space:
mode:

author	Jung-uk Kim <jkim@FreeBSD.org>	2018-09-13 19:18:07 +0000
committer	Jung-uk Kim <jkim@FreeBSD.org>	2018-09-13 19:18:07 +0000
commit	a43ce912fc025d11e1395506111f75fc194d7ba5 (patch)
tree	9794cf7720d75938ed0ea4f499c0dcd4b6eacdda /crypto/aes/asm/aesni-sha1-x86_64.pl
parent	02be298e504b8554caca6dc85af450e1ea44d19d (diff)
download	src-a43ce912fc025d11e1395506111f75fc194d7ba5.tar.gz src-a43ce912fc025d11e1395506111f75fc194d7ba5.zip

Import OpenSSL 1.1.1.vendor/openssl/1.1.1

Notes

Notes: svn path=/vendor-crypto/openssl/dist/; revision=338658 svn path=/vendor-crypto/openssl/1.1.1/; revision=338659; tag=vendor/openssl/1.1.1

Diffstat (limited to 'crypto/aes/asm/aesni-sha1-x86_64.pl')

-rwxr-xr-x

crypto/aes/asm/aesni-sha1-x86_64.pl

1 files changed, 87 insertions, 5 deletions

diff --git a/crypto/aes/asm/aesni-sha1-x86_64.pl b/crypto/aes/asm/aesni-sha1-x86_64.pl
index 7a30e893fbe6..b01a4c55c86a 100755
--- a/crypto/aes/asm/aesni-sha1-x86_64.pl
+++ b/crypto/aes/asm/aesni-sha1-x86_64.pl

@@ -1,4 +1,11 @@

-#!/usr/bin/env perl

+#! /usr/bin/env perl

+# Licensed under the OpenSSL license (the "License"). You may not use

+# this file except in compliance with the License. You can obtain a copy

+# in the file LICENSE in the source distribution or at

+# https://www.openssl.org/source/license.html

# ====================================================================

# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL

@@ -25,7 +32,10 @@

# Sandy Bridge 5.05[+5.0(6.1)] 10.06(11.15) 5.98(7.05) +68%(+58%)

# Ivy Bridge 5.05[+4.6] 9.65 5.54 +74%

# Haswell 4.43[+3.6(4.2)] 8.00(8.58) 4.55(5.21) +75%(+65%)

+# Skylake 2.63[+3.5(4.1)] 6.17(6.69) 4.23(4.44) +46%(+51%)

# Bulldozer 5.77[+6.0] 11.72 6.37 +84%

+# Ryzen(**) 2.71[+1.93] 4.64 2.74 +69%

+# Goldmont(**) 3.82[+1.70] 5.52 4.20 +31%

# AES-192-CBC

# Westmere 4.51 9.81 6.80 +44%

@@ -39,12 +49,16 @@

# Sandy Bridge 7.05 12.06(13.15) 7.12(7.72) +69%(+70%)

# Ivy Bridge 7.05 11.65 7.12 +64%

# Haswell 6.19 9.76(10.34) 6.21(6.25) +57%(+65%)

+# Skylake 3.62 7.16(7.68) 4.56(4.76) +57%(+61%)

# Bulldozer 8.00 13.95 8.25 +69%

+# Ryzen(**) 3.71 5.64 3.72 +52%

+# Goldmont(**) 5.35 7.05 5.76 +22%

# (*) There are two code paths: SSSE3 and AVX. See sha1-568.pl for

# background information. Above numbers in parentheses are SSSE3

# results collected on AVX-capable CPU, i.e. apply on OSes that

# don't support AVX.

+# (**) SHAEXT results.

# Needless to mention that it makes no sense to implement "stitched"

# *decrypt* subroutine. Because *both* AESNI-CBC decrypt and SHA1

@@ -100,7 +114,7 @@ $shaext=1; ### set to zero if compiling for 1.0.1

$stitched_decrypt=0;

-open OUT,"| \"$^X\" $xlate $flavour $output";

+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";

*STDOUT=*OUT;

# void aesni_cbc_sha1_enc(const void *inp,

@@ -177,16 +191,24 @@ $code.=<<___;

.type aesni_cbc_sha1_enc_ssse3,\@function,6

.align 32

aesni_cbc_sha1_enc_ssse3:

+.cfi_startproc

mov `($win64?56:8)`(%rsp),$inp # load 7th argument

#shr \$6,$len # debugging artefact

#jz .Lepilogue_ssse3 # debugging artefact

push %rbx

+.cfi_push %rbx

push %rbp

+.cfi_push %rbp

push %r12

+.cfi_push %r12

push %r13

+.cfi_push %r13

push %r14

+.cfi_push %r14

push %r15

+.cfi_push %r15

lea `-104-($win64?10*16:0)`(%rsp),%rsp

+.cfi_adjust_cfa_offset `104+($win64?10*16:0)`

#mov $in0,$inp # debugging artefact

#lea 64(%rsp),$ctx # debugging artefact

___

@@ -298,7 +320,7 @@ ___

$r++; unshift(@rndkey,pop(@rndkey));

};

-sub Xupdate_ssse3_16_31() # recall that $Xi starts wtih 4

+sub Xupdate_ssse3_16_31() # recall that $Xi starts with 4

{ use integer;

my $body = shift;

my @insns = (&$body,&$body,&$body,&$body); # 40 instructions

@@ -712,15 +734,24 @@ $code.=<<___ if ($win64);

___

$code.=<<___;

lea `104+($win64?10*16:0)`(%rsp),%rsi

+.cfi_def_cfa %rsi,56

mov 0(%rsi),%r15

+.cfi_restore %r15

mov 8(%rsi),%r14

+.cfi_restore %r14

mov 16(%rsi),%r13

+.cfi_restore %r13

mov 24(%rsi),%r12

+.cfi_restore %r12

mov 32(%rsi),%rbp

+.cfi_restore %rbp

mov 40(%rsi),%rbx

+.cfi_restore %rbx

lea 48(%rsi),%rsp

+.cfi_def_cfa %rsp,8

.Lepilogue_ssse3:

ret

+.cfi_endproc

.size aesni_cbc_sha1_enc_ssse3,.-aesni_cbc_sha1_enc_ssse3

___

@@ -784,7 +815,7 @@ sub body_00_19_dec () { # ((c^d)&b)^d

sub body_20_39_dec () { # b^d^c

# on entry @T[0]=b^d

return &body_40_59_dec() if ($rx==39);

my @r=@body_20_39;

unshift (@r,@aes256_dec[$rx]) if (@aes256_dec[$rx]);

@@ -828,14 +859,22 @@ $code.=<<___;

.type aesni256_cbc_sha1_dec_ssse3,\@function,6

.align 32

aesni256_cbc_sha1_dec_ssse3:

+.cfi_startproc

mov `($win64?56:8)`(%rsp),$inp # load 7th argument

push %rbx

+.cfi_push %rbx

push %rbp

+.cfi_push %rbp

push %r12

+.cfi_push %r12

push %r13

+.cfi_push %r13

push %r14

+.cfi_push %r14

push %r15

+.cfi_push %r15

lea `-104-($win64?10*16:0)`(%rsp),%rsp

+.cfi_adjust_cfa_offset `104+($win64?10*16:0)`

___

$code.=<<___ if ($win64);

movaps %xmm6,96+0(%rsp)

@@ -983,15 +1022,24 @@ $code.=<<___ if ($win64);

___

$code.=<<___;

lea `104+($win64?10*16:0)`(%rsp),%rsi

+.cfi_cfa_def %rsi,56

mov 0(%rsi),%r15

+.cfi_restore %r15

mov 8(%rsi),%r14

+.cfi_restore %r14

mov 16(%rsi),%r13

+.cfi_restore %r13

mov 24(%rsi),%r12

+.cfi_restore %r12

mov 32(%rsi),%rbp

+.cfi_restore %rbp

mov 40(%rsi),%rbx

+.cfi_restore %rbx

lea 48(%rsi),%rsp

+.cfi_cfa_def %rsp,8

.Lepilogue_dec_ssse3:

ret

+.cfi_endproc

.size aesni256_cbc_sha1_dec_ssse3,.-aesni256_cbc_sha1_dec_ssse3

___

}}}

@@ -1017,16 +1065,24 @@ $code.=<<___;

.type aesni_cbc_sha1_enc_avx,\@function,6

.align 32

aesni_cbc_sha1_enc_avx:

+.cfi_startproc

mov `($win64?56:8)`(%rsp),$inp # load 7th argument

#shr \$6,$len # debugging artefact

#jz .Lepilogue_avx # debugging artefact

push %rbx

+.cfi_push %rbx

push %rbp

+.cfi_push %rbp

push %r12

+.cfi_push %r12

push %r13

+.cfi_push %r13

push %r14

+.cfi_push %r14

push %r15

+.cfi_push %r15

lea `-104-($win64?10*16:0)`(%rsp),%rsp

+.cfi_adjust_cfa_offset `104+($win64?10*16:0)`

#mov $in0,$inp # debugging artefact

#lea 64(%rsp),$ctx # debugging artefact

___

@@ -1137,7 +1193,7 @@ ___

$r++; unshift(@rndkey,pop(@rndkey));

};

-sub Xupdate_avx_16_31() # recall that $Xi starts wtih 4

+sub Xupdate_avx_16_31() # recall that $Xi starts with 4

{ use integer;

my $body = shift;

my @insns = (&$body,&$body,&$body,&$body); # 40 instructions

@@ -1425,15 +1481,24 @@ $code.=<<___ if ($win64);

___

$code.=<<___;

lea `104+($win64?10*16:0)`(%rsp),%rsi

+.cfi_def_cfa %rsi,56

mov 0(%rsi),%r15

+.cfi_restore %r15

mov 8(%rsi),%r14

+.cfi_restore %r14

mov 16(%rsi),%r13

+.cfi_restore %r13

mov 24(%rsi),%r12

+.cfi_restore %r12

mov 32(%rsi),%rbp

+.cfi_restore %rbp

mov 40(%rsi),%rbx

+.cfi_restore %rbx

lea 48(%rsi),%rsp

+.cfi_def_cfa %rsp,8

.Lepilogue_avx:

ret

+.cfi_endproc

.size aesni_cbc_sha1_enc_avx,.-aesni_cbc_sha1_enc_avx

___

@@ -1482,14 +1547,22 @@ $code.=<<___;

.type aesni256_cbc_sha1_dec_avx,\@function,6

.align 32

aesni256_cbc_sha1_dec_avx:

+.cfi_startproc

mov `($win64?56:8)`(%rsp),$inp # load 7th argument

push %rbx

+.cfi_push %rbx

push %rbp

+.cfi_push %rbp

push %r12

+.cfi_push %r12

push %r13

+.cfi_push %r13

push %r14

+.cfi_push %r14

push %r15

+.cfi_push %r15

lea `-104-($win64?10*16:0)`(%rsp),%rsp

+.cfi_adjust_cfa_offset `104+($win64?10*16:0)`

___

$code.=<<___ if ($win64);

movaps %xmm6,96+0(%rsp)

@@ -1636,15 +1709,24 @@ $code.=<<___ if ($win64);

___

$code.=<<___;

lea `104+($win64?10*16:0)`(%rsp),%rsi

+.cfi_def_cfa %rsi,56

mov 0(%rsi),%r15

+.cfi_restore %r15

mov 8(%rsi),%r14

+.cfi_restore %r14

mov 16(%rsi),%r13

+.cfi_restore %r13

mov 24(%rsi),%r12

+.cfi_restore %r12

mov 32(%rsi),%rbp

+.cfi_restore %rbp

mov 40(%rsi),%rbx

+.cfi_restore %rbx

lea 48(%rsi),%rsp

+.cfi_def_cfa %rsp,8

.Lepilogue_dec_avx:

ret

+.cfi_endproc

.size aesni256_cbc_sha1_dec_avx,.-aesni256_cbc_sha1_dec_avx

___

}}}