aboutsummaryrefslogtreecommitdiff
path: root/crypto/x86_64cpuid.pl
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/x86_64cpuid.pl')
-rw-r--r--crypto/x86_64cpuid.pl226
1 files changed, 198 insertions, 28 deletions
diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl
index ef3608b13495..6423e803b759 100644
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -1,4 +1,11 @@
-#!/usr/bin/env perl
+#! /usr/bin/env perl
+# Copyright 2005-2018 The OpenSSL Project Authors. All Rights Reserved.
+#
+# Licensed under the OpenSSL license (the "License"). You may not use
+# this file except in compliance with the License. You can obtain a copy
+# in the file LICENSE in the source distribution or at
+# https://www.openssl.org/source/license.html
+
$flavour = shift;
$output = shift;
@@ -11,7 +18,7 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}perlasm/x86_64-xlate.pl" and -f $xlate) or
die "can't locate x86_64-xlate.pl";
-open OUT,"| \"$^X\" $xlate $flavour $output";
+open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"";
*STDOUT=*OUT;
($arg1,$arg2,$arg3,$arg4)=$win64?("%rcx","%rdx","%r8", "%r9") : # Win64 order
@@ -56,10 +63,12 @@ OPENSSL_rdtsc:
.type OPENSSL_ia32_cpuid,\@function,1
.align 16
OPENSSL_ia32_cpuid:
+.cfi_startproc
mov %rbx,%r8 # save %rbx
+.cfi_register %rbx,%r8
xor %eax,%eax
- mov %eax,8(%rdi) # clear extended feature flags
+ mov %rax,8(%rdi) # clear extended feature flags
cpuid
mov %eax,%r11d # max value for standard query level
@@ -130,6 +139,7 @@ OPENSSL_ia32_cpuid:
.Lnocacheinfo:
mov \$1,%eax
cpuid
+ movd %eax,%xmm0 # put aside processor id
and \$0xbfefffff,%edx # force reserved bits to 0
cmp \$0,%r9d
jne .Lnotintel
@@ -177,26 +187,45 @@ OPENSSL_ia32_cpuid:
jc .Lnotknights
and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag
.Lnotknights:
+ movd %xmm0,%eax # restore processor id
+ and \$0x0fff0ff0,%eax
+ cmp \$0x00050650,%eax # Skylake-X
+ jne .Lnotskylakex
+ and \$0xfffeffff,%ebx # ~(1<<16)
+ # suppress AVX512F flag on Skylake-X
+.Lnotskylakex:
mov %ebx,8(%rdi) # save extended feature flags
+ mov %ecx,12(%rdi)
.Lno_extended_info:
bt \$27,%r9d # check OSXSAVE bit
jnc .Lclear_avx
xor %ecx,%ecx # XCR0
.byte 0x0f,0x01,0xd0 # xgetbv
+ and \$0xe6,%eax # isolate XMM, YMM and ZMM state support
+ cmp \$0xe6,%eax
+ je .Ldone
+ andl \$0x3fdeffff,8(%rdi) # ~(1<<31|1<<30|1<<21|1<<16)
+ # clear AVX512F+BW+VL+FIMA, all of
+ # them are EVEX-encoded, which requires
+ # ZMM state support even if one uses
+ # only XMM and YMM :-(
and \$6,%eax # isolate XMM and YMM state support
cmp \$6,%eax
je .Ldone
.Lclear_avx:
mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
and %eax,%r9d # clear AVX, FMA and AMD XOP bits
- andl \$0xffffffdf,8(%rdi) # clear AVX2, ~(1<<5)
+ mov \$0x3fdeffdf,%eax # ~(1<<31|1<<30|1<<21|1<<16|1<<5)
+ and %eax,8(%rdi) # clear AVX2 and AVX512* bits
.Ldone:
shl \$32,%r9
mov %r10d,%eax
mov %r8,%rbx # restore %rbx
+.cfi_restore %rbx
or %r9,%rax
ret
+.cfi_endproc
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
.globl OPENSSL_cleanse
@@ -233,6 +262,40 @@ OPENSSL_cleanse:
jne .Little
ret
.size OPENSSL_cleanse,.-OPENSSL_cleanse
+
+.globl CRYPTO_memcmp
+.type CRYPTO_memcmp,\@abi-omnipotent
+.align 16
+CRYPTO_memcmp:
+ xor %rax,%rax
+ xor %r10,%r10
+ cmp \$0,$arg3
+ je .Lno_data
+ cmp \$16,$arg3
+ jne .Loop_cmp
+ mov ($arg1),%r10
+ mov 8($arg1),%r11
+ mov \$1,$arg3
+ xor ($arg2),%r10
+ xor 8($arg2),%r11
+ or %r11,%r10
+ cmovnz $arg3,%rax
+ ret
+
+.align 16
+.Loop_cmp:
+ mov ($arg1),%r10b
+ lea 1($arg1),$arg1
+ xor ($arg2),%r10b
+ lea 1($arg2),$arg2
+ or %r10b,%al
+ dec $arg3
+ jnz .Loop_cmp
+ neg %rax
+ shr \$63,%rax
+.Lno_data:
+ ret
+.size CRYPTO_memcmp,.-CRYPTO_memcmp
___
print<<___ if (!$win64);
@@ -289,37 +352,144 @@ OPENSSL_wipe_cpu:
ret
.size OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
___
+{
+my $out="%r10";
+my $cnt="%rcx";
+my $max="%r11";
+my $lasttick="%r8d";
+my $lastdiff="%r9d";
+my $redzone=win64?8:-8;
print<<___;
-.globl OPENSSL_ia32_rdrand
-.type OPENSSL_ia32_rdrand,\@abi-omnipotent
+.globl OPENSSL_instrument_bus
+.type OPENSSL_instrument_bus,\@abi-omnipotent
.align 16
-OPENSSL_ia32_rdrand:
- mov \$8,%ecx
-.Loop_rdrand:
- rdrand %rax
- jc .Lbreak_rdrand
- loop .Loop_rdrand
-.Lbreak_rdrand:
- cmp \$0,%rax
- cmove %rcx,%rax
+OPENSSL_instrument_bus:
+ mov $arg1,$out # tribute to Win64
+ mov $arg2,$cnt
+ mov $arg2,$max
+
+ rdtsc # collect 1st tick
+ mov %eax,$lasttick # lasttick = tick
+ mov \$0,$lastdiff # lastdiff = 0
+ clflush ($out)
+ .byte 0xf0 # lock
+ add $lastdiff,($out)
+ jmp .Loop
+.align 16
+.Loop: rdtsc
+ mov %eax,%edx
+ sub $lasttick,%eax
+ mov %edx,$lasttick
+ mov %eax,$lastdiff
+ clflush ($out)
+ .byte 0xf0 # lock
+ add %eax,($out)
+ lea 4($out),$out
+ sub \$1,$cnt
+ jnz .Loop
+
+ mov $max,%rax
ret
-.size OPENSSL_ia32_rdrand,.-OPENSSL_ia32_rdrand
+.size OPENSSL_instrument_bus,.-OPENSSL_instrument_bus
-.globl OPENSSL_ia32_rdseed
-.type OPENSSL_ia32_rdseed,\@abi-omnipotent
+.globl OPENSSL_instrument_bus2
+.type OPENSSL_instrument_bus2,\@abi-omnipotent
.align 16
-OPENSSL_ia32_rdseed:
- mov \$8,%ecx
-.Loop_rdseed:
- rdseed %rax
- jc .Lbreak_rdseed
- loop .Loop_rdseed
-.Lbreak_rdseed:
- cmp \$0,%rax
- cmove %rcx,%rax
+OPENSSL_instrument_bus2:
+ mov $arg1,$out # tribute to Win64
+ mov $arg2,$cnt
+ mov $arg3,$max
+ mov $cnt,$redzone(%rsp)
+
+ rdtsc # collect 1st tick
+ mov %eax,$lasttick # lasttick = tick
+ mov \$0,$lastdiff # lastdiff = 0
+
+ clflush ($out)
+ .byte 0xf0 # lock
+ add $lastdiff,($out)
+
+ rdtsc # collect 1st diff
+ mov %eax,%edx
+ sub $lasttick,%eax # diff
+ mov %edx,$lasttick # lasttick = tick
+ mov %eax,$lastdiff # lastdiff = diff
+.Loop2:
+ clflush ($out)
+ .byte 0xf0 # lock
+ add %eax,($out) # accumulate diff
+
+ sub \$1,$max
+ jz .Ldone2
+
+ rdtsc
+ mov %eax,%edx
+ sub $lasttick,%eax # diff
+ mov %edx,$lasttick # lasttick = tick
+ cmp $lastdiff,%eax
+ mov %eax,$lastdiff # lastdiff = diff
+ mov \$0,%edx
+ setne %dl
+ sub %rdx,$cnt # conditional --$cnt
+ lea ($out,%rdx,4),$out # conditional ++$out
+ jnz .Loop2
+
+.Ldone2:
+ mov $redzone(%rsp),%rax
+ sub $cnt,%rax
+ ret
+.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
+___
+}
+
+sub gen_random {
+my $rdop = shift;
+print<<___;
+.globl OPENSSL_ia32_${rdop}_bytes
+.type OPENSSL_ia32_${rdop}_bytes,\@abi-omnipotent
+.align 16
+OPENSSL_ia32_${rdop}_bytes:
+ xor %rax, %rax # return value
+ cmp \$0,$arg2
+ je .Ldone_${rdop}_bytes
+
+ mov \$8,%r11
+.Loop_${rdop}_bytes:
+ ${rdop} %r10
+ jc .Lbreak_${rdop}_bytes
+ dec %r11
+ jnz .Loop_${rdop}_bytes
+ jmp .Ldone_${rdop}_bytes
+
+.align 16
+.Lbreak_${rdop}_bytes:
+ cmp \$8,$arg2
+ jb .Ltail_${rdop}_bytes
+ mov %r10,($arg1)
+ lea 8($arg1),$arg1
+ add \$8,%rax
+ sub \$8,$arg2
+ jz .Ldone_${rdop}_bytes
+ mov \$8,%r11
+ jmp .Loop_${rdop}_bytes
+
+.align 16
+.Ltail_${rdop}_bytes:
+ mov %r10b,($arg1)
+ lea 1($arg1),$arg1
+ inc %rax
+ shr \$8,%r10
+ dec $arg2
+ jnz .Ltail_${rdop}_bytes
+
+.Ldone_${rdop}_bytes:
+ xor %r10,%r10 # Clear sensitive data from register
ret
-.size OPENSSL_ia32_rdseed,.-OPENSSL_ia32_rdseed
+.size OPENSSL_ia32_${rdop}_bytes,.-OPENSSL_ia32_${rdop}_bytes
___
+}
+gen_random("rdrand");
+gen_random("rdseed");
close STDOUT; # flush