aboutsummaryrefslogtreecommitdiff
path: root/crypto/modes/asm
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/modes/asm')
-rwxr-xr-xcrypto/modes/asm/aesni-gcm-x86_64.pl2
-rwxr-xr-xcrypto/modes/asm/ghash-armv4.pl13
-rwxr-xr-xcrypto/modes/asm/ghash-sparcv9.pl18
-rwxr-xr-xcrypto/modes/asm/ghash-x86.pl2
-rwxr-xr-xcrypto/modes/asm/ghash-x86_64.pl10
-rwxr-xr-xcrypto/modes/asm/ghashp8-ppc.pl12
-rwxr-xr-xcrypto/modes/asm/ghashv8-armx.pl22
7 files changed, 42 insertions, 37 deletions
diff --git a/crypto/modes/asm/aesni-gcm-x86_64.pl b/crypto/modes/asm/aesni-gcm-x86_64.pl
index 7e4e04ea2530..4be25571ea28 100755
--- a/crypto/modes/asm/aesni-gcm-x86_64.pl
+++ b/crypto/modes/asm/aesni-gcm-x86_64.pl
@@ -56,7 +56,7 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
$avx = ($1>=10) + ($1>=11);
}
-if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([3-9]\.[0-9]+)/) {
$avx = ($2>=3.0) + ($2>3.0);
}
diff --git a/crypto/modes/asm/ghash-armv4.pl b/crypto/modes/asm/ghash-armv4.pl
index 77fbf34465db..8ccc963ef297 100755
--- a/crypto/modes/asm/ghash-armv4.pl
+++ b/crypto/modes/asm/ghash-armv4.pl
@@ -45,7 +45,7 @@
# processes one byte in 8.45 cycles, A9 - in 10.2, Snapdragon S4 -
# in 9.33.
#
-# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
+# Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R.: Fast Software
# Polynomial Multiplication on ARM Processors using the NEON Engine.
#
# http://conradoplg.cryptoland.net/files/2010/12/mocrysen13.pdf
@@ -126,6 +126,11 @@ $code=<<___;
.text
.code 32
+#ifdef __clang__
+#define ldrplb ldrbpl
+#define ldrneb ldrbne
+#endif
+
.type rem_4bit,%object
.align 5
rem_4bit:
@@ -432,12 +437,12 @@ gcm_ghash_neon:
veor $IN,$Xl @ inp^=Xi
.Lgmult_neon:
___
- &clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo
+ &clmul64x64 ($Xl,$Hlo,"$IN#lo"); # H.lo·Xi.lo
$code.=<<___;
veor $IN#lo,$IN#lo,$IN#hi @ Karatsuba pre-processing
___
- &clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi)
- &clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi
+ &clmul64x64 ($Xm,$Hhl,"$IN#lo"); # (H.lo+H.hi)·(Xi.lo+Xi.hi)
+ &clmul64x64 ($Xh,$Hhi,"$IN#hi"); # H.hi·Xi.hi
$code.=<<___;
veor $Xm,$Xm,$Xl @ Karatsuba post-processing
veor $Xm,$Xm,$Xh
diff --git a/crypto/modes/asm/ghash-sparcv9.pl b/crypto/modes/asm/ghash-sparcv9.pl
index 0365e0f1ff42..5bc28702019a 100755
--- a/crypto/modes/asm/ghash-sparcv9.pl
+++ b/crypto/modes/asm/ghash-sparcv9.pl
@@ -379,7 +379,7 @@ gcm_init_vis3:
or $V,%lo(0xA0406080),$V
or %l0,%lo(0x20C0E000),%l0
sllx $V,32,$V
- or %l0,$V,$V ! (0xE0·i)&0xff=0xA040608020C0E000
+ or %l0,$V,$V ! (0xE0·i)&0xff=0xA040608020C0E000
stx $V,[%i0+16]
ret
@@ -399,7 +399,7 @@ gcm_gmult_vis3:
mov 0xE1,%l7
sllx %l7,57,$xE1 ! 57 is not a typo
- ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
+ ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
xor $Hhi,$Hlo,$Hhl ! Karatsuba pre-processing
xmulx $Xlo,$Hlo,$C0
@@ -411,9 +411,9 @@ gcm_gmult_vis3:
xmulx $Xhi,$Hhi,$Xhi
sll $C0,3,$sqr
- srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
+ srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
xor $C0,$sqr,$sqr
- sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
+ sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
xor $C0,$C1,$C1 ! Karatsuba post-processing
xor $Xlo,$C2,$C2
@@ -423,7 +423,7 @@ gcm_gmult_vis3:
xor $Xhi,$C2,$C2
xor $Xhi,$C1,$C1
- xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
+ xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
xor $C0,$C2,$C2
xmulx $C1,$xE1,$C0
xor $C1,$C3,$C3
@@ -453,7 +453,7 @@ gcm_ghash_vis3:
mov 0xE1,%l7
sllx %l7,57,$xE1 ! 57 is not a typo
- ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
+ ldx [$Htable+16],$V ! (0xE0·i)&0xff=0xA040608020C0E000
and $inp,7,$shl
andn $inp,7,$inp
@@ -490,9 +490,9 @@ gcm_ghash_vis3:
xmulx $Xhi,$Hhi,$Xhi
sll $C0,3,$sqr
- srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
+ srlx $V,$sqr,$sqr ! ·0xE0 [implicit &(7<<3)]
xor $C0,$sqr,$sqr
- sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
+ sllx $sqr,57,$sqr ! ($C0·0xE1)<<1<<56 [implicit &0x7f]
xor $C0,$C1,$C1 ! Karatsuba post-processing
xor $Xlo,$C2,$C2
@@ -502,7 +502,7 @@ gcm_ghash_vis3:
xor $Xhi,$C2,$C2
xor $Xhi,$C1,$C1
- xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
+ xmulxhi $C0,$xE1,$Xlo ! ·0xE1<<1<<56
xor $C0,$C2,$C2
xmulx $C1,$xE1,$C0
xor $C1,$C3,$C3
diff --git a/crypto/modes/asm/ghash-x86.pl b/crypto/modes/asm/ghash-x86.pl
index 23a5527b30af..0269169fa743 100755
--- a/crypto/modes/asm/ghash-x86.pl
+++ b/crypto/modes/asm/ghash-x86.pl
@@ -358,7 +358,7 @@ $S=12; # shift factor for rem_4bit
# effective address calculation and finally merge of value to Z.hi.
# Reference to rem_4bit is scheduled so late that I had to >>4
# rem_4bit elements. This resulted in 20-45% procent improvement
-# on contemporary µ-archs.
+# on contemporary µ-archs.
{
my $cnt;
my $rem_4bit = "eax";
diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl
index 6e656ca13b80..0bcb6d4e028b 100755
--- a/crypto/modes/asm/ghash-x86_64.pl
+++ b/crypto/modes/asm/ghash-x86_64.pl
@@ -105,7 +105,7 @@ if (!$avx && $win64 && ($flavour =~ /masm/ || $ENV{ASM} =~ /ml64/) &&
$avx = ($1>=10) + ($1>=11);
}
-if (!$avx && `$ENV{CC} -v 2>&1` =~ /(^clang version|based on LLVM) ([3-9]\.[0-9]+)/) {
+if (!$avx && `$ENV{CC} -v 2>&1` =~ /((?:^clang|LLVM) version|based on LLVM) ([3-9]\.[0-9]+)/) {
$avx = ($2>=3.0) + ($2>3.0);
}
@@ -576,15 +576,15 @@ $code.=<<___ if (0 || (&reduction_alg9($Xhi,$Xi)&&0));
# experimental alternative. special thing about is that there
# no dependency between the two multiplications...
mov \$`0xE1<<1`,%eax
- mov \$0xA040608020C0E000,%r10 # ((7..0)·0xE0)&0xff
+ mov \$0xA040608020C0E000,%r10 # ((7..0)·0xE0)&0xff
mov \$0x07,%r11d
movq %rax,$T1
movq %r10,$T2
movq %r11,$T3 # borrow $T3
pand $Xi,$T3
- pshufb $T3,$T2 # ($Xi&7)·0xE0
+ pshufb $T3,$T2 # ($Xi&7)·0xE0
movq %rax,$T3
- pclmulqdq \$0x00,$Xi,$T1 # ·(0xE1<<1)
+ pclmulqdq \$0x00,$Xi,$T1 # ·(0xE1<<1)
pxor $Xi,$T2
pslldq \$15,$T2
paddd $T2,$T2 # <<(64+56+1)
@@ -657,7 +657,7 @@ $code.=<<___;
je .Lskip4x
sub \$0x30,$len
- mov \$0xA040608020C0E000,%rax # ((7..0)·0xE0)&0xff
+ mov \$0xA040608020C0E000,%rax # ((7..0)·0xE0)&0xff
movdqu 0x30($Htbl),$Hkey3
movdqu 0x40($Htbl),$Hkey4
diff --git a/crypto/modes/asm/ghashp8-ppc.pl b/crypto/modes/asm/ghashp8-ppc.pl
index e76a58c343c1..71457cf4fc59 100755
--- a/crypto/modes/asm/ghashp8-ppc.pl
+++ b/crypto/modes/asm/ghashp8-ppc.pl
@@ -118,9 +118,9 @@ $code=<<___;
le?vperm $IN,$IN,$IN,$lemask
vxor $zero,$zero,$zero
- vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
- vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
- vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
vpmsumd $t2,$Xl,$xC2 # 1st phase
@@ -178,11 +178,11 @@ $code=<<___;
.align 5
Loop:
subic $len,$len,16
- vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
+ vpmsumd $Xl,$IN,$Hl # H.lo·Xi.lo
subfe. r0,r0,r0 # borrow?-1:0
- vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
+ vpmsumd $Xm,$IN,$H # H.hi·Xi.lo+H.lo·Xi.hi
and r0,r0,$len
- vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
+ vpmsumd $Xh,$IN,$Hh # H.hi·Xi.hi
add $inp,$inp,r0
vpmsumd $t2,$Xl,$xC2 # 1st phase
diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/modes/asm/ghashv8-armx.pl
index 0b9cd7359fba..0886d2180702 100755
--- a/crypto/modes/asm/ghashv8-armx.pl
+++ b/crypto/modes/asm/ghashv8-armx.pl
@@ -135,10 +135,10 @@ gcm_gmult_v8:
#endif
vext.8 $IN,$t1,$t1,#8
- vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
+ vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
veor $t1,$t1,$IN @ Karatsuba pre-processing
- vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
- vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+ vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
+ vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
veor $t2,$Xl,$Xh
@@ -226,7 +226,7 @@ $code.=<<___;
#endif
vext.8 $In,$t1,$t1,#8
veor $IN,$IN,$Xl @ I[i]^=Xi
- vpmull.p64 $Xln,$H,$In @ H·Ii+1
+ vpmull.p64 $Xln,$H,$In @ H·Ii+1
veor $t1,$t1,$In @ Karatsuba pre-processing
vpmull2.p64 $Xhn,$H,$In
b .Loop_mod2x_v8
@@ -235,14 +235,14 @@ $code.=<<___;
.Loop_mod2x_v8:
vext.8 $t2,$IN,$IN,#8
subs $len,$len,#32 @ is there more data?
- vpmull.p64 $Xl,$H2,$IN @ H^2.lo·Xi.lo
+ vpmull.p64 $Xl,$H2,$IN @ H^2.lo·Xi.lo
cclr $inc,lo @ is it time to zero $inc?
vpmull.p64 $Xmn,$Hhl,$t1
veor $t2,$t2,$IN @ Karatsuba pre-processing
- vpmull2.p64 $Xh,$H2,$IN @ H^2.hi·Xi.hi
+ vpmull2.p64 $Xh,$H2,$IN @ H^2.hi·Xi.hi
veor $Xl,$Xl,$Xln @ accumulate
- vpmull2.p64 $Xm,$Hhl,$t2 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
+ vpmull2.p64 $Xm,$Hhl,$t2 @ (H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
vld1.64 {$t0},[$inp],$inc @ load [rotated] I[i+2]
veor $Xh,$Xh,$Xhn
@@ -267,7 +267,7 @@ $code.=<<___;
vext.8 $In,$t1,$t1,#8
vext.8 $IN,$t0,$t0,#8
veor $Xl,$Xm,$t2
- vpmull.p64 $Xln,$H,$In @ H·Ii+1
+ vpmull.p64 $Xln,$H,$In @ H·Ii+1
veor $IN,$IN,$Xh @ accumulate $IN early
vext.8 $t2,$Xl,$Xl,#8 @ 2nd phase of reduction
@@ -291,10 +291,10 @@ $code.=<<___;
veor $IN,$IN,$Xl @ inp^=Xi
veor $t1,$t0,$t2 @ $t1 is rotated inp^Xi
- vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
+ vpmull.p64 $Xl,$H,$IN @ H.lo·Xi.lo
veor $t1,$t1,$IN @ Karatsuba pre-processing
- vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
- vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
+ vpmull2.p64 $Xh,$H,$IN @ H.hi·Xi.hi
+ vpmull.p64 $Xm,$Hhl,$t1 @ (H.lo+H.hi)·(Xi.lo+Xi.hi)
vext.8 $t1,$Xl,$Xh,#8 @ Karatsuba post-processing
veor $t2,$Xl,$Xh