aboutsummaryrefslogtreecommitdiff
path: root/crypto/aes/asm/aesp8-ppc.pl
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/aes/asm/aesp8-ppc.pl')
-rwxr-xr-xcrypto/aes/asm/aesp8-ppc.pl157
1 files changed, 103 insertions, 54 deletions
diff --git a/crypto/aes/asm/aesp8-ppc.pl b/crypto/aes/asm/aesp8-ppc.pl
index 22a538f9e24f..f7f78d04b0e1 100755
--- a/crypto/aes/asm/aesp8-ppc.pl
+++ b/crypto/aes/asm/aesp8-ppc.pl
@@ -1,7 +1,7 @@
#! /usr/bin/env perl
-# Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2014-2024 The OpenSSL Project Authors. All Rights Reserved.
#
-# Licensed under the OpenSSL license (the "License"). You may not use
+# Licensed under the Apache License 2.0 (the "License"). You may not use
# this file except in compliance with the License. You can obtain a copy
# in the file LICENSE in the source distribution or at
# https://www.openssl.org/source/license.html
@@ -43,7 +43,10 @@
# POWER9[le] 4.02/0.86 0.84 1.05
# POWER9[be] 3.99/0.78 0.79 0.97
-$flavour = shift;
+# $output is the last argument if it looks like a file (it has an extension)
+# $flavour is the first argument if it doesn't look like a file
+$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
+$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
if ($flavour =~ /64/) {
$SIZE_T =8;
@@ -70,7 +73,8 @@ $0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
die "can't locate ppc-xlate.pl";
-open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
+open STDOUT,"| $^X $xlate $flavour \"$output\""
+ or die "can't call $xlate: $!";
$FRAME=8*$SIZE_T;
$prefix="aes_p8";
@@ -95,11 +99,12 @@ rcon:
.long 0x1b000000, 0x1b000000, 0x1b000000, 0x1b000000 ?rev
.long 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c, 0x0d0e0f0c ?rev
.long 0,0,0,0 ?asis
+.long 0x0f102132, 0x43546576, 0x8798a9ba, 0xcbdcedfe
Lconsts:
mflr r0
bcl 20,31,\$+4
mflr $ptr #vvvvv "distance between . and rcon
- addi $ptr,$ptr,-0x48
+ addi $ptr,$ptr,-0x58
mtlr r0
blr
.long 0
@@ -2401,7 +2406,7 @@ ___
my $key_=$key2;
my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31));
$x00=0 if ($flavour =~ /osx/);
-my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5));
+my ($in0, $in1, $in2, $in3, $in4, $in5)=map("v$_",(0..5));
my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16));
my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22));
my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys
@@ -2456,6 +2461,18 @@ _aesp8_xts_encrypt6x:
li $x70,0x70
mtspr 256,r0
+ # Reverse eighty7 to 0x010101..87
+ xxlor 2, 32+$eighty7, 32+$eighty7
+ vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
+ xxlor 1, 32+$eighty7, 32+$eighty7
+
+ # Load XOR contents. 0xf102132435465768798a9bacbdcedfe
+ mr $x70, r6
+ bl Lconsts
+ lxvw4x 0, $x40, r6 # load XOR contents
+ mr r6, $x70
+ li $x70,0x70
+
subi $rounds,$rounds,3 # -4 in total
lvx $rndkey0,$x00,$key1 # load key schedule
@@ -2498,69 +2515,77 @@ Load_xts_enc_key:
?vperm v31,v31,$twk5,$keyperm
lvx v25,$x10,$key_ # pre-load round[2]
+ # Switch to use the following codes with 0x010101..87 to generate tweak.
+ # eighty7 = 0x010101..87
+ # vsrab tmp, tweak, seven # next tweak value, right shift 7 bits
+ # vand tmp, tmp, eighty7 # last byte with carry
+ # vaddubm tweak, tweak, tweak # left shift 1 bit (x2)
+ # xxlor vsx, 0, 0
+ # vpermxor tweak, tweak, tmp, vsx
+
vperm $in0,$inout,$inptail,$inpperm
subi $inp,$inp,31 # undo "caller"
vxor $twk0,$tweak,$rndkey0
vsrab $tmp,$tweak,$seven # next tweak value
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
vand $tmp,$tmp,$eighty7
vxor $out0,$in0,$twk0
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in1, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in1
lvx_u $in1,$x10,$inp
vxor $twk1,$tweak,$rndkey0
vsrab $tmp,$tweak,$seven # next tweak value
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
le?vperm $in1,$in1,$in1,$leperm
vand $tmp,$tmp,$eighty7
vxor $out1,$in1,$twk1
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in2, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in2
lvx_u $in2,$x20,$inp
andi. $taillen,$len,15
vxor $twk2,$tweak,$rndkey0
vsrab $tmp,$tweak,$seven # next tweak value
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
le?vperm $in2,$in2,$in2,$leperm
vand $tmp,$tmp,$eighty7
vxor $out2,$in2,$twk2
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in3, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in3
lvx_u $in3,$x30,$inp
sub $len,$len,$taillen
vxor $twk3,$tweak,$rndkey0
vsrab $tmp,$tweak,$seven # next tweak value
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
le?vperm $in3,$in3,$in3,$leperm
vand $tmp,$tmp,$eighty7
vxor $out3,$in3,$twk3
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in4, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in4
lvx_u $in4,$x40,$inp
subi $len,$len,0x60
vxor $twk4,$tweak,$rndkey0
vsrab $tmp,$tweak,$seven # next tweak value
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
le?vperm $in4,$in4,$in4,$leperm
vand $tmp,$tmp,$eighty7
vxor $out4,$in4,$twk4
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in5, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in5
lvx_u $in5,$x50,$inp
addi $inp,$inp,0x60
vxor $twk5,$tweak,$rndkey0
vsrab $tmp,$tweak,$seven # next tweak value
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
le?vperm $in5,$in5,$in5,$leperm
vand $tmp,$tmp,$eighty7
vxor $out5,$in5,$twk5
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in0, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in0
vxor v31,v31,$rndkey0
mtctr $rounds
@@ -2586,6 +2611,8 @@ Loop_xts_enc6x:
lvx v25,$x10,$key_ # round[4]
bdnz Loop_xts_enc6x
+ xxlor 32+$eighty7, 1, 1 # 0x010101..87
+
subic $len,$len,96 # $len-=96
vxor $in0,$twk0,v31 # xor with last round key
vcipher $out0,$out0,v24
@@ -2595,7 +2622,6 @@ Loop_xts_enc6x:
vaddubm $tweak,$tweak,$tweak
vcipher $out2,$out2,v24
vcipher $out3,$out3,v24
- vsldoi $tmp,$tmp,$tmp,15
vcipher $out4,$out4,v24
vcipher $out5,$out5,v24
@@ -2603,7 +2629,8 @@ Loop_xts_enc6x:
vand $tmp,$tmp,$eighty7
vcipher $out0,$out0,v25
vcipher $out1,$out1,v25
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in1, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in1
vcipher $out2,$out2,v25
vcipher $out3,$out3,v25
vxor $in1,$twk1,v31
@@ -2614,13 +2641,13 @@ Loop_xts_enc6x:
and r0,r0,$len
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
vcipher $out0,$out0,v26
vcipher $out1,$out1,v26
vand $tmp,$tmp,$eighty7
vcipher $out2,$out2,v26
vcipher $out3,$out3,v26
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in2, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in2
vcipher $out4,$out4,v26
vcipher $out5,$out5,v26
@@ -2634,7 +2661,6 @@ Loop_xts_enc6x:
vaddubm $tweak,$tweak,$tweak
vcipher $out0,$out0,v27
vcipher $out1,$out1,v27
- vsldoi $tmp,$tmp,$tmp,15
vcipher $out2,$out2,v27
vcipher $out3,$out3,v27
vand $tmp,$tmp,$eighty7
@@ -2642,7 +2668,8 @@ Loop_xts_enc6x:
vcipher $out5,$out5,v27
addi $key_,$sp,$FRAME+15 # rewind $key_
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in3, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in3
vcipher $out0,$out0,v28
vcipher $out1,$out1,v28
vxor $in3,$twk3,v31
@@ -2651,7 +2678,6 @@ Loop_xts_enc6x:
vcipher $out2,$out2,v28
vcipher $out3,$out3,v28
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
vcipher $out4,$out4,v28
vcipher $out5,$out5,v28
lvx v24,$x00,$key_ # re-pre-load round[1]
@@ -2659,7 +2685,8 @@ Loop_xts_enc6x:
vcipher $out0,$out0,v29
vcipher $out1,$out1,v29
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in4, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in4
vcipher $out2,$out2,v29
vcipher $out3,$out3,v29
vxor $in4,$twk4,v31
@@ -2669,14 +2696,14 @@ Loop_xts_enc6x:
vcipher $out5,$out5,v29
lvx v25,$x10,$key_ # re-pre-load round[2]
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
vcipher $out0,$out0,v30
vcipher $out1,$out1,v30
vand $tmp,$tmp,$eighty7
vcipher $out2,$out2,v30
vcipher $out3,$out3,v30
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in5, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in5
vcipher $out4,$out4,v30
vcipher $out5,$out5,v30
vxor $in5,$twk5,v31
@@ -2686,7 +2713,6 @@ Loop_xts_enc6x:
vcipherlast $out0,$out0,$in0
lvx_u $in0,$x00,$inp # load next input block
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
vcipherlast $out1,$out1,$in1
lvx_u $in1,$x10,$inp
vcipherlast $out2,$out2,$in2
@@ -2699,7 +2725,10 @@ Loop_xts_enc6x:
vcipherlast $out4,$out4,$in4
le?vperm $in2,$in2,$in2,$leperm
lvx_u $in4,$x40,$inp
- vxor $tweak,$tweak,$tmp
+ xxlor 10, 32+$in0, 32+$in0
+ xxlor 32+$in0, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in0
+ xxlor 32+$in0, 10, 10
vcipherlast $tmp,$out5,$in5 # last block might be needed
# in stealing mode
le?vperm $in3,$in3,$in3,$leperm
@@ -2732,6 +2761,8 @@ Loop_xts_enc6x:
mtctr $rounds
beq Loop_xts_enc6x # did $len-=96 borrow?
+ xxlor 32+$eighty7, 2, 2 # 0x870101..01
+
addic. $len,$len,0x60
beq Lxts_enc6x_zero
cmpwi $len,0x20
@@ -3108,6 +3139,18 @@ _aesp8_xts_decrypt6x:
li $x70,0x70
mtspr 256,r0
+ # Reverse eighty7 to 0x010101..87
+ xxlor 2, 32+$eighty7, 32+$eighty7
+ vsldoi $eighty7,$tmp,$eighty7,1 # 0x010101..87
+ xxlor 1, 32+$eighty7, 32+$eighty7
+
+ # Load XOR contents. 0xf102132435465768798a9bacbdcedfe
+ mr $x70, r6
+ bl Lconsts
+ lxvw4x 0, $x40, r6 # load XOR contents
+ mr r6, $x70
+ li $x70,0x70
+
subi $rounds,$rounds,3 # -4 in total
lvx $rndkey0,$x00,$key1 # load key schedule
@@ -3155,64 +3198,64 @@ Load_xts_dec_key:
vxor $twk0,$tweak,$rndkey0
vsrab $tmp,$tweak,$seven # next tweak value
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
vand $tmp,$tmp,$eighty7
vxor $out0,$in0,$twk0
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in1, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in1
lvx_u $in1,$x10,$inp
vxor $twk1,$tweak,$rndkey0
vsrab $tmp,$tweak,$seven # next tweak value
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
le?vperm $in1,$in1,$in1,$leperm
vand $tmp,$tmp,$eighty7
vxor $out1,$in1,$twk1
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in2, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in2
lvx_u $in2,$x20,$inp
andi. $taillen,$len,15
vxor $twk2,$tweak,$rndkey0
vsrab $tmp,$tweak,$seven # next tweak value
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
le?vperm $in2,$in2,$in2,$leperm
vand $tmp,$tmp,$eighty7
vxor $out2,$in2,$twk2
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in3, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in3
lvx_u $in3,$x30,$inp
sub $len,$len,$taillen
vxor $twk3,$tweak,$rndkey0
vsrab $tmp,$tweak,$seven # next tweak value
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
le?vperm $in3,$in3,$in3,$leperm
vand $tmp,$tmp,$eighty7
vxor $out3,$in3,$twk3
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in4, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in4
lvx_u $in4,$x40,$inp
subi $len,$len,0x60
vxor $twk4,$tweak,$rndkey0
vsrab $tmp,$tweak,$seven # next tweak value
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
le?vperm $in4,$in4,$in4,$leperm
vand $tmp,$tmp,$eighty7
vxor $out4,$in4,$twk4
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in5, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in5
lvx_u $in5,$x50,$inp
addi $inp,$inp,0x60
vxor $twk5,$tweak,$rndkey0
vsrab $tmp,$tweak,$seven # next tweak value
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
le?vperm $in5,$in5,$in5,$leperm
vand $tmp,$tmp,$eighty7
vxor $out5,$in5,$twk5
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in0, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in0
vxor v31,v31,$rndkey0
mtctr $rounds
@@ -3238,6 +3281,8 @@ Loop_xts_dec6x:
lvx v25,$x10,$key_ # round[4]
bdnz Loop_xts_dec6x
+ xxlor 32+$eighty7, 1, 1
+
subic $len,$len,96 # $len-=96
vxor $in0,$twk0,v31 # xor with last round key
vncipher $out0,$out0,v24
@@ -3247,7 +3292,6 @@ Loop_xts_dec6x:
vaddubm $tweak,$tweak,$tweak
vncipher $out2,$out2,v24
vncipher $out3,$out3,v24
- vsldoi $tmp,$tmp,$tmp,15
vncipher $out4,$out4,v24
vncipher $out5,$out5,v24
@@ -3255,7 +3299,8 @@ Loop_xts_dec6x:
vand $tmp,$tmp,$eighty7
vncipher $out0,$out0,v25
vncipher $out1,$out1,v25
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in1, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in1
vncipher $out2,$out2,v25
vncipher $out3,$out3,v25
vxor $in1,$twk1,v31
@@ -3266,13 +3311,13 @@ Loop_xts_dec6x:
and r0,r0,$len
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
vncipher $out0,$out0,v26
vncipher $out1,$out1,v26
vand $tmp,$tmp,$eighty7
vncipher $out2,$out2,v26
vncipher $out3,$out3,v26
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in2, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in2
vncipher $out4,$out4,v26
vncipher $out5,$out5,v26
@@ -3286,7 +3331,6 @@ Loop_xts_dec6x:
vaddubm $tweak,$tweak,$tweak
vncipher $out0,$out0,v27
vncipher $out1,$out1,v27
- vsldoi $tmp,$tmp,$tmp,15
vncipher $out2,$out2,v27
vncipher $out3,$out3,v27
vand $tmp,$tmp,$eighty7
@@ -3294,7 +3338,8 @@ Loop_xts_dec6x:
vncipher $out5,$out5,v27
addi $key_,$sp,$FRAME+15 # rewind $key_
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in3, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in3
vncipher $out0,$out0,v28
vncipher $out1,$out1,v28
vxor $in3,$twk3,v31
@@ -3303,7 +3348,6 @@ Loop_xts_dec6x:
vncipher $out2,$out2,v28
vncipher $out3,$out3,v28
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
vncipher $out4,$out4,v28
vncipher $out5,$out5,v28
lvx v24,$x00,$key_ # re-pre-load round[1]
@@ -3311,7 +3355,8 @@ Loop_xts_dec6x:
vncipher $out0,$out0,v29
vncipher $out1,$out1,v29
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in4, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in4
vncipher $out2,$out2,v29
vncipher $out3,$out3,v29
vxor $in4,$twk4,v31
@@ -3321,14 +3366,14 @@ Loop_xts_dec6x:
vncipher $out5,$out5,v29
lvx v25,$x10,$key_ # re-pre-load round[2]
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
vncipher $out0,$out0,v30
vncipher $out1,$out1,v30
vand $tmp,$tmp,$eighty7
vncipher $out2,$out2,v30
vncipher $out3,$out3,v30
- vxor $tweak,$tweak,$tmp
+ xxlor 32+$in5, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in5
vncipher $out4,$out4,v30
vncipher $out5,$out5,v30
vxor $in5,$twk5,v31
@@ -3338,7 +3383,6 @@ Loop_xts_dec6x:
vncipherlast $out0,$out0,$in0
lvx_u $in0,$x00,$inp # load next input block
vaddubm $tweak,$tweak,$tweak
- vsldoi $tmp,$tmp,$tmp,15
vncipherlast $out1,$out1,$in1
lvx_u $in1,$x10,$inp
vncipherlast $out2,$out2,$in2
@@ -3351,7 +3395,10 @@ Loop_xts_dec6x:
vncipherlast $out4,$out4,$in4
le?vperm $in2,$in2,$in2,$leperm
lvx_u $in4,$x40,$inp
- vxor $tweak,$tweak,$tmp
+ xxlor 10, 32+$in0, 32+$in0
+ xxlor 32+$in0, 0, 0
+ vpermxor $tweak, $tweak, $tmp, $in0
+ xxlor 32+$in0, 10, 10
vncipherlast $out5,$out5,$in5
le?vperm $in3,$in3,$in3,$leperm
lvx_u $in5,$x50,$inp
@@ -3382,6 +3429,8 @@ Loop_xts_dec6x:
mtctr $rounds
beq Loop_xts_dec6x # did $len-=96 borrow?
+ xxlor 32+$eighty7, 2, 2
+
addic. $len,$len,0x60
beq Lxts_dec6x_zero
cmpwi $len,0x20