aboutsummaryrefslogtreecommitdiff
path: root/crypto/bn/asm
diff options
context:
space:
mode:
Diffstat (limited to 'crypto/bn/asm')
-rwxr-xr-xcrypto/bn/asm/x86-mont.pl15
-rwxr-xr-xcrypto/bn/asm/x86_64-mont.pl40
-rwxr-xr-xcrypto/bn/asm/x86_64-mont5.pl22
3 files changed, 74 insertions, 3 deletions
diff --git a/crypto/bn/asm/x86-mont.pl b/crypto/bn/asm/x86-mont.pl
index e8f6b050842e..89f4de61e896 100755
--- a/crypto/bn/asm/x86-mont.pl
+++ b/crypto/bn/asm/x86-mont.pl
@@ -85,6 +85,21 @@ $frame=32; # size of above frame rounded up to 16n
&and ("esp",-64); # align to cache line
+ # Some OSes, *cough*-dows, insist on stack being "wired" to
+ # physical memory in strictly sequential manner, i.e. if stack
+ # allocation spans two pages, then reference to farmost one can
+ # be punishable by SEGV. But page walking can do good even on
+ # other OSes, because it guarantees that villain thread hits
+ # the guard page before it can make damage to innocent one...
+ &mov ("eax","ebp");
+ &sub ("eax","esp");
+ &and ("eax",-4096);
+&set_label("page_walk");
+ &mov ("edx",&DWP(0,"esp","eax"));
+ &sub ("eax",4096);
+ &data_byte(0x2e);
+ &jnc (&label("page_walk"));
+
################################# load argument block...
&mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
&mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl
index 17fb94c84c9f..c8ae019da1b9 100755
--- a/crypto/bn/asm/x86_64-mont.pl
+++ b/crypto/bn/asm/x86_64-mont.pl
@@ -91,6 +91,20 @@ bn_mul_mont:
mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp
.Lmul_body:
+ # Some OSes, *cough*-dows, insist on stack being "wired" to
+ # physical memory in strictly sequential manner, i.e. if stack
+ # allocation spans two pages, then reference to farmost one can
+ # be punishable by SEGV. But page walking can do good even on
+ # other OSes, because it guarantees that villain thread hits
+ # the guard page before it can make damage to innocent one...
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lmul_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x66,0x2e # predict non-taken
+ jnc .Lmul_page_walk
+
mov $bp,%r12 # reassign $bp
___
$bp="%r12";
@@ -296,6 +310,14 @@ bn_mul4x_mont:
mov %r11,8(%rsp,$num,8) # tp[num+1]=%rsp
.Lmul4x_body:
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lmul4x_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x2e # predict non-taken
+ jnc .Lmul4x_page_walk
+
mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp
mov %rdx,%r12 # reassign $bp
___
@@ -707,6 +729,7 @@ $code.=<<___;
.align 16
bn_sqr4x_mont:
.Lsqr4x_enter:
+ mov %rsp,%rax
push %rbx
push %rbp
push %r12
@@ -715,12 +738,23 @@ bn_sqr4x_mont:
push %r15
shl \$3,${num}d # convert $num to bytes
- xor %r10,%r10
mov %rsp,%r11 # put aside %rsp
- sub $num,%r10 # -$num
+ neg $num # -$num
mov ($n0),$n0 # *n0
- lea -72(%rsp,%r10,2),%rsp # alloca(frame+2*$num)
+ lea -72(%rsp,$num,2),%rsp # alloca(frame+2*$num)
and \$-1024,%rsp # minimize TLB usage
+
+ sub %rsp,%r11
+ and \$-4096,%r11
+.Lsqr4x_page_walk:
+ mov (%rsp,%r11),%r10
+ sub \$4096,%r11
+ .byte 0x2e # predict non-taken
+ jnc .Lsqr4x_page_walk
+
+ mov $num,%r10
+ neg $num # restore $num
+ lea -48(%rax),%r11 # restore saved %rsp
##############################################################
# Stack layout
#
diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl
index 235979181f54..99243ae3ec2c 100755
--- a/crypto/bn/asm/x86_64-mont5.pl
+++ b/crypto/bn/asm/x86_64-mont5.pl
@@ -84,6 +84,20 @@ bn_mul_mont_gather5:
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
.Lmul_body:
+ # Some OSes, *cough*-dows, insist on stack being "wired" to
+ # physical memory in strictly sequential manner, i.e. if stack
+ # allocation spans two pages, then reference to farmost one can
+ # be punishable by SEGV. But page walking can do good even on
+ # other OSes, because it guarantees that villain thread hits
+ # the guard page before it can make damage to innocent one...
+ sub %rsp,%rax
+ and \$-4096,%rax
+.Lmul_page_walk:
+ mov (%rsp,%rax),%r11
+ sub \$4096,%rax
+ .byte 0x2e # predict non-taken
+ jnc .Lmul_page_walk
+
lea 128($bp),%r12 # reassign $bp (+size optimization)
___
$bp="%r12";
@@ -407,6 +421,14 @@ bn_mul4x_mont_gather5:
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
.Lmul4x_body:
+ sub %rsp,%rax
+ and \$-4096,%rax
+.Lmul4x_page_walk:
+ mov (%rsp,%rax),%r11
+ sub \$4096,%rax
+ .byte 0x2e # predict non-taken
+ jnc .Lmul4x_page_walk
+
mov $rp,16(%rsp,$num,8) # tp[num+2]=$rp
lea 128(%rdx),%r12 # reassign $bp (+size optimization)
___