aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJung-uk Kim <jkim@FreeBSD.org>2016-03-01 17:57:01 +0000
committerJung-uk Kim <jkim@FreeBSD.org>2016-03-01 17:57:01 +0000
commit9aeed18ad799c20d3accf6e1535817538dc983f6 (patch)
tree37a4bb1290ee86a2b4ce070f139b2379ee747425
parentc188d4cade9cba451816aef2371942bea4ff837f (diff)
downloadsrc-9aeed18ad799c20d3accf6e1535817538dc983f6.tar.gz
src-9aeed18ad799c20d3accf6e1535817538dc983f6.zip
Import OpenSSL 1.0.2g.vendor/openssl/1.0.2g
Notes
Notes: svn path=/vendor-crypto/openssl/dist/; revision=296273 svn path=/vendor-crypto/openssl/1.0.2g/; revision=296274; tag=vendor/openssl/1.0.2g
-rw-r--r--CHANGES134
-rwxr-xr-xConfigure8
-rw-r--r--Makefile6
-rw-r--r--Makefile.shared6
-rw-r--r--NEWS13
-rw-r--r--README2
-rw-r--r--apps/apps.c8
-rw-r--r--apps/apps.h2
-rw-r--r--apps/pkeyutl.c90
-rw-r--r--apps/req.c4
-rw-r--r--apps/rsautl.c6
-rw-r--r--apps/s_client.c2
-rw-r--r--apps/s_server.c49
-rwxr-xr-xconfig3
-rw-r--r--crypto/asn1/tasn_dec.c14
-rw-r--r--crypto/bio/b_print.c187
-rw-r--r--crypto/bio/bio.h4
-rw-r--r--crypto/bio/bss_mem.c6
-rw-r--r--crypto/bn/Makefile4
-rwxr-xr-xcrypto/bn/asm/rsaz-avx2.pl219
-rwxr-xr-xcrypto/bn/asm/rsaz-x86_64.pl375
-rwxr-xr-xcrypto/bn/asm/x86_64-mont.pl227
-rwxr-xr-xcrypto/bn/asm/x86_64-mont5.pl1276
-rw-r--r--crypto/bn/bn.h14
-rw-r--r--crypto/bn/bn_exp.c103
-rw-r--r--crypto/bn/bn_print.c17
-rw-r--r--crypto/bn/bn_recp.c1
-rw-r--r--crypto/cmac/cmac.c8
-rw-r--r--crypto/cryptlib.c6
-rw-r--r--crypto/crypto.h2
-rw-r--r--crypto/dh/dh.h2
-rw-r--r--crypto/dh/dh_check.c7
-rw-r--r--crypto/dsa/dsa_ameth.c22
-rw-r--r--crypto/dso/dso_lib.c1
-rwxr-xr-xcrypto/ec/asm/ecp_nistz256-x86_64.pl11
-rw-r--r--crypto/ec/ecp_nistp224.c4
-rw-r--r--crypto/ec/ecp_nistp256.c4
-rw-r--r--crypto/ec/ecp_nistp521.c4
-rw-r--r--crypto/ec/ectest.c9
-rw-r--r--crypto/engine/eng_dyn.c4
-rw-r--r--crypto/evp/e_des.c11
-rw-r--r--crypto/evp/e_des3.c13
-rwxr-xr-xcrypto/modes/asm/aesni-gcm-x86_64.pl4
-rwxr-xr-xcrypto/modes/asm/ghash-x86_64.pl2
-rw-r--r--crypto/modes/ctr128.c41
-rw-r--r--crypto/opensslconf.h12
-rw-r--r--crypto/opensslv.h6
-rwxr-xr-xcrypto/perlasm/x86_64-xlate.pl7
-rw-r--r--crypto/pkcs7/pk7_smime.c17
-rw-r--r--crypto/rsa/rsa_sign.c4
-rw-r--r--crypto/srp/srp.h10
-rw-r--r--crypto/srp/srp_vfy.c57
-rw-r--r--crypto/stack/stack.c2
-rw-r--r--crypto/x509/x509_vfy.c70
-rw-r--r--doc/apps/ciphers.pod59
-rw-r--r--doc/apps/pkeyutl.pod13
-rw-r--r--doc/apps/req.pod9
-rw-r--r--doc/apps/s_client.pod12
-rw-r--r--doc/apps/s_server.pod8
-rw-r--r--doc/crypto/BIO_s_mem.pod4
-rw-r--r--doc/ssl/SSL_CONF_cmd.pod33
-rw-r--r--doc/ssl/SSL_CTX_new.pod168
-rw-r--r--doc/ssl/SSL_CTX_set_options.pod10
-rw-r--r--doc/ssl/ssl.pod77
-rw-r--r--engines/e_capi.c32
-rw-r--r--ssl/Makefile69
-rw-r--r--ssl/s2_lib.c6
-rw-r--r--ssl/s3_lib.c69
-rw-r--r--ssl/ssl.h1
-rw-r--r--ssl/ssl_conf.c10
-rw-r--r--ssl/ssl_err.c1
-rw-r--r--ssl/ssl_lib.c14
-rw-r--r--ssl/sslv2conftest.c231
-rwxr-xr-xutil/libeay.num2
-rwxr-xr-xutil/mk1mf.pl4
-rw-r--r--util/pl/BC-32.pl4
-rw-r--r--util/pl/Mingw32.pl2
-rw-r--r--util/pl/OS2-EMX.pl4
-rw-r--r--util/pl/VC-32.pl10
-rw-r--r--util/pl/linux.pl2
-rw-r--r--util/pl/netware.pl8
-rw-r--r--util/pl/ultrix.pl2
-rw-r--r--util/pl/unix.pl2
83 files changed, 2668 insertions, 1318 deletions
diff --git a/CHANGES b/CHANGES
index 18693f70efe9..7578f7eb7ace 100644
--- a/CHANGES
+++ b/CHANGES
@@ -2,6 +2,138 @@
OpenSSL CHANGES
_______________
+ Changes between 1.0.2f and 1.0.2g [1 Mar 2016]
+
+ * Disable weak ciphers in SSLv3 and up in default builds of OpenSSL.
+ Builds that are not configured with "enable-weak-ssl-ciphers" will not
+ provide any "EXPORT" or "LOW" strength ciphers.
+ [Viktor Dukhovni]
+
+ * Disable SSLv2 default build, default negotiation and weak ciphers. SSLv2
+ is by default disabled at build-time. Builds that are not configured with
+ "enable-ssl2" will not support SSLv2. Even if "enable-ssl2" is used,
+ users who want to negotiate SSLv2 via the version-flexible SSLv23_method()
+ will need to explicitly call either of:
+
+ SSL_CTX_clear_options(ctx, SSL_OP_NO_SSLv2);
+ or
+ SSL_clear_options(ssl, SSL_OP_NO_SSLv2);
+
+ as appropriate. Even if either of those is used, or the application
+ explicitly uses the version-specific SSLv2_method() or its client and
+ server variants, SSLv2 ciphers vulnerable to exhaustive search key
+ recovery have been removed. Specifically, the SSLv2 40-bit EXPORT
+ ciphers, and SSLv2 56-bit DES are no longer available.
+ (CVE-2016-0800)
+ [Viktor Dukhovni]
+
+ *) Fix a double-free in DSA code
+
+ A double free bug was discovered when OpenSSL parses malformed DSA private
+ keys and could lead to a DoS attack or memory corruption for applications
+ that receive DSA private keys from untrusted sources. This scenario is
+ considered rare.
+
+ This issue was reported to OpenSSL by Adam Langley(Google/BoringSSL) using
+ libFuzzer.
+ (CVE-2016-0705)
+ [Stephen Henson]
+
+ *) Disable SRP fake user seed to address a server memory leak.
+
+ Add a new method SRP_VBASE_get1_by_user that handles the seed properly.
+
+ SRP_VBASE_get_by_user had inconsistent memory management behaviour.
+ In order to fix an unavoidable memory leak, SRP_VBASE_get_by_user
+ was changed to ignore the "fake user" SRP seed, even if the seed
+ is configured.
+
+ Users should use SRP_VBASE_get1_by_user instead. Note that in
+ SRP_VBASE_get1_by_user, caller must free the returned value. Note
+ also that even though configuring the SRP seed attempts to hide
+ invalid usernames by continuing the handshake with fake
+ credentials, this behaviour is not constant time and no strong
+ guarantees are made that the handshake is indistinguishable from
+ that of a valid user.
+ (CVE-2016-0798)
+ [Emilia Käsper]
+
+ *) Fix BN_hex2bn/BN_dec2bn NULL pointer deref/heap corruption
+
+ In the BN_hex2bn function the number of hex digits is calculated using an
+ int value |i|. Later |bn_expand| is called with a value of |i * 4|. For
+ large values of |i| this can result in |bn_expand| not allocating any
+ memory because |i * 4| is negative. This can leave the internal BIGNUM data
+ field as NULL leading to a subsequent NULL ptr deref. For very large values
+ of |i|, the calculation |i * 4| could be a positive value smaller than |i|.
+ In this case memory is allocated to the internal BIGNUM data field, but it
+ is insufficiently sized leading to heap corruption. A similar issue exists
+ in BN_dec2bn. This could have security consequences if BN_hex2bn/BN_dec2bn
+ is ever called by user applications with very large untrusted hex/dec data.
+ This is anticipated to be a rare occurrence.
+
+ All OpenSSL internal usage of these functions use data that is not expected
+ to be untrusted, e.g. config file data or application command line
+ arguments. If user developed applications generate config file data based
+ on untrusted data then it is possible that this could also lead to security
+ consequences. This is also anticipated to be rare.
+
+ This issue was reported to OpenSSL by Guido Vranken.
+ (CVE-2016-0797)
+ [Matt Caswell]
+
+ *) Fix memory issues in BIO_*printf functions
+
+ The internal |fmtstr| function used in processing a "%s" format string in
+ the BIO_*printf functions could overflow while calculating the length of a
+ string and cause an OOB read when printing very long strings.
+
+ Additionally the internal |doapr_outch| function can attempt to write to an
+ OOB memory location (at an offset from the NULL pointer) in the event of a
+ memory allocation failure. In 1.0.2 and below this could be caused where
+ the size of a buffer to be allocated is greater than INT_MAX. E.g. this
+ could be in processing a very long "%s" format string. Memory leaks can
+ also occur.
+
+ The first issue may mask the second issue dependent on compiler behaviour.
+ These problems could enable attacks where large amounts of untrusted data
+ is passed to the BIO_*printf functions. If applications use these functions
+ in this way then they could be vulnerable. OpenSSL itself uses these
+ functions when printing out human-readable dumps of ASN.1 data. Therefore
+ applications that print this data could be vulnerable if the data is from
+ untrusted sources. OpenSSL command line applications could also be
+ vulnerable where they print out ASN.1 data, or if untrusted data is passed
+ as command line arguments.
+
+ Libssl is not considered directly vulnerable. Additionally certificates etc
+ received via remote connections via libssl are also unlikely to be able to
+ trigger these issues because of message size limits enforced within libssl.
+
+ This issue was reported to OpenSSL Guido Vranken.
+ (CVE-2016-0799)
+ [Matt Caswell]
+
+ *) Side channel attack on modular exponentiation
+
+ A side-channel attack was found which makes use of cache-bank conflicts on
+ the Intel Sandy-Bridge microarchitecture which could lead to the recovery
+ of RSA keys. The ability to exploit this issue is limited as it relies on
+ an attacker who has control of code in a thread running on the same
+ hyper-threaded core as the victim thread which is performing decryptions.
+
+ This issue was reported to OpenSSL by Yuval Yarom, The University of
+ Adelaide and NICTA, Daniel Genkin, Technion and Tel Aviv University, and
+ Nadia Heninger, University of Pennsylvania with more information at
+ http://cachebleed.info.
+ (CVE-2016-0702)
+ [Andy Polyakov]
+
+ *) Change the req app to generate a 2048-bit RSA/DSA key by default,
+ if no keysize is specified with default_bits. This fixes an
+ omission in an earlier change that changed all RSA/DSA key generation
+ apps to use 2048 bits by default.
+ [Emilia Käsper]
+
Changes between 1.0.2e and 1.0.2f [28 Jan 2016]
*) DH small subgroups
@@ -105,7 +237,7 @@
[Emilia Käsper]
*) In DSA_generate_parameters_ex, if the provided seed is too short,
- return an error
+ use a random seed, as already documented.
[Rich Salz and Ismo Puustinen <ismo.puustinen@intel.com>]
Changes between 1.0.2c and 1.0.2d [9 Jul 2015]
diff --git a/Configure b/Configure
index 4a715dc43732..c98107a48718 100755
--- a/Configure
+++ b/Configure
@@ -58,6 +58,10 @@ my $usage="Usage: Configure [no-<cipher> ...] [enable-<cipher> ...] [experimenta
# library and will be loaded in run-time by the OpenSSL library.
# sctp include SCTP support
# 386 generate 80386 code
+# enable-weak-ssl-ciphers
+# Enable EXPORT and LOW SSLv3 ciphers that are disabled by
+# default. Note, weak SSLv2 ciphers are unconditionally
+# disabled.
# no-sse2 disables IA-32 SSE2 code, above option implies no-sse2
# no-<cipher> build without specified algorithm (rsa, idea, rc5, ...)
# -<xxx> +<xxx> compiler options are passed through
@@ -781,11 +785,13 @@ my %disabled = ( # "what" => "comment" [or special keyword "experimental
"md2" => "default",
"rc5" => "default",
"rfc3779" => "default",
- "sctp" => "default",
+ "sctp" => "default",
"shared" => "default",
"ssl-trace" => "default",
+ "ssl2" => "default",
"store" => "experimental",
"unit-test" => "default",
+ "weak-ssl-ciphers" => "default",
"zlib" => "default",
"zlib-dynamic" => "default"
);
diff --git a/Makefile b/Makefile
index ee04c02cc1ca..190d064d89ef 100644
--- a/Makefile
+++ b/Makefile
@@ -4,7 +4,7 @@
## Makefile for OpenSSL
##
-VERSION=1.0.2f
+VERSION=1.0.2g
MAJOR=1
MINOR=0.2
SHLIB_VERSION_NUMBER=1.0.0
@@ -13,7 +13,7 @@ SHLIB_MAJOR=1
SHLIB_MINOR=0.0
SHLIB_EXT=
PLATFORM=dist
-OPTIONS= no-ec_nistp_64_gcc_128 no-gmp no-jpake no-krb5 no-libunbound no-md2 no-rc5 no-rfc3779 no-sctp no-shared no-ssl-trace no-store no-unit-test no-zlib no-zlib-dynamic static-engine
+OPTIONS= no-ec_nistp_64_gcc_128 no-gmp no-jpake no-krb5 no-libunbound no-md2 no-rc5 no-rfc3779 no-sctp no-shared no-ssl-trace no-ssl2 no-store no-unit-test no-weak-ssl-ciphers no-zlib no-zlib-dynamic static-engine
CONFIGURE_ARGS=dist
SHLIB_TARGET=
@@ -61,7 +61,7 @@ OPENSSLDIR=/usr/local/ssl
CC= cc
CFLAG= -O
-DEPFLAG= -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_LIBUNBOUND -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_SSL_TRACE -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST
+DEPFLAG= -DOPENSSL_NO_EC_NISTP_64_GCC_128 -DOPENSSL_NO_GMP -DOPENSSL_NO_JPAKE -DOPENSSL_NO_LIBUNBOUND -DOPENSSL_NO_MD2 -DOPENSSL_NO_RC5 -DOPENSSL_NO_RFC3779 -DOPENSSL_NO_SCTP -DOPENSSL_NO_SSL_TRACE -DOPENSSL_NO_SSL2 -DOPENSSL_NO_STORE -DOPENSSL_NO_UNIT_TEST -DOPENSSL_NO_WEAK_SSL_CIPHERS
PEX_LIBS=
EX_LIBS=
EXE_EXT=
diff --git a/Makefile.shared b/Makefile.shared
index e753f44e18fd..a2aa9804c1d9 100644
--- a/Makefile.shared
+++ b/Makefile.shared
@@ -272,7 +272,7 @@ link_o.cygwin:
SHLIB_SOVER=${LIBVERSION:+"-$(LIBVERSION)"}; \
ALLSYMSFLAGS='-Wl,--whole-archive'; \
NOALLSYMSFLAGS='-Wl,--no-whole-archive'; \
- SHAREDFLAGS="$(CFLAGS) $(SHARED_LDFLAGS) -shared $$base $$deffile -Wl,-s,-Bsymbolic"; \
+ SHAREDFLAGS="$(CFLAGS) $(SHARED_LDFLAGS) -shared $$base $$deffile -Wl,-Bsymbolic"; \
$(LINK_SO_O)
#for mingw target if def-file is in use dll-name should match library-name
link_a.cygwin:
@@ -289,7 +289,7 @@ link_a.cygwin:
SHLIB_SOVER=32; \
extras="$(LIBNAME).def"; \
$(PERL) util/mkdef.pl 32 $$SHLIB > $$extras; \
- base=; [ $(LIBNAME) = "crypto" ] && base=-Wl,--image-base,0x63000000; \
+ base=; [ $(LIBNAME) = "crypto" -a -n "$(FIPSCANLIB)" ] && base=-Wl,--image-base,0x63000000; \
fi; \
dll_name=$$SHLIB$$SHLIB_SOVER$$SHLIB_SUFFIX; \
$(PERL) util/mkrc.pl $$dll_name | \
@@ -297,7 +297,7 @@ link_a.cygwin:
extras="$$extras rc.o"; \
ALLSYMSFLAGS='-Wl,--whole-archive'; \
NOALLSYMSFLAGS='-Wl,--no-whole-archive'; \
- SHAREDFLAGS="$(CFLAGS) $(SHARED_LDFLAGS) -shared $$base -Wl,-s,-Bsymbolic -Wl,--out-implib,lib$(LIBNAME).dll.a $$extras"; \
+ SHAREDFLAGS="$(CFLAGS) $(SHARED_LDFLAGS) -shared $$base -Wl,-Bsymbolic -Wl,--out-implib,lib$(LIBNAME).dll.a $$extras"; \
[ -f apps/$$dll_name ] && rm apps/$$dll_name; \
[ -f test/$$dll_name ] && rm test/$$dll_name; \
$(LINK_SO_A) || exit 1; \
diff --git a/NEWS b/NEWS
index 06c77025e999..33242c83624d 100644
--- a/NEWS
+++ b/NEWS
@@ -5,6 +5,19 @@
This file gives a brief overview of the major changes between each OpenSSL
release. For more details please read the CHANGES file.
+ Major changes between OpenSSL 1.0.2f and OpenSSL 1.0.2g [1 Mar 2016]
+
+ o Disable weak ciphers in SSLv3 and up in default builds of OpenSSL.
+ o Disable SSLv2 default build, default negotiation and weak ciphers
+ (CVE-2016-0800)
+ o Fix a double-free in DSA code (CVE-2016-0705)
+ o Disable SRP fake user seed to address a server memory leak
+ (CVE-2016-0798)
+ o Fix BN_hex2bn/BN_dec2bn NULL pointer deref/heap corruption
+ (CVE-2016-0797)
+ o Fix memory issues in BIO_*printf functions (CVE-2016-0799)
+ o Fix side channel attack on modular exponentiation (CVE-2016-0702)
+
Major changes between OpenSSL 1.0.2e and OpenSSL 1.0.2f [28 Jan 2016]
o DH small subgroups (CVE-2016-0701)
diff --git a/README b/README
index 1e9869daee00..2077b04eb271 100644
--- a/README
+++ b/README
@@ -1,5 +1,5 @@
- OpenSSL 1.0.2f 28 Jan 2016
+ OpenSSL 1.0.2g 1 Mar 2016
Copyright (c) 1998-2015 The OpenSSL Project
Copyright (c) 1995-1998 Eric A. Young, Tim J. Hudson
diff --git a/apps/apps.c b/apps/apps.c
index 2e778054ca8f..b1dd97038f7d 100644
--- a/apps/apps.c
+++ b/apps/apps.c
@@ -2442,7 +2442,11 @@ int bio_to_mem(unsigned char **out, int maxlen, BIO *in)
else
len = 1024;
len = BIO_read(in, tbuf, len);
- if (len <= 0)
+ if (len < 0) {
+ BIO_free(mem);
+ return -1;
+ }
+ if (len == 0)
break;
if (BIO_write(mem, tbuf, len) != len) {
BIO_free(mem);
@@ -2459,7 +2463,7 @@ int bio_to_mem(unsigned char **out, int maxlen, BIO *in)
return ret;
}
-int pkey_ctrl_string(EVP_PKEY_CTX *ctx, char *value)
+int pkey_ctrl_string(EVP_PKEY_CTX *ctx, const char *value)
{
int rv;
char *stmp, *vtmp = NULL;
diff --git a/apps/apps.h b/apps/apps.h
index 8276e708694d..19bf5cc3337d 100644
--- a/apps/apps.h
+++ b/apps/apps.h
@@ -321,7 +321,7 @@ int args_verify(char ***pargs, int *pargc,
int *badarg, BIO *err, X509_VERIFY_PARAM **pm);
void policies_print(BIO *out, X509_STORE_CTX *ctx);
int bio_to_mem(unsigned char **out, int maxlen, BIO *in);
-int pkey_ctrl_string(EVP_PKEY_CTX *ctx, char *value);
+int pkey_ctrl_string(EVP_PKEY_CTX *ctx, const char *value);
int init_gen_str(BIO *err, EVP_PKEY_CTX **pctx,
const char *algname, ENGINE *e, int do_param);
int do_X509_sign(BIO *err, X509 *x, EVP_PKEY *pkey, const EVP_MD *md,
diff --git a/apps/pkeyutl.c b/apps/pkeyutl.c
index c8d513b44ac4..e47206c40a11 100644
--- a/apps/pkeyutl.c
+++ b/apps/pkeyutl.c
@@ -73,7 +73,7 @@ static void usage(void);
#define PROG pkeyutl_main
static EVP_PKEY_CTX *init_ctx(int *pkeysize,
- char *keyfile, int keyform, int key_type,
+ const char *keyfile, int keyform, int key_type,
char *passargin, int pkey_op, ENGINE *e,
int impl);
@@ -99,10 +99,12 @@ int MAIN(int argc, char **argv)
char *passargin = NULL;
int keysize = -1;
int engine_impl = 0;
-
unsigned char *buf_in = NULL, *buf_out = NULL, *sig = NULL;
- size_t buf_outlen;
+ size_t buf_outlen = 0;
int buf_inlen = 0, siglen = -1;
+ const char *inkey = NULL;
+ const char *peerkey = NULL;
+ STACK_OF(OPENSSL_STRING) *pkeyopts = NULL;
int ret = 1, rv = -1;
@@ -136,21 +138,13 @@ int MAIN(int argc, char **argv)
} else if (!strcmp(*argv, "-inkey")) {
if (--argc < 1)
badarg = 1;
- else {
- ctx = init_ctx(&keysize,
- *(++argv), keyform, key_type,
- passargin, pkey_op, e, engine_impl);
- if (!ctx) {
- BIO_puts(bio_err, "Error initializing context\n");
- ERR_print_errors(bio_err);
- badarg = 1;
- }
- }
+ else
+ inkey = *++argv;
} else if (!strcmp(*argv, "-peerkey")) {
if (--argc < 1)
badarg = 1;
- else if (!setup_peer(bio_err, ctx, peerform, *(++argv), e))
- badarg = 1;
+ else
+ peerkey = *++argv;
} else if (!strcmp(*argv, "-passin")) {
if (--argc < 1)
badarg = 1;
@@ -191,23 +185,21 @@ int MAIN(int argc, char **argv)
pkey_op = EVP_PKEY_OP_VERIFY;
else if (!strcmp(*argv, "-verifyrecover"))
pkey_op = EVP_PKEY_OP_VERIFYRECOVER;
- else if (!strcmp(*argv, "-rev"))
- rev = 1;
else if (!strcmp(*argv, "-encrypt"))
pkey_op = EVP_PKEY_OP_ENCRYPT;
else if (!strcmp(*argv, "-decrypt"))
pkey_op = EVP_PKEY_OP_DECRYPT;
else if (!strcmp(*argv, "-derive"))
pkey_op = EVP_PKEY_OP_DERIVE;
+ else if (!strcmp(*argv, "-rev"))
+ rev = 1;
else if (strcmp(*argv, "-pkeyopt") == 0) {
if (--argc < 1)
badarg = 1;
- else if (!ctx) {
- BIO_puts(bio_err, "-pkeyopt command before -inkey\n");
- badarg = 1;
- } else if (pkey_ctrl_string(ctx, *(++argv)) <= 0) {
- BIO_puts(bio_err, "parameter setting error\n");
- ERR_print_errors(bio_err);
+ else if ((pkeyopts == NULL &&
+ (pkeyopts = sk_OPENSSL_STRING_new_null()) == NULL) ||
+ sk_OPENSSL_STRING_push(pkeyopts, *++argv) == 0) {
+ BIO_puts(bio_err, "out of memory\n");
goto end;
}
} else
@@ -220,10 +212,37 @@ int MAIN(int argc, char **argv)
argv++;
}
- if (!ctx) {
+ if (inkey == NULL ||
+ (peerkey != NULL && pkey_op != EVP_PKEY_OP_DERIVE)) {
usage();
goto end;
}
+ ctx = init_ctx(&keysize, inkey, keyform, key_type,
+ passargin, pkey_op, e, engine_impl);
+ if (!ctx) {
+ BIO_puts(bio_err, "Error initializing context\n");
+ ERR_print_errors(bio_err);
+ goto end;
+ }
+ if (peerkey != NULL && !setup_peer(bio_err, ctx, peerform, peerkey, e)) {
+ BIO_puts(bio_err, "Error setting up peer key\n");
+ ERR_print_errors(bio_err);
+ goto end;
+ }
+ if (pkeyopts != NULL) {
+ int num = sk_OPENSSL_STRING_num(pkeyopts);
+ int i;
+
+ for (i = 0; i < num; ++i) {
+ const char *opt = sk_OPENSSL_STRING_value(pkeyopts, i);
+
+ if (pkey_ctrl_string(ctx, opt) <= 0) {
+ BIO_puts(bio_err, "parameter setting error\n");
+ ERR_print_errors(bio_err);
+ goto end;
+ }
+ }
+ }
if (sigfile && (pkey_op != EVP_PKEY_OP_VERIFY)) {
BIO_puts(bio_err, "Signature file specified for non verify\n");
@@ -273,7 +292,7 @@ int MAIN(int argc, char **argv)
}
siglen = bio_to_mem(&sig, keysize * 10, sigbio);
BIO_free(sigbio);
- if (siglen <= 0) {
+ if (siglen < 0) {
BIO_printf(bio_err, "Error reading signature data\n");
goto end;
}
@@ -282,7 +301,7 @@ int MAIN(int argc, char **argv)
if (in) {
/* Read the input data */
buf_inlen = bio_to_mem(&buf_in, keysize * 10, in);
- if (buf_inlen <= 0) {
+ if (buf_inlen < 0) {
BIO_printf(bio_err, "Error reading input Data\n");
exit(1);
}
@@ -310,7 +329,7 @@ int MAIN(int argc, char **argv)
} else {
rv = do_keyop(ctx, pkey_op, NULL, (size_t *)&buf_outlen,
buf_in, (size_t)buf_inlen);
- if (rv > 0) {
+ if (rv > 0 && buf_outlen != 0) {
buf_out = OPENSSL_malloc(buf_outlen);
if (!buf_out)
rv = -1;
@@ -340,12 +359,14 @@ int MAIN(int argc, char **argv)
EVP_PKEY_CTX_free(ctx);
BIO_free(in);
BIO_free_all(out);
- if (buf_in)
+ if (buf_in != NULL)
OPENSSL_free(buf_in);
- if (buf_out)
+ if (buf_out != NULL)
OPENSSL_free(buf_out);
- if (sig)
+ if (sig != NULL)
OPENSSL_free(sig);
+ if (pkeyopts != NULL)
+ sk_OPENSSL_STRING_free(pkeyopts);
return ret;
}
@@ -380,7 +401,7 @@ static void usage()
}
static EVP_PKEY_CTX *init_ctx(int *pkeysize,
- char *keyfile, int keyform, int key_type,
+ const char *keyfile, int keyform, int key_type,
char *passargin, int pkey_op, ENGINE *e,
int engine_impl)
{
@@ -484,14 +505,9 @@ static int setup_peer(BIO *err, EVP_PKEY_CTX *ctx, int peerform,
EVP_PKEY *peer = NULL;
ENGINE* engine = NULL;
int ret;
- if (!ctx) {
- BIO_puts(err, "-peerkey command before -inkey\n");
- return 0;
- }
if (peerform == FORMAT_ENGINE)
- engine = e;
-
+ engine = e;
peer = load_pubkey(bio_err, file, peerform, 0, NULL, engine, "Peer Key");
if (!peer) {
diff --git a/apps/req.c b/apps/req.c
index 57781c93c4ca..e818bd2976d6 100644
--- a/apps/req.c
+++ b/apps/req.c
@@ -101,8 +101,8 @@
#define STRING_MASK "string_mask"
#define UTF8_IN "utf8"
-#define DEFAULT_KEY_LENGTH 512
-#define MIN_KEY_LENGTH 384
+#define DEFAULT_KEY_LENGTH 2048
+#define MIN_KEY_LENGTH 512
#undef PROG
#define PROG req_main
diff --git a/apps/rsautl.c b/apps/rsautl.c
index d642f9ad97f3..5b6f849ea74d 100644
--- a/apps/rsautl.c
+++ b/apps/rsautl.c
@@ -250,7 +250,7 @@ int MAIN(int argc, char **argv)
if (outfile) {
if (!(out = BIO_new_file(outfile, "wb"))) {
- BIO_printf(bio_err, "Error Reading Output File\n");
+ BIO_printf(bio_err, "Error Writing Output File\n");
ERR_print_errors(bio_err);
goto end;
}
@@ -276,7 +276,7 @@ int MAIN(int argc, char **argv)
/* Read the input data */
rsa_inlen = BIO_read(in, rsa_in, keysize * 2);
- if (rsa_inlen <= 0) {
+ if (rsa_inlen < 0) {
BIO_printf(bio_err, "Error reading input Data\n");
exit(1);
}
@@ -311,7 +311,7 @@ int MAIN(int argc, char **argv)
}
- if (rsa_outlen <= 0) {
+ if (rsa_outlen < 0) {
BIO_printf(bio_err, "RSA operation error\n");
ERR_print_errors(bio_err);
goto end;
diff --git a/apps/s_client.c b/apps/s_client.c
index caf76d35dc5a..0c1102b9c36a 100644
--- a/apps/s_client.c
+++ b/apps/s_client.c
@@ -391,8 +391,6 @@ static void sc_usage(void)
BIO_printf(bio_err,
" -bugs - Switch on all SSL implementation bug workarounds\n");
BIO_printf(bio_err,
- " -serverpref - Use server's cipher preferences (only SSLv2)\n");
- BIO_printf(bio_err,
" -cipher - preferred cipher to use, use the 'openssl ciphers'\n");
BIO_printf(bio_err,
" command to see what is available\n");
diff --git a/apps/s_server.c b/apps/s_server.c
index 65cbaaf6eb9b..09c755b55cfe 100644
--- a/apps/s_server.c
+++ b/apps/s_server.c
@@ -429,6 +429,8 @@ typedef struct srpsrvparm_st {
static int MS_CALLBACK ssl_srp_server_param_cb(SSL *s, int *ad, void *arg)
{
srpsrvparm *p = (srpsrvparm *) arg;
+ int ret = SSL3_AL_FATAL;
+
if (p->login == NULL && p->user == NULL) {
p->login = SSL_get_srp_username(s);
BIO_printf(bio_err, "SRP username = \"%s\"\n", p->login);
@@ -437,21 +439,25 @@ static int MS_CALLBACK ssl_srp_server_param_cb(SSL *s, int *ad, void *arg)
if (p->user == NULL) {
BIO_printf(bio_err, "User %s doesn't exist\n", p->login);
- return SSL3_AL_FATAL;
+ goto err;
}
+
if (SSL_set_srp_server_param
(s, p->user->N, p->user->g, p->user->s, p->user->v,
p->user->info) < 0) {
*ad = SSL_AD_INTERNAL_ERROR;
- return SSL3_AL_FATAL;
+ goto err;
}
BIO_printf(bio_err,
"SRP parameters set: username = \"%s\" info=\"%s\" \n",
p->login, p->user->info);
- /* need to check whether there are memory leaks */
+ ret = SSL_ERROR_NONE;
+
+err:
+ SRP_user_pwd_free(p->user);
p->user = NULL;
p->login = NULL;
- return SSL_ERROR_NONE;
+ return ret;
}
#endif
@@ -2452,9 +2458,10 @@ static int sv_body(char *hostname, int s, int stype, unsigned char *context)
#ifndef OPENSSL_NO_SRP
while (SSL_get_error(con, k) == SSL_ERROR_WANT_X509_LOOKUP) {
BIO_printf(bio_s_out, "LOOKUP renego during write\n");
+ SRP_user_pwd_free(srp_callback_parm.user);
srp_callback_parm.user =
- SRP_VBASE_get_by_user(srp_callback_parm.vb,
- srp_callback_parm.login);
+ SRP_VBASE_get1_by_user(srp_callback_parm.vb,
+ srp_callback_parm.login);
if (srp_callback_parm.user)
BIO_printf(bio_s_out, "LOOKUP done %s\n",
srp_callback_parm.user->info);
@@ -2508,9 +2515,10 @@ static int sv_body(char *hostname, int s, int stype, unsigned char *context)
#ifndef OPENSSL_NO_SRP
while (SSL_get_error(con, i) == SSL_ERROR_WANT_X509_LOOKUP) {
BIO_printf(bio_s_out, "LOOKUP renego during read\n");
+ SRP_user_pwd_free(srp_callback_parm.user);
srp_callback_parm.user =
- SRP_VBASE_get_by_user(srp_callback_parm.vb,
- srp_callback_parm.login);
+ SRP_VBASE_get1_by_user(srp_callback_parm.vb,
+ srp_callback_parm.login);
if (srp_callback_parm.user)
BIO_printf(bio_s_out, "LOOKUP done %s\n",
srp_callback_parm.user->info);
@@ -2605,9 +2613,10 @@ static int init_ssl_connection(SSL *con)
while (i <= 0 && SSL_get_error(con, i) == SSL_ERROR_WANT_X509_LOOKUP) {
BIO_printf(bio_s_out, "LOOKUP during accept %s\n",
srp_callback_parm.login);
+ SRP_user_pwd_free(srp_callback_parm.user);
srp_callback_parm.user =
- SRP_VBASE_get_by_user(srp_callback_parm.vb,
- srp_callback_parm.login);
+ SRP_VBASE_get1_by_user(srp_callback_parm.vb,
+ srp_callback_parm.login);
if (srp_callback_parm.user)
BIO_printf(bio_s_out, "LOOKUP done %s\n",
srp_callback_parm.user->info);
@@ -2849,9 +2858,10 @@ static int www_body(char *hostname, int s, int stype, unsigned char *context)
&& SSL_get_error(con, i) == SSL_ERROR_WANT_X509_LOOKUP) {
BIO_printf(bio_s_out, "LOOKUP during accept %s\n",
srp_callback_parm.login);
+ SRP_user_pwd_free(srp_callback_parm.user);
srp_callback_parm.user =
- SRP_VBASE_get_by_user(srp_callback_parm.vb,
- srp_callback_parm.login);
+ SRP_VBASE_get1_by_user(srp_callback_parm.vb,
+ srp_callback_parm.login);
if (srp_callback_parm.user)
BIO_printf(bio_s_out, "LOOKUP done %s\n",
srp_callback_parm.user->info);
@@ -2891,9 +2901,10 @@ static int www_body(char *hostname, int s, int stype, unsigned char *context)
if (BIO_should_io_special(io)
&& BIO_get_retry_reason(io) == BIO_RR_SSL_X509_LOOKUP) {
BIO_printf(bio_s_out, "LOOKUP renego during read\n");
+ SRP_user_pwd_free(srp_callback_parm.user);
srp_callback_parm.user =
- SRP_VBASE_get_by_user(srp_callback_parm.vb,
- srp_callback_parm.login);
+ SRP_VBASE_get1_by_user(srp_callback_parm.vb,
+ srp_callback_parm.login);
if (srp_callback_parm.user)
BIO_printf(bio_s_out, "LOOKUP done %s\n",
srp_callback_parm.user->info);
@@ -3236,9 +3247,10 @@ static int rev_body(char *hostname, int s, int stype, unsigned char *context)
if (BIO_should_io_special(io)
&& BIO_get_retry_reason(io) == BIO_RR_SSL_X509_LOOKUP) {
BIO_printf(bio_s_out, "LOOKUP renego during accept\n");
+ SRP_user_pwd_free(srp_callback_parm.user);
srp_callback_parm.user =
- SRP_VBASE_get_by_user(srp_callback_parm.vb,
- srp_callback_parm.login);
+ SRP_VBASE_get1_by_user(srp_callback_parm.vb,
+ srp_callback_parm.login);
if (srp_callback_parm.user)
BIO_printf(bio_s_out, "LOOKUP done %s\n",
srp_callback_parm.user->info);
@@ -3264,9 +3276,10 @@ static int rev_body(char *hostname, int s, int stype, unsigned char *context)
if (BIO_should_io_special(io)
&& BIO_get_retry_reason(io) == BIO_RR_SSL_X509_LOOKUP) {
BIO_printf(bio_s_out, "LOOKUP renego during read\n");
+ SRP_user_pwd_free(srp_callback_parm.user);
srp_callback_parm.user =
- SRP_VBASE_get_by_user(srp_callback_parm.vb,
- srp_callback_parm.login);
+ SRP_VBASE_get1_by_user(srp_callback_parm.vb,
+ srp_callback_parm.login);
if (srp_callback_parm.user)
BIO_printf(bio_s_out, "LOOKUP done %s\n",
srp_callback_parm.user->info);
diff --git a/config b/config
index 77f730f093e6..bba370c4f3f1 100755
--- a/config
+++ b/config
@@ -852,7 +852,8 @@ case "$GUESSOS" in
# *-dgux) OUT="dgux" ;;
mips-sony-newsos4) OUT="newsos4-gcc" ;;
*-*-cygwin_pre1.3) OUT="Cygwin-pre1.3" ;;
- *-*-cygwin) OUT="Cygwin" ;;
+ i[3456]86-*-cygwin) OUT="Cygwin" ;;
+ *-*-cygwin) OUT="Cygwin-${MACHINE}" ;;
t3e-cray-unicosmk) OUT="cray-t3e" ;;
j90-cray-unicos) OUT="cray-j90" ;;
nsr-tandem-nsk) OUT="tandem-c89" ;;
diff --git a/crypto/asn1/tasn_dec.c b/crypto/asn1/tasn_dec.c
index 9256049d1588..5a507967c894 100644
--- a/crypto/asn1/tasn_dec.c
+++ b/crypto/asn1/tasn_dec.c
@@ -717,7 +717,7 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval,
long plen;
char cst, inf, free_cont = 0;
const unsigned char *p;
- BUF_MEM buf;
+ BUF_MEM buf = { 0, NULL, 0 };
const unsigned char *cont = NULL;
long len;
if (!pval) {
@@ -793,7 +793,6 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval,
} else {
len = p - cont + plen;
p += plen;
- buf.data = NULL;
}
} else if (cst) {
if (utype == V_ASN1_NULL || utype == V_ASN1_BOOLEAN
@@ -802,9 +801,9 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval,
ASN1err(ASN1_F_ASN1_D2I_EX_PRIMITIVE, ASN1_R_TYPE_NOT_PRIMITIVE);
return 0;
}
- buf.length = 0;
- buf.max = 0;
- buf.data = NULL;
+
+ /* Free any returned 'buf' content */
+ free_cont = 1;
/*
* Should really check the internal tags are correct but some things
* may get this wrong. The relevant specs say that constructed string
@@ -812,18 +811,16 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval,
* So instead just check for UNIVERSAL class and ignore the tag.
*/
if (!asn1_collect(&buf, &p, plen, inf, -1, V_ASN1_UNIVERSAL, 0)) {
- free_cont = 1;
goto err;
}
len = buf.length;
/* Append a final null to string */
if (!BUF_MEM_grow_clean(&buf, len + 1)) {
ASN1err(ASN1_F_ASN1_D2I_EX_PRIMITIVE, ERR_R_MALLOC_FAILURE);
- return 0;
+ goto err;
}
buf.data[len] = 0;
cont = (const unsigned char *)buf.data;
- free_cont = 1;
} else {
cont = p;
len = plen;
@@ -831,6 +828,7 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval,
}
/* We now have content length and type: translate into a structure */
+ /* asn1_ex_c2i may reuse allocated buffer, and so sets free_cont to 0 */
if (!asn1_ex_c2i(pval, cont, len, utype, &free_cont, it))
goto err;
diff --git a/crypto/bio/b_print.c b/crypto/bio/b_print.c
index 7c81e25d482c..90248fa2aaba 100644
--- a/crypto/bio/b_print.c
+++ b/crypto/bio/b_print.c
@@ -125,16 +125,16 @@
# define LLONG long
#endif
-static void fmtstr(char **, char **, size_t *, size_t *,
- const char *, int, int, int);
-static void fmtint(char **, char **, size_t *, size_t *,
- LLONG, int, int, int, int);
-static void fmtfp(char **, char **, size_t *, size_t *,
- LDOUBLE, int, int, int);
-static void doapr_outch(char **, char **, size_t *, size_t *, int);
-static void _dopr(char **sbuffer, char **buffer,
- size_t *maxlen, size_t *retlen, int *truncated,
- const char *format, va_list args);
+static int fmtstr(char **, char **, size_t *, size_t *,
+ const char *, int, int, int);
+static int fmtint(char **, char **, size_t *, size_t *,
+ LLONG, int, int, int, int);
+static int fmtfp(char **, char **, size_t *, size_t *,
+ LDOUBLE, int, int, int);
+static int doapr_outch(char **, char **, size_t *, size_t *, int);
+static int _dopr(char **sbuffer, char **buffer,
+ size_t *maxlen, size_t *retlen, int *truncated,
+ const char *format, va_list args);
/* format read states */
#define DP_S_DEFAULT 0
@@ -165,7 +165,7 @@ static void _dopr(char **sbuffer, char **buffer,
#define char_to_int(p) (p - '0')
#define OSSL_MAX(p,q) ((p >= q) ? p : q)
-static void
+static int
_dopr(char **sbuffer,
char **buffer,
size_t *maxlen,
@@ -196,7 +196,8 @@ _dopr(char **sbuffer,
if (ch == '%')
state = DP_S_FLAGS;
else
- doapr_outch(sbuffer, buffer, &currlen, maxlen, ch);
+ if(!doapr_outch(sbuffer, buffer, &currlen, maxlen, ch))
+ return 0;
ch = *format++;
break;
case DP_S_FLAGS:
@@ -302,8 +303,9 @@ _dopr(char **sbuffer,
value = va_arg(args, int);
break;
}
- fmtint(sbuffer, buffer, &currlen, maxlen,
- value, 10, min, max, flags);
+ if (!fmtint(sbuffer, buffer, &currlen, maxlen, value, 10, min,
+ max, flags))
+ return 0;
break;
case 'X':
flags |= DP_F_UP;
@@ -326,17 +328,19 @@ _dopr(char **sbuffer,
value = (LLONG) va_arg(args, unsigned int);
break;
}
- fmtint(sbuffer, buffer, &currlen, maxlen, value,
- ch == 'o' ? 8 : (ch == 'u' ? 10 : 16),
- min, max, flags);
+ if (!fmtint(sbuffer, buffer, &currlen, maxlen, value,
+ ch == 'o' ? 8 : (ch == 'u' ? 10 : 16),
+ min, max, flags))
+ return 0;
break;
case 'f':
if (cflags == DP_C_LDOUBLE)
fvalue = va_arg(args, LDOUBLE);
else
fvalue = va_arg(args, double);
- fmtfp(sbuffer, buffer, &currlen, maxlen,
- fvalue, min, max, flags);
+ if (!fmtfp(sbuffer, buffer, &currlen, maxlen, fvalue, min, max,
+ flags))
+ return 0;
break;
case 'E':
flags |= DP_F_UP;
@@ -355,8 +359,9 @@ _dopr(char **sbuffer,
fvalue = va_arg(args, double);
break;
case 'c':
- doapr_outch(sbuffer, buffer, &currlen, maxlen,
- va_arg(args, int));
+ if(!doapr_outch(sbuffer, buffer, &currlen, maxlen,
+ va_arg(args, int)))
+ return 0;
break;
case 's':
strvalue = va_arg(args, char *);
@@ -366,13 +371,15 @@ _dopr(char **sbuffer,
else
max = *maxlen;
}
- fmtstr(sbuffer, buffer, &currlen, maxlen, strvalue,
- flags, min, max);
+ if (!fmtstr(sbuffer, buffer, &currlen, maxlen, strvalue,
+ flags, min, max))
+ return 0;
break;
case 'p':
value = (long)va_arg(args, void *);
- fmtint(sbuffer, buffer, &currlen, maxlen,
- value, 16, min, max, flags | DP_F_NUM);
+ if (!fmtint(sbuffer, buffer, &currlen, maxlen,
+ value, 16, min, max, flags | DP_F_NUM))
+ return 0;
break;
case 'n': /* XXX */
if (cflags == DP_C_SHORT) {
@@ -394,7 +401,8 @@ _dopr(char **sbuffer,
}
break;
case '%':
- doapr_outch(sbuffer, buffer, &currlen, maxlen, ch);
+ if(!doapr_outch(sbuffer, buffer, &currlen, maxlen, ch))
+ return 0;
break;
case 'w':
/* not supported yet, treat as next char */
@@ -418,46 +426,56 @@ _dopr(char **sbuffer,
*truncated = (currlen > *maxlen - 1);
if (*truncated)
currlen = *maxlen - 1;
- doapr_outch(sbuffer, buffer, &currlen, maxlen, '\0');
+ if(!doapr_outch(sbuffer, buffer, &currlen, maxlen, '\0'))
+ return 0;
*retlen = currlen - 1;
- return;
+ return 1;
}
-static void
+static int
fmtstr(char **sbuffer,
char **buffer,
size_t *currlen,
size_t *maxlen, const char *value, int flags, int min, int max)
{
- int padlen, strln;
+ int padlen;
+ size_t strln;
int cnt = 0;
if (value == 0)
value = "<NULL>";
- for (strln = 0; value[strln]; ++strln) ;
+
+ strln = strlen(value);
+ if (strln > INT_MAX)
+ strln = INT_MAX;
+
padlen = min - strln;
- if (padlen < 0)
+ if (min < 0 || padlen < 0)
padlen = 0;
if (flags & DP_F_MINUS)
padlen = -padlen;
while ((padlen > 0) && (cnt < max)) {
- doapr_outch(sbuffer, buffer, currlen, maxlen, ' ');
+ if(!doapr_outch(sbuffer, buffer, currlen, maxlen, ' '))
+ return 0;
--padlen;
++cnt;
}
while (*value && (cnt < max)) {
- doapr_outch(sbuffer, buffer, currlen, maxlen, *value++);
+ if(!doapr_outch(sbuffer, buffer, currlen, maxlen, *value++))
+ return 0;
++cnt;
}
while ((padlen < 0) && (cnt < max)) {
- doapr_outch(sbuffer, buffer, currlen, maxlen, ' ');
+ if(!doapr_outch(sbuffer, buffer, currlen, maxlen, ' '))
+ return 0;
++padlen;
++cnt;
}
+ return 1;
}
-static void
+static int
fmtint(char **sbuffer,
char **buffer,
size_t *currlen,
@@ -517,37 +535,44 @@ fmtint(char **sbuffer,
/* spaces */
while (spadlen > 0) {
- doapr_outch(sbuffer, buffer, currlen, maxlen, ' ');
+ if(!doapr_outch(sbuffer, buffer, currlen, maxlen, ' '))
+ return 0;
--spadlen;
}
/* sign */
if (signvalue)
- doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue);
+ if(!doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue))
+ return 0;
/* prefix */
while (*prefix) {
- doapr_outch(sbuffer, buffer, currlen, maxlen, *prefix);
+ if(!doapr_outch(sbuffer, buffer, currlen, maxlen, *prefix))
+ return 0;
prefix++;
}
/* zeros */
if (zpadlen > 0) {
while (zpadlen > 0) {
- doapr_outch(sbuffer, buffer, currlen, maxlen, '0');
+ if(!doapr_outch(sbuffer, buffer, currlen, maxlen, '0'))
+ return 0;
--zpadlen;
}
}
/* digits */
- while (place > 0)
- doapr_outch(sbuffer, buffer, currlen, maxlen, convert[--place]);
+ while (place > 0) {
+ if (!doapr_outch(sbuffer, buffer, currlen, maxlen, convert[--place]))
+ return 0;
+ }
/* left justified spaces */
while (spadlen < 0) {
- doapr_outch(sbuffer, buffer, currlen, maxlen, ' ');
+ if (!doapr_outch(sbuffer, buffer, currlen, maxlen, ' '))
+ return 0;
++spadlen;
}
- return;
+ return 1;
}
static LDOUBLE abs_val(LDOUBLE value)
@@ -578,7 +603,7 @@ static long roundv(LDOUBLE value)
return intpart;
}
-static void
+static int
fmtfp(char **sbuffer,
char **buffer,
size_t *currlen,
@@ -657,47 +682,61 @@ fmtfp(char **sbuffer,
if ((flags & DP_F_ZERO) && (padlen > 0)) {
if (signvalue) {
- doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue);
+ if (!doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue))
+ return 0;
--padlen;
signvalue = 0;
}
while (padlen > 0) {
- doapr_outch(sbuffer, buffer, currlen, maxlen, '0');
+ if (!doapr_outch(sbuffer, buffer, currlen, maxlen, '0'))
+ return 0;
--padlen;
}
}
while (padlen > 0) {
- doapr_outch(sbuffer, buffer, currlen, maxlen, ' ');
+ if (!doapr_outch(sbuffer, buffer, currlen, maxlen, ' '))
+ return 0;
--padlen;
}
- if (signvalue)
- doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue);
+ if (signvalue && !doapr_outch(sbuffer, buffer, currlen, maxlen, signvalue))
+ return 0;
- while (iplace > 0)
- doapr_outch(sbuffer, buffer, currlen, maxlen, iconvert[--iplace]);
+ while (iplace > 0) {
+ if (!doapr_outch(sbuffer, buffer, currlen, maxlen, iconvert[--iplace]))
+ return 0;
+ }
/*
* Decimal point. This should probably use locale to find the correct
* char to print out.
*/
if (max > 0 || (flags & DP_F_NUM)) {
- doapr_outch(sbuffer, buffer, currlen, maxlen, '.');
+ if (!doapr_outch(sbuffer, buffer, currlen, maxlen, '.'))
+ return 0;
- while (fplace > 0)
- doapr_outch(sbuffer, buffer, currlen, maxlen, fconvert[--fplace]);
+ while (fplace > 0) {
+ if(!doapr_outch(sbuffer, buffer, currlen, maxlen,
+ fconvert[--fplace]))
+ return 0;
+ }
}
while (zpadlen > 0) {
- doapr_outch(sbuffer, buffer, currlen, maxlen, '0');
+ if (!doapr_outch(sbuffer, buffer, currlen, maxlen, '0'))
+ return 0;
--zpadlen;
}
while (padlen < 0) {
- doapr_outch(sbuffer, buffer, currlen, maxlen, ' ');
+ if (!doapr_outch(sbuffer, buffer, currlen, maxlen, ' '))
+ return 0;
++padlen;
}
+ return 1;
}
-static void
+#define BUFFER_INC 1024
+
+static int
doapr_outch(char **sbuffer,
char **buffer, size_t *currlen, size_t *maxlen, int c)
{
@@ -708,24 +747,25 @@ doapr_outch(char **sbuffer,
assert(*currlen <= *maxlen);
if (buffer && *currlen == *maxlen) {
- *maxlen += 1024;
+ if (*maxlen > INT_MAX - BUFFER_INC)
+ return 0;
+
+ *maxlen += BUFFER_INC;
if (*buffer == NULL) {
*buffer = OPENSSL_malloc(*maxlen);
- if (!*buffer) {
- /* Panic! Can't really do anything sensible. Just return */
- return;
- }
+ if (*buffer == NULL)
+ return 0;
if (*currlen > 0) {
assert(*sbuffer != NULL);
memcpy(*buffer, *sbuffer, *currlen);
}
*sbuffer = NULL;
} else {
- *buffer = OPENSSL_realloc(*buffer, *maxlen);
- if (!*buffer) {
- /* Panic! Can't really do anything sensible. Just return */
- return;
- }
+ char *tmpbuf;
+ tmpbuf = OPENSSL_realloc(*buffer, *maxlen);
+ if (tmpbuf == NULL)
+ return 0;
+ *buffer = tmpbuf;
}
}
@@ -736,7 +776,7 @@ doapr_outch(char **sbuffer,
(*buffer)[(*currlen)++] = (char)c;
}
- return;
+ return 1;
}
/***************************************************************************/
@@ -768,7 +808,11 @@ int BIO_vprintf(BIO *bio, const char *format, va_list args)
dynbuf = NULL;
CRYPTO_push_info("doapr()");
- _dopr(&hugebufp, &dynbuf, &hugebufsize, &retlen, &ignored, format, args);
+ if (!_dopr(&hugebufp, &dynbuf, &hugebufsize, &retlen, &ignored, format,
+ args)) {
+ OPENSSL_free(dynbuf);
+ return -1;
+ }
if (dynbuf) {
ret = BIO_write(bio, dynbuf, (int)retlen);
OPENSSL_free(dynbuf);
@@ -803,7 +847,8 @@ int BIO_vsnprintf(char *buf, size_t n, const char *format, va_list args)
size_t retlen;
int truncated;
- _dopr(&buf, NULL, &n, &retlen, &truncated, format, args);
+ if(!_dopr(&buf, NULL, &n, &retlen, &truncated, format, args))
+ return -1;
if (truncated)
/*
diff --git a/crypto/bio/bio.h b/crypto/bio/bio.h
index 6e2293bc66da..6790aed28e0b 100644
--- a/crypto/bio/bio.h
+++ b/crypto/bio/bio.h
@@ -479,7 +479,7 @@ struct bio_dgram_sctp_prinfo {
# define BIO_get_conn_hostname(b) BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,0)
# define BIO_get_conn_port(b) BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,1)
# define BIO_get_conn_ip(b) BIO_ptr_ctrl(b,BIO_C_GET_CONNECT,2)
-# define BIO_get_conn_int_port(b) BIO_ctrl(b,BIO_C_GET_CONNECT,3,0,NULL)
+# define BIO_get_conn_int_port(b) BIO_ctrl(b,BIO_C_GET_CONNECT,3,NULL)
# define BIO_set_nbio(b,n) BIO_ctrl(b,BIO_C_SET_NBIO,(n),NULL)
@@ -689,7 +689,7 @@ long BIO_debug_callback(BIO *bio, int cmd, const char *argp, int argi,
long argl, long ret);
BIO_METHOD *BIO_s_mem(void);
-BIO *BIO_new_mem_buf(void *buf, int len);
+BIO *BIO_new_mem_buf(const void *buf, int len);
BIO_METHOD *BIO_s_socket(void);
BIO_METHOD *BIO_s_connect(void);
BIO_METHOD *BIO_s_accept(void);
diff --git a/crypto/bio/bss_mem.c b/crypto/bio/bss_mem.c
index d190765dc201..b0394a960da1 100644
--- a/crypto/bio/bss_mem.c
+++ b/crypto/bio/bss_mem.c
@@ -91,7 +91,8 @@ BIO_METHOD *BIO_s_mem(void)
return (&mem_method);
}
-BIO *BIO_new_mem_buf(void *buf, int len)
+
+BIO *BIO_new_mem_buf(const void *buf, int len)
{
BIO *ret;
BUF_MEM *b;
@@ -105,7 +106,8 @@ BIO *BIO_new_mem_buf(void *buf, int len)
if (!(ret = BIO_new(BIO_s_mem())))
return NULL;
b = (BUF_MEM *)ret->ptr;
- b->data = buf;
+ /* Cast away const and trust in the MEM_RDONLY flag. */
+ b->data = (void *)buf;
b->length = sz;
b->max = sz;
ret->flags |= BIO_FLAGS_MEM_RDONLY;
diff --git a/crypto/bn/Makefile b/crypto/bn/Makefile
index 215855ecae91..c4c640951759 100644
--- a/crypto/bn/Makefile
+++ b/crypto/bn/Makefile
@@ -252,8 +252,8 @@ bn_exp.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
bn_exp.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
bn_exp.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
bn_exp.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
-bn_exp.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_exp.c bn_lcl.h
-bn_exp.o: rsaz_exp.h
+bn_exp.o: ../../include/openssl/symhacks.h ../constant_time_locl.h
+bn_exp.o: ../cryptlib.h bn_exp.c bn_lcl.h rsaz_exp.h
bn_exp2.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
bn_exp2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
bn_exp2.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
diff --git a/crypto/bn/asm/rsaz-avx2.pl b/crypto/bn/asm/rsaz-avx2.pl
index 3b6ccf83d13e..712a77fe8ca3 100755
--- a/crypto/bn/asm/rsaz-avx2.pl
+++ b/crypto/bn/asm/rsaz-avx2.pl
@@ -443,7 +443,7 @@ $TEMP2 = $B2;
$TEMP3 = $Y1;
$TEMP4 = $Y2;
$code.=<<___;
- #we need to fix indexes 32-39 to avoid overflow
+ # we need to fix indices 32-39 to avoid overflow
vmovdqu 32*8(%rsp), $ACC8 # 32*8-192($tp0),
vmovdqu 32*9(%rsp), $ACC1 # 32*9-192($tp0)
vmovdqu 32*10(%rsp), $ACC2 # 32*10-192($tp0)
@@ -1592,68 +1592,128 @@ rsaz_1024_scatter5_avx2:
.type rsaz_1024_gather5_avx2,\@abi-omnipotent
.align 32
rsaz_1024_gather5_avx2:
+ vzeroupper
+ mov %rsp,%r11
___
$code.=<<___ if ($win64);
lea -0x88(%rsp),%rax
- vzeroupper
.LSEH_begin_rsaz_1024_gather5:
# I can't trust assembler to use specific encoding:-(
- .byte 0x48,0x8d,0x60,0xe0 #lea -0x20(%rax),%rsp
- .byte 0xc5,0xf8,0x29,0x70,0xe0 #vmovaps %xmm6,-0x20(%rax)
- .byte 0xc5,0xf8,0x29,0x78,0xf0 #vmovaps %xmm7,-0x10(%rax)
- .byte 0xc5,0x78,0x29,0x40,0x00 #vmovaps %xmm8,0(%rax)
- .byte 0xc5,0x78,0x29,0x48,0x10 #vmovaps %xmm9,0x10(%rax)
- .byte 0xc5,0x78,0x29,0x50,0x20 #vmovaps %xmm10,0x20(%rax)
- .byte 0xc5,0x78,0x29,0x58,0x30 #vmovaps %xmm11,0x30(%rax)
- .byte 0xc5,0x78,0x29,0x60,0x40 #vmovaps %xmm12,0x40(%rax)
- .byte 0xc5,0x78,0x29,0x68,0x50 #vmovaps %xmm13,0x50(%rax)
- .byte 0xc5,0x78,0x29,0x70,0x60 #vmovaps %xmm14,0x60(%rax)
- .byte 0xc5,0x78,0x29,0x78,0x70 #vmovaps %xmm15,0x70(%rax)
+ .byte 0x48,0x8d,0x60,0xe0 # lea -0x20(%rax),%rsp
+ .byte 0xc5,0xf8,0x29,0x70,0xe0 # vmovaps %xmm6,-0x20(%rax)
+ .byte 0xc5,0xf8,0x29,0x78,0xf0 # vmovaps %xmm7,-0x10(%rax)
+ .byte 0xc5,0x78,0x29,0x40,0x00 # vmovaps %xmm8,0(%rax)
+ .byte 0xc5,0x78,0x29,0x48,0x10 # vmovaps %xmm9,0x10(%rax)
+ .byte 0xc5,0x78,0x29,0x50,0x20 # vmovaps %xmm10,0x20(%rax)
+ .byte 0xc5,0x78,0x29,0x58,0x30 # vmovaps %xmm11,0x30(%rax)
+ .byte 0xc5,0x78,0x29,0x60,0x40 # vmovaps %xmm12,0x40(%rax)
+ .byte 0xc5,0x78,0x29,0x68,0x50 # vmovaps %xmm13,0x50(%rax)
+ .byte 0xc5,0x78,0x29,0x70,0x60 # vmovaps %xmm14,0x60(%rax)
+ .byte 0xc5,0x78,0x29,0x78,0x70 # vmovaps %xmm15,0x70(%rax)
___
$code.=<<___;
- lea .Lgather_table(%rip),%r11
- mov $power,%eax
- and \$3,$power
- shr \$2,%eax # cache line number
- shl \$4,$power # offset within cache line
-
- vmovdqu -32(%r11),%ymm7 # .Lgather_permd
- vpbroadcastb 8(%r11,%rax), %xmm8
- vpbroadcastb 7(%r11,%rax), %xmm9
- vpbroadcastb 6(%r11,%rax), %xmm10
- vpbroadcastb 5(%r11,%rax), %xmm11
- vpbroadcastb 4(%r11,%rax), %xmm12
- vpbroadcastb 3(%r11,%rax), %xmm13
- vpbroadcastb 2(%r11,%rax), %xmm14
- vpbroadcastb 1(%r11,%rax), %xmm15
-
- lea 64($inp,$power),$inp
- mov \$64,%r11 # size optimization
- mov \$9,%eax
- jmp .Loop_gather_1024
+ lea -0x100(%rsp),%rsp
+ and \$-32, %rsp
+ lea .Linc(%rip), %r10
+ lea -128(%rsp),%rax # control u-op density
+
+ vmovd $power, %xmm4
+ vmovdqa (%r10),%ymm0
+ vmovdqa 32(%r10),%ymm1
+ vmovdqa 64(%r10),%ymm5
+ vpbroadcastd %xmm4,%ymm4
+
+ vpaddd %ymm5, %ymm0, %ymm2
+ vpcmpeqd %ymm4, %ymm0, %ymm0
+ vpaddd %ymm5, %ymm1, %ymm3
+ vpcmpeqd %ymm4, %ymm1, %ymm1
+ vmovdqa %ymm0, 32*0+128(%rax)
+ vpaddd %ymm5, %ymm2, %ymm0
+ vpcmpeqd %ymm4, %ymm2, %ymm2
+ vmovdqa %ymm1, 32*1+128(%rax)
+ vpaddd %ymm5, %ymm3, %ymm1
+ vpcmpeqd %ymm4, %ymm3, %ymm3
+ vmovdqa %ymm2, 32*2+128(%rax)
+ vpaddd %ymm5, %ymm0, %ymm2
+ vpcmpeqd %ymm4, %ymm0, %ymm0
+ vmovdqa %ymm3, 32*3+128(%rax)
+ vpaddd %ymm5, %ymm1, %ymm3
+ vpcmpeqd %ymm4, %ymm1, %ymm1
+ vmovdqa %ymm0, 32*4+128(%rax)
+ vpaddd %ymm5, %ymm2, %ymm8
+ vpcmpeqd %ymm4, %ymm2, %ymm2
+ vmovdqa %ymm1, 32*5+128(%rax)
+ vpaddd %ymm5, %ymm3, %ymm9
+ vpcmpeqd %ymm4, %ymm3, %ymm3
+ vmovdqa %ymm2, 32*6+128(%rax)
+ vpaddd %ymm5, %ymm8, %ymm10
+ vpcmpeqd %ymm4, %ymm8, %ymm8
+ vmovdqa %ymm3, 32*7+128(%rax)
+ vpaddd %ymm5, %ymm9, %ymm11
+ vpcmpeqd %ymm4, %ymm9, %ymm9
+ vpaddd %ymm5, %ymm10, %ymm12
+ vpcmpeqd %ymm4, %ymm10, %ymm10
+ vpaddd %ymm5, %ymm11, %ymm13
+ vpcmpeqd %ymm4, %ymm11, %ymm11
+ vpaddd %ymm5, %ymm12, %ymm14
+ vpcmpeqd %ymm4, %ymm12, %ymm12
+ vpaddd %ymm5, %ymm13, %ymm15
+ vpcmpeqd %ymm4, %ymm13, %ymm13
+ vpcmpeqd %ymm4, %ymm14, %ymm14
+ vpcmpeqd %ymm4, %ymm15, %ymm15
+
+ vmovdqa -32(%r10),%ymm7 # .Lgather_permd
+ lea 128($inp), $inp
+ mov \$9,$power
-.align 32
.Loop_gather_1024:
- vpand -64($inp), %xmm8,%xmm0
- vpand ($inp), %xmm9,%xmm1
- vpand 64($inp), %xmm10,%xmm2
- vpand ($inp,%r11,2), %xmm11,%xmm3
- vpor %xmm0,%xmm1,%xmm1
- vpand 64($inp,%r11,2), %xmm12,%xmm4
- vpor %xmm2,%xmm3,%xmm3
- vpand ($inp,%r11,4), %xmm13,%xmm5
- vpor %xmm1,%xmm3,%xmm3
- vpand 64($inp,%r11,4), %xmm14,%xmm6
- vpor %xmm4,%xmm5,%xmm5
- vpand -128($inp,%r11,8), %xmm15,%xmm2
- lea ($inp,%r11,8),$inp
- vpor %xmm3,%xmm5,%xmm5
- vpor %xmm2,%xmm6,%xmm6
- vpor %xmm5,%xmm6,%xmm6
- vpermd %ymm6,%ymm7,%ymm6
- vmovdqu %ymm6,($out)
+ vmovdqa 32*0-128($inp), %ymm0
+ vmovdqa 32*1-128($inp), %ymm1
+ vmovdqa 32*2-128($inp), %ymm2
+ vmovdqa 32*3-128($inp), %ymm3
+ vpand 32*0+128(%rax), %ymm0, %ymm0
+ vpand 32*1+128(%rax), %ymm1, %ymm1
+ vpand 32*2+128(%rax), %ymm2, %ymm2
+ vpor %ymm0, %ymm1, %ymm4
+ vpand 32*3+128(%rax), %ymm3, %ymm3
+ vmovdqa 32*4-128($inp), %ymm0
+ vmovdqa 32*5-128($inp), %ymm1
+ vpor %ymm2, %ymm3, %ymm5
+ vmovdqa 32*6-128($inp), %ymm2
+ vmovdqa 32*7-128($inp), %ymm3
+ vpand 32*4+128(%rax), %ymm0, %ymm0
+ vpand 32*5+128(%rax), %ymm1, %ymm1
+ vpand 32*6+128(%rax), %ymm2, %ymm2
+ vpor %ymm0, %ymm4, %ymm4
+ vpand 32*7+128(%rax), %ymm3, %ymm3
+ vpand 32*8-128($inp), %ymm8, %ymm0
+ vpor %ymm1, %ymm5, %ymm5
+ vpand 32*9-128($inp), %ymm9, %ymm1
+ vpor %ymm2, %ymm4, %ymm4
+ vpand 32*10-128($inp),%ymm10, %ymm2
+ vpor %ymm3, %ymm5, %ymm5
+ vpand 32*11-128($inp),%ymm11, %ymm3
+ vpor %ymm0, %ymm4, %ymm4
+ vpand 32*12-128($inp),%ymm12, %ymm0
+ vpor %ymm1, %ymm5, %ymm5
+ vpand 32*13-128($inp),%ymm13, %ymm1
+ vpor %ymm2, %ymm4, %ymm4
+ vpand 32*14-128($inp),%ymm14, %ymm2
+ vpor %ymm3, %ymm5, %ymm5
+ vpand 32*15-128($inp),%ymm15, %ymm3
+ lea 32*16($inp), $inp
+ vpor %ymm0, %ymm4, %ymm4
+ vpor %ymm1, %ymm5, %ymm5
+ vpor %ymm2, %ymm4, %ymm4
+ vpor %ymm3, %ymm5, %ymm5
+
+ vpor %ymm5, %ymm4, %ymm4
+ vextracti128 \$1, %ymm4, %xmm5 # upper half is cleared
+ vpor %xmm4, %xmm5, %xmm5
+ vpermd %ymm5,%ymm7,%ymm5
+ vmovdqu %ymm5,($out)
lea 32($out),$out
- dec %eax
+ dec $power
jnz .Loop_gather_1024
vpxor %ymm0,%ymm0,%ymm0
@@ -1661,20 +1721,20 @@ $code.=<<___;
vzeroupper
___
$code.=<<___ if ($win64);
- movaps (%rsp),%xmm6
- movaps 0x10(%rsp),%xmm7
- movaps 0x20(%rsp),%xmm8
- movaps 0x30(%rsp),%xmm9
- movaps 0x40(%rsp),%xmm10
- movaps 0x50(%rsp),%xmm11
- movaps 0x60(%rsp),%xmm12
- movaps 0x70(%rsp),%xmm13
- movaps 0x80(%rsp),%xmm14
- movaps 0x90(%rsp),%xmm15
- lea 0xa8(%rsp),%rsp
+ movaps -0xa8(%r11),%xmm6
+ movaps -0x98(%r11),%xmm7
+ movaps -0x88(%r11),%xmm8
+ movaps -0x78(%r11),%xmm9
+ movaps -0x68(%r11),%xmm10
+ movaps -0x58(%r11),%xmm11
+ movaps -0x48(%r11),%xmm12
+ movaps -0x38(%r11),%xmm13
+ movaps -0x28(%r11),%xmm14
+ movaps -0x18(%r11),%xmm15
.LSEH_end_rsaz_1024_gather5:
___
$code.=<<___;
+ lea (%r11),%rsp
ret
.size rsaz_1024_gather5_avx2,.-rsaz_1024_gather5_avx2
___
@@ -1708,8 +1768,10 @@ $code.=<<___;
.long 0,2,4,6,7,7,7,7
.Lgather_permd:
.long 0,7,1,7,2,7,3,7
-.Lgather_table:
- .byte 0,0,0,0,0,0,0,0, 0xff,0,0,0,0,0,0,0
+.Linc:
+ .long 0,0,0,0, 1,1,1,1
+ .long 2,2,2,2, 3,3,3,3
+ .long 4,4,4,4, 4,4,4,4
.align 64
___
@@ -1837,18 +1899,19 @@ rsaz_se_handler:
.rva rsaz_se_handler
.rva .Lmul_1024_body,.Lmul_1024_epilogue
.LSEH_info_rsaz_1024_gather5:
- .byte 0x01,0x33,0x16,0x00
- .byte 0x36,0xf8,0x09,0x00 #vmovaps 0x90(rsp),xmm15
- .byte 0x31,0xe8,0x08,0x00 #vmovaps 0x80(rsp),xmm14
- .byte 0x2c,0xd8,0x07,0x00 #vmovaps 0x70(rsp),xmm13
- .byte 0x27,0xc8,0x06,0x00 #vmovaps 0x60(rsp),xmm12
- .byte 0x22,0xb8,0x05,0x00 #vmovaps 0x50(rsp),xmm11
- .byte 0x1d,0xa8,0x04,0x00 #vmovaps 0x40(rsp),xmm10
- .byte 0x18,0x98,0x03,0x00 #vmovaps 0x30(rsp),xmm9
- .byte 0x13,0x88,0x02,0x00 #vmovaps 0x20(rsp),xmm8
- .byte 0x0e,0x78,0x01,0x00 #vmovaps 0x10(rsp),xmm7
- .byte 0x09,0x68,0x00,0x00 #vmovaps 0x00(rsp),xmm6
- .byte 0x04,0x01,0x15,0x00 #sub rsp,0xa8
+ .byte 0x01,0x36,0x17,0x0b
+ .byte 0x36,0xf8,0x09,0x00 # vmovaps 0x90(rsp),xmm15
+ .byte 0x31,0xe8,0x08,0x00 # vmovaps 0x80(rsp),xmm14
+ .byte 0x2c,0xd8,0x07,0x00 # vmovaps 0x70(rsp),xmm13
+ .byte 0x27,0xc8,0x06,0x00 # vmovaps 0x60(rsp),xmm12
+ .byte 0x22,0xb8,0x05,0x00 # vmovaps 0x50(rsp),xmm11
+ .byte 0x1d,0xa8,0x04,0x00 # vmovaps 0x40(rsp),xmm10
+ .byte 0x18,0x98,0x03,0x00 # vmovaps 0x30(rsp),xmm9
+ .byte 0x13,0x88,0x02,0x00 # vmovaps 0x20(rsp),xmm8
+ .byte 0x0e,0x78,0x01,0x00 # vmovaps 0x10(rsp),xmm7
+ .byte 0x09,0x68,0x00,0x00 # vmovaps 0x00(rsp),xmm6
+ .byte 0x04,0x01,0x15,0x00 # sub rsp,0xa8
+ .byte 0x00,0xb3,0x00,0x00 # set_frame r11
___
}
diff --git a/crypto/bn/asm/rsaz-x86_64.pl b/crypto/bn/asm/rsaz-x86_64.pl
index 091cdc2069da..87ce2c34d90c 100755
--- a/crypto/bn/asm/rsaz-x86_64.pl
+++ b/crypto/bn/asm/rsaz-x86_64.pl
@@ -915,9 +915,76 @@ rsaz_512_mul_gather4:
push %r14
push %r15
- mov $pwr, $pwr
- subq \$128+24, %rsp
+ subq \$`128+24+($win64?0xb0:0)`, %rsp
+___
+$code.=<<___ if ($win64);
+ movaps %xmm6,0xa0(%rsp)
+ movaps %xmm7,0xb0(%rsp)
+ movaps %xmm8,0xc0(%rsp)
+ movaps %xmm9,0xd0(%rsp)
+ movaps %xmm10,0xe0(%rsp)
+ movaps %xmm11,0xf0(%rsp)
+ movaps %xmm12,0x100(%rsp)
+ movaps %xmm13,0x110(%rsp)
+ movaps %xmm14,0x120(%rsp)
+ movaps %xmm15,0x130(%rsp)
+___
+$code.=<<___;
.Lmul_gather4_body:
+ movd $pwr,%xmm8
+ movdqa .Linc+16(%rip),%xmm1 # 00000002000000020000000200000002
+ movdqa .Linc(%rip),%xmm0 # 00000001000000010000000000000000
+
+ pshufd \$0,%xmm8,%xmm8 # broadcast $power
+ movdqa %xmm1,%xmm7
+ movdqa %xmm1,%xmm2
+___
+########################################################################
+# calculate mask by comparing 0..15 to $power
+#
+for($i=0;$i<4;$i++) {
+$code.=<<___;
+ paddd %xmm`$i`,%xmm`$i+1`
+ pcmpeqd %xmm8,%xmm`$i`
+ movdqa %xmm7,%xmm`$i+3`
+___
+}
+for(;$i<7;$i++) {
+$code.=<<___;
+ paddd %xmm`$i`,%xmm`$i+1`
+ pcmpeqd %xmm8,%xmm`$i`
+___
+}
+$code.=<<___;
+ pcmpeqd %xmm8,%xmm7
+
+ movdqa 16*0($bp),%xmm8
+ movdqa 16*1($bp),%xmm9
+ movdqa 16*2($bp),%xmm10
+ movdqa 16*3($bp),%xmm11
+ pand %xmm0,%xmm8
+ movdqa 16*4($bp),%xmm12
+ pand %xmm1,%xmm9
+ movdqa 16*5($bp),%xmm13
+ pand %xmm2,%xmm10
+ movdqa 16*6($bp),%xmm14
+ pand %xmm3,%xmm11
+ movdqa 16*7($bp),%xmm15
+ leaq 128($bp), %rbp
+ pand %xmm4,%xmm12
+ pand %xmm5,%xmm13
+ pand %xmm6,%xmm14
+ pand %xmm7,%xmm15
+ por %xmm10,%xmm8
+ por %xmm11,%xmm9
+ por %xmm12,%xmm8
+ por %xmm13,%xmm9
+ por %xmm14,%xmm8
+ por %xmm15,%xmm9
+
+ por %xmm9,%xmm8
+ pshufd \$0x4e,%xmm8,%xmm9
+ por %xmm9,%xmm8
___
$code.=<<___ if ($addx);
movl \$0x80100,%r11d
@@ -926,45 +993,38 @@ $code.=<<___ if ($addx);
je .Lmulx_gather
___
$code.=<<___;
- movl 64($bp,$pwr,4), %eax
- movq $out, %xmm0 # off-load arguments
- movl ($bp,$pwr,4), %ebx
- movq $mod, %xmm1
- movq $n0, 128(%rsp)
+ movq %xmm8,%rbx
+
+ movq $n0, 128(%rsp) # off-load arguments
+ movq $out, 128+8(%rsp)
+ movq $mod, 128+16(%rsp)
- shlq \$32, %rax
- or %rax, %rbx
movq ($ap), %rax
movq 8($ap), %rcx
- leaq 128($bp,$pwr,4), %rbp
mulq %rbx # 0 iteration
movq %rax, (%rsp)
movq %rcx, %rax
movq %rdx, %r8
mulq %rbx
- movd (%rbp), %xmm4
addq %rax, %r8
movq 16($ap), %rax
movq %rdx, %r9
adcq \$0, %r9
mulq %rbx
- movd 64(%rbp), %xmm5
addq %rax, %r9
movq 24($ap), %rax
movq %rdx, %r10
adcq \$0, %r10
mulq %rbx
- pslldq \$4, %xmm5
addq %rax, %r10
movq 32($ap), %rax
movq %rdx, %r11
adcq \$0, %r11
mulq %rbx
- por %xmm5, %xmm4
addq %rax, %r11
movq 40($ap), %rax
movq %rdx, %r12
@@ -977,14 +1037,12 @@ $code.=<<___;
adcq \$0, %r13
mulq %rbx
- leaq 128(%rbp), %rbp
addq %rax, %r13
movq 56($ap), %rax
movq %rdx, %r14
adcq \$0, %r14
mulq %rbx
- movq %xmm4, %rbx
addq %rax, %r14
movq ($ap), %rax
movq %rdx, %r15
@@ -996,6 +1054,35 @@ $code.=<<___;
.align 32
.Loop_mul_gather:
+ movdqa 16*0(%rbp),%xmm8
+ movdqa 16*1(%rbp),%xmm9
+ movdqa 16*2(%rbp),%xmm10
+ movdqa 16*3(%rbp),%xmm11
+ pand %xmm0,%xmm8
+ movdqa 16*4(%rbp),%xmm12
+ pand %xmm1,%xmm9
+ movdqa 16*5(%rbp),%xmm13
+ pand %xmm2,%xmm10
+ movdqa 16*6(%rbp),%xmm14
+ pand %xmm3,%xmm11
+ movdqa 16*7(%rbp),%xmm15
+ leaq 128(%rbp), %rbp
+ pand %xmm4,%xmm12
+ pand %xmm5,%xmm13
+ pand %xmm6,%xmm14
+ pand %xmm7,%xmm15
+ por %xmm10,%xmm8
+ por %xmm11,%xmm9
+ por %xmm12,%xmm8
+ por %xmm13,%xmm9
+ por %xmm14,%xmm8
+ por %xmm15,%xmm9
+
+ por %xmm9,%xmm8
+ pshufd \$0x4e,%xmm8,%xmm9
+ por %xmm9,%xmm8
+ movq %xmm8,%rbx
+
mulq %rbx
addq %rax, %r8
movq 8($ap), %rax
@@ -1004,7 +1091,6 @@ $code.=<<___;
adcq \$0, %r8
mulq %rbx
- movd (%rbp), %xmm4
addq %rax, %r9
movq 16($ap), %rax
adcq \$0, %rdx
@@ -1013,7 +1099,6 @@ $code.=<<___;
adcq \$0, %r9
mulq %rbx
- movd 64(%rbp), %xmm5
addq %rax, %r10
movq 24($ap), %rax
adcq \$0, %rdx
@@ -1022,7 +1107,6 @@ $code.=<<___;
adcq \$0, %r10
mulq %rbx
- pslldq \$4, %xmm5
addq %rax, %r11
movq 32($ap), %rax
adcq \$0, %rdx
@@ -1031,7 +1115,6 @@ $code.=<<___;
adcq \$0, %r11
mulq %rbx
- por %xmm5, %xmm4
addq %rax, %r12
movq 40($ap), %rax
adcq \$0, %rdx
@@ -1056,7 +1139,6 @@ $code.=<<___;
adcq \$0, %r14
mulq %rbx
- movq %xmm4, %rbx
addq %rax, %r15
movq ($ap), %rax
adcq \$0, %rdx
@@ -1064,7 +1146,6 @@ $code.=<<___;
movq %rdx, %r15
adcq \$0, %r15
- leaq 128(%rbp), %rbp
leaq 8(%rdi), %rdi
decl %ecx
@@ -1079,8 +1160,8 @@ $code.=<<___;
movq %r14, 48(%rdi)
movq %r15, 56(%rdi)
- movq %xmm0, $out
- movq %xmm1, %rbp
+ movq 128+8(%rsp), $out
+ movq 128+16(%rsp), %rbp
movq (%rsp), %r8
movq 8(%rsp), %r9
@@ -1098,45 +1179,37 @@ $code.=<<___ if ($addx);
.align 32
.Lmulx_gather:
- mov 64($bp,$pwr,4), %eax
- movq $out, %xmm0 # off-load arguments
- lea 128($bp,$pwr,4), %rbp
- mov ($bp,$pwr,4), %edx
- movq $mod, %xmm1
- mov $n0, 128(%rsp)
+ movq %xmm8,%rdx
+
+ mov $n0, 128(%rsp) # off-load arguments
+ mov $out, 128+8(%rsp)
+ mov $mod, 128+16(%rsp)
- shl \$32, %rax
- or %rax, %rdx
mulx ($ap), %rbx, %r8 # 0 iteration
mov %rbx, (%rsp)
xor %edi, %edi # cf=0, of=0
mulx 8($ap), %rax, %r9
- movd (%rbp), %xmm4
mulx 16($ap), %rbx, %r10
- movd 64(%rbp), %xmm5
adcx %rax, %r8
mulx 24($ap), %rax, %r11
- pslldq \$4, %xmm5
adcx %rbx, %r9
mulx 32($ap), %rbx, %r12
- por %xmm5, %xmm4
adcx %rax, %r10
mulx 40($ap), %rax, %r13
adcx %rbx, %r11
mulx 48($ap), %rbx, %r14
- lea 128(%rbp), %rbp
adcx %rax, %r12
mulx 56($ap), %rax, %r15
- movq %xmm4, %rdx
adcx %rbx, %r13
adcx %rax, %r14
+ .byte 0x67
mov %r8, %rbx
adcx %rdi, %r15 # %rdi is 0
@@ -1145,24 +1218,48 @@ $code.=<<___ if ($addx);
.align 32
.Loop_mulx_gather:
- mulx ($ap), %rax, %r8
+ movdqa 16*0(%rbp),%xmm8
+ movdqa 16*1(%rbp),%xmm9
+ movdqa 16*2(%rbp),%xmm10
+ movdqa 16*3(%rbp),%xmm11
+ pand %xmm0,%xmm8
+ movdqa 16*4(%rbp),%xmm12
+ pand %xmm1,%xmm9
+ movdqa 16*5(%rbp),%xmm13
+ pand %xmm2,%xmm10
+ movdqa 16*6(%rbp),%xmm14
+ pand %xmm3,%xmm11
+ movdqa 16*7(%rbp),%xmm15
+ leaq 128(%rbp), %rbp
+ pand %xmm4,%xmm12
+ pand %xmm5,%xmm13
+ pand %xmm6,%xmm14
+ pand %xmm7,%xmm15
+ por %xmm10,%xmm8
+ por %xmm11,%xmm9
+ por %xmm12,%xmm8
+ por %xmm13,%xmm9
+ por %xmm14,%xmm8
+ por %xmm15,%xmm9
+
+ por %xmm9,%xmm8
+ pshufd \$0x4e,%xmm8,%xmm9
+ por %xmm9,%xmm8
+ movq %xmm8,%rdx
+
+ .byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00 # mulx ($ap), %rax, %r8
adcx %rax, %rbx
adox %r9, %r8
mulx 8($ap), %rax, %r9
- .byte 0x66,0x0f,0x6e,0xa5,0x00,0x00,0x00,0x00 # movd (%rbp), %xmm4
adcx %rax, %r8
adox %r10, %r9
mulx 16($ap), %rax, %r10
- movd 64(%rbp), %xmm5
- lea 128(%rbp), %rbp
adcx %rax, %r9
adox %r11, %r10
.byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00 # mulx 24($ap), %rax, %r11
- pslldq \$4, %xmm5
- por %xmm5, %xmm4
adcx %rax, %r10
adox %r12, %r11
@@ -1176,10 +1273,10 @@ $code.=<<___ if ($addx);
.byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00 # mulx 48($ap), %rax, %r14
adcx %rax, %r13
+ .byte 0x67
adox %r15, %r14
mulx 56($ap), %rax, %r15
- movq %xmm4, %rdx
mov %rbx, 64(%rsp,%rcx,8)
adcx %rax, %r14
adox %rdi, %r15
@@ -1198,10 +1295,10 @@ $code.=<<___ if ($addx);
mov %r14, 64+48(%rsp)
mov %r15, 64+56(%rsp)
- movq %xmm0, $out
- movq %xmm1, %rbp
+ mov 128(%rsp), %rdx # pull arguments
+ mov 128+8(%rsp), $out
+ mov 128+16(%rsp), %rbp
- mov 128(%rsp), %rdx # pull $n0
mov (%rsp), %r8
mov 8(%rsp), %r9
mov 16(%rsp), %r10
@@ -1229,6 +1326,21 @@ $code.=<<___;
call __rsaz_512_subtract
leaq 128+24+48(%rsp), %rax
+___
+$code.=<<___ if ($win64);
+ movaps 0xa0-0xc8(%rax),%xmm6
+ movaps 0xb0-0xc8(%rax),%xmm7
+ movaps 0xc0-0xc8(%rax),%xmm8
+ movaps 0xd0-0xc8(%rax),%xmm9
+ movaps 0xe0-0xc8(%rax),%xmm10
+ movaps 0xf0-0xc8(%rax),%xmm11
+ movaps 0x100-0xc8(%rax),%xmm12
+ movaps 0x110-0xc8(%rax),%xmm13
+ movaps 0x120-0xc8(%rax),%xmm14
+ movaps 0x130-0xc8(%rax),%xmm15
+ lea 0xb0(%rax),%rax
+___
+$code.=<<___;
movq -48(%rax), %r15
movq -40(%rax), %r14
movq -32(%rax), %r13
@@ -1258,7 +1370,7 @@ rsaz_512_mul_scatter4:
mov $pwr, $pwr
subq \$128+24, %rsp
.Lmul_scatter4_body:
- leaq ($tbl,$pwr,4), $tbl
+ leaq ($tbl,$pwr,8), $tbl
movq $out, %xmm0 # off-load arguments
movq $mod, %xmm1
movq $tbl, %xmm2
@@ -1329,30 +1441,14 @@ $code.=<<___;
call __rsaz_512_subtract
- movl %r8d, 64*0($inp) # scatter
- shrq \$32, %r8
- movl %r9d, 64*2($inp)
- shrq \$32, %r9
- movl %r10d, 64*4($inp)
- shrq \$32, %r10
- movl %r11d, 64*6($inp)
- shrq \$32, %r11
- movl %r12d, 64*8($inp)
- shrq \$32, %r12
- movl %r13d, 64*10($inp)
- shrq \$32, %r13
- movl %r14d, 64*12($inp)
- shrq \$32, %r14
- movl %r15d, 64*14($inp)
- shrq \$32, %r15
- movl %r8d, 64*1($inp)
- movl %r9d, 64*3($inp)
- movl %r10d, 64*5($inp)
- movl %r11d, 64*7($inp)
- movl %r12d, 64*9($inp)
- movl %r13d, 64*11($inp)
- movl %r14d, 64*13($inp)
- movl %r15d, 64*15($inp)
+ movq %r8, 128*0($inp) # scatter
+ movq %r9, 128*1($inp)
+ movq %r10, 128*2($inp)
+ movq %r11, 128*3($inp)
+ movq %r12, 128*4($inp)
+ movq %r13, 128*5($inp)
+ movq %r14, 128*6($inp)
+ movq %r15, 128*7($inp)
leaq 128+24+48(%rsp), %rax
movq -48(%rax), %r15
@@ -1956,16 +2052,14 @@ $code.=<<___;
.type rsaz_512_scatter4,\@abi-omnipotent
.align 16
rsaz_512_scatter4:
- leaq ($out,$power,4), $out
+ leaq ($out,$power,8), $out
movl \$8, %r9d
jmp .Loop_scatter
.align 16
.Loop_scatter:
movq ($inp), %rax
leaq 8($inp), $inp
- movl %eax, ($out)
- shrq \$32, %rax
- movl %eax, 64($out)
+ movq %rax, ($out)
leaq 128($out), $out
decl %r9d
jnz .Loop_scatter
@@ -1976,22 +2070,106 @@ rsaz_512_scatter4:
.type rsaz_512_gather4,\@abi-omnipotent
.align 16
rsaz_512_gather4:
- leaq ($inp,$power,4), $inp
+___
+$code.=<<___ if ($win64);
+.LSEH_begin_rsaz_512_gather4:
+ .byte 0x48,0x81,0xec,0xa8,0x00,0x00,0x00 # sub $0xa8,%rsp
+ .byte 0x0f,0x29,0x34,0x24 # movaps %xmm6,(%rsp)
+ .byte 0x0f,0x29,0x7c,0x24,0x10 # movaps %xmm7,0x10(%rsp)
+ .byte 0x44,0x0f,0x29,0x44,0x24,0x20 # movaps %xmm8,0x20(%rsp)
+ .byte 0x44,0x0f,0x29,0x4c,0x24,0x30 # movaps %xmm9,0x30(%rsp)
+ .byte 0x44,0x0f,0x29,0x54,0x24,0x40 # movaps %xmm10,0x40(%rsp)
+ .byte 0x44,0x0f,0x29,0x5c,0x24,0x50 # movaps %xmm11,0x50(%rsp)
+ .byte 0x44,0x0f,0x29,0x64,0x24,0x60 # movaps %xmm12,0x60(%rsp)
+ .byte 0x44,0x0f,0x29,0x6c,0x24,0x70 # movaps %xmm13,0x70(%rsp)
+ .byte 0x44,0x0f,0x29,0xb4,0x24,0x80,0,0,0 # movaps %xmm14,0x80(%rsp)
+ .byte 0x44,0x0f,0x29,0xbc,0x24,0x90,0,0,0 # movaps %xmm15,0x90(%rsp)
+___
+$code.=<<___;
+ movd $power,%xmm8
+ movdqa .Linc+16(%rip),%xmm1 # 00000002000000020000000200000002
+ movdqa .Linc(%rip),%xmm0 # 00000001000000010000000000000000
+
+ pshufd \$0,%xmm8,%xmm8 # broadcast $power
+ movdqa %xmm1,%xmm7
+ movdqa %xmm1,%xmm2
+___
+########################################################################
+# calculate mask by comparing 0..15 to $power
+#
+for($i=0;$i<4;$i++) {
+$code.=<<___;
+ paddd %xmm`$i`,%xmm`$i+1`
+ pcmpeqd %xmm8,%xmm`$i`
+ movdqa %xmm7,%xmm`$i+3`
+___
+}
+for(;$i<7;$i++) {
+$code.=<<___;
+ paddd %xmm`$i`,%xmm`$i+1`
+ pcmpeqd %xmm8,%xmm`$i`
+___
+}
+$code.=<<___;
+ pcmpeqd %xmm8,%xmm7
movl \$8, %r9d
jmp .Loop_gather
.align 16
.Loop_gather:
- movl ($inp), %eax
- movl 64($inp), %r8d
+ movdqa 16*0($inp),%xmm8
+ movdqa 16*1($inp),%xmm9
+ movdqa 16*2($inp),%xmm10
+ movdqa 16*3($inp),%xmm11
+ pand %xmm0,%xmm8
+ movdqa 16*4($inp),%xmm12
+ pand %xmm1,%xmm9
+ movdqa 16*5($inp),%xmm13
+ pand %xmm2,%xmm10
+ movdqa 16*6($inp),%xmm14
+ pand %xmm3,%xmm11
+ movdqa 16*7($inp),%xmm15
leaq 128($inp), $inp
- shlq \$32, %r8
- or %r8, %rax
- movq %rax, ($out)
+ pand %xmm4,%xmm12
+ pand %xmm5,%xmm13
+ pand %xmm6,%xmm14
+ pand %xmm7,%xmm15
+ por %xmm10,%xmm8
+ por %xmm11,%xmm9
+ por %xmm12,%xmm8
+ por %xmm13,%xmm9
+ por %xmm14,%xmm8
+ por %xmm15,%xmm9
+
+ por %xmm9,%xmm8
+ pshufd \$0x4e,%xmm8,%xmm9
+ por %xmm9,%xmm8
+ movq %xmm8,($out)
leaq 8($out), $out
decl %r9d
jnz .Loop_gather
+___
+$code.=<<___ if ($win64);
+ movaps 0x00(%rsp),%xmm6
+ movaps 0x10(%rsp),%xmm7
+ movaps 0x20(%rsp),%xmm8
+ movaps 0x30(%rsp),%xmm9
+ movaps 0x40(%rsp),%xmm10
+ movaps 0x50(%rsp),%xmm11
+ movaps 0x60(%rsp),%xmm12
+ movaps 0x70(%rsp),%xmm13
+ movaps 0x80(%rsp),%xmm14
+ movaps 0x90(%rsp),%xmm15
+ add \$0xa8,%rsp
+___
+$code.=<<___;
ret
+.LSEH_end_rsaz_512_gather4:
.size rsaz_512_gather4,.-rsaz_512_gather4
+
+.align 64
+.Linc:
+ .long 0,0, 1,1
+ .long 2,2, 2,2
___
}
@@ -2039,6 +2217,18 @@ se_handler:
lea 128+24+48(%rax),%rax
+ lea .Lmul_gather4_epilogue(%rip),%rbx
+ cmp %r10,%rbx
+ jne .Lse_not_in_mul_gather4
+
+ lea 0xb0(%rax),%rax
+
+ lea -48-0xa8(%rax),%rsi
+ lea 512($context),%rdi
+ mov \$20,%ecx
+ .long 0xa548f3fc # cld; rep movsq
+
+.Lse_not_in_mul_gather4:
mov -8(%rax),%rbx
mov -16(%rax),%rbp
mov -24(%rax),%r12
@@ -2090,7 +2280,7 @@ se_handler:
pop %rdi
pop %rsi
ret
-.size sqr_handler,.-sqr_handler
+.size se_handler,.-se_handler
.section .pdata
.align 4
@@ -2114,6 +2304,10 @@ se_handler:
.rva .LSEH_end_rsaz_512_mul_by_one
.rva .LSEH_info_rsaz_512_mul_by_one
+ .rva .LSEH_begin_rsaz_512_gather4
+ .rva .LSEH_end_rsaz_512_gather4
+ .rva .LSEH_info_rsaz_512_gather4
+
.section .xdata
.align 8
.LSEH_info_rsaz_512_sqr:
@@ -2136,6 +2330,19 @@ se_handler:
.byte 9,0,0,0
.rva se_handler
.rva .Lmul_by_one_body,.Lmul_by_one_epilogue # HandlerData[]
+.LSEH_info_rsaz_512_gather4:
+ .byte 0x01,0x46,0x16,0x00
+ .byte 0x46,0xf8,0x09,0x00 # vmovaps 0x90(rsp),xmm15
+ .byte 0x3d,0xe8,0x08,0x00 # vmovaps 0x80(rsp),xmm14
+ .byte 0x34,0xd8,0x07,0x00 # vmovaps 0x70(rsp),xmm13
+ .byte 0x2e,0xc8,0x06,0x00 # vmovaps 0x60(rsp),xmm12
+ .byte 0x28,0xb8,0x05,0x00 # vmovaps 0x50(rsp),xmm11
+ .byte 0x22,0xa8,0x04,0x00 # vmovaps 0x40(rsp),xmm10
+ .byte 0x1c,0x98,0x03,0x00 # vmovaps 0x30(rsp),xmm9
+ .byte 0x16,0x88,0x02,0x00 # vmovaps 0x20(rsp),xmm8
+ .byte 0x10,0x78,0x01,0x00 # vmovaps 0x10(rsp),xmm7
+ .byte 0x0b,0x68,0x00,0x00 # vmovaps 0x00(rsp),xmm6
+ .byte 0x07,0x01,0x15,0x00 # sub rsp,0xa8
___
}
diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl
index e82e451388c7..29ba1224e36b 100755
--- a/crypto/bn/asm/x86_64-mont.pl
+++ b/crypto/bn/asm/x86_64-mont.pl
@@ -775,100 +775,126 @@ bn_sqr8x_mont:
# 4096. this is done to allow memory disambiguation logic
# do its job.
#
- lea -64(%rsp,$num,4),%r11
+ lea -64(%rsp,$num,2),%r11
mov ($n0),$n0 # *n0
sub $aptr,%r11
and \$4095,%r11
cmp %r11,%r10
jb .Lsqr8x_sp_alt
sub %r11,%rsp # align with $aptr
- lea -64(%rsp,$num,4),%rsp # alloca(frame+4*$num)
+ lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num)
jmp .Lsqr8x_sp_done
.align 32
.Lsqr8x_sp_alt:
- lea 4096-64(,$num,4),%r10 # 4096-frame-4*$num
- lea -64(%rsp,$num,4),%rsp # alloca(frame+4*$num)
+ lea 4096-64(,$num,2),%r10 # 4096-frame-2*$num
+ lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num)
sub %r10,%r11
mov \$0,%r10
cmovc %r10,%r11
sub %r11,%rsp
.Lsqr8x_sp_done:
and \$-64,%rsp
- mov $num,%r10
+ mov $num,%r10
neg $num
- lea 64(%rsp,$num,2),%r11 # copy of modulus
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
.Lsqr8x_body:
- mov $num,$i
- movq %r11, %xmm2 # save pointer to modulus copy
- shr \$3+2,$i
- mov OPENSSL_ia32cap_P+8(%rip),%eax
- jmp .Lsqr8x_copy_n
-
-.align 32
-.Lsqr8x_copy_n:
- movq 8*0($nptr),%xmm0
- movq 8*1($nptr),%xmm1
- movq 8*2($nptr),%xmm3
- movq 8*3($nptr),%xmm4
- lea 8*4($nptr),$nptr
- movdqa %xmm0,16*0(%r11)
- movdqa %xmm1,16*1(%r11)
- movdqa %xmm3,16*2(%r11)
- movdqa %xmm4,16*3(%r11)
- lea 16*4(%r11),%r11
- dec $i
- jnz .Lsqr8x_copy_n
-
+ movq $nptr, %xmm2 # save pointer to modulus
pxor %xmm0,%xmm0
movq $rptr,%xmm1 # save $rptr
movq %r10, %xmm3 # -$num
___
$code.=<<___ if ($addx);
+ mov OPENSSL_ia32cap_P+8(%rip),%eax
and \$0x80100,%eax
cmp \$0x80100,%eax
jne .Lsqr8x_nox
call bn_sqrx8x_internal # see x86_64-mont5 module
-
- pxor %xmm0,%xmm0
- lea 48(%rsp),%rax
- lea 64(%rsp,$num,2),%rdx
- shr \$3+2,$num
- mov 40(%rsp),%rsi # restore %rsp
- jmp .Lsqr8x_zero
+ # %rax top-most carry
+ # %rbp nptr
+ # %rcx -8*num
+ # %r8 end of tp[2*num]
+ lea (%r8,%rcx),%rbx
+ mov %rcx,$num
+ mov %rcx,%rdx
+ movq %xmm1,$rptr
+ sar \$3+2,%rcx # %cf=0
+ jmp .Lsqr8x_sub
.align 32
.Lsqr8x_nox:
___
$code.=<<___;
call bn_sqr8x_internal # see x86_64-mont5 module
+ # %rax top-most carry
+ # %rbp nptr
+ # %r8 -8*num
+ # %rdi end of tp[2*num]
+ lea (%rdi,$num),%rbx
+ mov $num,%rcx
+ mov $num,%rdx
+ movq %xmm1,$rptr
+ sar \$3+2,%rcx # %cf=0
+ jmp .Lsqr8x_sub
+.align 32
+.Lsqr8x_sub:
+ mov 8*0(%rbx),%r12
+ mov 8*1(%rbx),%r13
+ mov 8*2(%rbx),%r14
+ mov 8*3(%rbx),%r15
+ lea 8*4(%rbx),%rbx
+ sbb 8*0(%rbp),%r12
+ sbb 8*1(%rbp),%r13
+ sbb 8*2(%rbp),%r14
+ sbb 8*3(%rbp),%r15
+ lea 8*4(%rbp),%rbp
+ mov %r12,8*0($rptr)
+ mov %r13,8*1($rptr)
+ mov %r14,8*2($rptr)
+ mov %r15,8*3($rptr)
+ lea 8*4($rptr),$rptr
+ inc %rcx # preserves %cf
+ jnz .Lsqr8x_sub
+
+ sbb \$0,%rax # top-most carry
+ lea (%rbx,$num),%rbx # rewind
+ lea ($rptr,$num),$rptr # rewind
+
+ movq %rax,%xmm1
pxor %xmm0,%xmm0
- lea 48(%rsp),%rax
- lea 64(%rsp,$num,2),%rdx
- shr \$3+2,$num
+ pshufd \$0,%xmm1,%xmm1
mov 40(%rsp),%rsi # restore %rsp
- jmp .Lsqr8x_zero
+ jmp .Lsqr8x_cond_copy
.align 32
-.Lsqr8x_zero:
- movdqa %xmm0,16*0(%rax) # wipe t
- movdqa %xmm0,16*1(%rax)
- movdqa %xmm0,16*2(%rax)
- movdqa %xmm0,16*3(%rax)
- lea 16*4(%rax),%rax
- movdqa %xmm0,16*0(%rdx) # wipe n
- movdqa %xmm0,16*1(%rdx)
- movdqa %xmm0,16*2(%rdx)
- movdqa %xmm0,16*3(%rdx)
- lea 16*4(%rdx),%rdx
- dec $num
- jnz .Lsqr8x_zero
+.Lsqr8x_cond_copy:
+ movdqa 16*0(%rbx),%xmm2
+ movdqa 16*1(%rbx),%xmm3
+ lea 16*2(%rbx),%rbx
+ movdqu 16*0($rptr),%xmm4
+ movdqu 16*1($rptr),%xmm5
+ lea 16*2($rptr),$rptr
+ movdqa %xmm0,-16*2(%rbx) # zero tp
+ movdqa %xmm0,-16*1(%rbx)
+ movdqa %xmm0,-16*2(%rbx,%rdx)
+ movdqa %xmm0,-16*1(%rbx,%rdx)
+ pcmpeqd %xmm1,%xmm0
+ pand %xmm1,%xmm2
+ pand %xmm1,%xmm3
+ pand %xmm0,%xmm4
+ pand %xmm0,%xmm5
+ pxor %xmm0,%xmm0
+ por %xmm2,%xmm4
+ por %xmm3,%xmm5
+ movdqu %xmm4,-16*2($rptr)
+ movdqu %xmm5,-16*1($rptr)
+ add \$32,$num
+ jnz .Lsqr8x_cond_copy
mov \$1,%rax
mov -48(%rsi),%r15
@@ -1135,64 +1161,75 @@ $code.=<<___;
adc $zero,%r15 # modulo-scheduled
sub 0*8($tptr),$zero # pull top-most carry
adc %r15,%r14
- mov -8($nptr),$mi
sbb %r15,%r15 # top-most carry
mov %r14,-1*8($tptr)
cmp 16(%rsp),$bptr
jne .Lmulx4x_outer
- sub %r14,$mi # compare top-most words
- sbb $mi,$mi
- or $mi,%r15
-
- neg $num
- xor %rdx,%rdx
+ lea 64(%rsp),$tptr
+ sub $num,$nptr # rewind $nptr
+ neg %r15
+ mov $num,%rdx
+ shr \$3+2,$num # %cf=0
mov 32(%rsp),$rptr # restore rp
+ jmp .Lmulx4x_sub
+
+.align 32
+.Lmulx4x_sub:
+ mov 8*0($tptr),%r11
+ mov 8*1($tptr),%r12
+ mov 8*2($tptr),%r13
+ mov 8*3($tptr),%r14
+ lea 8*4($tptr),$tptr
+ sbb 8*0($nptr),%r11
+ sbb 8*1($nptr),%r12
+ sbb 8*2($nptr),%r13
+ sbb 8*3($nptr),%r14
+ lea 8*4($nptr),$nptr
+ mov %r11,8*0($rptr)
+ mov %r12,8*1($rptr)
+ mov %r13,8*2($rptr)
+ mov %r14,8*3($rptr)
+ lea 8*4($rptr),$rptr
+ dec $num # preserves %cf
+ jnz .Lmulx4x_sub
+
+ sbb \$0,%r15 # top-most carry
lea 64(%rsp),$tptr
+ sub %rdx,$rptr # rewind
+ movq %r15,%xmm1
pxor %xmm0,%xmm0
- mov 0*8($nptr,$num),%r8
- mov 1*8($nptr,$num),%r9
- neg %r8
- jmp .Lmulx4x_sub_entry
+ pshufd \$0,%xmm1,%xmm1
+ mov 40(%rsp),%rsi # restore %rsp
+ jmp .Lmulx4x_cond_copy
.align 32
-.Lmulx4x_sub:
- mov 0*8($nptr,$num),%r8
- mov 1*8($nptr,$num),%r9
- not %r8
-.Lmulx4x_sub_entry:
- mov 2*8($nptr,$num),%r10
- not %r9
- and %r15,%r8
- mov 3*8($nptr,$num),%r11
- not %r10
- and %r15,%r9
- not %r11
- and %r15,%r10
- and %r15,%r11
-
- neg %rdx # mov %rdx,%cf
- adc 0*8($tptr),%r8
- adc 1*8($tptr),%r9
- movdqa %xmm0,($tptr)
- adc 2*8($tptr),%r10
- adc 3*8($tptr),%r11
- movdqa %xmm0,16($tptr)
- lea 4*8($tptr),$tptr
- sbb %rdx,%rdx # mov %cf,%rdx
+.Lmulx4x_cond_copy:
+ movdqa 16*0($tptr),%xmm2
+ movdqa 16*1($tptr),%xmm3
+ lea 16*2($tptr),$tptr
+ movdqu 16*0($rptr),%xmm4
+ movdqu 16*1($rptr),%xmm5
+ lea 16*2($rptr),$rptr
+ movdqa %xmm0,-16*2($tptr) # zero tp
+ movdqa %xmm0,-16*1($tptr)
+ pcmpeqd %xmm1,%xmm0
+ pand %xmm1,%xmm2
+ pand %xmm1,%xmm3
+ pand %xmm0,%xmm4
+ pand %xmm0,%xmm5
+ pxor %xmm0,%xmm0
+ por %xmm2,%xmm4
+ por %xmm3,%xmm5
+ movdqu %xmm4,-16*2($rptr)
+ movdqu %xmm5,-16*1($rptr)
+ sub \$32,%rdx
+ jnz .Lmulx4x_cond_copy
- mov %r8,0*8($rptr)
- mov %r9,1*8($rptr)
- mov %r10,2*8($rptr)
- mov %r11,3*8($rptr)
- lea 4*8($rptr),$rptr
+ mov %rdx,($tptr)
- add \$32,$num
- jnz .Lmulx4x_sub
-
- mov 40(%rsp),%rsi # restore %rsp
mov \$1,%rax
mov -48(%rsi),%r15
mov -40(%rsi),%r14
diff --git a/crypto/bn/asm/x86_64-mont5.pl b/crypto/bn/asm/x86_64-mont5.pl
index 292409c4ffb8..2e8c9db32cbc 100755
--- a/crypto/bn/asm/x86_64-mont5.pl
+++ b/crypto/bn/asm/x86_64-mont5.pl
@@ -99,58 +99,111 @@ $code.=<<___;
.Lmul_enter:
mov ${num}d,${num}d
mov %rsp,%rax
- mov `($win64?56:8)`(%rsp),%r10d # load 7th argument
+ movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument
+ lea .Linc(%rip),%r10
push %rbx
push %rbp
push %r12
push %r13
push %r14
push %r15
-___
-$code.=<<___ if ($win64);
- lea -0x28(%rsp),%rsp
- movaps %xmm6,(%rsp)
- movaps %xmm7,0x10(%rsp)
-___
-$code.=<<___;
+
lea 2($num),%r11
neg %r11
- lea (%rsp,%r11,8),%rsp # tp=alloca(8*(num+2))
+ lea -264(%rsp,%r11,8),%rsp # tp=alloca(8*(num+2)+256+8)
and \$-1024,%rsp # minimize TLB usage
mov %rax,8(%rsp,$num,8) # tp[num+1]=%rsp
.Lmul_body:
- mov $bp,%r12 # reassign $bp
+ lea 128($bp),%r12 # reassign $bp (+size optimization)
___
$bp="%r12";
$STRIDE=2**5*8; # 5 is "window size"
$N=$STRIDE/4; # should match cache line size
$code.=<<___;
- mov %r10,%r11
- shr \$`log($N/8)/log(2)`,%r10
- and \$`$N/8-1`,%r11
- not %r10
- lea .Lmagic_masks(%rip),%rax
- and \$`2**5/($N/8)-1`,%r10 # 5 is "window size"
- lea 96($bp,%r11,8),$bp # pointer within 1st cache line
- movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which
- movq 8(%rax,%r10,8),%xmm5 # cache line contains element
- movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument
- movq 24(%rax,%r10,8),%xmm7
-
- movq `0*$STRIDE/4-96`($bp),%xmm0
- movq `1*$STRIDE/4-96`($bp),%xmm1
- pand %xmm4,%xmm0
- movq `2*$STRIDE/4-96`($bp),%xmm2
- pand %xmm5,%xmm1
- movq `3*$STRIDE/4-96`($bp),%xmm3
- pand %xmm6,%xmm2
- por %xmm1,%xmm0
- pand %xmm7,%xmm3
+ movdqa 0(%r10),%xmm0 # 00000001000000010000000000000000
+ movdqa 16(%r10),%xmm1 # 00000002000000020000000200000002
+ lea 24-112(%rsp,$num,8),%r10# place the mask after tp[num+3] (+ICache optimization)
+ and \$-16,%r10
+
+ pshufd \$0,%xmm5,%xmm5 # broadcast index
+ movdqa %xmm1,%xmm4
+ movdqa %xmm1,%xmm2
+___
+########################################################################
+# calculate mask by comparing 0..31 to index and save result to stack
+#
+$code.=<<___;
+ paddd %xmm0,%xmm1
+ pcmpeqd %xmm5,%xmm0 # compare to 1,0
+ .byte 0x67
+ movdqa %xmm4,%xmm3
+___
+for($k=0;$k<$STRIDE/16-4;$k+=4) {
+$code.=<<___;
+ paddd %xmm1,%xmm2
+ pcmpeqd %xmm5,%xmm1 # compare to 3,2
+ movdqa %xmm0,`16*($k+0)+112`(%r10)
+ movdqa %xmm4,%xmm0
+
+ paddd %xmm2,%xmm3
+ pcmpeqd %xmm5,%xmm2 # compare to 5,4
+ movdqa %xmm1,`16*($k+1)+112`(%r10)
+ movdqa %xmm4,%xmm1
+
+ paddd %xmm3,%xmm0
+ pcmpeqd %xmm5,%xmm3 # compare to 7,6
+ movdqa %xmm2,`16*($k+2)+112`(%r10)
+ movdqa %xmm4,%xmm2
+
+ paddd %xmm0,%xmm1
+ pcmpeqd %xmm5,%xmm0
+ movdqa %xmm3,`16*($k+3)+112`(%r10)
+ movdqa %xmm4,%xmm3
+___
+}
+$code.=<<___; # last iteration can be optimized
+ paddd %xmm1,%xmm2
+ pcmpeqd %xmm5,%xmm1
+ movdqa %xmm0,`16*($k+0)+112`(%r10)
+
+ paddd %xmm2,%xmm3
+ .byte 0x67
+ pcmpeqd %xmm5,%xmm2
+ movdqa %xmm1,`16*($k+1)+112`(%r10)
+
+ pcmpeqd %xmm5,%xmm3
+ movdqa %xmm2,`16*($k+2)+112`(%r10)
+ pand `16*($k+0)-128`($bp),%xmm0 # while it's still in register
+
+ pand `16*($k+1)-128`($bp),%xmm1
+ pand `16*($k+2)-128`($bp),%xmm2
+ movdqa %xmm3,`16*($k+3)+112`(%r10)
+ pand `16*($k+3)-128`($bp),%xmm3
por %xmm2,%xmm0
+ por %xmm3,%xmm1
+___
+for($k=0;$k<$STRIDE/16-4;$k+=4) {
+$code.=<<___;
+ movdqa `16*($k+0)-128`($bp),%xmm4
+ movdqa `16*($k+1)-128`($bp),%xmm5
+ movdqa `16*($k+2)-128`($bp),%xmm2
+ pand `16*($k+0)+112`(%r10),%xmm4
+ movdqa `16*($k+3)-128`($bp),%xmm3
+ pand `16*($k+1)+112`(%r10),%xmm5
+ por %xmm4,%xmm0
+ pand `16*($k+2)+112`(%r10),%xmm2
+ por %xmm5,%xmm1
+ pand `16*($k+3)+112`(%r10),%xmm3
+ por %xmm2,%xmm0
+ por %xmm3,%xmm1
+___
+}
+$code.=<<___;
+ por %xmm1,%xmm0
+ pshufd \$0x4e,%xmm0,%xmm1
+ por %xmm1,%xmm0
lea $STRIDE($bp),$bp
- por %xmm3,%xmm0
-
movq %xmm0,$m0 # m0=bp[0]
mov ($n0),$n0 # pull n0[0] value
@@ -159,29 +212,14 @@ $code.=<<___;
xor $i,$i # i=0
xor $j,$j # j=0
- movq `0*$STRIDE/4-96`($bp),%xmm0
- movq `1*$STRIDE/4-96`($bp),%xmm1
- pand %xmm4,%xmm0
- movq `2*$STRIDE/4-96`($bp),%xmm2
- pand %xmm5,%xmm1
-
mov $n0,$m1
mulq $m0 # ap[0]*bp[0]
mov %rax,$lo0
mov ($np),%rax
- movq `3*$STRIDE/4-96`($bp),%xmm3
- pand %xmm6,%xmm2
- por %xmm1,%xmm0
- pand %xmm7,%xmm3
-
imulq $lo0,$m1 # "tp[0]"*n0
mov %rdx,$hi0
- por %xmm2,%xmm0
- lea $STRIDE($bp),$bp
- por %xmm3,%xmm0
-
mulq $m1 # np[0]*m1
add %rax,$lo0 # discarded
mov 8($ap),%rax
@@ -212,16 +250,14 @@ $code.=<<___;
mulq $m1 # np[j]*m1
cmp $num,$j
- jne .L1st
-
- movq %xmm0,$m0 # bp[1]
+ jne .L1st # note that upon exit $j==$num, so
+ # they can be used interchangeably
add %rax,$hi1
- mov ($ap),%rax # ap[0]
adc \$0,%rdx
add $hi0,$hi1 # np[j]*m1+ap[j]*bp[0]
adc \$0,%rdx
- mov $hi1,-16(%rsp,$j,8) # tp[j-1]
+ mov $hi1,-16(%rsp,$num,8) # tp[num-1]
mov %rdx,$hi1
mov $lo0,$hi0
@@ -235,33 +271,48 @@ $code.=<<___;
jmp .Louter
.align 16
.Louter:
+ lea 24+128(%rsp,$num,8),%rdx # where 256-byte mask is (+size optimization)
+ and \$-16,%rdx
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+___
+for($k=0;$k<$STRIDE/16;$k+=4) {
+$code.=<<___;
+ movdqa `16*($k+0)-128`($bp),%xmm0
+ movdqa `16*($k+1)-128`($bp),%xmm1
+ movdqa `16*($k+2)-128`($bp),%xmm2
+ movdqa `16*($k+3)-128`($bp),%xmm3
+ pand `16*($k+0)-128`(%rdx),%xmm0
+ pand `16*($k+1)-128`(%rdx),%xmm1
+ por %xmm0,%xmm4
+ pand `16*($k+2)-128`(%rdx),%xmm2
+ por %xmm1,%xmm5
+ pand `16*($k+3)-128`(%rdx),%xmm3
+ por %xmm2,%xmm4
+ por %xmm3,%xmm5
+___
+}
+$code.=<<___;
+ por %xmm5,%xmm4
+ pshufd \$0x4e,%xmm4,%xmm0
+ por %xmm4,%xmm0
+ lea $STRIDE($bp),$bp
+
+ mov ($ap),%rax # ap[0]
+ movq %xmm0,$m0 # m0=bp[i]
+
xor $j,$j # j=0
mov $n0,$m1
mov (%rsp),$lo0
- movq `0*$STRIDE/4-96`($bp),%xmm0
- movq `1*$STRIDE/4-96`($bp),%xmm1
- pand %xmm4,%xmm0
- movq `2*$STRIDE/4-96`($bp),%xmm2
- pand %xmm5,%xmm1
-
mulq $m0 # ap[0]*bp[i]
add %rax,$lo0 # ap[0]*bp[i]+tp[0]
mov ($np),%rax
adc \$0,%rdx
- movq `3*$STRIDE/4-96`($bp),%xmm3
- pand %xmm6,%xmm2
- por %xmm1,%xmm0
- pand %xmm7,%xmm3
-
imulq $lo0,$m1 # tp[0]*n0
mov %rdx,$hi0
- por %xmm2,%xmm0
- lea $STRIDE($bp),$bp
- por %xmm3,%xmm0
-
mulq $m1 # np[0]*m1
add %rax,$lo0 # discarded
mov 8($ap),%rax
@@ -295,17 +346,14 @@ $code.=<<___;
mulq $m1 # np[j]*m1
cmp $num,$j
- jne .Linner
-
- movq %xmm0,$m0 # bp[i+1]
-
+ jne .Linner # note that upon exit $j==$num, so
+ # they can be used interchangeably
add %rax,$hi1
- mov ($ap),%rax # ap[0]
adc \$0,%rdx
add $lo0,$hi1 # np[j]*m1+ap[j]*bp[i]+tp[j]
- mov (%rsp,$j,8),$lo0
+ mov (%rsp,$num,8),$lo0
adc \$0,%rdx
- mov $hi1,-16(%rsp,$j,8) # tp[j-1]
+ mov $hi1,-16(%rsp,$num,8) # tp[num-1]
mov %rdx,$hi1
xor %rdx,%rdx
@@ -352,12 +400,7 @@ $code.=<<___;
mov 8(%rsp,$num,8),%rsi # restore %rsp
mov \$1,%rax
-___
-$code.=<<___ if ($win64);
- movaps -88(%rsi),%xmm6
- movaps -72(%rsi),%xmm7
-___
-$code.=<<___;
+
mov -48(%rsi),%r15
mov -40(%rsi),%r14
mov -32(%rsi),%r13
@@ -379,8 +422,8 @@ bn_mul4x_mont_gather5:
.Lmul4x_enter:
___
$code.=<<___ if ($addx);
- and \$0x80100,%r11d
- cmp \$0x80100,%r11d
+ and \$0x80108,%r11d
+ cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1
je .Lmulx4x_enter
___
$code.=<<___;
@@ -392,39 +435,34 @@ $code.=<<___;
push %r13
push %r14
push %r15
-___
-$code.=<<___ if ($win64);
- lea -0x28(%rsp),%rsp
- movaps %xmm6,(%rsp)
- movaps %xmm7,0x10(%rsp)
-___
-$code.=<<___;
+
.byte 0x67
- mov ${num}d,%r10d
- shl \$3,${num}d
- shl \$3+2,%r10d # 4*$num
+ shl \$3,${num}d # convert $num to bytes
+ lea ($num,$num,2),%r10 # 3*$num in bytes
neg $num # -$num
##############################################################
- # ensure that stack frame doesn't alias with $aptr+4*$num
- # modulo 4096, which covers ret[num], am[num] and n[2*num]
- # (see bn_exp.c). this is done to allow memory disambiguation
- # logic do its magic. [excessive frame is allocated in order
- # to allow bn_from_mont8x to clear it.]
+ # Ensure that stack frame doesn't alias with $rptr+3*$num
+ # modulo 4096, which covers ret[num], am[num] and n[num]
+ # (see bn_exp.c). This is done to allow memory disambiguation
+ # logic do its magic. [Extra [num] is allocated in order
+ # to align with bn_power5's frame, which is cleansed after
+ # completing exponentiation. Extra 256 bytes is for power mask
+ # calculated from 7th argument, the index.]
#
- lea -64(%rsp,$num,2),%r11
- sub $ap,%r11
+ lea -320(%rsp,$num,2),%r11
+ sub $rp,%r11
and \$4095,%r11
cmp %r11,%r10
jb .Lmul4xsp_alt
- sub %r11,%rsp # align with $ap
- lea -64(%rsp,$num,2),%rsp # alloca(128+num*8)
+ sub %r11,%rsp # align with $rp
+ lea -320(%rsp,$num,2),%rsp # alloca(frame+2*num*8+256)
jmp .Lmul4xsp_done
.align 32
.Lmul4xsp_alt:
- lea 4096-64(,$num,2),%r10
- lea -64(%rsp,$num,2),%rsp # alloca(128+num*8)
+ lea 4096-320(,$num,2),%r10
+ lea -320(%rsp,$num,2),%rsp # alloca(frame+2*num*8+256)
sub %r10,%r11
mov \$0,%r10
cmovc %r10,%r11
@@ -440,12 +478,7 @@ $code.=<<___;
mov 40(%rsp),%rsi # restore %rsp
mov \$1,%rax
-___
-$code.=<<___ if ($win64);
- movaps -88(%rsi),%xmm6
- movaps -72(%rsi),%xmm7
-___
-$code.=<<___;
+
mov -48(%rsi),%r15
mov -40(%rsi),%r14
mov -32(%rsi),%r13
@@ -460,9 +493,10 @@ $code.=<<___;
.type mul4x_internal,\@abi-omnipotent
.align 32
mul4x_internal:
- shl \$5,$num
- mov `($win64?56:8)`(%rax),%r10d # load 7th argument
- lea 256(%rdx,$num),%r13
+ shl \$5,$num # $num was in bytes
+ movd `($win64?56:8)`(%rax),%xmm5 # load 7th argument, index
+ lea .Linc(%rip),%rax
+ lea 128(%rdx,$num),%r13 # end of powers table (+size optimization)
shr \$5,$num # restore $num
___
$bp="%r12";
@@ -470,44 +504,92 @@ ___
$N=$STRIDE/4; # should match cache line size
$tp=$i;
$code.=<<___;
- mov %r10,%r11
- shr \$`log($N/8)/log(2)`,%r10
- and \$`$N/8-1`,%r11
- not %r10
- lea .Lmagic_masks(%rip),%rax
- and \$`2**5/($N/8)-1`,%r10 # 5 is "window size"
- lea 96(%rdx,%r11,8),$bp # pointer within 1st cache line
- movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which
- movq 8(%rax,%r10,8),%xmm5 # cache line contains element
- add \$7,%r11
- movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument
- movq 24(%rax,%r10,8),%xmm7
- and \$7,%r11
-
- movq `0*$STRIDE/4-96`($bp),%xmm0
- lea $STRIDE($bp),$tp # borrow $tp
- movq `1*$STRIDE/4-96`($bp),%xmm1
- pand %xmm4,%xmm0
- movq `2*$STRIDE/4-96`($bp),%xmm2
- pand %xmm5,%xmm1
- movq `3*$STRIDE/4-96`($bp),%xmm3
- pand %xmm6,%xmm2
- .byte 0x67
- por %xmm1,%xmm0
- movq `0*$STRIDE/4-96`($tp),%xmm1
- .byte 0x67
- pand %xmm7,%xmm3
- .byte 0x67
- por %xmm2,%xmm0
- movq `1*$STRIDE/4-96`($tp),%xmm2
+ movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000
+ movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002
+ lea 88-112(%rsp,$num),%r10 # place the mask after tp[num+1] (+ICache optimization)
+ lea 128(%rdx),$bp # size optimization
+
+ pshufd \$0,%xmm5,%xmm5 # broadcast index
+ movdqa %xmm1,%xmm4
+ .byte 0x67,0x67
+ movdqa %xmm1,%xmm2
+___
+########################################################################
+# calculate mask by comparing 0..31 to index and save result to stack
+#
+$code.=<<___;
+ paddd %xmm0,%xmm1
+ pcmpeqd %xmm5,%xmm0 # compare to 1,0
.byte 0x67
- pand %xmm4,%xmm1
+ movdqa %xmm4,%xmm3
+___
+for($i=0;$i<$STRIDE/16-4;$i+=4) {
+$code.=<<___;
+ paddd %xmm1,%xmm2
+ pcmpeqd %xmm5,%xmm1 # compare to 3,2
+ movdqa %xmm0,`16*($i+0)+112`(%r10)
+ movdqa %xmm4,%xmm0
+
+ paddd %xmm2,%xmm3
+ pcmpeqd %xmm5,%xmm2 # compare to 5,4
+ movdqa %xmm1,`16*($i+1)+112`(%r10)
+ movdqa %xmm4,%xmm1
+
+ paddd %xmm3,%xmm0
+ pcmpeqd %xmm5,%xmm3 # compare to 7,6
+ movdqa %xmm2,`16*($i+2)+112`(%r10)
+ movdqa %xmm4,%xmm2
+
+ paddd %xmm0,%xmm1
+ pcmpeqd %xmm5,%xmm0
+ movdqa %xmm3,`16*($i+3)+112`(%r10)
+ movdqa %xmm4,%xmm3
+___
+}
+$code.=<<___; # last iteration can be optimized
+ paddd %xmm1,%xmm2
+ pcmpeqd %xmm5,%xmm1
+ movdqa %xmm0,`16*($i+0)+112`(%r10)
+
+ paddd %xmm2,%xmm3
.byte 0x67
- por %xmm3,%xmm0
- movq `2*$STRIDE/4-96`($tp),%xmm3
+ pcmpeqd %xmm5,%xmm2
+ movdqa %xmm1,`16*($i+1)+112`(%r10)
+ pcmpeqd %xmm5,%xmm3
+ movdqa %xmm2,`16*($i+2)+112`(%r10)
+ pand `16*($i+0)-128`($bp),%xmm0 # while it's still in register
+
+ pand `16*($i+1)-128`($bp),%xmm1
+ pand `16*($i+2)-128`($bp),%xmm2
+ movdqa %xmm3,`16*($i+3)+112`(%r10)
+ pand `16*($i+3)-128`($bp),%xmm3
+ por %xmm2,%xmm0
+ por %xmm3,%xmm1
+___
+for($i=0;$i<$STRIDE/16-4;$i+=4) {
+$code.=<<___;
+ movdqa `16*($i+0)-128`($bp),%xmm4
+ movdqa `16*($i+1)-128`($bp),%xmm5
+ movdqa `16*($i+2)-128`($bp),%xmm2
+ pand `16*($i+0)+112`(%r10),%xmm4
+ movdqa `16*($i+3)-128`($bp),%xmm3
+ pand `16*($i+1)+112`(%r10),%xmm5
+ por %xmm4,%xmm0
+ pand `16*($i+2)+112`(%r10),%xmm2
+ por %xmm5,%xmm1
+ pand `16*($i+3)+112`(%r10),%xmm3
+ por %xmm2,%xmm0
+ por %xmm3,%xmm1
+___
+}
+$code.=<<___;
+ por %xmm1,%xmm0
+ pshufd \$0x4e,%xmm0,%xmm1
+ por %xmm1,%xmm0
+ lea $STRIDE($bp),$bp
movq %xmm0,$m0 # m0=bp[0]
- movq `3*$STRIDE/4-96`($tp),%xmm0
+
mov %r13,16+8(%rsp) # save end of b[num]
mov $rp, 56+8(%rsp) # save $rp
@@ -521,26 +603,10 @@ $code.=<<___;
mov %rax,$A[0]
mov ($np),%rax
- pand %xmm5,%xmm2
- pand %xmm6,%xmm3
- por %xmm2,%xmm1
-
imulq $A[0],$m1 # "tp[0]"*n0
- ##############################################################
- # $tp is chosen so that writing to top-most element of the
- # vector occurs just "above" references to powers table,
- # "above" modulo cache-line size, which effectively precludes
- # possibility of memory disambiguation logic failure when
- # accessing the table.
- #
- lea 64+8(%rsp,%r11,8),$tp
+ lea 64+8(%rsp),$tp
mov %rdx,$A[1]
- pand %xmm7,%xmm0
- por %xmm3,%xmm1
- lea 2*$STRIDE($bp),$bp
- por %xmm1,%xmm0
-
mulq $m1 # np[0]*m1
add %rax,$A[0] # discarded
mov 8($ap,$num),%rax
@@ -549,7 +615,7 @@ $code.=<<___;
mulq $m0
add %rax,$A[1]
- mov 16*1($np),%rax # interleaved with 0, therefore 16*n
+ mov 8*1($np),%rax
adc \$0,%rdx
mov %rdx,$A[0]
@@ -559,7 +625,7 @@ $code.=<<___;
adc \$0,%rdx
add $A[1],$N[1]
lea 4*8($num),$j # j=4
- lea 16*4($np),$np
+ lea 8*4($np),$np
adc \$0,%rdx
mov $N[1],($tp)
mov %rdx,$N[0]
@@ -569,7 +635,7 @@ $code.=<<___;
.L1st4x:
mulq $m0 # ap[j]*bp[0]
add %rax,$A[0]
- mov -16*2($np),%rax
+ mov -8*2($np),%rax
lea 32($tp),$tp
adc \$0,%rdx
mov %rdx,$A[1]
@@ -585,7 +651,7 @@ $code.=<<___;
mulq $m0 # ap[j]*bp[0]
add %rax,$A[1]
- mov -16*1($np),%rax
+ mov -8*1($np),%rax
adc \$0,%rdx
mov %rdx,$A[0]
@@ -600,7 +666,7 @@ $code.=<<___;
mulq $m0 # ap[j]*bp[0]
add %rax,$A[0]
- mov 16*0($np),%rax
+ mov 8*0($np),%rax
adc \$0,%rdx
mov %rdx,$A[1]
@@ -615,7 +681,7 @@ $code.=<<___;
mulq $m0 # ap[j]*bp[0]
add %rax,$A[1]
- mov 16*1($np),%rax
+ mov 8*1($np),%rax
adc \$0,%rdx
mov %rdx,$A[0]
@@ -624,7 +690,7 @@ $code.=<<___;
mov 16($ap,$j),%rax
adc \$0,%rdx
add $A[1],$N[1] # np[j]*m1+ap[j]*bp[0]
- lea 16*4($np),$np
+ lea 8*4($np),$np
adc \$0,%rdx
mov $N[1],($tp) # tp[j-1]
mov %rdx,$N[0]
@@ -634,7 +700,7 @@ $code.=<<___;
mulq $m0 # ap[j]*bp[0]
add %rax,$A[0]
- mov -16*2($np),%rax
+ mov -8*2($np),%rax
lea 32($tp),$tp
adc \$0,%rdx
mov %rdx,$A[1]
@@ -650,7 +716,7 @@ $code.=<<___;
mulq $m0 # ap[j]*bp[0]
add %rax,$A[1]
- mov -16*1($np),%rax
+ mov -8*1($np),%rax
adc \$0,%rdx
mov %rdx,$A[0]
@@ -663,8 +729,7 @@ $code.=<<___;
mov $N[1],-16($tp) # tp[j-1]
mov %rdx,$N[0]
- movq %xmm0,$m0 # bp[1]
- lea ($np,$num,2),$np # rewind $np
+ lea ($np,$num),$np # rewind $np
xor $N[1],$N[1]
add $A[0],$N[0]
@@ -675,6 +740,33 @@ $code.=<<___;
.align 32
.Louter4x:
+ lea 16+128($tp),%rdx # where 256-byte mask is (+size optimization)
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+___
+for($i=0;$i<$STRIDE/16;$i+=4) {
+$code.=<<___;
+ movdqa `16*($i+0)-128`($bp),%xmm0
+ movdqa `16*($i+1)-128`($bp),%xmm1
+ movdqa `16*($i+2)-128`($bp),%xmm2
+ movdqa `16*($i+3)-128`($bp),%xmm3
+ pand `16*($i+0)-128`(%rdx),%xmm0
+ pand `16*($i+1)-128`(%rdx),%xmm1
+ por %xmm0,%xmm4
+ pand `16*($i+2)-128`(%rdx),%xmm2
+ por %xmm1,%xmm5
+ pand `16*($i+3)-128`(%rdx),%xmm3
+ por %xmm2,%xmm4
+ por %xmm3,%xmm5
+___
+}
+$code.=<<___;
+ por %xmm5,%xmm4
+ pshufd \$0x4e,%xmm4,%xmm0
+ por %xmm4,%xmm0
+ lea $STRIDE($bp),$bp
+ movq %xmm0,$m0 # m0=bp[i]
+
mov ($tp,$num),$A[0]
mov $n0,$m1
mulq $m0 # ap[0]*bp[i]
@@ -682,25 +774,11 @@ $code.=<<___;
mov ($np),%rax
adc \$0,%rdx
- movq `0*$STRIDE/4-96`($bp),%xmm0
- movq `1*$STRIDE/4-96`($bp),%xmm1
- pand %xmm4,%xmm0
- movq `2*$STRIDE/4-96`($bp),%xmm2
- pand %xmm5,%xmm1
- movq `3*$STRIDE/4-96`($bp),%xmm3
-
imulq $A[0],$m1 # tp[0]*n0
- .byte 0x67
mov %rdx,$A[1]
mov $N[1],($tp) # store upmost overflow bit
- pand %xmm6,%xmm2
- por %xmm1,%xmm0
- pand %xmm7,%xmm3
- por %xmm2,%xmm0
lea ($tp,$num),$tp # rewind $tp
- lea $STRIDE($bp),$bp
- por %xmm3,%xmm0
mulq $m1 # np[0]*m1
add %rax,$A[0] # "$N[0]", discarded
@@ -710,7 +788,7 @@ $code.=<<___;
mulq $m0 # ap[j]*bp[i]
add %rax,$A[1]
- mov 16*1($np),%rax # interleaved with 0, therefore 16*n
+ mov 8*1($np),%rax
adc \$0,%rdx
add 8($tp),$A[1] # +tp[1]
adc \$0,%rdx
@@ -722,7 +800,7 @@ $code.=<<___;
adc \$0,%rdx
add $A[1],$N[1] # np[j]*m1+ap[j]*bp[i]+tp[j]
lea 4*8($num),$j # j=4
- lea 16*4($np),$np
+ lea 8*4($np),$np
adc \$0,%rdx
mov %rdx,$N[0]
jmp .Linner4x
@@ -731,7 +809,7 @@ $code.=<<___;
.Linner4x:
mulq $m0 # ap[j]*bp[i]
add %rax,$A[0]
- mov -16*2($np),%rax
+ mov -8*2($np),%rax
adc \$0,%rdx
add 16($tp),$A[0] # ap[j]*bp[i]+tp[j]
lea 32($tp),$tp
@@ -749,7 +827,7 @@ $code.=<<___;
mulq $m0 # ap[j]*bp[i]
add %rax,$A[1]
- mov -16*1($np),%rax
+ mov -8*1($np),%rax
adc \$0,%rdx
add -8($tp),$A[1]
adc \$0,%rdx
@@ -766,7 +844,7 @@ $code.=<<___;
mulq $m0 # ap[j]*bp[i]
add %rax,$A[0]
- mov 16*0($np),%rax
+ mov 8*0($np),%rax
adc \$0,%rdx
add ($tp),$A[0] # ap[j]*bp[i]+tp[j]
adc \$0,%rdx
@@ -783,7 +861,7 @@ $code.=<<___;
mulq $m0 # ap[j]*bp[i]
add %rax,$A[1]
- mov 16*1($np),%rax
+ mov 8*1($np),%rax
adc \$0,%rdx
add 8($tp),$A[1]
adc \$0,%rdx
@@ -794,7 +872,7 @@ $code.=<<___;
mov 16($ap,$j),%rax
adc \$0,%rdx
add $A[1],$N[1]
- lea 16*4($np),$np
+ lea 8*4($np),$np
adc \$0,%rdx
mov $N[0],-8($tp) # tp[j-1]
mov %rdx,$N[0]
@@ -804,7 +882,7 @@ $code.=<<___;
mulq $m0 # ap[j]*bp[i]
add %rax,$A[0]
- mov -16*2($np),%rax
+ mov -8*2($np),%rax
adc \$0,%rdx
add 16($tp),$A[0] # ap[j]*bp[i]+tp[j]
lea 32($tp),$tp
@@ -823,7 +901,7 @@ $code.=<<___;
mulq $m0 # ap[j]*bp[i]
add %rax,$A[1]
mov $m1,%rax
- mov -16*1($np),$m1
+ mov -8*1($np),$m1
adc \$0,%rdx
add -8($tp),$A[1]
adc \$0,%rdx
@@ -838,9 +916,8 @@ $code.=<<___;
mov $N[0],-24($tp) # tp[j-1]
mov %rdx,$N[0]
- movq %xmm0,$m0 # bp[i+1]
mov $N[1],-16($tp) # tp[j-1]
- lea ($np,$num,2),$np # rewind $np
+ lea ($np,$num),$np # rewind $np
xor $N[1],$N[1]
add $A[0],$N[0]
@@ -854,16 +931,23 @@ $code.=<<___;
___
if (1) {
$code.=<<___;
+ xor %rax,%rax
sub $N[0],$m1 # compare top-most words
adc $j,$j # $j is zero
or $j,$N[1]
- xor \$1,$N[1]
+ sub $N[1],%rax # %rax=-$N[1]
lea ($tp,$num),%rbx # tptr in .sqr4x_sub
- lea ($np,$N[1],8),%rbp # nptr in .sqr4x_sub
+ mov ($np),%r12
+ lea ($np),%rbp # nptr in .sqr4x_sub
mov %r9,%rcx
- sar \$3+2,%rcx # cf=0
+ sar \$3+2,%rcx
mov 56+8(%rsp),%rdi # rptr in .sqr4x_sub
- jmp .Lsqr4x_sub
+ dec %r12 # so that after 'not' we get -n[0]
+ xor %r10,%r10
+ mov 8*1(%rbp),%r13
+ mov 8*2(%rbp),%r14
+ mov 8*3(%rbp),%r15
+ jmp .Lsqr4x_sub_entry
___
} else {
my @ri=("%rax",$bp,$m0,$m1);
@@ -930,8 +1014,8 @@ bn_power5:
___
$code.=<<___ if ($addx);
mov OPENSSL_ia32cap_P+8(%rip),%r11d
- and \$0x80100,%r11d
- cmp \$0x80100,%r11d
+ and \$0x80108,%r11d
+ cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1
je .Lpowerx5_enter
___
$code.=<<___;
@@ -942,38 +1026,32 @@ $code.=<<___;
push %r13
push %r14
push %r15
-___
-$code.=<<___ if ($win64);
- lea -0x28(%rsp),%rsp
- movaps %xmm6,(%rsp)
- movaps %xmm7,0x10(%rsp)
-___
-$code.=<<___;
- mov ${num}d,%r10d
+
shl \$3,${num}d # convert $num to bytes
- shl \$3+2,%r10d # 4*$num
+ lea ($num,$num,2),%r10d # 3*$num
neg $num
mov ($n0),$n0 # *n0
##############################################################
- # ensure that stack frame doesn't alias with $aptr+4*$num
- # modulo 4096, which covers ret[num], am[num] and n[2*num]
- # (see bn_exp.c). this is done to allow memory disambiguation
- # logic do its magic.
+ # Ensure that stack frame doesn't alias with $rptr+3*$num
+ # modulo 4096, which covers ret[num], am[num] and n[num]
+ # (see bn_exp.c). This is done to allow memory disambiguation
+ # logic do its magic. [Extra 256 bytes is for power mask
+ # calculated from 7th argument, the index.]
#
- lea -64(%rsp,$num,2),%r11
- sub $aptr,%r11
+ lea -320(%rsp,$num,2),%r11
+ sub $rptr,%r11
and \$4095,%r11
cmp %r11,%r10
jb .Lpwr_sp_alt
sub %r11,%rsp # align with $aptr
- lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num)
+ lea -320(%rsp,$num,2),%rsp # alloca(frame+2*num*8+256)
jmp .Lpwr_sp_done
.align 32
.Lpwr_sp_alt:
- lea 4096-64(,$num,2),%r10 # 4096-frame-2*$num
- lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num)
+ lea 4096-320(,$num,2),%r10
+ lea -320(%rsp,$num,2),%rsp # alloca(frame+2*num*8+256)
sub %r10,%r11
mov \$0,%r10
cmovc %r10,%r11
@@ -995,16 +1073,21 @@ $code.=<<___;
mov $n0, 32(%rsp)
mov %rax, 40(%rsp) # save original %rsp
.Lpower5_body:
- movq $rptr,%xmm1 # save $rptr
+ movq $rptr,%xmm1 # save $rptr, used in sqr8x
movq $nptr,%xmm2 # save $nptr
- movq %r10, %xmm3 # -$num
+ movq %r10, %xmm3 # -$num, used in sqr8x
movq $bptr,%xmm4
call __bn_sqr8x_internal
+ call __bn_post4x_internal
call __bn_sqr8x_internal
+ call __bn_post4x_internal
call __bn_sqr8x_internal
+ call __bn_post4x_internal
call __bn_sqr8x_internal
+ call __bn_post4x_internal
call __bn_sqr8x_internal
+ call __bn_post4x_internal
movq %xmm2,$nptr
movq %xmm4,$bptr
@@ -1565,9 +1648,9 @@ my ($nptr,$tptr,$carry,$m0)=("%rbp","%rdi","%rsi","%rbx");
$code.=<<___;
movq %xmm2,$nptr
-sqr8x_reduction:
+__bn_sqr8x_reduction:
xor %rax,%rax
- lea ($nptr,$num,2),%rcx # end of n[]
+ lea ($nptr,$num),%rcx # end of n[]
lea 48+8(%rsp,$num,2),%rdx # end of t[] buffer
mov %rcx,0+8(%rsp)
lea 48+8(%rsp,$num),$tptr # end of initial t[] window
@@ -1593,21 +1676,21 @@ sqr8x_reduction:
.byte 0x67
mov $m0,%r8
imulq 32+8(%rsp),$m0 # n0*a[0]
- mov 16*0($nptr),%rax # n[0]
+ mov 8*0($nptr),%rax # n[0]
mov \$8,%ecx
jmp .L8x_reduce
.align 32
.L8x_reduce:
mulq $m0
- mov 16*1($nptr),%rax # n[1]
+ mov 8*1($nptr),%rax # n[1]
neg %r8
mov %rdx,%r8
adc \$0,%r8
mulq $m0
add %rax,%r9
- mov 16*2($nptr),%rax
+ mov 8*2($nptr),%rax
adc \$0,%rdx
add %r9,%r8
mov $m0,48-8+8(%rsp,%rcx,8) # put aside n0*a[i]
@@ -1616,7 +1699,7 @@ sqr8x_reduction:
mulq $m0
add %rax,%r10
- mov 16*3($nptr),%rax
+ mov 8*3($nptr),%rax
adc \$0,%rdx
add %r10,%r9
mov 32+8(%rsp),$carry # pull n0, borrow $carry
@@ -1625,7 +1708,7 @@ sqr8x_reduction:
mulq $m0
add %rax,%r11
- mov 16*4($nptr),%rax
+ mov 8*4($nptr),%rax
adc \$0,%rdx
imulq %r8,$carry # modulo-scheduled
add %r11,%r10
@@ -1634,7 +1717,7 @@ sqr8x_reduction:
mulq $m0
add %rax,%r12
- mov 16*5($nptr),%rax
+ mov 8*5($nptr),%rax
adc \$0,%rdx
add %r12,%r11
mov %rdx,%r12
@@ -1642,7 +1725,7 @@ sqr8x_reduction:
mulq $m0
add %rax,%r13
- mov 16*6($nptr),%rax
+ mov 8*6($nptr),%rax
adc \$0,%rdx
add %r13,%r12
mov %rdx,%r13
@@ -1650,7 +1733,7 @@ sqr8x_reduction:
mulq $m0
add %rax,%r14
- mov 16*7($nptr),%rax
+ mov 8*7($nptr),%rax
adc \$0,%rdx
add %r14,%r13
mov %rdx,%r14
@@ -1659,7 +1742,7 @@ sqr8x_reduction:
mulq $m0
mov $carry,$m0 # n0*a[i]
add %rax,%r15
- mov 16*0($nptr),%rax # n[0]
+ mov 8*0($nptr),%rax # n[0]
adc \$0,%rdx
add %r15,%r14
mov %rdx,%r15
@@ -1668,7 +1751,7 @@ sqr8x_reduction:
dec %ecx
jnz .L8x_reduce
- lea 16*8($nptr),$nptr
+ lea 8*8($nptr),$nptr
xor %rax,%rax
mov 8+8(%rsp),%rdx # pull end of t[]
cmp 0+8(%rsp),$nptr # end of n[]?
@@ -1687,21 +1770,21 @@ sqr8x_reduction:
mov 48+56+8(%rsp),$m0 # pull n0*a[0]
mov \$8,%ecx
- mov 16*0($nptr),%rax
+ mov 8*0($nptr),%rax
jmp .L8x_tail
.align 32
.L8x_tail:
mulq $m0
add %rax,%r8
- mov 16*1($nptr),%rax
+ mov 8*1($nptr),%rax
mov %r8,($tptr) # save result
mov %rdx,%r8
adc \$0,%r8
mulq $m0
add %rax,%r9
- mov 16*2($nptr),%rax
+ mov 8*2($nptr),%rax
adc \$0,%rdx
add %r9,%r8
lea 8($tptr),$tptr # $tptr++
@@ -1710,7 +1793,7 @@ sqr8x_reduction:
mulq $m0
add %rax,%r10
- mov 16*3($nptr),%rax
+ mov 8*3($nptr),%rax
adc \$0,%rdx
add %r10,%r9
mov %rdx,%r10
@@ -1718,7 +1801,7 @@ sqr8x_reduction:
mulq $m0
add %rax,%r11
- mov 16*4($nptr),%rax
+ mov 8*4($nptr),%rax
adc \$0,%rdx
add %r11,%r10
mov %rdx,%r11
@@ -1726,7 +1809,7 @@ sqr8x_reduction:
mulq $m0
add %rax,%r12
- mov 16*5($nptr),%rax
+ mov 8*5($nptr),%rax
adc \$0,%rdx
add %r12,%r11
mov %rdx,%r12
@@ -1734,7 +1817,7 @@ sqr8x_reduction:
mulq $m0
add %rax,%r13
- mov 16*6($nptr),%rax
+ mov 8*6($nptr),%rax
adc \$0,%rdx
add %r13,%r12
mov %rdx,%r13
@@ -1742,7 +1825,7 @@ sqr8x_reduction:
mulq $m0
add %rax,%r14
- mov 16*7($nptr),%rax
+ mov 8*7($nptr),%rax
adc \$0,%rdx
add %r14,%r13
mov %rdx,%r14
@@ -1753,14 +1836,14 @@ sqr8x_reduction:
add %rax,%r15
adc \$0,%rdx
add %r15,%r14
- mov 16*0($nptr),%rax # pull n[0]
+ mov 8*0($nptr),%rax # pull n[0]
mov %rdx,%r15
adc \$0,%r15
dec %ecx
jnz .L8x_tail
- lea 16*8($nptr),$nptr
+ lea 8*8($nptr),$nptr
mov 8+8(%rsp),%rdx # pull end of t[]
cmp 0+8(%rsp),$nptr # end of n[]?
jae .L8x_tail_done # break out of loop
@@ -1806,7 +1889,7 @@ sqr8x_reduction:
adc 8*6($tptr),%r14
adc 8*7($tptr),%r15
adc \$0,%rax # top-most carry
- mov -16($nptr),%rcx # np[num-1]
+ mov -8($nptr),%rcx # np[num-1]
xor $carry,$carry
movq %xmm2,$nptr # restore $nptr
@@ -1824,6 +1907,8 @@ sqr8x_reduction:
cmp %rdx,$tptr # end of t[]?
jb .L8x_reduction_loop
+ ret
+.size bn_sqr8x_internal,.-bn_sqr8x_internal
___
}
##############################################################
@@ -1832,48 +1917,62 @@ ___
{
my ($tptr,$nptr)=("%rbx","%rbp");
$code.=<<___;
- #xor %rsi,%rsi # %rsi was $carry above
- sub %r15,%rcx # compare top-most words
+.type __bn_post4x_internal,\@abi-omnipotent
+.align 32
+__bn_post4x_internal:
+ mov 8*0($nptr),%r12
lea (%rdi,$num),$tptr # %rdi was $tptr above
- adc %rsi,%rsi
mov $num,%rcx
- or %rsi,%rax
movq %xmm1,$rptr # restore $rptr
- xor \$1,%rax
+ neg %rax
movq %xmm1,$aptr # prepare for back-to-back call
- lea ($nptr,%rax,8),$nptr
- sar \$3+2,%rcx # cf=0
- jmp .Lsqr4x_sub
+ sar \$3+2,%rcx
+ dec %r12 # so that after 'not' we get -n[0]
+ xor %r10,%r10
+ mov 8*1($nptr),%r13
+ mov 8*2($nptr),%r14
+ mov 8*3($nptr),%r15
+ jmp .Lsqr4x_sub_entry
-.align 32
+.align 16
.Lsqr4x_sub:
- .byte 0x66
- mov 8*0($tptr),%r12
- mov 8*1($tptr),%r13
- sbb 16*0($nptr),%r12
- mov 8*2($tptr),%r14
- sbb 16*1($nptr),%r13
- mov 8*3($tptr),%r15
- lea 8*4($tptr),$tptr
- sbb 16*2($nptr),%r14
+ mov 8*0($nptr),%r12
+ mov 8*1($nptr),%r13
+ mov 8*2($nptr),%r14
+ mov 8*3($nptr),%r15
+.Lsqr4x_sub_entry:
+ lea 8*4($nptr),$nptr
+ not %r12
+ not %r13
+ not %r14
+ not %r15
+ and %rax,%r12
+ and %rax,%r13
+ and %rax,%r14
+ and %rax,%r15
+
+ neg %r10 # mov %r10,%cf
+ adc 8*0($tptr),%r12
+ adc 8*1($tptr),%r13
+ adc 8*2($tptr),%r14
+ adc 8*3($tptr),%r15
mov %r12,8*0($rptr)
- sbb 16*3($nptr),%r15
- lea 16*4($nptr),$nptr
+ lea 8*4($tptr),$tptr
mov %r13,8*1($rptr)
+ sbb %r10,%r10 # mov %cf,%r10
mov %r14,8*2($rptr)
mov %r15,8*3($rptr)
lea 8*4($rptr),$rptr
inc %rcx # pass %cf
jnz .Lsqr4x_sub
-___
-}
-$code.=<<___;
+
mov $num,%r10 # prepare for back-to-back call
neg $num # restore $num
ret
-.size bn_sqr8x_internal,.-bn_sqr8x_internal
+.size __bn_post4x_internal,.-__bn_post4x_internal
___
+}
{
$code.=<<___;
.globl bn_from_montgomery
@@ -1897,39 +1996,32 @@ bn_from_mont8x:
push %r13
push %r14
push %r15
-___
-$code.=<<___ if ($win64);
- lea -0x28(%rsp),%rsp
- movaps %xmm6,(%rsp)
- movaps %xmm7,0x10(%rsp)
-___
-$code.=<<___;
- .byte 0x67
- mov ${num}d,%r10d
+
shl \$3,${num}d # convert $num to bytes
- shl \$3+2,%r10d # 4*$num
+ lea ($num,$num,2),%r10 # 3*$num in bytes
neg $num
mov ($n0),$n0 # *n0
##############################################################
- # ensure that stack frame doesn't alias with $aptr+4*$num
- # modulo 4096, which covers ret[num], am[num] and n[2*num]
- # (see bn_exp.c). this is done to allow memory disambiguation
- # logic do its magic.
+ # Ensure that stack frame doesn't alias with $rptr+3*$num
+ # modulo 4096, which covers ret[num], am[num] and n[num]
+ # (see bn_exp.c). The stack is allocated to aligned with
+ # bn_power5's frame, and as bn_from_montgomery happens to be
+ # last operation, we use the opportunity to cleanse it.
#
- lea -64(%rsp,$num,2),%r11
- sub $aptr,%r11
+ lea -320(%rsp,$num,2),%r11
+ sub $rptr,%r11
and \$4095,%r11
cmp %r11,%r10
jb .Lfrom_sp_alt
sub %r11,%rsp # align with $aptr
- lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num)
+ lea -320(%rsp,$num,2),%rsp # alloca(frame+2*$num*8+256)
jmp .Lfrom_sp_done
.align 32
.Lfrom_sp_alt:
- lea 4096-64(,$num,2),%r10 # 4096-frame-2*$num
- lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num)
+ lea 4096-320(,$num,2),%r10
+ lea -320(%rsp,$num,2),%rsp # alloca(frame+2*$num*8+256)
sub %r10,%r11
mov \$0,%r10
cmovc %r10,%r11
@@ -1983,12 +2075,13 @@ $code.=<<___;
___
$code.=<<___ if ($addx);
mov OPENSSL_ia32cap_P+8(%rip),%r11d
- and \$0x80100,%r11d
- cmp \$0x80100,%r11d
+ and \$0x80108,%r11d
+ cmp \$0x80108,%r11d # check for AD*X+BMI2+BMI1
jne .Lfrom_mont_nox
lea (%rax,$num),$rptr
- call sqrx8x_reduction
+ call __bn_sqrx8x_reduction
+ call __bn_postx4x_internal
pxor %xmm0,%xmm0
lea 48(%rsp),%rax
@@ -1999,7 +2092,8 @@ $code.=<<___ if ($addx);
.Lfrom_mont_nox:
___
$code.=<<___;
- call sqr8x_reduction
+ call __bn_sqr8x_reduction
+ call __bn_post4x_internal
pxor %xmm0,%xmm0
lea 48(%rsp),%rax
@@ -2039,7 +2133,6 @@ $code.=<<___;
.align 32
bn_mulx4x_mont_gather5:
.Lmulx4x_enter:
- .byte 0x67
mov %rsp,%rax
push %rbx
push %rbp
@@ -2047,40 +2140,33 @@ bn_mulx4x_mont_gather5:
push %r13
push %r14
push %r15
-___
-$code.=<<___ if ($win64);
- lea -0x28(%rsp),%rsp
- movaps %xmm6,(%rsp)
- movaps %xmm7,0x10(%rsp)
-___
-$code.=<<___;
- .byte 0x67
- mov ${num}d,%r10d
+
shl \$3,${num}d # convert $num to bytes
- shl \$3+2,%r10d # 4*$num
+ lea ($num,$num,2),%r10 # 3*$num in bytes
neg $num # -$num
mov ($n0),$n0 # *n0
##############################################################
- # ensure that stack frame doesn't alias with $aptr+4*$num
- # modulo 4096, which covers a[num], ret[num] and n[2*num]
- # (see bn_exp.c). this is done to allow memory disambiguation
- # logic do its magic. [excessive frame is allocated in order
- # to allow bn_from_mont8x to clear it.]
+ # Ensure that stack frame doesn't alias with $rptr+3*$num
+ # modulo 4096, which covers ret[num], am[num] and n[num]
+ # (see bn_exp.c). This is done to allow memory disambiguation
+ # logic do its magic. [Extra [num] is allocated in order
+ # to align with bn_power5's frame, which is cleansed after
+ # completing exponentiation. Extra 256 bytes is for power mask
+ # calculated from 7th argument, the index.]
#
- lea -64(%rsp,$num,2),%r11
- sub $ap,%r11
+ lea -320(%rsp,$num,2),%r11
+ sub $rp,%r11
and \$4095,%r11
cmp %r11,%r10
jb .Lmulx4xsp_alt
sub %r11,%rsp # align with $aptr
- lea -64(%rsp,$num,2),%rsp # alloca(frame+$num)
+ lea -320(%rsp,$num,2),%rsp # alloca(frame+2*$num*8+256)
jmp .Lmulx4xsp_done
-.align 32
.Lmulx4xsp_alt:
- lea 4096-64(,$num,2),%r10 # 4096-frame-$num
- lea -64(%rsp,$num,2),%rsp # alloca(frame+$num)
+ lea 4096-320(,$num,2),%r10
+ lea -320(%rsp,$num,2),%rsp # alloca(frame+2*$num*8+256)
sub %r10,%r11
mov \$0,%r10
cmovc %r10,%r11
@@ -2106,12 +2192,7 @@ $code.=<<___;
mov 40(%rsp),%rsi # restore %rsp
mov \$1,%rax
-___
-$code.=<<___ if ($win64);
- movaps -88(%rsi),%xmm6
- movaps -72(%rsi),%xmm7
-___
-$code.=<<___;
+
mov -48(%rsi),%r15
mov -40(%rsi),%r14
mov -32(%rsi),%r13
@@ -2126,14 +2207,16 @@ $code.=<<___;
.type mulx4x_internal,\@abi-omnipotent
.align 32
mulx4x_internal:
- .byte 0x4c,0x89,0x8c,0x24,0x08,0x00,0x00,0x00 # mov $num,8(%rsp) # save -$num
- .byte 0x67
+ mov $num,8(%rsp) # save -$num (it was in bytes)
+ mov $num,%r10
neg $num # restore $num
shl \$5,$num
- lea 256($bp,$num),%r13
+ neg %r10 # restore $num
+ lea 128($bp,$num),%r13 # end of powers table (+size optimization)
shr \$5+5,$num
- mov `($win64?56:8)`(%rax),%r10d # load 7th argument
+ movd `($win64?56:8)`(%rax),%xmm5 # load 7th argument
sub \$1,$num
+ lea .Linc(%rip),%rax
mov %r13,16+8(%rsp) # end of b[num]
mov $num,24+8(%rsp) # inner counter
mov $rp, 56+8(%rsp) # save $rp
@@ -2144,52 +2227,92 @@ my $rptr=$bptr;
my $STRIDE=2**5*8; # 5 is "window size"
my $N=$STRIDE/4; # should match cache line size
$code.=<<___;
- mov %r10,%r11
- shr \$`log($N/8)/log(2)`,%r10
- and \$`$N/8-1`,%r11
- not %r10
- lea .Lmagic_masks(%rip),%rax
- and \$`2**5/($N/8)-1`,%r10 # 5 is "window size"
- lea 96($bp,%r11,8),$bptr # pointer within 1st cache line
- movq 0(%rax,%r10,8),%xmm4 # set of masks denoting which
- movq 8(%rax,%r10,8),%xmm5 # cache line contains element
- add \$7,%r11
- movq 16(%rax,%r10,8),%xmm6 # denoted by 7th argument
- movq 24(%rax,%r10,8),%xmm7
- and \$7,%r11
-
- movq `0*$STRIDE/4-96`($bptr),%xmm0
- lea $STRIDE($bptr),$tptr # borrow $tptr
- movq `1*$STRIDE/4-96`($bptr),%xmm1
- pand %xmm4,%xmm0
- movq `2*$STRIDE/4-96`($bptr),%xmm2
- pand %xmm5,%xmm1
- movq `3*$STRIDE/4-96`($bptr),%xmm3
- pand %xmm6,%xmm2
- por %xmm1,%xmm0
- movq `0*$STRIDE/4-96`($tptr),%xmm1
- pand %xmm7,%xmm3
- por %xmm2,%xmm0
- movq `1*$STRIDE/4-96`($tptr),%xmm2
- por %xmm3,%xmm0
- .byte 0x67,0x67
- pand %xmm4,%xmm1
- movq `2*$STRIDE/4-96`($tptr),%xmm3
+ movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000
+ movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002
+ lea 88-112(%rsp,%r10),%r10 # place the mask after tp[num+1] (+ICache optimizaton)
+ lea 128($bp),$bptr # size optimization
+ pshufd \$0,%xmm5,%xmm5 # broadcast index
+ movdqa %xmm1,%xmm4
+ .byte 0x67
+ movdqa %xmm1,%xmm2
+___
+########################################################################
+# calculate mask by comparing 0..31 to index and save result to stack
+#
+$code.=<<___;
+ .byte 0x67
+ paddd %xmm0,%xmm1
+ pcmpeqd %xmm5,%xmm0 # compare to 1,0
+ movdqa %xmm4,%xmm3
+___
+for($i=0;$i<$STRIDE/16-4;$i+=4) {
+$code.=<<___;
+ paddd %xmm1,%xmm2
+ pcmpeqd %xmm5,%xmm1 # compare to 3,2
+ movdqa %xmm0,`16*($i+0)+112`(%r10)
+ movdqa %xmm4,%xmm0
+
+ paddd %xmm2,%xmm3
+ pcmpeqd %xmm5,%xmm2 # compare to 5,4
+ movdqa %xmm1,`16*($i+1)+112`(%r10)
+ movdqa %xmm4,%xmm1
+
+ paddd %xmm3,%xmm0
+ pcmpeqd %xmm5,%xmm3 # compare to 7,6
+ movdqa %xmm2,`16*($i+2)+112`(%r10)
+ movdqa %xmm4,%xmm2
+
+ paddd %xmm0,%xmm1
+ pcmpeqd %xmm5,%xmm0
+ movdqa %xmm3,`16*($i+3)+112`(%r10)
+ movdqa %xmm4,%xmm3
+___
+}
+$code.=<<___; # last iteration can be optimized
+ .byte 0x67
+ paddd %xmm1,%xmm2
+ pcmpeqd %xmm5,%xmm1
+ movdqa %xmm0,`16*($i+0)+112`(%r10)
+
+ paddd %xmm2,%xmm3
+ pcmpeqd %xmm5,%xmm2
+ movdqa %xmm1,`16*($i+1)+112`(%r10)
+
+ pcmpeqd %xmm5,%xmm3
+ movdqa %xmm2,`16*($i+2)+112`(%r10)
+
+ pand `16*($i+0)-128`($bptr),%xmm0 # while it's still in register
+ pand `16*($i+1)-128`($bptr),%xmm1
+ pand `16*($i+2)-128`($bptr),%xmm2
+ movdqa %xmm3,`16*($i+3)+112`(%r10)
+ pand `16*($i+3)-128`($bptr),%xmm3
+ por %xmm2,%xmm0
+ por %xmm3,%xmm1
+___
+for($i=0;$i<$STRIDE/16-4;$i+=4) {
+$code.=<<___;
+ movdqa `16*($i+0)-128`($bptr),%xmm4
+ movdqa `16*($i+1)-128`($bptr),%xmm5
+ movdqa `16*($i+2)-128`($bptr),%xmm2
+ pand `16*($i+0)+112`(%r10),%xmm4
+ movdqa `16*($i+3)-128`($bptr),%xmm3
+ pand `16*($i+1)+112`(%r10),%xmm5
+ por %xmm4,%xmm0
+ pand `16*($i+2)+112`(%r10),%xmm2
+ por %xmm5,%xmm1
+ pand `16*($i+3)+112`(%r10),%xmm3
+ por %xmm2,%xmm0
+ por %xmm3,%xmm1
+___
+}
+$code.=<<___;
+ pxor %xmm1,%xmm0
+ pshufd \$0x4e,%xmm0,%xmm1
+ por %xmm1,%xmm0
+ lea $STRIDE($bptr),$bptr
movq %xmm0,%rdx # bp[0]
- movq `3*$STRIDE/4-96`($tptr),%xmm0
- lea 2*$STRIDE($bptr),$bptr # next &b[i]
- pand %xmm5,%xmm2
- .byte 0x67,0x67
- pand %xmm6,%xmm3
- ##############################################################
- # $tptr is chosen so that writing to top-most element of the
- # vector occurs just "above" references to powers table,
- # "above" modulo cache-line size, which effectively precludes
- # possibility of memory disambiguation logic failure when
- # accessing the table.
- #
- lea 64+8*4+8(%rsp,%r11,8),$tptr
+ lea 64+8*4+8(%rsp),$tptr
mov %rdx,$bi
mulx 0*8($aptr),$mi,%rax # a[0]*b[0]
@@ -2205,37 +2328,31 @@ $code.=<<___;
xor $zero,$zero # cf=0, of=0
mov $mi,%rdx
- por %xmm2,%xmm1
- pand %xmm7,%xmm0
- por %xmm3,%xmm1
mov $bptr,8+8(%rsp) # off-load &b[i]
- por %xmm1,%xmm0
- .byte 0x48,0x8d,0xb6,0x20,0x00,0x00,0x00 # lea 4*8($aptr),$aptr
+ lea 4*8($aptr),$aptr
adcx %rax,%r13
adcx $zero,%r14 # cf=0
- mulx 0*16($nptr),%rax,%r10
+ mulx 0*8($nptr),%rax,%r10
adcx %rax,%r15 # discarded
adox %r11,%r10
- mulx 1*16($nptr),%rax,%r11
+ mulx 1*8($nptr),%rax,%r11
adcx %rax,%r10
adox %r12,%r11
- mulx 2*16($nptr),%rax,%r12
+ mulx 2*8($nptr),%rax,%r12
mov 24+8(%rsp),$bptr # counter value
- .byte 0x66
mov %r10,-8*4($tptr)
adcx %rax,%r11
adox %r13,%r12
- mulx 3*16($nptr),%rax,%r15
- .byte 0x67,0x67
+ mulx 3*8($nptr),%rax,%r15
mov $bi,%rdx
mov %r11,-8*3($tptr)
adcx %rax,%r12
adox $zero,%r15 # of=0
- .byte 0x48,0x8d,0x89,0x40,0x00,0x00,0x00 # lea 4*16($nptr),$nptr
+ lea 4*8($nptr),$nptr
mov %r12,-8*2($tptr)
- #jmp .Lmulx4x_1st
+ jmp .Lmulx4x_1st
.align 32
.Lmulx4x_1st:
@@ -2255,30 +2372,29 @@ $code.=<<___;
lea 4*8($tptr),$tptr
adox %r15,%r10
- mulx 0*16($nptr),%rax,%r15
+ mulx 0*8($nptr),%rax,%r15
adcx %rax,%r10
adox %r15,%r11
- mulx 1*16($nptr),%rax,%r15
+ mulx 1*8($nptr),%rax,%r15
adcx %rax,%r11
adox %r15,%r12
- mulx 2*16($nptr),%rax,%r15
+ mulx 2*8($nptr),%rax,%r15
mov %r10,-5*8($tptr)
adcx %rax,%r12
mov %r11,-4*8($tptr)
adox %r15,%r13
- mulx 3*16($nptr),%rax,%r15
+ mulx 3*8($nptr),%rax,%r15
mov $bi,%rdx
mov %r12,-3*8($tptr)
adcx %rax,%r13
adox $zero,%r15
- lea 4*16($nptr),$nptr
+ lea 4*8($nptr),$nptr
mov %r13,-2*8($tptr)
dec $bptr # of=0, pass cf
jnz .Lmulx4x_1st
mov 8(%rsp),$num # load -num
- movq %xmm0,%rdx # bp[1]
adc $zero,%r15 # modulo-scheduled
lea ($aptr,$num),$aptr # rewind $aptr
add %r15,%r14
@@ -2289,6 +2405,34 @@ $code.=<<___;
.align 32
.Lmulx4x_outer:
+ lea 16-256($tptr),%r10 # where 256-byte mask is (+density control)
+ pxor %xmm4,%xmm4
+ .byte 0x67,0x67
+ pxor %xmm5,%xmm5
+___
+for($i=0;$i<$STRIDE/16;$i+=4) {
+$code.=<<___;
+ movdqa `16*($i+0)-128`($bptr),%xmm0
+ movdqa `16*($i+1)-128`($bptr),%xmm1
+ movdqa `16*($i+2)-128`($bptr),%xmm2
+ pand `16*($i+0)+256`(%r10),%xmm0
+ movdqa `16*($i+3)-128`($bptr),%xmm3
+ pand `16*($i+1)+256`(%r10),%xmm1
+ por %xmm0,%xmm4
+ pand `16*($i+2)+256`(%r10),%xmm2
+ por %xmm1,%xmm5
+ pand `16*($i+3)+256`(%r10),%xmm3
+ por %xmm2,%xmm4
+ por %xmm3,%xmm5
+___
+}
+$code.=<<___;
+ por %xmm5,%xmm4
+ pshufd \$0x4e,%xmm4,%xmm0
+ por %xmm4,%xmm0
+ lea $STRIDE($bptr),$bptr
+ movq %xmm0,%rdx # m0=bp[i]
+
mov $zero,($tptr) # save top-most carry
lea 4*8($tptr,$num),$tptr # rewind $tptr
mulx 0*8($aptr),$mi,%r11 # a[0]*b[i]
@@ -2303,54 +2447,37 @@ $code.=<<___;
mulx 3*8($aptr),%rdx,%r14
adox -2*8($tptr),%r12
adcx %rdx,%r13
- lea ($nptr,$num,2),$nptr # rewind $nptr
+ lea ($nptr,$num),$nptr # rewind $nptr
lea 4*8($aptr),$aptr
adox -1*8($tptr),%r13
adcx $zero,%r14
adox $zero,%r14
- .byte 0x67
mov $mi,%r15
imulq 32+8(%rsp),$mi # "t[0]"*n0
- movq `0*$STRIDE/4-96`($bptr),%xmm0
- .byte 0x67,0x67
mov $mi,%rdx
- movq `1*$STRIDE/4-96`($bptr),%xmm1
- .byte 0x67
- pand %xmm4,%xmm0
- movq `2*$STRIDE/4-96`($bptr),%xmm2
- .byte 0x67
- pand %xmm5,%xmm1
- movq `3*$STRIDE/4-96`($bptr),%xmm3
- add \$$STRIDE,$bptr # next &b[i]
- .byte 0x67
- pand %xmm6,%xmm2
- por %xmm1,%xmm0
- pand %xmm7,%xmm3
xor $zero,$zero # cf=0, of=0
mov $bptr,8+8(%rsp) # off-load &b[i]
- mulx 0*16($nptr),%rax,%r10
+ mulx 0*8($nptr),%rax,%r10
adcx %rax,%r15 # discarded
adox %r11,%r10
- mulx 1*16($nptr),%rax,%r11
+ mulx 1*8($nptr),%rax,%r11
adcx %rax,%r10
adox %r12,%r11
- mulx 2*16($nptr),%rax,%r12
+ mulx 2*8($nptr),%rax,%r12
adcx %rax,%r11
adox %r13,%r12
- mulx 3*16($nptr),%rax,%r15
+ mulx 3*8($nptr),%rax,%r15
mov $bi,%rdx
- por %xmm2,%xmm0
mov 24+8(%rsp),$bptr # counter value
mov %r10,-8*4($tptr)
- por %xmm3,%xmm0
adcx %rax,%r12
mov %r11,-8*3($tptr)
adox $zero,%r15 # of=0
mov %r12,-8*2($tptr)
- lea 4*16($nptr),$nptr
+ lea 4*8($nptr),$nptr
jmp .Lmulx4x_inner
.align 32
@@ -2375,20 +2502,20 @@ $code.=<<___;
adcx $zero,%r14 # cf=0
adox %r15,%r10
- mulx 0*16($nptr),%rax,%r15
+ mulx 0*8($nptr),%rax,%r15
adcx %rax,%r10
adox %r15,%r11
- mulx 1*16($nptr),%rax,%r15
+ mulx 1*8($nptr),%rax,%r15
adcx %rax,%r11
adox %r15,%r12
- mulx 2*16($nptr),%rax,%r15
+ mulx 2*8($nptr),%rax,%r15
mov %r10,-5*8($tptr)
adcx %rax,%r12
adox %r15,%r13
mov %r11,-4*8($tptr)
- mulx 3*16($nptr),%rax,%r15
+ mulx 3*8($nptr),%rax,%r15
mov $bi,%rdx
- lea 4*16($nptr),$nptr
+ lea 4*8($nptr),$nptr
mov %r12,-3*8($tptr)
adcx %rax,%r13
adox $zero,%r15
@@ -2398,7 +2525,6 @@ $code.=<<___;
jnz .Lmulx4x_inner
mov 0+8(%rsp),$num # load -num
- movq %xmm0,%rdx # bp[i+1]
adc $zero,%r15 # modulo-scheduled
sub 0*8($tptr),$bptr # pull top-most carry to %cf
mov 8+8(%rsp),$bptr # re-load &b[i]
@@ -2411,20 +2537,26 @@ $code.=<<___;
cmp %r10,$bptr
jb .Lmulx4x_outer
- mov -16($nptr),%r10
+ mov -8($nptr),%r10
+ mov $zero,%r8
+ mov ($nptr,$num),%r12
+ lea ($nptr,$num),%rbp # rewind $nptr
+ mov $num,%rcx
+ lea ($tptr,$num),%rdi # rewind $tptr
+ xor %eax,%eax
xor %r15,%r15
sub %r14,%r10 # compare top-most words
adc %r15,%r15
- or %r15,$zero
- xor \$1,$zero
- lea ($tptr,$num),%rdi # rewind $tptr
- lea ($nptr,$num,2),$nptr # rewind $nptr
- .byte 0x67,0x67
- sar \$3+2,$num # cf=0
- lea ($nptr,$zero,8),%rbp
+ or %r15,%r8
+ sar \$3+2,%rcx
+ sub %r8,%rax # %rax=-%r8
mov 56+8(%rsp),%rdx # restore rp
- mov $num,%rcx
- jmp .Lsqrx4x_sub # common post-condition
+ dec %r12 # so that after 'not' we get -n[0]
+ mov 8*1(%rbp),%r13
+ xor %r8,%r8
+ mov 8*2(%rbp),%r14
+ mov 8*3(%rbp),%r15
+ jmp .Lsqrx4x_sub_entry # common post-condition
.size mulx4x_internal,.-mulx4x_internal
___
} {
@@ -2448,7 +2580,6 @@ $code.=<<___;
.align 32
bn_powerx5:
.Lpowerx5_enter:
- .byte 0x67
mov %rsp,%rax
push %rbx
push %rbp
@@ -2456,39 +2587,32 @@ bn_powerx5:
push %r13
push %r14
push %r15
-___
-$code.=<<___ if ($win64);
- lea -0x28(%rsp),%rsp
- movaps %xmm6,(%rsp)
- movaps %xmm7,0x10(%rsp)
-___
-$code.=<<___;
- .byte 0x67
- mov ${num}d,%r10d
+
shl \$3,${num}d # convert $num to bytes
- shl \$3+2,%r10d # 4*$num
+ lea ($num,$num,2),%r10 # 3*$num in bytes
neg $num
mov ($n0),$n0 # *n0
##############################################################
- # ensure that stack frame doesn't alias with $aptr+4*$num
- # modulo 4096, which covers ret[num], am[num] and n[2*num]
- # (see bn_exp.c). this is done to allow memory disambiguation
- # logic do its magic.
+ # Ensure that stack frame doesn't alias with $rptr+3*$num
+ # modulo 4096, which covers ret[num], am[num] and n[num]
+ # (see bn_exp.c). This is done to allow memory disambiguation
+ # logic do its magic. [Extra 256 bytes is for power mask
+ # calculated from 7th argument, the index.]
#
- lea -64(%rsp,$num,2),%r11
- sub $aptr,%r11
+ lea -320(%rsp,$num,2),%r11
+ sub $rptr,%r11
and \$4095,%r11
cmp %r11,%r10
jb .Lpwrx_sp_alt
sub %r11,%rsp # align with $aptr
- lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num)
+ lea -320(%rsp,$num,2),%rsp # alloca(frame+2*$num*8+256)
jmp .Lpwrx_sp_done
.align 32
.Lpwrx_sp_alt:
- lea 4096-64(,$num,2),%r10 # 4096-frame-2*$num
- lea -64(%rsp,$num,2),%rsp # alloca(frame+2*$num)
+ lea 4096-320(,$num,2),%r10
+ lea -320(%rsp,$num,2),%rsp # alloca(frame+2*$num*8+256)
sub %r10,%r11
mov \$0,%r10
cmovc %r10,%r11
@@ -2519,10 +2643,15 @@ $code.=<<___;
.Lpowerx5_body:
call __bn_sqrx8x_internal
+ call __bn_postx4x_internal
call __bn_sqrx8x_internal
+ call __bn_postx4x_internal
call __bn_sqrx8x_internal
+ call __bn_postx4x_internal
call __bn_sqrx8x_internal
+ call __bn_postx4x_internal
call __bn_sqrx8x_internal
+ call __bn_postx4x_internal
mov %r10,$num # -num
mov $aptr,$rptr
@@ -2534,12 +2663,7 @@ $code.=<<___;
mov 40(%rsp),%rsi # restore %rsp
mov \$1,%rax
-___
-$code.=<<___ if ($win64);
- movaps -88(%rsi),%xmm6
- movaps -72(%rsi),%xmm7
-___
-$code.=<<___;
+
mov -48(%rsi),%r15
mov -40(%rsi),%r14
mov -32(%rsi),%r13
@@ -2973,11 +3097,11 @@ my ($nptr,$carry,$m0)=("%rbp","%rsi","%rdx");
$code.=<<___;
movq %xmm2,$nptr
-sqrx8x_reduction:
+__bn_sqrx8x_reduction:
xor %eax,%eax # initial top-most carry bit
mov 32+8(%rsp),%rbx # n0
mov 48+8(%rsp),%rdx # "%r8", 8*0($tptr)
- lea -128($nptr,$num,2),%rcx # end of n[]
+ lea -8*8($nptr,$num),%rcx # end of n[]
#lea 48+8(%rsp,$num,2),$tptr # end of t[] buffer
mov %rcx, 0+8(%rsp) # save end of n[]
mov $tptr,8+8(%rsp) # save end of t[]
@@ -3006,23 +3130,23 @@ sqrx8x_reduction:
.align 32
.Lsqrx8x_reduce:
mov %r8, %rbx
- mulx 16*0($nptr),%rax,%r8 # n[0]
+ mulx 8*0($nptr),%rax,%r8 # n[0]
adcx %rbx,%rax # discarded
adox %r9,%r8
- mulx 16*1($nptr),%rbx,%r9 # n[1]
+ mulx 8*1($nptr),%rbx,%r9 # n[1]
adcx %rbx,%r8
adox %r10,%r9
- mulx 16*2($nptr),%rbx,%r10
+ mulx 8*2($nptr),%rbx,%r10
adcx %rbx,%r9
adox %r11,%r10
- mulx 16*3($nptr),%rbx,%r11
+ mulx 8*3($nptr),%rbx,%r11
adcx %rbx,%r10
adox %r12,%r11
- .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x40,0x00,0x00,0x00 # mulx 16*4($nptr),%rbx,%r12
+ .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 # mulx 8*4($nptr),%rbx,%r12
mov %rdx,%rax
mov %r8,%rdx
adcx %rbx,%r11
@@ -3032,15 +3156,15 @@ sqrx8x_reduction:
mov %rax,%rdx
mov %rax,64+48+8(%rsp,%rcx,8) # put aside n0*a[i]
- mulx 16*5($nptr),%rax,%r13
+ mulx 8*5($nptr),%rax,%r13
adcx %rax,%r12
adox %r14,%r13
- mulx 16*6($nptr),%rax,%r14
+ mulx 8*6($nptr),%rax,%r14
adcx %rax,%r13
adox %r15,%r14
- mulx 16*7($nptr),%rax,%r15
+ mulx 8*7($nptr),%rax,%r15
mov %rbx,%rdx
adcx %rax,%r14
adox $carry,%r15 # $carry is 0
@@ -3056,7 +3180,7 @@ sqrx8x_reduction:
mov 48+8(%rsp),%rdx # pull n0*a[0]
add 8*0($tptr),%r8
- lea 16*8($nptr),$nptr
+ lea 8*8($nptr),$nptr
mov \$-8,%rcx
adcx 8*1($tptr),%r9
adcx 8*2($tptr),%r10
@@ -3075,35 +3199,35 @@ sqrx8x_reduction:
.align 32
.Lsqrx8x_tail:
mov %r8,%rbx
- mulx 16*0($nptr),%rax,%r8
+ mulx 8*0($nptr),%rax,%r8
adcx %rax,%rbx
adox %r9,%r8
- mulx 16*1($nptr),%rax,%r9
+ mulx 8*1($nptr),%rax,%r9
adcx %rax,%r8
adox %r10,%r9
- mulx 16*2($nptr),%rax,%r10
+ mulx 8*2($nptr),%rax,%r10
adcx %rax,%r9
adox %r11,%r10
- mulx 16*3($nptr),%rax,%r11
+ mulx 8*3($nptr),%rax,%r11
adcx %rax,%r10
adox %r12,%r11
- .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x40,0x00,0x00,0x00 # mulx 16*4($nptr),%rax,%r12
+ .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 # mulx 8*4($nptr),%rax,%r12
adcx %rax,%r11
adox %r13,%r12
- mulx 16*5($nptr),%rax,%r13
+ mulx 8*5($nptr),%rax,%r13
adcx %rax,%r12
adox %r14,%r13
- mulx 16*6($nptr),%rax,%r14
+ mulx 8*6($nptr),%rax,%r14
adcx %rax,%r13
adox %r15,%r14
- mulx 16*7($nptr),%rax,%r15
+ mulx 8*7($nptr),%rax,%r15
mov 72+48+8(%rsp,%rcx,8),%rdx # pull n0*a[i]
adcx %rax,%r14
adox $carry,%r15
@@ -3119,7 +3243,7 @@ sqrx8x_reduction:
sub 16+8(%rsp),$carry # mov 16(%rsp),%cf
mov 48+8(%rsp),%rdx # pull n0*a[0]
- lea 16*8($nptr),$nptr
+ lea 8*8($nptr),$nptr
adc 8*0($tptr),%r8
adc 8*1($tptr),%r9
adc 8*2($tptr),%r10
@@ -3155,7 +3279,7 @@ sqrx8x_reduction:
adc 8*0($tptr),%r8
movq %xmm3,%rcx
adc 8*1($tptr),%r9
- mov 16*7($nptr),$carry
+ mov 8*7($nptr),$carry
movq %xmm2,$nptr # restore $nptr
adc 8*2($tptr),%r10
adc 8*3($tptr),%r11
@@ -3181,6 +3305,8 @@ sqrx8x_reduction:
lea 8*8($tptr,%rcx),$tptr # start of current t[] window
cmp 8+8(%rsp),%r8 # end of t[]?
jb .Lsqrx8x_reduction_loop
+ ret
+.size bn_sqrx8x_internal,.-bn_sqrx8x_internal
___
}
##############################################################
@@ -3188,52 +3314,59 @@ ___
#
{
my ($rptr,$nptr)=("%rdx","%rbp");
-my @ri=map("%r$_",(10..13));
-my @ni=map("%r$_",(14..15));
$code.=<<___;
- xor %ebx,%ebx
- sub %r15,%rsi # compare top-most words
- adc %rbx,%rbx
+.align 32
+__bn_postx4x_internal:
+ mov 8*0($nptr),%r12
mov %rcx,%r10 # -$num
- or %rbx,%rax
mov %rcx,%r9 # -$num
- xor \$1,%rax
- sar \$3+2,%rcx # cf=0
+ neg %rax
+ sar \$3+2,%rcx
#lea 48+8(%rsp,%r9),$tptr
- lea ($nptr,%rax,8),$nptr
movq %xmm1,$rptr # restore $rptr
movq %xmm1,$aptr # prepare for back-to-back call
- jmp .Lsqrx4x_sub
+ dec %r12 # so that after 'not' we get -n[0]
+ mov 8*1($nptr),%r13
+ xor %r8,%r8
+ mov 8*2($nptr),%r14
+ mov 8*3($nptr),%r15
+ jmp .Lsqrx4x_sub_entry
-.align 32
+.align 16
.Lsqrx4x_sub:
- .byte 0x66
- mov 8*0($tptr),%r12
- mov 8*1($tptr),%r13
- sbb 16*0($nptr),%r12
- mov 8*2($tptr),%r14
- sbb 16*1($nptr),%r13
- mov 8*3($tptr),%r15
- lea 8*4($tptr),$tptr
- sbb 16*2($nptr),%r14
+ mov 8*0($nptr),%r12
+ mov 8*1($nptr),%r13
+ mov 8*2($nptr),%r14
+ mov 8*3($nptr),%r15
+.Lsqrx4x_sub_entry:
+ andn %rax,%r12,%r12
+ lea 8*4($nptr),$nptr
+ andn %rax,%r13,%r13
+ andn %rax,%r14,%r14
+ andn %rax,%r15,%r15
+
+ neg %r8 # mov %r8,%cf
+ adc 8*0($tptr),%r12
+ adc 8*1($tptr),%r13
+ adc 8*2($tptr),%r14
+ adc 8*3($tptr),%r15
mov %r12,8*0($rptr)
- sbb 16*3($nptr),%r15
- lea 16*4($nptr),$nptr
+ lea 8*4($tptr),$tptr
mov %r13,8*1($rptr)
+ sbb %r8,%r8 # mov %cf,%r8
mov %r14,8*2($rptr)
mov %r15,8*3($rptr)
lea 8*4($rptr),$rptr
inc %rcx
jnz .Lsqrx4x_sub
-___
-}
-$code.=<<___;
+
neg %r9 # restore $num
ret
-.size bn_sqrx8x_internal,.-bn_sqrx8x_internal
+.size __bn_postx4x_internal,.-__bn_postx4x_internal
___
+}
}}}
{
my ($inp,$num,$tbl,$idx)=$win64?("%rcx","%edx","%r8", "%r9d") : # Win64 order
@@ -3282,56 +3415,91 @@ bn_scatter5:
.globl bn_gather5
.type bn_gather5,\@abi-omnipotent
-.align 16
+.align 32
bn_gather5:
-___
-$code.=<<___ if ($win64);
-.LSEH_begin_bn_gather5:
+.LSEH_begin_bn_gather5: # Win64 thing, but harmless in other cases
# I can't trust assembler to use specific encoding:-(
- .byte 0x48,0x83,0xec,0x28 #sub \$0x28,%rsp
- .byte 0x0f,0x29,0x34,0x24 #movaps %xmm6,(%rsp)
- .byte 0x0f,0x29,0x7c,0x24,0x10 #movdqa %xmm7,0x10(%rsp)
+ .byte 0x4c,0x8d,0x14,0x24 #lea (%rsp),%r10
+ .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 #sub $0x108,%rsp
+ lea .Linc(%rip),%rax
+ and \$-16,%rsp # shouldn't be formally required
+
+ movd $idx,%xmm5
+ movdqa 0(%rax),%xmm0 # 00000001000000010000000000000000
+ movdqa 16(%rax),%xmm1 # 00000002000000020000000200000002
+ lea 128($tbl),%r11 # size optimization
+ lea 128(%rsp),%rax # size optimization
+
+ pshufd \$0,%xmm5,%xmm5 # broadcast $idx
+ movdqa %xmm1,%xmm4
+ movdqa %xmm1,%xmm2
___
+########################################################################
+# calculate mask by comparing 0..31 to $idx and save result to stack
+#
+for($i=0;$i<$STRIDE/16;$i+=4) {
$code.=<<___;
- mov $idx,%r11d
- shr \$`log($N/8)/log(2)`,$idx
- and \$`$N/8-1`,%r11
- not $idx
- lea .Lmagic_masks(%rip),%rax
- and \$`2**5/($N/8)-1`,$idx # 5 is "window size"
- lea 128($tbl,%r11,8),$tbl # pointer within 1st cache line
- movq 0(%rax,$idx,8),%xmm4 # set of masks denoting which
- movq 8(%rax,$idx,8),%xmm5 # cache line contains element
- movq 16(%rax,$idx,8),%xmm6 # denoted by 7th argument
- movq 24(%rax,$idx,8),%xmm7
+ paddd %xmm0,%xmm1
+ pcmpeqd %xmm5,%xmm0 # compare to 1,0
+___
+$code.=<<___ if ($i);
+ movdqa %xmm3,`16*($i-1)-128`(%rax)
+___
+$code.=<<___;
+ movdqa %xmm4,%xmm3
+
+ paddd %xmm1,%xmm2
+ pcmpeqd %xmm5,%xmm1 # compare to 3,2
+ movdqa %xmm0,`16*($i+0)-128`(%rax)
+ movdqa %xmm4,%xmm0
+
+ paddd %xmm2,%xmm3
+ pcmpeqd %xmm5,%xmm2 # compare to 5,4
+ movdqa %xmm1,`16*($i+1)-128`(%rax)
+ movdqa %xmm4,%xmm1
+
+ paddd %xmm3,%xmm0
+ pcmpeqd %xmm5,%xmm3 # compare to 7,6
+ movdqa %xmm2,`16*($i+2)-128`(%rax)
+ movdqa %xmm4,%xmm2
+___
+}
+$code.=<<___;
+ movdqa %xmm3,`16*($i-1)-128`(%rax)
jmp .Lgather
-.align 16
-.Lgather:
- movq `0*$STRIDE/4-128`($tbl),%xmm0
- movq `1*$STRIDE/4-128`($tbl),%xmm1
- pand %xmm4,%xmm0
- movq `2*$STRIDE/4-128`($tbl),%xmm2
- pand %xmm5,%xmm1
- movq `3*$STRIDE/4-128`($tbl),%xmm3
- pand %xmm6,%xmm2
- por %xmm1,%xmm0
- pand %xmm7,%xmm3
- .byte 0x67,0x67
- por %xmm2,%xmm0
- lea $STRIDE($tbl),$tbl
- por %xmm3,%xmm0
+.align 32
+.Lgather:
+ pxor %xmm4,%xmm4
+ pxor %xmm5,%xmm5
+___
+for($i=0;$i<$STRIDE/16;$i+=4) {
+$code.=<<___;
+ movdqa `16*($i+0)-128`(%r11),%xmm0
+ movdqa `16*($i+1)-128`(%r11),%xmm1
+ movdqa `16*($i+2)-128`(%r11),%xmm2
+ pand `16*($i+0)-128`(%rax),%xmm0
+ movdqa `16*($i+3)-128`(%r11),%xmm3
+ pand `16*($i+1)-128`(%rax),%xmm1
+ por %xmm0,%xmm4
+ pand `16*($i+2)-128`(%rax),%xmm2
+ por %xmm1,%xmm5
+ pand `16*($i+3)-128`(%rax),%xmm3
+ por %xmm2,%xmm4
+ por %xmm3,%xmm5
+___
+}
+$code.=<<___;
+ por %xmm5,%xmm4
+ lea $STRIDE(%r11),%r11
+ pshufd \$0x4e,%xmm4,%xmm0
+ por %xmm4,%xmm0
movq %xmm0,($out) # m0=bp[0]
lea 8($out),$out
sub \$1,$num
jnz .Lgather
-___
-$code.=<<___ if ($win64);
- movaps (%rsp),%xmm6
- movaps 0x10(%rsp),%xmm7
- lea 0x28(%rsp),%rsp
-___
-$code.=<<___;
+
+ lea (%r10),%rsp
ret
.LSEH_end_bn_gather5:
.size bn_gather5,.-bn_gather5
@@ -3339,9 +3507,9 @@ ___
}
$code.=<<___;
.align 64
-.Lmagic_masks:
- .long 0,0, 0,0, 0,0, -1,-1
- .long 0,0, 0,0, 0,0, 0,0
+.Linc:
+ .long 0,0, 1,1
+ .long 2,2, 2,2
.asciz "Montgomery Multiplication with scatter/gather for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
___
@@ -3389,19 +3557,16 @@ mul_handler:
lea .Lmul_epilogue(%rip),%r10
cmp %r10,%rbx
- jb .Lbody_40
+ ja .Lbody_40
mov 192($context),%r10 # pull $num
mov 8(%rax,%r10,8),%rax # pull saved stack pointer
+
jmp .Lbody_proceed
.Lbody_40:
mov 40(%rax),%rax # pull saved stack pointer
.Lbody_proceed:
-
- movaps -88(%rax),%xmm0
- movaps -72(%rax),%xmm1
-
mov -8(%rax),%rbx
mov -16(%rax),%rbp
mov -24(%rax),%r12
@@ -3414,8 +3579,6 @@ mul_handler:
mov %r13,224($context) # restore context->R13
mov %r14,232($context) # restore context->R14
mov %r15,240($context) # restore context->R15
- movups %xmm0,512($context) # restore context->Xmm6
- movups %xmm1,528($context) # restore context->Xmm7
.Lcommon_seh_tail:
mov 8(%rax),%rdi
@@ -3526,10 +3689,9 @@ ___
$code.=<<___;
.align 8
.LSEH_info_bn_gather5:
- .byte 0x01,0x0d,0x05,0x00
- .byte 0x0d,0x78,0x01,0x00 #movaps 0x10(rsp),xmm7
- .byte 0x08,0x68,0x00,0x00 #movaps (rsp),xmm6
- .byte 0x04,0x42,0x00,0x00 #sub rsp,0x28
+ .byte 0x01,0x0b,0x03,0x0a
+ .byte 0x0b,0x01,0x21,0x00 # sub rsp,0x108
+ .byte 0x04,0xa3,0x00,0x00 # lea r10,(rsp)
.align 8
___
}
diff --git a/crypto/bn/bn.h b/crypto/bn/bn.h
index 5696965e9a09..86264ae6315f 100644
--- a/crypto/bn/bn.h
+++ b/crypto/bn/bn.h
@@ -125,6 +125,7 @@
#ifndef HEADER_BN_H
# define HEADER_BN_H
+# include <limits.h>
# include <openssl/e_os2.h>
# ifndef OPENSSL_NO_FP_API
# include <stdio.h> /* FILE */
@@ -721,8 +722,17 @@ const BIGNUM *BN_get0_nist_prime_521(void);
/* library internal functions */
-# define bn_expand(a,bits) ((((((bits+BN_BITS2-1))/BN_BITS2)) <= (a)->dmax)?\
- (a):bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2))
+# define bn_expand(a,bits) \
+ ( \
+ bits > (INT_MAX - BN_BITS2 + 1) ? \
+ NULL \
+ : \
+ (((bits+BN_BITS2-1)/BN_BITS2) <= (a)->dmax) ? \
+ (a) \
+ : \
+ bn_expand2((a),(bits+BN_BITS2-1)/BN_BITS2) \
+ )
+
# define bn_wexpand(a,words) (((words) <= (a)->dmax)?(a):bn_expand2((a),(words)))
BIGNUM *bn_expand2(BIGNUM *a, int words);
# ifndef OPENSSL_NO_DEPRECATED
diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c
index 6d30d1e0fff5..1670f01d1d8c 100644
--- a/crypto/bn/bn_exp.c
+++ b/crypto/bn/bn_exp.c
@@ -110,6 +110,7 @@
*/
#include "cryptlib.h"
+#include "constant_time_locl.h"
#include "bn_lcl.h"
#include <stdlib.h>
@@ -606,15 +607,17 @@ static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos)
static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top,
unsigned char *buf, int idx,
- int width)
+ int window)
{
- size_t i, j;
+ int i, j;
+ int width = 1 << window;
+ BN_ULONG *table = (BN_ULONG *)buf;
if (top > b->top)
top = b->top; /* this works because 'buf' is explicitly
* zeroed */
- for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
- buf[j] = ((unsigned char *)b->d)[i];
+ for (i = 0, j = idx; i < top; i++, j += width) {
+ table[j] = b->d[i];
}
return 1;
@@ -622,15 +625,51 @@ static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top,
static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top,
unsigned char *buf, int idx,
- int width)
+ int window)
{
- size_t i, j;
+ int i, j;
+ int width = 1 << window;
+ volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
if (bn_wexpand(b, top) == NULL)
return 0;
- for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) {
- ((unsigned char *)b->d)[i] = buf[j];
+ if (window <= 3) {
+ for (i = 0; i < top; i++, table += width) {
+ BN_ULONG acc = 0;
+
+ for (j = 0; j < width; j++) {
+ acc |= table[j] &
+ ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
+ }
+
+ b->d[i] = acc;
+ }
+ } else {
+ int xstride = 1 << (window - 2);
+ BN_ULONG y0, y1, y2, y3;
+
+ i = idx >> (window - 2); /* equivalent of idx / xstride */
+ idx &= xstride - 1; /* equivalent of idx % xstride */
+
+ y0 = (BN_ULONG)0 - (constant_time_eq_int(i,0)&1);
+ y1 = (BN_ULONG)0 - (constant_time_eq_int(i,1)&1);
+ y2 = (BN_ULONG)0 - (constant_time_eq_int(i,2)&1);
+ y3 = (BN_ULONG)0 - (constant_time_eq_int(i,3)&1);
+
+ for (i = 0; i < top; i++, table += width) {
+ BN_ULONG acc = 0;
+
+ for (j = 0; j < xstride; j++) {
+ acc |= ( (table[j + 0 * xstride] & y0) |
+ (table[j + 1 * xstride] & y1) |
+ (table[j + 2 * xstride] & y2) |
+ (table[j + 3 * xstride] & y3) )
+ & ((BN_ULONG)0 - (constant_time_eq_int(j,idx)&1));
+ }
+
+ b->d[i] = acc;
+ }
}
b->top = top;
@@ -749,8 +788,8 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
if (window >= 5) {
window = 5; /* ~5% improvement for RSA2048 sign, and even
* for RSA4096 */
- if ((top & 7) == 0)
- powerbufLen += 2 * top * sizeof(m->d[0]);
+ /* reserve space for mont->N.d[] copy */
+ powerbufLen += top * sizeof(mont->N.d[0]);
}
#endif
(void)0;
@@ -971,7 +1010,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
const BN_ULONG *not_used, const BN_ULONG *np,
const BN_ULONG *n0, int num);
- BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2;
+ BN_ULONG *n0 = mont->n0, *np;
/*
* BN_to_montgomery can contaminate words above .top [in
@@ -982,11 +1021,11 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
for (i = tmp.top; i < top; i++)
tmp.d[i] = 0;
- if (top & 7)
- np2 = np;
- else
- for (np2 = am.d + top, i = 0; i < top; i++)
- np2[2 * i] = np[i];
+ /*
+ * copy mont->N.d[] to improve cache locality
+ */
+ for (np = am.d + top, i = 0; i < top; i++)
+ np[i] = mont->N.d[i];
bn_scatter5(tmp.d, top, powerbuf, 0);
bn_scatter5(am.d, am.top, powerbuf, 1);
@@ -996,7 +1035,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
# if 0
for (i = 3; i < 32; i++) {
/* Calculate a^i = a^(i-1) * a */
- bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
+ bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
bn_scatter5(tmp.d, top, powerbuf, i);
}
# else
@@ -1007,7 +1046,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
}
for (i = 3; i < 8; i += 2) {
int j;
- bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
+ bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
bn_scatter5(tmp.d, top, powerbuf, i);
for (j = 2 * i; j < 32; j *= 2) {
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
@@ -1015,13 +1054,13 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
}
}
for (; i < 16; i += 2) {
- bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
+ bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
bn_scatter5(tmp.d, top, powerbuf, i);
bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
bn_scatter5(tmp.d, top, powerbuf, 2 * i);
}
for (; i < 32; i += 2) {
- bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1);
+ bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
bn_scatter5(tmp.d, top, powerbuf, i);
}
# endif
@@ -1050,11 +1089,11 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
while (bits >= 0) {
wvalue = bn_get_bits5(p->d, bits - 4);
bits -= 5;
- bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue);
+ bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
}
}
- ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top);
+ ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top);
tmp.top = top;
bn_correct_top(&tmp);
if (ret) {
@@ -1065,9 +1104,9 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
} else
#endif
{
- if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers))
+ if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, window))
goto err;
- if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, numPowers))
+ if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am, top, powerbuf, 1, window))
goto err;
/*
@@ -1079,15 +1118,15 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
if (window > 1) {
if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx))
goto err;
- if (!MOD_EXP_CTIME_COPY_TO_PREBUF
- (&tmp, top, powerbuf, 2, numPowers))
+ if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2,
+ window))
goto err;
for (i = 3; i < numPowers; i++) {
/* Calculate a^i = a^(i-1) * a */
if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx))
goto err;
- if (!MOD_EXP_CTIME_COPY_TO_PREBUF
- (&tmp, top, powerbuf, i, numPowers))
+ if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i,
+ window))
goto err;
}
}
@@ -1095,8 +1134,8 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
bits--;
for (wvalue = 0, i = bits % window; i >= 0; i--, bits--)
wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
- if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
- (&tmp, top, powerbuf, wvalue, numPowers))
+ if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue,
+ window))
goto err;
/*
@@ -1116,8 +1155,8 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
/*
* Fetch the appropriate pre-computed value from the pre-buf
*/
- if (!MOD_EXP_CTIME_COPY_FROM_PREBUF
- (&am, top, powerbuf, wvalue, numPowers))
+ if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue,
+ window))
goto err;
/* Multiply the result into the intermediate result */
diff --git a/crypto/bn/bn_print.c b/crypto/bn/bn_print.c
index ab10b957ba27..bfa31efc5621 100644
--- a/crypto/bn/bn_print.c
+++ b/crypto/bn/bn_print.c
@@ -58,6 +58,7 @@
#include <stdio.h>
#include <ctype.h>
+#include <limits.h>
#include "cryptlib.h"
#include <openssl/buffer.h>
#include "bn_lcl.h"
@@ -189,7 +190,11 @@ int BN_hex2bn(BIGNUM **bn, const char *a)
a++;
}
- for (i = 0; isxdigit((unsigned char)a[i]); i++) ;
+ for (i = 0; i <= (INT_MAX/4) && isxdigit((unsigned char)a[i]); i++)
+ continue;
+
+ if (i > INT_MAX/4)
+ goto err;
num = i + neg;
if (bn == NULL)
@@ -204,7 +209,7 @@ int BN_hex2bn(BIGNUM **bn, const char *a)
BN_zero(ret);
}
- /* i is the number of hex digests; */
+ /* i is the number of hex digits */
if (bn_expand(ret, i * 4) == NULL)
goto err;
@@ -260,7 +265,11 @@ int BN_dec2bn(BIGNUM **bn, const char *a)
a++;
}
- for (i = 0; isdigit((unsigned char)a[i]); i++) ;
+ for (i = 0; i <= (INT_MAX/4) && isdigit((unsigned char)a[i]); i++)
+ continue;
+
+ if (i > INT_MAX/4)
+ goto err;
num = i + neg;
if (bn == NULL)
@@ -278,7 +287,7 @@ int BN_dec2bn(BIGNUM **bn, const char *a)
BN_zero(ret);
}
- /* i is the number of digests, a bit of an over expand; */
+ /* i is the number of digits, a bit of an over expand */
if (bn_expand(ret, i * 4) == NULL)
goto err;
diff --git a/crypto/bn/bn_recp.c b/crypto/bn/bn_recp.c
index 7497ac624d94..f047040efe03 100644
--- a/crypto/bn/bn_recp.c
+++ b/crypto/bn/bn_recp.c
@@ -65,6 +65,7 @@ void BN_RECP_CTX_init(BN_RECP_CTX *recp)
BN_init(&(recp->N));
BN_init(&(recp->Nr));
recp->num_bits = 0;
+ recp->shift = 0;
recp->flags = 0;
}
diff --git a/crypto/cmac/cmac.c b/crypto/cmac/cmac.c
index 774e6dc91905..2954b6eb7dcf 100644
--- a/crypto/cmac/cmac.c
+++ b/crypto/cmac/cmac.c
@@ -160,6 +160,14 @@ int CMAC_Init(CMAC_CTX *ctx, const void *key, size_t keylen,
EVPerr(EVP_F_CMAC_INIT, EVP_R_DISABLED_FOR_FIPS);
return 0;
}
+
+ /* Switch to FIPS cipher implementation if possible */
+ if (cipher != NULL) {
+ const EVP_CIPHER *fcipher;
+ fcipher = FIPS_get_cipherbynid(EVP_CIPHER_nid(cipher));
+ if (fcipher != NULL)
+ cipher = fcipher;
+ }
/*
* Other algorithm blocking will be done in FIPS_cmac_init, via
* FIPS_cipherinit().
diff --git a/crypto/cryptlib.c b/crypto/cryptlib.c
index c9f674ba8e62..1925428f5ec5 100644
--- a/crypto/cryptlib.c
+++ b/crypto/cryptlib.c
@@ -1016,11 +1016,11 @@ void *OPENSSL_stderr(void)
return stderr;
}
-int CRYPTO_memcmp(const void *in_a, const void *in_b, size_t len)
+int CRYPTO_memcmp(const volatile void *in_a, const volatile void *in_b, size_t len)
{
size_t i;
- const unsigned char *a = in_a;
- const unsigned char *b = in_b;
+ const volatile unsigned char *a = in_a;
+ const volatile unsigned char *b = in_b;
unsigned char x = 0;
for (i = 0; i < len; i++)
diff --git a/crypto/crypto.h b/crypto/crypto.h
index c450d7a3c374..6c644ce12a82 100644
--- a/crypto/crypto.h
+++ b/crypto/crypto.h
@@ -628,7 +628,7 @@ void OPENSSL_init(void);
* into a defined order as the return value when a != b is undefined, other
* than to be non-zero.
*/
-int CRYPTO_memcmp(const void *a, const void *b, size_t len);
+int CRYPTO_memcmp(const volatile void *a, const volatile void *b, size_t len);
/* BEGIN ERROR CODES */
/*
diff --git a/crypto/dh/dh.h b/crypto/dh/dh.h
index 5498a9dc1060..a5bd9016aae8 100644
--- a/crypto/dh/dh.h
+++ b/crypto/dh/dh.h
@@ -174,7 +174,7 @@ struct dh_st {
/* DH_check_pub_key error codes */
# define DH_CHECK_PUBKEY_TOO_SMALL 0x01
# define DH_CHECK_PUBKEY_TOO_LARGE 0x02
-# define DH_CHECK_PUBKEY_INVALID 0x03
+# define DH_CHECK_PUBKEY_INVALID 0x04
/*
* primes p where (p-1)/2 is prime too are called "safe"; we define this for
diff --git a/crypto/dh/dh_check.c b/crypto/dh/dh_check.c
index 5adedc0d264e..027704111432 100644
--- a/crypto/dh/dh_check.c
+++ b/crypto/dh/dh_check.c
@@ -160,13 +160,12 @@ int DH_check_pub_key(const DH *dh, const BIGNUM *pub_key, int *ret)
goto err;
BN_CTX_start(ctx);
tmp = BN_CTX_get(ctx);
- if (tmp == NULL)
+ if (tmp == NULL || !BN_set_word(tmp, 1))
goto err;
- BN_set_word(tmp, 1);
if (BN_cmp(pub_key, tmp) <= 0)
*ret |= DH_CHECK_PUBKEY_TOO_SMALL;
- BN_copy(tmp, dh->p);
- BN_sub_word(tmp, 1);
+ if (BN_copy(tmp, dh->p) == NULL || !BN_sub_word(tmp, 1))
+ goto err;
if (BN_cmp(pub_key, tmp) >= 0)
*ret |= DH_CHECK_PUBKEY_TOO_LARGE;
diff --git a/crypto/dsa/dsa_ameth.c b/crypto/dsa/dsa_ameth.c
index c40e1777ade1..cc83d6e6ad3b 100644
--- a/crypto/dsa/dsa_ameth.c
+++ b/crypto/dsa/dsa_ameth.c
@@ -191,6 +191,8 @@ static int dsa_priv_decode(EVP_PKEY *pkey, PKCS8_PRIV_KEY_INFO *p8)
STACK_OF(ASN1_TYPE) *ndsa = NULL;
DSA *dsa = NULL;
+ int ret = 0;
+
if (!PKCS8_pkey_get0(NULL, &p, &pklen, &palg, p8))
return 0;
X509_ALGOR_get0(NULL, &ptype, &pval, palg);
@@ -262,23 +264,21 @@ static int dsa_priv_decode(EVP_PKEY *pkey, PKCS8_PRIV_KEY_INFO *p8)
}
EVP_PKEY_assign_DSA(pkey, dsa);
- BN_CTX_free(ctx);
- if (ndsa)
- sk_ASN1_TYPE_pop_free(ndsa, ASN1_TYPE_free);
- else
- ASN1_STRING_clear_free(privkey);
- return 1;
+ ret = 1;
+ goto done;
decerr:
- DSAerr(DSA_F_DSA_PRIV_DECODE, EVP_R_DECODE_ERROR);
+ DSAerr(DSA_F_DSA_PRIV_DECODE, DSA_R_DECODE_ERROR);
dsaerr:
+ DSA_free(dsa);
+ done:
BN_CTX_free(ctx);
- if (privkey)
+ if (ndsa)
+ sk_ASN1_TYPE_pop_free(ndsa, ASN1_TYPE_free);
+ else
ASN1_STRING_clear_free(privkey);
- sk_ASN1_TYPE_pop_free(ndsa, ASN1_TYPE_free);
- DSA_free(dsa);
- return 0;
+ return ret;
}
static int dsa_priv_encode(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pkey)
diff --git a/crypto/dso/dso_lib.c b/crypto/dso/dso_lib.c
index 3312450eae67..2beb7c1ba542 100644
--- a/crypto/dso/dso_lib.c
+++ b/crypto/dso/dso_lib.c
@@ -122,6 +122,7 @@ DSO *DSO_new_method(DSO_METHOD *meth)
ret->meth = meth;
ret->references = 1;
if ((ret->meth->init != NULL) && !ret->meth->init(ret)) {
+ sk_void_free(ret->meth_data);
OPENSSL_free(ret);
ret = NULL;
}
diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl
index e6acfd59f0d4..7140860e245b 100755
--- a/crypto/ec/asm/ecp_nistz256-x86_64.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl
@@ -2001,6 +2001,7 @@ $code.=<<___;
push %r15
sub \$32*5+8, %rsp
+.Lpoint_double_shortcut$x:
movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$a_ptr.x
mov $a_ptr, $b_ptr # backup copy
movdqu 0x10($a_ptr), %xmm1
@@ -2291,6 +2292,7 @@ $code.=<<___;
mov 0x40+8*1($b_ptr), $acc6
mov 0x40+8*2($b_ptr), $acc7
mov 0x40+8*3($b_ptr), $acc0
+ movq $b_ptr, %xmm1
lea 0x40-$bias($b_ptr), $a_ptr
lea $Z1sqr(%rsp), $r_ptr # Z1^2
@@ -2346,7 +2348,7 @@ $code.=<<___;
test $acc0, $acc0
jnz .Ladd_proceed$x # (in1infty || in2infty)?
test $acc1, $acc1
- jz .Ladd_proceed$x # is_equal(S1,S2)?
+ jz .Ladd_double$x # is_equal(S1,S2)?
movq %xmm0, $r_ptr # restore $r_ptr
pxor %xmm0, %xmm0
@@ -2359,6 +2361,13 @@ $code.=<<___;
jmp .Ladd_done$x
.align 32
+.Ladd_double$x:
+ movq %xmm1, $a_ptr # restore $a_ptr
+ movq %xmm0, $r_ptr # restore $r_ptr
+ add \$`32*(18-5)`, %rsp # difference in frame sizes
+ jmp .Lpoint_double_shortcut$x
+
+.align 32
.Ladd_proceed$x:
`&load_for_sqr("$R(%rsp)", "$src0")`
lea $Rsqr(%rsp), $r_ptr # R^2
diff --git a/crypto/ec/ecp_nistp224.c b/crypto/ec/ecp_nistp224.c
index ed09f97ade68..d81cc9ce6b1a 100644
--- a/crypto/ec/ecp_nistp224.c
+++ b/crypto/ec/ecp_nistp224.c
@@ -1657,8 +1657,7 @@ int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
*/
if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
- ret = 1;
- goto err;
+ goto done;
}
if ((!BN_to_felem(pre->g_pre_comp[0][1][0], &group->generator->X)) ||
(!BN_to_felem(pre->g_pre_comp[0][1][1], &group->generator->Y)) ||
@@ -1736,6 +1735,7 @@ int ec_GFp_nistp224_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
}
make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_felems);
+ done:
if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp224_pre_comp_dup,
nistp224_pre_comp_free,
nistp224_pre_comp_clear_free))
diff --git a/crypto/ec/ecp_nistp256.c b/crypto/ec/ecp_nistp256.c
index a5887086c638..78d191aac7af 100644
--- a/crypto/ec/ecp_nistp256.c
+++ b/crypto/ec/ecp_nistp256.c
@@ -2249,8 +2249,7 @@ int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
*/
if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
- ret = 1;
- goto err;
+ goto done;
}
if ((!BN_to_felem(x_tmp, &group->generator->X)) ||
(!BN_to_felem(y_tmp, &group->generator->Y)) ||
@@ -2337,6 +2336,7 @@ int ec_GFp_nistp256_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
}
make_points_affine(31, &(pre->g_pre_comp[0][1]), tmp_smallfelems);
+ done:
if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp256_pre_comp_dup,
nistp256_pre_comp_free,
nistp256_pre_comp_clear_free))
diff --git a/crypto/ec/ecp_nistp521.c b/crypto/ec/ecp_nistp521.c
index 360b9a3516f6..c53a61bbfb69 100644
--- a/crypto/ec/ecp_nistp521.c
+++ b/crypto/ec/ecp_nistp521.c
@@ -2056,8 +2056,7 @@ int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
*/
if (0 == EC_POINT_cmp(group, generator, group->generator, ctx)) {
memcpy(pre->g_pre_comp, gmul, sizeof(pre->g_pre_comp));
- ret = 1;
- goto err;
+ goto done;
}
if ((!BN_to_felem(pre->g_pre_comp[1][0], &group->generator->X)) ||
(!BN_to_felem(pre->g_pre_comp[1][1], &group->generator->Y)) ||
@@ -2115,6 +2114,7 @@ int ec_GFp_nistp521_precompute_mult(EC_GROUP *group, BN_CTX *ctx)
}
make_points_affine(15, &(pre->g_pre_comp[1]), tmp_felems);
+ done:
if (!EC_EX_DATA_set_data(&group->extra_data, pre, nistp521_pre_comp_dup,
nistp521_pre_comp_free,
nistp521_pre_comp_clear_free))
diff --git a/crypto/ec/ectest.c b/crypto/ec/ectest.c
index efab0b07b1d2..40a1f003259f 100644
--- a/crypto/ec/ectest.c
+++ b/crypto/ec/ectest.c
@@ -1758,9 +1758,18 @@ static void nistp_single_test(const struct nistp_test_params *test)
if (0 != EC_POINT_cmp(NISTP, Q, Q_CHECK, ctx))
ABORT;
+ /*
+ * We have not performed precomputation so have_precompute mult should be
+ * false
+ */
+ if (EC_GROUP_have_precompute_mult(NISTP))
+ ABORT;
+
/* now repeat all tests with precomputation */
if (!EC_GROUP_precompute_mult(NISTP, ctx))
ABORT;
+ if (!EC_GROUP_have_precompute_mult(NISTP))
+ ABORT;
/* fixed point multiplication */
EC_POINT_mul(NISTP, Q, m, NULL, NULL, ctx);
diff --git a/crypto/engine/eng_dyn.c b/crypto/engine/eng_dyn.c
index 3169b09ad865..40f30e9d585e 100644
--- a/crypto/engine/eng_dyn.c
+++ b/crypto/engine/eng_dyn.c
@@ -243,8 +243,10 @@ static int dynamic_set_data_ctx(ENGINE *e, dynamic_data_ctx **ctx)
* If we lost the race to set the context, c is non-NULL and *ctx is the
* context of the thread that won.
*/
- if (c)
+ if (c) {
+ sk_OPENSSL_STRING_free(c->dirs);
OPENSSL_free(c);
+ }
return 1;
}
diff --git a/crypto/evp/e_des.c b/crypto/evp/e_des.c
index aae13a675694..8ca65cd03ae1 100644
--- a/crypto/evp/e_des.c
+++ b/crypto/evp/e_des.c
@@ -71,12 +71,13 @@ typedef struct {
DES_key_schedule ks;
} ks;
union {
- void (*cbc) (const void *, void *, size_t, const void *, void *);
+ void (*cbc) (const void *, void *, size_t,
+ const DES_key_schedule *, unsigned char *);
} stream;
} EVP_DES_KEY;
# if defined(AES_ASM) && (defined(__sparc) || defined(__sparc__))
-/* ---------^^^ this is not a typo, just a way to detect that
+/* ----------^^^ this is not a typo, just a way to detect that
* assembler support was in general requested... */
# include "sparc_arch.h"
@@ -86,9 +87,9 @@ extern unsigned int OPENSSL_sparcv9cap_P[];
void des_t4_key_expand(const void *key, DES_key_schedule *ks);
void des_t4_cbc_encrypt(const void *inp, void *out, size_t len,
- DES_key_schedule *ks, unsigned char iv[8]);
+ const DES_key_schedule *ks, unsigned char iv[8]);
void des_t4_cbc_decrypt(const void *inp, void *out, size_t len,
- DES_key_schedule *ks, unsigned char iv[8]);
+ const DES_key_schedule *ks, unsigned char iv[8]);
# endif
static int des_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
@@ -130,7 +131,7 @@ static int des_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
{
EVP_DES_KEY *dat = (EVP_DES_KEY *) ctx->cipher_data;
- if (dat->stream.cbc) {
+ if (dat->stream.cbc != NULL) {
(*dat->stream.cbc) (in, out, inl, &dat->ks.ks, ctx->iv);
return 1;
}
diff --git a/crypto/evp/e_des3.c b/crypto/evp/e_des3.c
index bf6c1d2d3d39..0e910d6d8085 100644
--- a/crypto/evp/e_des3.c
+++ b/crypto/evp/e_des3.c
@@ -75,7 +75,8 @@ typedef struct {
DES_key_schedule ks[3];
} ks;
union {
- void (*cbc) (const void *, void *, size_t, const void *, void *);
+ void (*cbc) (const void *, void *, size_t,
+ const DES_key_schedule *, unsigned char *);
} stream;
} DES_EDE_KEY;
# define ks1 ks.ks[0]
@@ -93,9 +94,9 @@ extern unsigned int OPENSSL_sparcv9cap_P[];
void des_t4_key_expand(const void *key, DES_key_schedule *ks);
void des_t4_ede3_cbc_encrypt(const void *inp, void *out, size_t len,
- DES_key_schedule *ks, unsigned char iv[8]);
+ const DES_key_schedule ks[3], unsigned char iv[8]);
void des_t4_ede3_cbc_decrypt(const void *inp, void *out, size_t len,
- DES_key_schedule *ks, unsigned char iv[8]);
+ const DES_key_schedule ks[3], unsigned char iv[8]);
# endif
static int des_ede_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
@@ -162,7 +163,7 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
}
# endif /* KSSL_DEBUG */
if (dat->stream.cbc) {
- (*dat->stream.cbc) (in, out, inl, &dat->ks, ctx->iv);
+ (*dat->stream.cbc) (in, out, inl, dat->ks.ks, ctx->iv);
return 1;
}
@@ -395,7 +396,7 @@ static int des_ede3_unwrap(EVP_CIPHER_CTX *ctx, unsigned char *out,
int rv = -1;
if (inl < 24)
return -1;
- if (!out)
+ if (out == NULL)
return inl - 16;
memcpy(ctx->iv, wrap_iv, 8);
/* Decrypt first block which will end up as icv */
@@ -438,7 +439,7 @@ static int des_ede3_wrap(EVP_CIPHER_CTX *ctx, unsigned char *out,
const unsigned char *in, size_t inl)
{
unsigned char sha1tmp[SHA_DIGEST_LENGTH];
- if (!out)
+ if (out == NULL)
return inl + 16;
/* Copy input to output buffer + 8 so we have space for IV */
memmove(out + 8, in, inl);
diff --git a/crypto/modes/asm/aesni-gcm-x86_64.pl b/crypto/modes/asm/aesni-gcm-x86_64.pl
index bd6bf72fe487..980cfd23efe3 100755
--- a/crypto/modes/asm/aesni-gcm-x86_64.pl
+++ b/crypto/modes/asm/aesni-gcm-x86_64.pl
@@ -43,7 +43,7 @@ die "can't locate x86_64-xlate.pl";
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
=~ /GNU assembler version ([2-9]\.[0-9]+)/) {
- $avx = ($1>=2.19) + ($1>=2.22);
+ $avx = ($1>=2.20) + ($1>=2.22);
}
if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
@@ -489,7 +489,7 @@ $code.=<<___;
___
$code.=<<___ if ($win64);
movaps -0xd8(%rax),%xmm6
- movaps -0xd8(%rax),%xmm7
+ movaps -0xc8(%rax),%xmm7
movaps -0xb8(%rax),%xmm8
movaps -0xa8(%rax),%xmm9
movaps -0x98(%rax),%xmm10
diff --git a/crypto/modes/asm/ghash-x86_64.pl b/crypto/modes/asm/ghash-x86_64.pl
index 4ff2d39aa7b2..f889f2018789 100755
--- a/crypto/modes/asm/ghash-x86_64.pl
+++ b/crypto/modes/asm/ghash-x86_64.pl
@@ -92,7 +92,7 @@ die "can't locate x86_64-xlate.pl";
if (`$ENV{CC} -Wa,-v -c -o /dev/null -x assembler /dev/null 2>&1`
=~ /GNU assembler version ([2-9]\.[0-9]+)/) {
- $avx = ($1>=2.19) + ($1>=2.22);
+ $avx = ($1>=2.20) + ($1>=2.22);
}
if (!$avx && $win64 && ($flavour =~ /nasm/ || $ENV{ASM} =~ /nasm/) &&
diff --git a/crypto/modes/ctr128.c b/crypto/modes/ctr128.c
index f3bbcbf72376..bcafd6b6bfb1 100644
--- a/crypto/modes/ctr128.c
+++ b/crypto/modes/ctr128.c
@@ -67,23 +67,20 @@
/* increment counter (128-bit int) by 1 */
static void ctr128_inc(unsigned char *counter)
{
- u32 n = 16;
- u8 c;
+ u32 n = 16, c = 1;
do {
--n;
- c = counter[n];
- ++c;
- counter[n] = c;
- if (c)
- return;
+ c += counter[n];
+ counter[n] = (u8)c;
+ c >>= 8;
} while (n);
}
#if !defined(OPENSSL_SMALL_FOOTPRINT)
static void ctr128_inc_aligned(unsigned char *counter)
{
- size_t *data, c, n;
+ size_t *data, c, d, n;
const union {
long one;
char little;
@@ -91,20 +88,19 @@ static void ctr128_inc_aligned(unsigned char *counter)
1
};
- if (is_endian.little) {
+ if (is_endian.little || ((size_t)counter % sizeof(size_t)) != 0) {
ctr128_inc(counter);
return;
}
data = (size_t *)counter;
+ c = 1;
n = 16 / sizeof(size_t);
do {
--n;
- c = data[n];
- ++c;
- data[n] = c;
- if (c)
- return;
+ d = data[n] += c;
+ /* did addition carry? */
+ c = ((d - c) ^ d) >> (sizeof(size_t) * 8 - 1);
} while (n);
}
#endif
@@ -144,14 +140,14 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
}
# if defined(STRICT_ALIGNMENT)
- if (((size_t)in | (size_t)out | (size_t)ivec) % sizeof(size_t) !=
- 0)
+ if (((size_t)in | (size_t)out | (size_t)ecount_buf)
+ % sizeof(size_t) != 0)
break;
# endif
while (len >= 16) {
(*block) (ivec, ecount_buf, key);
ctr128_inc_aligned(ivec);
- for (; n < 16; n += sizeof(size_t))
+ for (n = 0; n < 16; n += sizeof(size_t))
*(size_t *)(out + n) =
*(size_t *)(in + n) ^ *(size_t *)(ecount_buf + n);
len -= 16;
@@ -189,16 +185,13 @@ void CRYPTO_ctr128_encrypt(const unsigned char *in, unsigned char *out,
/* increment upper 96 bits of 128-bit counter by 1 */
static void ctr96_inc(unsigned char *counter)
{
- u32 n = 12;
- u8 c;
+ u32 n = 12, c = 1;
do {
--n;
- c = counter[n];
- ++c;
- counter[n] = c;
- if (c)
- return;
+ c += counter[n];
+ counter[n] = (u8)c;
+ c >>= 8;
} while (n);
}
diff --git a/crypto/opensslconf.h b/crypto/opensslconf.h
index b4d522e68505..f533508b152c 100644
--- a/crypto/opensslconf.h
+++ b/crypto/opensslconf.h
@@ -38,12 +38,18 @@ extern "C" {
#ifndef OPENSSL_NO_SSL_TRACE
# define OPENSSL_NO_SSL_TRACE
#endif
+#ifndef OPENSSL_NO_SSL2
+# define OPENSSL_NO_SSL2
+#endif
#ifndef OPENSSL_NO_STORE
# define OPENSSL_NO_STORE
#endif
#ifndef OPENSSL_NO_UNIT_TEST
# define OPENSSL_NO_UNIT_TEST
#endif
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
+# define OPENSSL_NO_WEAK_SSL_CIPHERS
+#endif
#endif /* OPENSSL_DOING_MAKEDEPEND */
@@ -86,12 +92,18 @@ extern "C" {
# if defined(OPENSSL_NO_SSL_TRACE) && !defined(NO_SSL_TRACE)
# define NO_SSL_TRACE
# endif
+# if defined(OPENSSL_NO_SSL2) && !defined(NO_SSL2)
+# define NO_SSL2
+# endif
# if defined(OPENSSL_NO_STORE) && !defined(NO_STORE)
# define NO_STORE
# endif
# if defined(OPENSSL_NO_UNIT_TEST) && !defined(NO_UNIT_TEST)
# define NO_UNIT_TEST
# endif
+# if defined(OPENSSL_NO_WEAK_SSL_CIPHERS) && !defined(NO_WEAK_SSL_CIPHERS)
+# define NO_WEAK_SSL_CIPHERS
+# endif
#endif
/* crypto/opensslconf.h.in */
diff --git a/crypto/opensslv.h b/crypto/opensslv.h
index 03b8c4843784..4334fd15cd87 100644
--- a/crypto/opensslv.h
+++ b/crypto/opensslv.h
@@ -30,11 +30,11 @@ extern "C" {
* (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for
* major minor fix final patch/beta)
*/
-# define OPENSSL_VERSION_NUMBER 0x1000206fL
+# define OPENSSL_VERSION_NUMBER 0x1000207fL
# ifdef OPENSSL_FIPS
-# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2f-fips 28 Jan 2016"
+# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2g-fips 1 Mar 2016"
# else
-# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2f 28 Jan 2016"
+# define OPENSSL_VERSION_TEXT "OpenSSL 1.0.2g 1 Mar 2016"
# endif
# define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT
diff --git a/crypto/perlasm/x86_64-xlate.pl b/crypto/perlasm/x86_64-xlate.pl
index 9c70b8c2c6e9..ee04221c7e1d 100755
--- a/crypto/perlasm/x86_64-xlate.pl
+++ b/crypto/perlasm/x86_64-xlate.pl
@@ -198,8 +198,11 @@ my %globals;
if ($gas) {
# Solaris /usr/ccs/bin/as can't handle multiplications
# in $self->{value}
- $self->{value} =~ s/(?<![\w\$\.])(0x?[0-9a-f]+)/oct($1)/egi;
- $self->{value} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg;
+ my $value = $self->{value};
+ $value =~ s/(?<![\w\$\.])(0x?[0-9a-f]+)/oct($1)/egi;
+ if ($value =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg) {
+ $self->{value} = $value;
+ }
sprintf "\$%s",$self->{value};
} else {
$self->{value} =~ s/(0b[0-1]+)/oct($1)/eig;
diff --git a/crypto/pkcs7/pk7_smime.c b/crypto/pkcs7/pk7_smime.c
index c4d3724d2a48..dc9b484078af 100644
--- a/crypto/pkcs7/pk7_smime.c
+++ b/crypto/pkcs7/pk7_smime.c
@@ -274,12 +274,29 @@ int PKCS7_verify(PKCS7 *p7, STACK_OF(X509) *certs, X509_STORE *store,
PKCS7err(PKCS7_F_PKCS7_VERIFY, PKCS7_R_NO_CONTENT);
return 0;
}
+#if 0
+ /*
+ * NB: this test commented out because some versions of Netscape
+ * illegally include zero length content when signing data. Also
+ * Microsoft Authenticode includes a SpcIndirectDataContent data
+ * structure which describes the content to be protected by the
+ * signature, rather than directly embedding that content. So
+ * Authenticode implementations are also expected to use
+ * PKCS7_verify() with explicit external data, on non-detached
+ * PKCS#7 signatures.
+ *
+ * In OpenSSL 1.1 a new flag PKCS7_NO_DUAL_CONTENT has been
+ * introduced to disable this sanity check. For the 1.0.2 branch
+ * this change is not acceptable, so the check remains completely
+ * commented out (as it has been for a long time).
+ */
/* Check for data and content: two sets of data */
if (!PKCS7_get_detached(p7) && indata) {
PKCS7err(PKCS7_F_PKCS7_VERIFY, PKCS7_R_CONTENT_AND_DATA_PRESENT);
return 0;
}
+#endif
sinfos = PKCS7_get_signer_info(p7);
diff --git a/crypto/rsa/rsa_sign.c b/crypto/rsa/rsa_sign.c
index ed63a1d8b0e3..82ca8324dfbc 100644
--- a/crypto/rsa/rsa_sign.c
+++ b/crypto/rsa/rsa_sign.c
@@ -84,7 +84,7 @@ int RSA_sign(int type, const unsigned char *m, unsigned int m_len,
return 0;
}
#endif
- if (rsa->meth->rsa_sign) {
+ if ((rsa->flags & RSA_FLAG_SIGN_VER) && rsa->meth->rsa_sign) {
return rsa->meth->rsa_sign(type, m, m_len, sigret, siglen, rsa);
}
/* Special case: SSL signature, just check the length */
@@ -293,7 +293,7 @@ int RSA_verify(int dtype, const unsigned char *m, unsigned int m_len,
const unsigned char *sigbuf, unsigned int siglen, RSA *rsa)
{
- if (rsa->meth->rsa_verify) {
+ if ((rsa->flags & RSA_FLAG_SIGN_VER) && rsa->meth->rsa_verify) {
return rsa->meth->rsa_verify(dtype, m, m_len, sigbuf, siglen, rsa);
}
diff --git a/crypto/srp/srp.h b/crypto/srp/srp.h
index d072536fec9b..028892a1ff5e 100644
--- a/crypto/srp/srp.h
+++ b/crypto/srp/srp.h
@@ -82,16 +82,21 @@ typedef struct SRP_gN_cache_st {
DECLARE_STACK_OF(SRP_gN_cache)
typedef struct SRP_user_pwd_st {
+ /* Owned by us. */
char *id;
BIGNUM *s;
BIGNUM *v;
+ /* Not owned by us. */
const BIGNUM *g;
const BIGNUM *N;
+ /* Owned by us. */
char *info;
} SRP_user_pwd;
DECLARE_STACK_OF(SRP_user_pwd)
+void SRP_user_pwd_free(SRP_user_pwd *user_pwd);
+
typedef struct SRP_VBASE_st {
STACK_OF(SRP_user_pwd) *users_pwd;
STACK_OF(SRP_gN_cache) *gN_cache;
@@ -115,7 +120,12 @@ DECLARE_STACK_OF(SRP_gN)
SRP_VBASE *SRP_VBASE_new(char *seed_key);
int SRP_VBASE_free(SRP_VBASE *vb);
int SRP_VBASE_init(SRP_VBASE *vb, char *verifier_file);
+
+/* This method ignores the configured seed and fails for an unknown user. */
SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username);
+/* NOTE: unlike in SRP_VBASE_get_by_user, caller owns the returned pointer.*/
+SRP_user_pwd *SRP_VBASE_get1_by_user(SRP_VBASE *vb, char *username);
+
char *SRP_create_verifier(const char *user, const char *pass, char **salt,
char **verifier, const char *N, const char *g);
int SRP_create_verifier_BN(const char *user, const char *pass, BIGNUM **salt,
diff --git a/crypto/srp/srp_vfy.c b/crypto/srp/srp_vfy.c
index a3f1a8a0a4d5..26ad3e07b4bb 100644
--- a/crypto/srp/srp_vfy.c
+++ b/crypto/srp/srp_vfy.c
@@ -185,7 +185,7 @@ static char *t_tob64(char *dst, const unsigned char *src, int size)
return olddst;
}
-static void SRP_user_pwd_free(SRP_user_pwd *user_pwd)
+void SRP_user_pwd_free(SRP_user_pwd *user_pwd)
{
if (user_pwd == NULL)
return;
@@ -247,6 +247,24 @@ static int SRP_user_pwd_set_sv_BN(SRP_user_pwd *vinfo, BIGNUM *s, BIGNUM *v)
return (vinfo->s != NULL && vinfo->v != NULL);
}
+static SRP_user_pwd *srp_user_pwd_dup(SRP_user_pwd *src)
+{
+ SRP_user_pwd *ret;
+
+ if (src == NULL)
+ return NULL;
+ if ((ret = SRP_user_pwd_new()) == NULL)
+ return NULL;
+
+ SRP_user_pwd_set_gN(ret, src->g, src->N);
+ if (!SRP_user_pwd_set_ids(ret, src->id, src->info)
+ || !SRP_user_pwd_set_sv_BN(ret, BN_dup(src->s), BN_dup(src->v))) {
+ SRP_user_pwd_free(ret);
+ return NULL;
+ }
+ return ret;
+}
+
SRP_VBASE *SRP_VBASE_new(char *seed_key)
{
SRP_VBASE *vb = (SRP_VBASE *)OPENSSL_malloc(sizeof(SRP_VBASE));
@@ -468,21 +486,50 @@ int SRP_VBASE_init(SRP_VBASE *vb, char *verifier_file)
}
-SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username)
+static SRP_user_pwd *find_user(SRP_VBASE *vb, char *username)
{
int i;
SRP_user_pwd *user;
- unsigned char digv[SHA_DIGEST_LENGTH];
- unsigned char digs[SHA_DIGEST_LENGTH];
- EVP_MD_CTX ctxt;
if (vb == NULL)
return NULL;
+
for (i = 0; i < sk_SRP_user_pwd_num(vb->users_pwd); i++) {
user = sk_SRP_user_pwd_value(vb->users_pwd, i);
if (strcmp(user->id, username) == 0)
return user;
}
+
+ return NULL;
+}
+
+/*
+ * This method ignores the configured seed and fails for an unknown user.
+ * Ownership of the returned pointer is not released to the caller.
+ * In other words, caller must not free the result.
+ */
+SRP_user_pwd *SRP_VBASE_get_by_user(SRP_VBASE *vb, char *username)
+{
+ return find_user(vb, username);
+}
+
+/*
+ * Ownership of the returned pointer is released to the caller.
+ * In other words, caller must free the result once done.
+ */
+SRP_user_pwd *SRP_VBASE_get1_by_user(SRP_VBASE *vb, char *username)
+{
+ SRP_user_pwd *user;
+ unsigned char digv[SHA_DIGEST_LENGTH];
+ unsigned char digs[SHA_DIGEST_LENGTH];
+ EVP_MD_CTX ctxt;
+
+ if (vb == NULL)
+ return NULL;
+
+ if ((user = find_user(vb, username)) != NULL)
+ return srp_user_pwd_dup(user);
+
if ((vb->seed_key == NULL) ||
(vb->default_g == NULL) || (vb->default_N == NULL))
return NULL;
diff --git a/crypto/stack/stack.c b/crypto/stack/stack.c
index de437acf6a5c..fa50083e22b3 100644
--- a/crypto/stack/stack.c
+++ b/crypto/stack/stack.c
@@ -360,7 +360,7 @@ void *sk_set(_STACK *st, int i, void *value)
void sk_sort(_STACK *st)
{
- if (st && !st->sorted) {
+ if (st && !st->sorted && st->comp != NULL) {
int (*comp_func) (const void *, const void *);
/*
diff --git a/crypto/x509/x509_vfy.c b/crypto/x509/x509_vfy.c
index 0429767032fd..4d34dbac9314 100644
--- a/crypto/x509/x509_vfy.c
+++ b/crypto/x509/x509_vfy.c
@@ -194,6 +194,9 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
int num, j, retry;
int (*cb) (int xok, X509_STORE_CTX *xctx);
STACK_OF(X509) *sktmp = NULL;
+ int trust = X509_TRUST_UNTRUSTED;
+ int err;
+
if (ctx->cert == NULL) {
X509err(X509_F_X509_VERIFY_CERT, X509_R_NO_CERT_SET_FOR_US_TO_VERIFY);
return -1;
@@ -216,7 +219,8 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
if (((ctx->chain = sk_X509_new_null()) == NULL) ||
(!sk_X509_push(ctx->chain, ctx->cert))) {
X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE);
- goto end;
+ ok = -1;
+ goto err;
}
CRYPTO_add(&ctx->cert->references, 1, CRYPTO_LOCK_X509);
ctx->last_untrusted = 1;
@@ -225,7 +229,8 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
if (ctx->untrusted != NULL
&& (sktmp = sk_X509_dup(ctx->untrusted)) == NULL) {
X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE);
- goto end;
+ ok = -1;
+ goto err;
}
num = sk_X509_num(ctx->chain);
@@ -249,7 +254,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
if (ctx->param->flags & X509_V_FLAG_TRUSTED_FIRST) {
ok = ctx->get_issuer(&xtmp, ctx, x);
if (ok < 0)
- goto end;
+ goto err;
/*
* If successful for now free up cert so it will be picked up
* again later.
@@ -266,7 +271,8 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
if (xtmp != NULL) {
if (!sk_X509_push(ctx->chain, xtmp)) {
X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE);
- goto end;
+ ok = -1;
+ goto err;
}
CRYPTO_add(&xtmp->references, 1, CRYPTO_LOCK_X509);
(void)sk_X509_delete_ptr(sktmp, xtmp);
@@ -314,7 +320,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
bad_chain = 1;
ok = cb(0, ctx);
if (!ok)
- goto end;
+ goto err;
} else {
/*
* We have a match: replace certificate with store
@@ -347,25 +353,26 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
ok = ctx->get_issuer(&xtmp, ctx, x);
if (ok < 0)
- goto end;
+ goto err;
if (ok == 0)
break;
x = xtmp;
if (!sk_X509_push(ctx->chain, x)) {
X509_free(xtmp);
X509err(X509_F_X509_VERIFY_CERT, ERR_R_MALLOC_FAILURE);
- ok = 0;
- goto end;
+ ok = -1;
+ goto err;
}
num++;
}
/* we now have our chain, lets check it... */
- i = check_trust(ctx);
+ if ((trust = check_trust(ctx)) == X509_TRUST_REJECTED) {
+ /* Callback already issued */
+ ok = 0;
+ goto err;
+ }
- /* If explicitly rejected error */
- if (i == X509_TRUST_REJECTED)
- goto end;
/*
* If it's not explicitly trusted then check if there is an alternative
* chain that could be used. We only do this if we haven't already
@@ -373,14 +380,14 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
* chain checking
*/
retry = 0;
- if (i != X509_TRUST_TRUSTED
+ if (trust != X509_TRUST_TRUSTED
&& !(ctx->param->flags & X509_V_FLAG_TRUSTED_FIRST)
&& !(ctx->param->flags & X509_V_FLAG_NO_ALT_CHAINS)) {
while (j-- > 1) {
xtmp2 = sk_X509_value(ctx->chain, j - 1);
ok = ctx->get_issuer(&xtmp, ctx, xtmp2);
if (ok < 0)
- goto end;
+ goto err;
/* Check if we found an alternate chain */
if (ok > 0) {
/*
@@ -410,7 +417,7 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
* self signed certificate in which case we've indicated an error already
* and set bad_chain == 1
*/
- if (i != X509_TRUST_TRUSTED && !bad_chain) {
+ if (trust != X509_TRUST_TRUSTED && !bad_chain) {
if ((chain_ss == NULL) || !ctx->check_issued(ctx, x, chain_ss)) {
if (ctx->last_untrusted >= num)
ctx->error = X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY;
@@ -431,26 +438,26 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
bad_chain = 1;
ok = cb(0, ctx);
if (!ok)
- goto end;
+ goto err;
}
/* We have the chain complete: now we need to check its purpose */
ok = check_chain_extensions(ctx);
if (!ok)
- goto end;
+ goto err;
/* Check name constraints */
ok = check_name_constraints(ctx);
if (!ok)
- goto end;
+ goto err;
ok = check_id(ctx);
if (!ok)
- goto end;
+ goto err;
/* We may as well copy down any DSA parameters that are required */
X509_get_pubkey_parameters(NULL, ctx->chain);
@@ -462,16 +469,16 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
ok = ctx->check_revocation(ctx);
if (!ok)
- goto end;
+ goto err;
- i = X509_chain_check_suiteb(&ctx->error_depth, NULL, ctx->chain,
- ctx->param->flags);
- if (i != X509_V_OK) {
- ctx->error = i;
+ err = X509_chain_check_suiteb(&ctx->error_depth, NULL, ctx->chain,
+ ctx->param->flags);
+ if (err != X509_V_OK) {
+ ctx->error = err;
ctx->current_cert = sk_X509_value(ctx->chain, ctx->error_depth);
ok = cb(0, ctx);
if (!ok)
- goto end;
+ goto err;
}
/* At this point, we have a chain and need to verify it */
@@ -480,25 +487,28 @@ int X509_verify_cert(X509_STORE_CTX *ctx)
else
ok = internal_verify(ctx);
if (!ok)
- goto end;
+ goto err;
#ifndef OPENSSL_NO_RFC3779
/* RFC 3779 path validation, now that CRL check has been done */
ok = v3_asid_validate_path(ctx);
if (!ok)
- goto end;
+ goto err;
ok = v3_addr_validate_path(ctx);
if (!ok)
- goto end;
+ goto err;
#endif
/* If we get this far evaluate policies */
if (!bad_chain && (ctx->param->flags & X509_V_FLAG_POLICY_CHECK))
ok = ctx->check_policy(ctx);
if (!ok)
- goto end;
+ goto err;
if (0) {
- end:
+ err:
+ /* Ensure we return an error */
+ if (ok > 0)
+ ok = 0;
X509_get_pubkey_parameters(NULL, ctx->chain);
}
if (sktmp != NULL)
diff --git a/doc/apps/ciphers.pod b/doc/apps/ciphers.pod
index 1c26e3b3da36..9643b4d48ca8 100644
--- a/doc/apps/ciphers.pod
+++ b/doc/apps/ciphers.pod
@@ -38,25 +38,21 @@ SSL v2 and for SSL v3/TLS v1.
Like B<-v>, but include cipher suite codes in output (hex format).
-=item B<-ssl3>
+=item B<-ssl3>, B<-tls1>
-only include SSL v3 ciphers.
+This lists ciphers compatible with any of SSLv3, TLSv1, TLSv1.1 or TLSv1.2.
=item B<-ssl2>
-only include SSL v2 ciphers.
-
-=item B<-tls1>
-
-only include TLS v1 ciphers.
+Only include SSLv2 ciphers.
=item B<-h>, B<-?>
-print a brief usage message.
+Print a brief usage message.
=item B<cipherlist>
-a cipher list to convert to a cipher preference list. If it is not included
+A cipher list to convert to a cipher preference list. If it is not included
then the default cipher list will be used. The format is described below.
=back
@@ -109,9 +105,10 @@ The following is a list of all permitted cipher strings and their meanings.
=item B<DEFAULT>
-the default cipher list. This is determined at compile time and
-is normally B<ALL:!EXPORT:!aNULL:!eNULL:!SSLv2>. This must be the firstcipher string
-specified.
+The default cipher list.
+This is determined at compile time and is normally
+B<ALL:!EXPORT:!aNULL:!eNULL:!SSLv2>.
+When used, this must be the first cipherstring specified.
=item B<COMPLEMENTOFDEFAULT>
@@ -139,34 +136,46 @@ than 128 bits, and some cipher suites with 128-bit keys.
=item B<LOW>
-"low" encryption cipher suites, currently those using 64 or 56 bit encryption algorithms
-but excluding export cipher suites.
+Low strength encryption cipher suites, currently those using 64 or 56 bit
+encryption algorithms but excluding export cipher suites.
+As of OpenSSL 1.0.2g, these are disabled in default builds.
=item B<EXP>, B<EXPORT>
-export encryption algorithms. Including 40 and 56 bits algorithms.
+Export strength encryption algorithms. Including 40 and 56 bits algorithms.
+As of OpenSSL 1.0.2g, these are disabled in default builds.
=item B<EXPORT40>
-40 bit export encryption algorithms
+40-bit export encryption algorithms
+As of OpenSSL 1.0.2g, these are disabled in default builds.
=item B<EXPORT56>
-56 bit export encryption algorithms. In OpenSSL 0.9.8c and later the set of
+56-bit export encryption algorithms. In OpenSSL 0.9.8c and later the set of
56 bit export ciphers is empty unless OpenSSL has been explicitly configured
with support for experimental ciphers.
+As of OpenSSL 1.0.2g, these are disabled in default builds.
=item B<eNULL>, B<NULL>
-the "NULL" ciphers that is those offering no encryption. Because these offer no
-encryption at all and are a security risk they are disabled unless explicitly
-included.
+The "NULL" ciphers that is those offering no encryption. Because these offer no
+encryption at all and are a security risk they are not enabled via either the
+B<DEFAULT> or B<ALL> cipher strings.
+Be careful when building cipherlists out of lower-level primitives such as
+B<kRSA> or B<aECDSA> as these do overlap with the B<eNULL> ciphers.
+When in doubt, include B<!eNULL> in your cipherlist.
=item B<aNULL>
-the cipher suites offering no authentication. This is currently the anonymous
+The cipher suites offering no authentication. This is currently the anonymous
DH algorithms and anonymous ECDH algorithms. These cipher suites are vulnerable
to a "man in the middle" attack and so their use is normally discouraged.
+These are excluded from the B<DEFAULT> ciphers, but included in the B<ALL>
+ciphers.
+Be careful when building cipherlists out of lower-level primitives such as
+B<kDHE> or B<AES> as these do overlap with the B<aNULL> ciphers.
+When in doubt, include B<!aNULL> in your cipherlist.
=item B<kRSA>, B<RSA>
@@ -582,11 +591,11 @@ Note: these ciphers can also be used in SSL v3.
=head2 Deprecated SSL v2.0 cipher suites.
SSL_CK_RC4_128_WITH_MD5 RC4-MD5
- SSL_CK_RC4_128_EXPORT40_WITH_MD5 EXP-RC4-MD5
- SSL_CK_RC2_128_CBC_WITH_MD5 RC2-MD5
- SSL_CK_RC2_128_CBC_EXPORT40_WITH_MD5 EXP-RC2-MD5
+ SSL_CK_RC4_128_EXPORT40_WITH_MD5 Not implemented.
+ SSL_CK_RC2_128_CBC_WITH_MD5 RC2-CBC-MD5
+ SSL_CK_RC2_128_CBC_EXPORT40_WITH_MD5 Not implemented.
SSL_CK_IDEA_128_CBC_WITH_MD5 IDEA-CBC-MD5
- SSL_CK_DES_64_CBC_WITH_MD5 DES-CBC-MD5
+ SSL_CK_DES_64_CBC_WITH_MD5 Not implemented.
SSL_CK_DES_192_EDE3_CBC_WITH_MD5 DES-CBC3-MD5
=head1 NOTES
diff --git a/doc/apps/pkeyutl.pod b/doc/apps/pkeyutl.pod
index 27be9a90079f..5da347c97d32 100644
--- a/doc/apps/pkeyutl.pod
+++ b/doc/apps/pkeyutl.pod
@@ -137,6 +137,19 @@ Unless otherwise mentioned all algorithms support the B<digest:alg> option
which specifies the digest in use for sign, verify and verifyrecover operations.
The value B<alg> should represent a digest name as used in the
EVP_get_digestbyname() function for example B<sha1>.
+This value is used only for sanity-checking the lengths of data passed in to
+the B<pkeyutl> and for creating the structures that make up the signature
+(e.g. B<DigestInfo> in RSASSA PKCS#1 v1.5 signatures).
+In case of RSA, ECDSA and DSA signatures, this utility
+will not perform hashing on input data but rather use the data directly as
+input of signature algorithm. Depending on key type, signature type and mode
+of padding, the maximum acceptable lengths of input data differ. In general,
+with RSA the signed data can't be longer than the key modulus, in case of ECDSA
+and DSA the data shouldn't be longer than field size, otherwise it will be
+silently truncated to field size.
+
+In other words, if the value of digest is B<sha1> the input should be 20 bytes
+long binary encoding of SHA-1 hash function output.
=head1 RSA ALGORITHM
diff --git a/doc/apps/req.pod b/doc/apps/req.pod
index 54a4d394d282..30653e509357 100644
--- a/doc/apps/req.pod
+++ b/doc/apps/req.pod
@@ -347,9 +347,12 @@ configuration file values.
=item B<default_bits>
-This specifies the default key size in bits. If not specified then
-512 is used. It is used if the B<-new> option is used. It can be
-overridden by using the B<-newkey> option.
+Specifies the default key size in bits.
+
+This option is used in conjunction with the B<-new> option to generate
+a new key. It can be overridden by specifying an explicit key size in
+the B<-newkey> option. The smallest accepted key size is 512 bits. If
+no key size is specified then 2048 bits is used.
=item B<default_keyfile>
diff --git a/doc/apps/s_client.pod b/doc/apps/s_client.pod
index 84d052706941..618df9659d3b 100644
--- a/doc/apps/s_client.pod
+++ b/doc/apps/s_client.pod
@@ -201,15 +201,11 @@ Use the PSK key B<key> when using a PSK cipher suite. The key is
given as a hexadecimal number without leading 0x, for example -psk
1a2b3c4d.
-=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2>
+=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-tls1_1>, B<-tls1_2>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2>
-these options disable the use of certain SSL or TLS protocols. By default
-the initial handshake uses a method which should be compatible with all
-servers and permit them to use SSL v3, SSL v2 or TLS as appropriate.
-
-Unfortunately there are still ancient and broken servers in use which
-cannot handle this technique and will fail to connect. Some servers only
-work if TLS is turned off.
+These options require or disable the use of the specified SSL or TLS protocols.
+By default the initial handshake uses a I<version-flexible> method which will
+negotiate the highest mutually supported protocol version.
=item B<-fallback_scsv>
diff --git a/doc/apps/s_server.pod b/doc/apps/s_server.pod
index baca7792446f..6f4acb7006ff 100644
--- a/doc/apps/s_server.pod
+++ b/doc/apps/s_server.pod
@@ -217,11 +217,11 @@ Use the PSK key B<key> when using a PSK cipher suite. The key is
given as a hexadecimal number without leading 0x, for example -psk
1a2b3c4d.
-=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>
+=item B<-ssl2>, B<-ssl3>, B<-tls1>, B<-tls1_1>, B<-tls1_2>, B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2>
-these options disable the use of certain SSL or TLS protocols. By default
-the initial handshake uses a method which should be compatible with all
-servers and permit them to use SSL v3, SSL v2 or TLS as appropriate.
+These options require or disable the use of the specified SSL or TLS protocols.
+By default the initial handshake uses a I<version-flexible> method which will
+negotiate the highest mutually supported protocol version.
=item B<-bugs>
diff --git a/doc/crypto/BIO_s_mem.pod b/doc/crypto/BIO_s_mem.pod
index 8f85e0dceeb7..9f239648d752 100644
--- a/doc/crypto/BIO_s_mem.pod
+++ b/doc/crypto/BIO_s_mem.pod
@@ -16,7 +16,7 @@ BIO_get_mem_ptr, BIO_new_mem_buf - memory BIO
BIO_set_mem_buf(BIO *b,BUF_MEM *bm,int c)
BIO_get_mem_ptr(BIO *b,BUF_MEM **pp)
- BIO *BIO_new_mem_buf(void *buf, int len);
+ BIO *BIO_new_mem_buf(const void *buf, int len);
=head1 DESCRIPTION
@@ -61,7 +61,7 @@ BIO_get_mem_ptr() places the underlying BUF_MEM structure in B<pp>. It is
a macro.
BIO_new_mem_buf() creates a memory BIO using B<len> bytes of data at B<buf>,
-if B<len> is -1 then the B<buf> is assumed to be null terminated and its
+if B<len> is -1 then the B<buf> is assumed to be nul terminated and its
length is determined by B<strlen>. The BIO is set to a read only state and
as a result cannot be written to. This is useful when some data needs to be
made available from a static area of memory in the form of a BIO. The
diff --git a/doc/ssl/SSL_CONF_cmd.pod b/doc/ssl/SSL_CONF_cmd.pod
index 2bf1a60e9013..e81d76ae779a 100644
--- a/doc/ssl/SSL_CONF_cmd.pod
+++ b/doc/ssl/SSL_CONF_cmd.pod
@@ -74,7 +74,7 @@ B<prime256v1>). Curve names are case sensitive.
=item B<-named_curve>
-This sets the temporary curve used for ephemeral ECDH modes. Only used by
+This sets the temporary curve used for ephemeral ECDH modes. Only used by
servers
The B<value> argument is a curve name or the special value B<auto> which
@@ -85,7 +85,7 @@ can be either the B<NIST> name (e.g. B<P-256>) or an OpenSSL OID name
=item B<-cipher>
Sets the cipher suite list to B<value>. Note: syntax checking of B<value> is
-currently not performed unless a B<SSL> or B<SSL_CTX> structure is
+currently not performed unless a B<SSL> or B<SSL_CTX> structure is
associated with B<cctx>.
=item B<-cert>
@@ -111,9 +111,9 @@ operations are permitted.
=item B<-no_ssl2>, B<-no_ssl3>, B<-no_tls1>, B<-no_tls1_1>, B<-no_tls1_2>
-Disables protocol support for SSLv2, SSLv3, TLS 1.0, TLS 1.1 or TLS 1.2
-by setting the corresponding options B<SSL_OP_NO_SSL2>, B<SSL_OP_NO_SSL3>,
-B<SSL_OP_NO_TLS1>, B<SSL_OP_NO_TLS1_1> and B<SSL_OP_NO_TLS1_2> respectively.
+Disables protocol support for SSLv2, SSLv3, TLSv1.0, TLSv1.1 or TLSv1.2
+by setting the corresponding options B<SSL_OP_NO_SSLv2>, B<SSL_OP_NO_SSLv3>,
+B<SSL_OP_NO_TLSv1>, B<SSL_OP_NO_TLSv1_1> and B<SSL_OP_NO_TLSv1_2> respectively.
=item B<-bugs>
@@ -177,7 +177,7 @@ Note: the command prefix (if set) alters the recognised B<cmd> values.
=item B<CipherString>
Sets the cipher suite list to B<value>. Note: syntax checking of B<value> is
-currently not performed unless an B<SSL> or B<SSL_CTX> structure is
+currently not performed unless an B<SSL> or B<SSL_CTX> structure is
associated with B<cctx>.
=item B<Certificate>
@@ -244,7 +244,7 @@ B<prime256v1>). Curve names are case sensitive.
=item B<ECDHParameters>
-This sets the temporary curve used for ephemeral ECDH modes. Only used by
+This sets the temporary curve used for ephemeral ECDH modes. Only used by
servers
The B<value> argument is a curve name or the special value B<Automatic> which
@@ -258,10 +258,11 @@ The supported versions of the SSL or TLS protocol.
The B<value> argument is a comma separated list of supported protocols to
enable or disable. If an protocol is preceded by B<-> that version is disabled.
-All versions are enabled by default, though applications may choose to
-explicitly disable some. Currently supported protocol values are B<SSLv2>,
-B<SSLv3>, B<TLSv1>, B<TLSv1.1> and B<TLSv1.2>. The special value B<ALL> refers
-to all supported versions.
+Currently supported protocol values are B<SSLv2>, B<SSLv3>, B<TLSv1>,
+B<TLSv1.1> and B<TLSv1.2>.
+All protocol versions other than B<SSLv2> are enabled by default.
+To avoid inadvertent enabling of B<SSLv2>, when SSLv2 is disabled, it is not
+possible to enable it via the B<Protocol> command.
=item B<Options>
@@ -339,16 +340,16 @@ The value is a directory name.
The order of operations is significant. This can be used to set either defaults
or values which cannot be overridden. For example if an application calls:
- SSL_CONF_cmd(ctx, "Protocol", "-SSLv2");
+ SSL_CONF_cmd(ctx, "Protocol", "-SSLv3");
SSL_CONF_cmd(ctx, userparam, uservalue);
-it will disable SSLv2 support by default but the user can override it. If
+it will disable SSLv3 support by default but the user can override it. If
however the call sequence is:
SSL_CONF_cmd(ctx, userparam, uservalue);
- SSL_CONF_cmd(ctx, "Protocol", "-SSLv2");
+ SSL_CONF_cmd(ctx, "Protocol", "-SSLv3");
-SSLv2 is B<always> disabled and attempt to override this by the user are
+then SSLv3 is B<always> disabled and attempt to override this by the user are
ignored.
By checking the return code of SSL_CTX_cmd() it is possible to query if a
@@ -372,7 +373,7 @@ can be checked instead. If -3 is returned a required argument is missing
and an error is indicated. If 0 is returned some other error occurred and
this can be reported back to the user.
-The function SSL_CONF_cmd_value_type() can be used by applications to
+The function SSL_CONF_cmd_value_type() can be used by applications to
check for the existence of a command or to perform additional syntax
checking or translation of the command value. For example if the return
value is B<SSL_CONF_TYPE_FILE> an application could translate a relative
diff --git a/doc/ssl/SSL_CTX_new.pod b/doc/ssl/SSL_CTX_new.pod
index 491ac8c172cb..b8cc87978451 100644
--- a/doc/ssl/SSL_CTX_new.pod
+++ b/doc/ssl/SSL_CTX_new.pod
@@ -2,13 +2,55 @@
=head1 NAME
-SSL_CTX_new - create a new SSL_CTX object as framework for TLS/SSL enabled functions
+SSL_CTX_new,
+SSLv23_method, SSLv23_server_method, SSLv23_client_method,
+TLSv1_2_method, TLSv1_2_server_method, TLSv1_2_client_method,
+TLSv1_1_method, TLSv1_1_server_method, TLSv1_1_client_method,
+TLSv1_method, TLSv1_server_method, TLSv1_client_method,
+SSLv3_method, SSLv3_server_method, SSLv3_client_method,
+SSLv2_method, SSLv2_server_method, SSLv2_client_method,
+DTLS_method, DTLS_server_method, DTLS_client_method,
+DTLSv1_2_method, DTLSv1_2_server_method, DTLSv1_2_client_method,
+DTLSv1_method, DTLSv1_server_method, DTLSv1_client_method -
+create a new SSL_CTX object as framework for TLS/SSL enabled functions
=head1 SYNOPSIS
#include <openssl/ssl.h>
SSL_CTX *SSL_CTX_new(const SSL_METHOD *method);
+ const SSL_METHOD *SSLv23_method(void);
+ const SSL_METHOD *SSLv23_server_method(void);
+ const SSL_METHOD *SSLv23_client_method(void);
+ const SSL_METHOD *TLSv1_2_method(void);
+ const SSL_METHOD *TLSv1_2_server_method(void);
+ const SSL_METHOD *TLSv1_2_client_method(void);
+ const SSL_METHOD *TLSv1_1_method(void);
+ const SSL_METHOD *TLSv1_1_server_method(void);
+ const SSL_METHOD *TLSv1_1_client_method(void);
+ const SSL_METHOD *TLSv1_method(void);
+ const SSL_METHOD *TLSv1_server_method(void);
+ const SSL_METHOD *TLSv1_client_method(void);
+ #ifndef OPENSSL_NO_SSL3_METHOD
+ const SSL_METHOD *SSLv3_method(void);
+ const SSL_METHOD *SSLv3_server_method(void);
+ const SSL_METHOD *SSLv3_client_method(void);
+ #endif
+ #ifndef OPENSSL_NO_SSL2
+ const SSL_METHOD *SSLv2_method(void);
+ const SSL_METHOD *SSLv2_server_method(void);
+ const SSL_METHOD *SSLv2_client_method(void);
+ #endif
+
+ const SSL_METHOD *DTLS_method(void);
+ const SSL_METHOD *DTLS_server_method(void);
+ const SSL_METHOD *DTLS_client_method(void);
+ const SSL_METHOD *DTLSv1_2_method(void);
+ const SSL_METHOD *DTLSv1_2_server_method(void);
+ const SSL_METHOD *DTLSv1_2_client_method(void);
+ const SSL_METHOD *DTLSv1_method(void);
+ const SSL_METHOD *DTLSv1_server_method(void);
+ const SSL_METHOD *DTLSv1_client_method(void);
=head1 DESCRIPTION
@@ -23,65 +65,88 @@ client only type. B<method> can be of the following types:
=over 4
-=item SSLv2_method(void), SSLv2_server_method(void), SSLv2_client_method(void)
+=item SSLv23_method(), SSLv23_server_method(), SSLv23_client_method()
+
+These are the general-purpose I<version-flexible> SSL/TLS methods.
+The actual protocol version used will be negotiated to the highest version
+mutually supported by the client and the server.
+The supported protocols are SSLv2, SSLv3, TLSv1, TLSv1.1 and TLSv1.2.
+Most applications should use these method, and avoid the version specific
+methods described below.
+
+The list of protocols available can be further limited using the
+B<SSL_OP_NO_SSLv2>, B<SSL_OP_NO_SSLv3>, B<SSL_OP_NO_TLSv1>,
+B<SSL_OP_NO_TLSv1_1> and B<SSL_OP_NO_TLSv1_2> options of the
+L<SSL_CTX_set_options(3)> or L<SSL_set_options(3)> functions.
+Clients should avoid creating "holes" in the set of protocols they support,
+when disabling a protocol, make sure that you also disable either all previous
+or all subsequent protocol versions.
+In clients, when a protocol version is disabled without disabling I<all>
+previous protocol versions, the effect is to also disable all subsequent
+protocol versions.
+
+The SSLv2 and SSLv3 protocols are deprecated and should generally not be used.
+Applications should typically use L<SSL_CTX_set_options(3)> in combination with
+the B<SSL_OP_NO_SSLv3> flag to disable negotiation of SSLv3 via the above
+I<version-flexible> SSL/TLS methods.
+The B<SSL_OP_NO_SSLv2> option is set by default, and would need to be cleared
+via L<SSL_CTX_clear_options(3)> in order to enable negotiation of SSLv2.
+
+=item TLSv1_2_method(), TLSv1_2_server_method(), TLSv1_2_client_method()
-A TLS/SSL connection established with these methods will only understand
-the SSLv2 protocol. A client will send out SSLv2 client hello messages
-and will also indicate that it only understand SSLv2. A server will only
-understand SSLv2 client hello messages.
+A TLS/SSL connection established with these methods will only understand the
+TLSv1.2 protocol. A client will send out TLSv1.2 client hello messages and
+will also indicate that it only understand TLSv1.2. A server will only
+understand TLSv1.2 client hello messages.
-=item SSLv3_method(void), SSLv3_server_method(void), SSLv3_client_method(void)
+=item TLSv1_1_method(), TLSv1_1_server_method(), TLSv1_1_client_method()
A TLS/SSL connection established with these methods will only understand the
-SSLv3 protocol. A client will send out SSLv3 client hello messages
-and will indicate that it only understands SSLv3. A server will only understand
-SSLv3 client hello messages. This especially means, that it will
-not understand SSLv2 client hello messages which are widely used for
-compatibility reasons, see SSLv23_*_method().
+TLSv1.1 protocol. A client will send out TLSv1.1 client hello messages and
+will also indicate that it only understand TLSv1.1. A server will only
+understand TLSv1.1 client hello messages.
-=item TLSv1_method(void), TLSv1_server_method(void), TLSv1_client_method(void)
+=item TLSv1_method(), TLSv1_server_method(), TLSv1_client_method()
A TLS/SSL connection established with these methods will only understand the
-TLSv1 protocol. A client will send out TLSv1 client hello messages
-and will indicate that it only understands TLSv1. A server will only understand
-TLSv1 client hello messages. This especially means, that it will
-not understand SSLv2 client hello messages which are widely used for
-compatibility reasons, see SSLv23_*_method(). It will also not understand
-SSLv3 client hello messages.
-
-=item SSLv23_method(void), SSLv23_server_method(void), SSLv23_client_method(void)
-
-A TLS/SSL connection established with these methods may understand the SSLv2,
-SSLv3, TLSv1, TLSv1.1 and TLSv1.2 protocols.
-
-If the cipher list does not contain any SSLv2 ciphersuites (the default
-cipher list does not) or extensions are required (for example server name)
-a client will send out TLSv1 client hello messages including extensions and
-will indicate that it also understands TLSv1.1, TLSv1.2 and permits a
-fallback to SSLv3. A server will support SSLv3, TLSv1, TLSv1.1 and TLSv1.2
-protocols. This is the best choice when compatibility is a concern.
-
-If any SSLv2 ciphersuites are included in the cipher list and no extensions
-are required then SSLv2 compatible client hellos will be used by clients and
-SSLv2 will be accepted by servers. This is B<not> recommended due to the
-insecurity of SSLv2 and the limited nature of the SSLv2 client hello
-prohibiting the use of extensions.
+TLSv1 protocol. A client will send out TLSv1 client hello messages and will
+indicate that it only understands TLSv1. A server will only understand TLSv1
+client hello messages.
-=back
+=item SSLv3_method(), SSLv3_server_method(), SSLv3_client_method()
+
+A TLS/SSL connection established with these methods will only understand the
+SSLv3 protocol. A client will send out SSLv3 client hello messages and will
+indicate that it only understands SSLv3. A server will only understand SSLv3
+client hello messages. The SSLv3 protocol is deprecated and should not be
+used.
+
+=item SSLv2_method(), SSLv2_server_method(), SSLv2_client_method()
+
+A TLS/SSL connection established with these methods will only understand the
+SSLv2 protocol. A client will send out SSLv2 client hello messages and will
+also indicate that it only understand SSLv2. A server will only understand
+SSLv2 client hello messages. The SSLv2 protocol offers little to no security
+and should not be used.
+As of OpenSSL 1.0.2g, EXPORT ciphers and 56-bit DES are no longer available
+with SSLv2.
-The list of protocols available can later be limited using the SSL_OP_NO_SSLv2,
-SSL_OP_NO_SSLv3, SSL_OP_NO_TLSv1, SSL_OP_NO_TLSv1_1 and SSL_OP_NO_TLSv1_2
-options of the SSL_CTX_set_options() or SSL_set_options() functions.
-Using these options it is possible to choose e.g. SSLv23_server_method() and
-be able to negotiate with all possible clients, but to only allow newer
-protocols like TLSv1, TLSv1.1 or TLS v1.2.
+=item DTLS_method(), DTLS_server_method(), DTLS_client_method()
-Applications which never want to support SSLv2 (even is the cipher string
-is configured to use SSLv2 ciphersuites) can set SSL_OP_NO_SSLv2.
+These are the version-flexible DTLS methods.
+
+=item DTLSv1_2_method(), DTLSv1_2_server_method(), DTLSv1_2_client_method()
+
+These are the version-specific methods for DTLSv1.2.
+
+=item DTLSv1_method(), DTLSv1_server_method(), DTLSv1_client_method()
+
+These are the version-specific methods for DTLSv1.
+
+=back
-SSL_CTX_new() initializes the list of ciphers, the session cache setting,
-the callbacks, the keys and certificates and the options to its default
-values.
+SSL_CTX_new() initializes the list of ciphers, the session cache setting, the
+callbacks, the keys and certificates and the options to its default values.
=head1 RETURN VALUES
@@ -91,8 +156,8 @@ The following return values can occur:
=item NULL
-The creation of a new SSL_CTX object failed. Check the error stack to
-find out the reason.
+The creation of a new SSL_CTX object failed. Check the error stack to find out
+the reason.
=item Pointer to an SSL_CTX object
@@ -102,6 +167,7 @@ The return value points to an allocated SSL_CTX object.
=head1 SEE ALSO
+L<SSL_CTX_set_options(3)>, L<SSL_CTX_clear_options(3)>, L<SSL_set_options(3)>,
L<SSL_CTX_free(3)|SSL_CTX_free(3)>, L<SSL_accept(3)|SSL_accept(3)>,
L<ssl(3)|ssl(3)>, L<SSL_set_connect_state(3)|SSL_set_connect_state(3)>
diff --git a/doc/ssl/SSL_CTX_set_options.pod b/doc/ssl/SSL_CTX_set_options.pod
index e80a72cd4d06..9a7e98c1d414 100644
--- a/doc/ssl/SSL_CTX_set_options.pod
+++ b/doc/ssl/SSL_CTX_set_options.pod
@@ -189,15 +189,25 @@ browser has a cert, it will crash/hang. Works for 3.x and 4.xbeta
=item SSL_OP_NO_SSLv2
Do not use the SSLv2 protocol.
+As of OpenSSL 1.0.2g the B<SSL_OP_NO_SSLv2> option is set by default.
=item SSL_OP_NO_SSLv3
Do not use the SSLv3 protocol.
+It is recommended that applications should set this option.
=item SSL_OP_NO_TLSv1
Do not use the TLSv1 protocol.
+=item SSL_OP_NO_TLSv1_1
+
+Do not use the TLSv1.1 protocol.
+
+=item SSL_OP_NO_TLSv1_2
+
+Do not use the TLSv1.2 protocol.
+
=item SSL_OP_NO_SESSION_RESUMPTION_ON_RENEGOTIATION
When performing renegotiation as a server, always start a new session
diff --git a/doc/ssl/ssl.pod b/doc/ssl/ssl.pod
index 242087e691e3..70cca178a204 100644
--- a/doc/ssl/ssl.pod
+++ b/doc/ssl/ssl.pod
@@ -130,41 +130,86 @@ protocol methods defined in B<SSL_METHOD> structures.
=over 4
-=item const SSL_METHOD *B<SSLv2_client_method>(void);
+=item const SSL_METHOD *B<SSLv23_method>(void);
-Constructor for the SSLv2 SSL_METHOD structure for a dedicated client.
+Constructor for the I<version-flexible> SSL_METHOD structure for
+clients, servers or both.
+See L<SSL_CTX_new(3)> for details.
-=item const SSL_METHOD *B<SSLv2_server_method>(void);
+=item const SSL_METHOD *B<SSLv23_client_method>(void);
-Constructor for the SSLv2 SSL_METHOD structure for a dedicated server.
+Constructor for the I<version-flexible> SSL_METHOD structure for
+clients.
-=item const SSL_METHOD *B<SSLv2_method>(void);
+=item const SSL_METHOD *B<SSLv23_client_method>(void);
-Constructor for the SSLv2 SSL_METHOD structure for combined client and server.
+Constructor for the I<version-flexible> SSL_METHOD structure for
+servers.
-=item const SSL_METHOD *B<SSLv3_client_method>(void);
+=item const SSL_METHOD *B<TLSv1_2_method>(void);
-Constructor for the SSLv3 SSL_METHOD structure for a dedicated client.
+Constructor for the TLSv1.2 SSL_METHOD structure for clients, servers
+or both.
-=item const SSL_METHOD *B<SSLv3_server_method>(void);
+=item const SSL_METHOD *B<TLSv1_2_client_method>(void);
-Constructor for the SSLv3 SSL_METHOD structure for a dedicated server.
+Constructor for the TLSv1.2 SSL_METHOD structure for clients.
-=item const SSL_METHOD *B<SSLv3_method>(void);
+=item const SSL_METHOD *B<TLSv1_2_server_method>(void);
+
+Constructor for the TLSv1.2 SSL_METHOD structure for servers.
+
+=item const SSL_METHOD *B<TLSv1_1_method>(void);
-Constructor for the SSLv3 SSL_METHOD structure for combined client and server.
+Constructor for the TLSv1.1 SSL_METHOD structure for clients, servers
+or both.
+
+=item const SSL_METHOD *B<TLSv1_1_client_method>(void);
+
+Constructor for the TLSv1.1 SSL_METHOD structure for clients.
+
+=item const SSL_METHOD *B<TLSv1_1_server_method>(void);
+
+Constructor for the TLSv1.1 SSL_METHOD structure for servers.
+
+=item const SSL_METHOD *B<TLSv1_method>(void);
+
+Constructor for the TLSv1 SSL_METHOD structure for clients, servers
+or both.
=item const SSL_METHOD *B<TLSv1_client_method>(void);
-Constructor for the TLSv1 SSL_METHOD structure for a dedicated client.
+Constructor for the TLSv1 SSL_METHOD structure for clients.
=item const SSL_METHOD *B<TLSv1_server_method>(void);
-Constructor for the TLSv1 SSL_METHOD structure for a dedicated server.
+Constructor for the TLSv1 SSL_METHOD structure for servers.
-=item const SSL_METHOD *B<TLSv1_method>(void);
+=item const SSL_METHOD *B<SSLv3_method>(void);
+
+Constructor for the SSLv3 SSL_METHOD structure for clients, servers
+or both.
+
+=item const SSL_METHOD *B<SSLv3_client_method>(void);
+
+Constructor for the SSLv3 SSL_METHOD structure for clients.
+
+=item const SSL_METHOD *B<SSLv3_server_method>(void);
+
+Constructor for the SSLv3 SSL_METHOD structure for servers.
+
+=item const SSL_METHOD *B<SSLv2_method>(void);
+
+Constructor for the SSLv2 SSL_METHOD structure for clients, servers
+or both.
+
+=item const SSL_METHOD *B<SSLv2_client_method>(void);
+
+Constructor for the SSLv2 SSL_METHOD structure for clients.
+
+=item const SSL_METHOD *B<SSLv2_server_method>(void);
-Constructor for the TLSv1 SSL_METHOD structure for combined client and server.
+Constructor for the SSLv2 SSL_METHOD structure for servers.
=back
diff --git a/engines/e_capi.c b/engines/e_capi.c
index f4cd2ffe7fa1..6e524633f3f0 100644
--- a/engines/e_capi.c
+++ b/engines/e_capi.c
@@ -114,6 +114,26 @@
# define CERT_SYSTEM_STORE_CURRENT_USER 0x00010000
# endif
+# ifndef ALG_SID_SHA_256
+# define ALG_SID_SHA_256 12
+# endif
+# ifndef ALG_SID_SHA_384
+# define ALG_SID_SHA_384 13
+# endif
+# ifndef ALG_SID_SHA_512
+# define ALG_SID_SHA_512 14
+# endif
+
+# ifndef CALG_SHA_256
+# define CALG_SHA_256 (ALG_CLASS_HASH | ALG_TYPE_ANY | ALG_SID_SHA_256)
+# endif
+# ifndef CALG_SHA_384
+# define CALG_SHA_384 (ALG_CLASS_HASH | ALG_TYPE_ANY | ALG_SID_SHA_384)
+# endif
+# ifndef CALG_SHA_512
+# define CALG_SHA_512 (ALG_CLASS_HASH | ALG_TYPE_ANY | ALG_SID_SHA_512)
+# endif
+
# include <openssl/engine.h>
# include <openssl/pem.h>
# include <openssl/x509v3.h>
@@ -800,6 +820,18 @@ int capi_rsa_sign(int dtype, const unsigned char *m, unsigned int m_len,
}
/* Convert the signature type to a CryptoAPI algorithm ID */
switch (dtype) {
+ case NID_sha256:
+ alg = CALG_SHA_256;
+ break;
+
+ case NID_sha384:
+ alg = CALG_SHA_384;
+ break;
+
+ case NID_sha512:
+ alg = CALG_SHA_512;
+ break;
+
case NID_sha1:
alg = CALG_SHA1;
break;
diff --git a/ssl/Makefile b/ssl/Makefile
index 7b90fb037550..b6dee5b5ea52 100644
--- a/ssl/Makefile
+++ b/ssl/Makefile
@@ -15,7 +15,7 @@ KRB5_INCLUDES=
CFLAGS= $(INCLUDES) $(CFLAG)
GENERAL=Makefile README ssl-lib.com install.com
-TEST=ssltest.c heartbeat_test.c clienthellotest.c
+TEST=ssltest.c heartbeat_test.c clienthellotest.c sslv2conftest.c
APPS=
LIB=$(TOP)/libssl.a
@@ -399,14 +399,14 @@ s2_clnt.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h
s2_clnt.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h
s2_clnt.o: ../include/openssl/ossl_typ.h ../include/openssl/pem.h
s2_clnt.o: ../include/openssl/pem2.h ../include/openssl/pkcs7.h
-s2_clnt.o: ../include/openssl/pqueue.h ../include/openssl/rand.h
-s2_clnt.o: ../include/openssl/rsa.h ../include/openssl/safestack.h
-s2_clnt.o: ../include/openssl/sha.h ../include/openssl/srtp.h
-s2_clnt.o: ../include/openssl/ssl.h ../include/openssl/ssl2.h
-s2_clnt.o: ../include/openssl/ssl23.h ../include/openssl/ssl3.h
-s2_clnt.o: ../include/openssl/stack.h ../include/openssl/symhacks.h
-s2_clnt.o: ../include/openssl/tls1.h ../include/openssl/x509.h
-s2_clnt.o: ../include/openssl/x509_vfy.h s2_clnt.c ssl_locl.h
+s2_clnt.o: ../include/openssl/pqueue.h ../include/openssl/rsa.h
+s2_clnt.o: ../include/openssl/safestack.h ../include/openssl/sha.h
+s2_clnt.o: ../include/openssl/srtp.h ../include/openssl/ssl.h
+s2_clnt.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h
+s2_clnt.o: ../include/openssl/ssl3.h ../include/openssl/stack.h
+s2_clnt.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h
+s2_clnt.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h s2_clnt.c
+s2_clnt.o: ssl_locl.h
s2_enc.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h
s2_enc.o: ../include/openssl/buffer.h ../include/openssl/comp.h
s2_enc.o: ../include/openssl/crypto.h ../include/openssl/dsa.h
@@ -435,18 +435,18 @@ s2_lib.o: ../include/openssl/ec.h ../include/openssl/ecdh.h
s2_lib.o: ../include/openssl/ecdsa.h ../include/openssl/err.h
s2_lib.o: ../include/openssl/evp.h ../include/openssl/hmac.h
s2_lib.o: ../include/openssl/kssl.h ../include/openssl/lhash.h
-s2_lib.o: ../include/openssl/md5.h ../include/openssl/obj_mac.h
-s2_lib.o: ../include/openssl/objects.h ../include/openssl/opensslconf.h
-s2_lib.o: ../include/openssl/opensslv.h ../include/openssl/ossl_typ.h
-s2_lib.o: ../include/openssl/pem.h ../include/openssl/pem2.h
-s2_lib.o: ../include/openssl/pkcs7.h ../include/openssl/pqueue.h
-s2_lib.o: ../include/openssl/rsa.h ../include/openssl/safestack.h
-s2_lib.o: ../include/openssl/sha.h ../include/openssl/srtp.h
-s2_lib.o: ../include/openssl/ssl.h ../include/openssl/ssl2.h
-s2_lib.o: ../include/openssl/ssl23.h ../include/openssl/ssl3.h
-s2_lib.o: ../include/openssl/stack.h ../include/openssl/symhacks.h
-s2_lib.o: ../include/openssl/tls1.h ../include/openssl/x509.h
-s2_lib.o: ../include/openssl/x509_vfy.h s2_lib.c ssl_locl.h
+s2_lib.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h
+s2_lib.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h
+s2_lib.o: ../include/openssl/ossl_typ.h ../include/openssl/pem.h
+s2_lib.o: ../include/openssl/pem2.h ../include/openssl/pkcs7.h
+s2_lib.o: ../include/openssl/pqueue.h ../include/openssl/rsa.h
+s2_lib.o: ../include/openssl/safestack.h ../include/openssl/sha.h
+s2_lib.o: ../include/openssl/srtp.h ../include/openssl/ssl.h
+s2_lib.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h
+s2_lib.o: ../include/openssl/ssl3.h ../include/openssl/stack.h
+s2_lib.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h
+s2_lib.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h s2_lib.c
+s2_lib.o: ssl_locl.h
s2_meth.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h
s2_meth.o: ../include/openssl/buffer.h ../include/openssl/comp.h
s2_meth.o: ../include/openssl/crypto.h ../include/openssl/dsa.h
@@ -487,20 +487,19 @@ s2_pkt.o: ../include/openssl/ssl3.h ../include/openssl/stack.h
s2_pkt.o: ../include/openssl/symhacks.h ../include/openssl/tls1.h
s2_pkt.o: ../include/openssl/x509.h ../include/openssl/x509_vfy.h s2_pkt.c
s2_pkt.o: ssl_locl.h
-s2_srvr.o: ../crypto/constant_time_locl.h ../e_os.h ../include/openssl/asn1.h
-s2_srvr.o: ../include/openssl/bio.h ../include/openssl/buffer.h
-s2_srvr.o: ../include/openssl/comp.h ../include/openssl/crypto.h
-s2_srvr.o: ../include/openssl/dsa.h ../include/openssl/dtls1.h
-s2_srvr.o: ../include/openssl/e_os2.h ../include/openssl/ec.h
-s2_srvr.o: ../include/openssl/ecdh.h ../include/openssl/ecdsa.h
-s2_srvr.o: ../include/openssl/err.h ../include/openssl/evp.h
-s2_srvr.o: ../include/openssl/hmac.h ../include/openssl/kssl.h
-s2_srvr.o: ../include/openssl/lhash.h ../include/openssl/obj_mac.h
-s2_srvr.o: ../include/openssl/objects.h ../include/openssl/opensslconf.h
-s2_srvr.o: ../include/openssl/opensslv.h ../include/openssl/ossl_typ.h
-s2_srvr.o: ../include/openssl/pem.h ../include/openssl/pem2.h
-s2_srvr.o: ../include/openssl/pkcs7.h ../include/openssl/pqueue.h
-s2_srvr.o: ../include/openssl/rand.h ../include/openssl/rsa.h
+s2_srvr.o: ../e_os.h ../include/openssl/asn1.h ../include/openssl/bio.h
+s2_srvr.o: ../include/openssl/buffer.h ../include/openssl/comp.h
+s2_srvr.o: ../include/openssl/crypto.h ../include/openssl/dsa.h
+s2_srvr.o: ../include/openssl/dtls1.h ../include/openssl/e_os2.h
+s2_srvr.o: ../include/openssl/ec.h ../include/openssl/ecdh.h
+s2_srvr.o: ../include/openssl/ecdsa.h ../include/openssl/err.h
+s2_srvr.o: ../include/openssl/evp.h ../include/openssl/hmac.h
+s2_srvr.o: ../include/openssl/kssl.h ../include/openssl/lhash.h
+s2_srvr.o: ../include/openssl/obj_mac.h ../include/openssl/objects.h
+s2_srvr.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h
+s2_srvr.o: ../include/openssl/ossl_typ.h ../include/openssl/pem.h
+s2_srvr.o: ../include/openssl/pem2.h ../include/openssl/pkcs7.h
+s2_srvr.o: ../include/openssl/pqueue.h ../include/openssl/rsa.h
s2_srvr.o: ../include/openssl/safestack.h ../include/openssl/sha.h
s2_srvr.o: ../include/openssl/srtp.h ../include/openssl/ssl.h
s2_srvr.o: ../include/openssl/ssl2.h ../include/openssl/ssl23.h
diff --git a/ssl/s2_lib.c b/ssl/s2_lib.c
index d55b93f76bb7..a8036b357f0e 100644
--- a/ssl/s2_lib.c
+++ b/ssl/s2_lib.c
@@ -156,6 +156,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = {
128,
},
+# if 0
/* RC4_128_EXPORT40_WITH_MD5 */
{
1,
@@ -171,6 +172,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = {
40,
128,
},
+# endif
/* RC2_128_CBC_WITH_MD5 */
{
@@ -188,6 +190,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = {
128,
},
+# if 0
/* RC2_128_CBC_EXPORT40_WITH_MD5 */
{
1,
@@ -203,6 +206,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = {
40,
128,
},
+# endif
# ifndef OPENSSL_NO_IDEA
/* IDEA_128_CBC_WITH_MD5 */
@@ -222,6 +226,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = {
},
# endif
+# if 0
/* DES_64_CBC_WITH_MD5 */
{
1,
@@ -237,6 +242,7 @@ OPENSSL_GLOBAL const SSL_CIPHER ssl2_ciphers[] = {
56,
56,
},
+# endif
/* DES_192_EDE3_CBC_WITH_MD5 */
{
diff --git a/ssl/s3_lib.c b/ssl/s3_lib.c
index f846cb5b7b01..4aac3b279280 100644
--- a/ssl/s3_lib.c
+++ b/ssl/s3_lib.c
@@ -198,6 +198,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
},
/* Cipher 03 */
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_RSA_RC4_40_MD5,
@@ -212,6 +213,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
40,
128,
},
+#endif
/* Cipher 04 */
{
@@ -246,6 +248,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
},
/* Cipher 06 */
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_RSA_RC2_40_MD5,
@@ -260,6 +263,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
40,
128,
},
+#endif
/* Cipher 07 */
#ifndef OPENSSL_NO_IDEA
@@ -280,6 +284,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
#endif
/* Cipher 08 */
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_RSA_DES_40_CBC_SHA,
@@ -294,8 +299,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
40,
56,
},
+#endif
/* Cipher 09 */
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_RSA_DES_64_CBC_SHA,
@@ -310,6 +317,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
56,
56,
},
+#endif
/* Cipher 0A */
{
@@ -329,6 +337,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
/* The DH ciphers */
/* Cipher 0B */
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
0,
SSL3_TXT_DH_DSS_DES_40_CBC_SHA,
@@ -343,8 +352,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
40,
56,
},
+#endif
/* Cipher 0C */
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_DH_DSS_DES_64_CBC_SHA,
@@ -359,6 +370,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
56,
56,
},
+#endif
/* Cipher 0D */
{
@@ -377,6 +389,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
},
/* Cipher 0E */
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
0,
SSL3_TXT_DH_RSA_DES_40_CBC_SHA,
@@ -391,8 +404,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
40,
56,
},
+#endif
/* Cipher 0F */
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_DH_RSA_DES_64_CBC_SHA,
@@ -407,6 +422,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
56,
56,
},
+#endif
/* Cipher 10 */
{
@@ -426,6 +442,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
/* The Ephemeral DH ciphers */
/* Cipher 11 */
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_EDH_DSS_DES_40_CBC_SHA,
@@ -440,8 +457,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
40,
56,
},
+#endif
/* Cipher 12 */
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_EDH_DSS_DES_64_CBC_SHA,
@@ -456,6 +475,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
56,
56,
},
+#endif
/* Cipher 13 */
{
@@ -474,6 +494,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
},
/* Cipher 14 */
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_EDH_RSA_DES_40_CBC_SHA,
@@ -488,8 +509,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
40,
56,
},
+#endif
/* Cipher 15 */
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_EDH_RSA_DES_64_CBC_SHA,
@@ -504,6 +527,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
56,
56,
},
+#endif
/* Cipher 16 */
{
@@ -522,6 +546,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
},
/* Cipher 17 */
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_ADH_RC4_40_MD5,
@@ -536,6 +561,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
40,
128,
},
+#endif
/* Cipher 18 */
{
@@ -554,6 +580,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
},
/* Cipher 19 */
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_ADH_DES_40_CBC_SHA,
@@ -568,8 +595,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
40,
128,
},
+#endif
/* Cipher 1A */
+#ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_ADH_DES_64_CBC_SHA,
@@ -584,6 +613,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
56,
56,
},
+#endif
/* Cipher 1B */
{
@@ -655,6 +685,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
#ifndef OPENSSL_NO_KRB5
/* The Kerberos ciphers*/
/* Cipher 1E */
+# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_KRB5_DES_64_CBC_SHA,
@@ -669,6 +700,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
56,
56,
},
+# endif
/* Cipher 1F */
{
@@ -719,6 +751,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
},
/* Cipher 22 */
+# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_KRB5_DES_64_CBC_MD5,
@@ -733,6 +766,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
56,
56,
},
+# endif
/* Cipher 23 */
{
@@ -783,6 +817,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
},
/* Cipher 26 */
+# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_KRB5_DES_40_CBC_SHA,
@@ -797,8 +832,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
40,
56,
},
+# endif
/* Cipher 27 */
+# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_KRB5_RC2_40_CBC_SHA,
@@ -813,8 +850,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
40,
128,
},
+# endif
/* Cipher 28 */
+# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_KRB5_RC4_40_SHA,
@@ -829,8 +868,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
40,
128,
},
+# endif
/* Cipher 29 */
+# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_KRB5_DES_40_CBC_MD5,
@@ -845,8 +886,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
40,
56,
},
+# endif
/* Cipher 2A */
+# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_KRB5_RC2_40_CBC_MD5,
@@ -861,8 +904,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
40,
128,
},
+# endif
/* Cipher 2B */
+# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
SSL3_TXT_KRB5_RC4_40_MD5,
@@ -877,6 +922,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
40,
128,
},
+# endif
#endif /* OPENSSL_NO_KRB5 */
/* New AES ciphersuites */
@@ -1300,6 +1346,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
# endif
/* Cipher 62 */
+# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
TLS1_TXT_RSA_EXPORT1024_WITH_DES_CBC_SHA,
@@ -1314,8 +1361,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
56,
56,
},
+# endif
/* Cipher 63 */
+# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
TLS1_TXT_DHE_DSS_EXPORT1024_WITH_DES_CBC_SHA,
@@ -1330,8 +1379,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
56,
56,
},
+# endif
/* Cipher 64 */
+# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
TLS1_TXT_RSA_EXPORT1024_WITH_RC4_56_SHA,
@@ -1346,8 +1397,10 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
56,
128,
},
+# endif
/* Cipher 65 */
+# ifndef OPENSSL_NO_WEAK_SSL_CIPHERS
{
1,
TLS1_TXT_DHE_DSS_EXPORT1024_WITH_RC4_56_SHA,
@@ -1362,6 +1415,7 @@ OPENSSL_GLOBAL SSL_CIPHER ssl3_ciphers[] = {
56,
128,
},
+# endif
/* Cipher 66 */
{
@@ -4326,21 +4380,6 @@ int ssl3_shutdown(SSL *s)
}
#endif
} else if (!(s->shutdown & SSL_RECEIVED_SHUTDOWN)) {
- if (SSL_in_init(s)) {
- /*
- * We can't shutdown properly if we are in the middle of a
- * handshake. Doing so is problematic because the peer may send a
- * CCS before it acts on our close_notify. However we should not
- * continue to process received handshake messages or CCS once our
- * close_notify has been sent. Therefore any close_notify from
- * the peer will be unreadable because we have not moved to the next
- * cipher state. Its best just to avoid this can-of-worms. Return
- * an error if we are wanting to wait for a close_notify from the
- * peer and we are in init.
- */
- SSLerr(SSL_F_SSL3_SHUTDOWN, SSL_R_SHUTDOWN_WHILE_IN_INIT);
- return -1;
- }
/*
* If we are waiting for a close from our peer, we are closed
*/
diff --git a/ssl/ssl.h b/ssl/ssl.h
index ae8c92575e03..04d4007eeb8e 100644
--- a/ssl/ssl.h
+++ b/ssl/ssl.h
@@ -2713,7 +2713,6 @@ void ERR_load_SSL_strings(void);
# define SSL_F_SSL3_SETUP_KEY_BLOCK 157
# define SSL_F_SSL3_SETUP_READ_BUFFER 156
# define SSL_F_SSL3_SETUP_WRITE_BUFFER 291
-# define SSL_F_SSL3_SHUTDOWN 396
# define SSL_F_SSL3_WRITE_BYTES 158
# define SSL_F_SSL3_WRITE_PENDING 159
# define SSL_F_SSL_ADD_CERT_CHAIN 318
diff --git a/ssl/ssl_conf.c b/ssl/ssl_conf.c
index 5478840deae9..8d3709d2b62c 100644
--- a/ssl/ssl_conf.c
+++ b/ssl/ssl_conf.c
@@ -330,11 +330,19 @@ static int cmd_Protocol(SSL_CONF_CTX *cctx, const char *value)
SSL_FLAG_TBL_INV("TLSv1.1", SSL_OP_NO_TLSv1_1),
SSL_FLAG_TBL_INV("TLSv1.2", SSL_OP_NO_TLSv1_2)
};
+ int ret;
+ int sslv2off;
+
if (!(cctx->flags & SSL_CONF_FLAG_FILE))
return -2;
cctx->tbl = ssl_protocol_list;
cctx->ntbl = sizeof(ssl_protocol_list) / sizeof(ssl_flag_tbl);
- return CONF_parse_list(value, ',', 1, ssl_set_option_list, cctx);
+
+ sslv2off = *cctx->poptions & SSL_OP_NO_SSLv2;
+ ret = CONF_parse_list(value, ',', 1, ssl_set_option_list, cctx);
+ /* Never turn on SSLv2 through configuration */
+ *cctx->poptions |= sslv2off;
+ return ret;
}
static int cmd_Options(SSL_CONF_CTX *cctx, const char *value)
diff --git a/ssl/ssl_err.c b/ssl/ssl_err.c
index dd3b2afd1ea6..704088dc469e 100644
--- a/ssl/ssl_err.c
+++ b/ssl/ssl_err.c
@@ -206,7 +206,6 @@ static ERR_STRING_DATA SSL_str_functs[] = {
{ERR_FUNC(SSL_F_SSL3_SETUP_KEY_BLOCK), "ssl3_setup_key_block"},
{ERR_FUNC(SSL_F_SSL3_SETUP_READ_BUFFER), "ssl3_setup_read_buffer"},
{ERR_FUNC(SSL_F_SSL3_SETUP_WRITE_BUFFER), "ssl3_setup_write_buffer"},
- {ERR_FUNC(SSL_F_SSL3_SHUTDOWN), "ssl3_shutdown"},
{ERR_FUNC(SSL_F_SSL3_WRITE_BYTES), "ssl3_write_bytes"},
{ERR_FUNC(SSL_F_SSL3_WRITE_PENDING), "ssl3_write_pending"},
{ERR_FUNC(SSL_F_SSL_ADD_CERT_CHAIN), "ssl_add_cert_chain"},
diff --git a/ssl/ssl_lib.c b/ssl/ssl_lib.c
index 2744be8ad8ce..f1279bbf9103 100644
--- a/ssl/ssl_lib.c
+++ b/ssl/ssl_lib.c
@@ -1060,7 +1060,12 @@ int SSL_shutdown(SSL *s)
return -1;
}
- return s->method->ssl_shutdown(s);
+ if (!SSL_in_init(s)) {
+ return s->method->ssl_shutdown(s);
+ } else {
+ SSLerr(SSL_F_SSL_SHUTDOWN, SSL_R_SHUTDOWN_WHILE_IN_INIT);
+ return -1;
+ }
}
int SSL_renegotiate(SSL *s)
@@ -2049,6 +2054,13 @@ SSL_CTX *SSL_CTX_new(const SSL_METHOD *meth)
*/
ret->options |= SSL_OP_LEGACY_SERVER_CONNECT;
+ /*
+ * Disable SSLv2 by default, callers that want to enable SSLv2 will have to
+ * explicitly clear this option via either of SSL_CTX_clear_options() or
+ * SSL_clear_options().
+ */
+ ret->options |= SSL_OP_NO_SSLv2;
+
return (ret);
err:
SSLerr(SSL_F_SSL_CTX_NEW, ERR_R_MALLOC_FAILURE);
diff --git a/ssl/sslv2conftest.c b/ssl/sslv2conftest.c
new file mode 100644
index 000000000000..1fd748b11866
--- /dev/null
+++ b/ssl/sslv2conftest.c
@@ -0,0 +1,231 @@
+/* Written by Matt Caswell for the OpenSSL Project */
+/* ====================================================================
+ * Copyright (c) 2016 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com). This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+#include <stdlib.h>
+#include <openssl/bio.h>
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+
+
+#define TOTAL_NUM_TESTS 2
+#define TEST_SSL_CTX 0
+
+#define SSLV2ON 1
+#define SSLV2OFF 0
+
+SSL_CONF_CTX *confctx;
+SSL_CTX *ctx;
+SSL *ssl;
+
+static int checksslv2(int test, int sslv2)
+{
+ int options;
+ if (test == TEST_SSL_CTX) {
+ options = SSL_CTX_get_options(ctx);
+ } else {
+ options = SSL_get_options(ssl);
+ }
+ return ((options & SSL_OP_NO_SSLv2) == 0) ^ (sslv2 == SSLV2OFF);
+}
+
+int main(int argc, char *argv[])
+{
+ BIO *err;
+ int testresult = 0;
+ int currtest;
+
+ SSL_library_init();
+ SSL_load_error_strings();
+
+ err = BIO_new_fp(stderr, BIO_NOCLOSE | BIO_FP_TEXT);
+
+ CRYPTO_malloc_debug_init();
+ CRYPTO_set_mem_debug_options(V_CRYPTO_MDEBUG_ALL);
+ CRYPTO_mem_ctrl(CRYPTO_MEM_CHECK_ON);
+
+
+ confctx = SSL_CONF_CTX_new();
+ ctx = SSL_CTX_new(SSLv23_method());
+ ssl = SSL_new(ctx);
+ if (confctx == NULL || ctx == NULL)
+ goto end;
+
+ SSL_CONF_CTX_set_flags(confctx, SSL_CONF_FLAG_FILE
+ | SSL_CONF_FLAG_CLIENT
+ | SSL_CONF_FLAG_SERVER);
+
+ /*
+ * For each test set up an SSL_CTX and SSL and see whether SSLv2 is enabled
+ * as expected after various SSL_CONF_cmd("Protocol", ...) calls.
+ */
+ for (currtest = 0; currtest < TOTAL_NUM_TESTS; currtest++) {
+ BIO_printf(err, "SSLv2 CONF Test number %d\n", currtest);
+ if (currtest == TEST_SSL_CTX)
+ SSL_CONF_CTX_set_ssl_ctx(confctx, ctx);
+ else
+ SSL_CONF_CTX_set_ssl(confctx, ssl);
+
+ /* SSLv2 should be off by default */
+ if (!checksslv2(currtest, SSLV2OFF)) {
+ BIO_printf(err, "SSLv2 CONF Test: Off by default test FAIL\n");
+ goto end;
+ }
+
+ if (SSL_CONF_cmd(confctx, "Protocol", "ALL") != 2
+ || !SSL_CONF_CTX_finish(confctx)) {
+ BIO_printf(err, "SSLv2 CONF Test: SSL_CONF command FAIL\n");
+ goto end;
+ }
+
+ /* Should still be off even after ALL Protocols on */
+ if (!checksslv2(currtest, SSLV2OFF)) {
+ BIO_printf(err, "SSLv2 CONF Test: Off after config #1 FAIL\n");
+ goto end;
+ }
+
+ if (SSL_CONF_cmd(confctx, "Protocol", "SSLv2") != 2
+ || !SSL_CONF_CTX_finish(confctx)) {
+ BIO_printf(err, "SSLv2 CONF Test: SSL_CONF command FAIL\n");
+ goto end;
+ }
+
+ /* Should still be off even if explicitly asked for */
+ if (!checksslv2(currtest, SSLV2OFF)) {
+ BIO_printf(err, "SSLv2 CONF Test: Off after config #2 FAIL\n");
+ goto end;
+ }
+
+ if (SSL_CONF_cmd(confctx, "Protocol", "-SSLv2") != 2
+ || !SSL_CONF_CTX_finish(confctx)) {
+ BIO_printf(err, "SSLv2 CONF Test: SSL_CONF command FAIL\n");;
+ goto end;
+ }
+
+ if (!checksslv2(currtest, SSLV2OFF)) {
+ BIO_printf(err, "SSLv2 CONF Test: Off after config #3 FAIL\n");
+ goto end;
+ }
+
+ if (currtest == TEST_SSL_CTX)
+ SSL_CTX_clear_options(ctx, SSL_OP_NO_SSLv2);
+ else
+ SSL_clear_options(ssl, SSL_OP_NO_SSLv2);
+
+ if (!checksslv2(currtest, SSLV2ON)) {
+ BIO_printf(err, "SSLv2 CONF Test: On after clear FAIL\n");
+ goto end;
+ }
+
+ if (SSL_CONF_cmd(confctx, "Protocol", "ALL") != 2
+ || !SSL_CONF_CTX_finish(confctx)) {
+ BIO_printf(err, "SSLv2 CONF Test: SSL_CONF command FAIL\n");
+ goto end;
+ }
+
+ /* Option has been cleared and config says have SSLv2 so should be on */
+ if (!checksslv2(currtest, SSLV2ON)) {
+ BIO_printf(err, "SSLv2 CONF Test: On after config #1 FAIL\n");
+ goto end;
+ }
+
+ if (SSL_CONF_cmd(confctx, "Protocol", "SSLv2") != 2
+ || !SSL_CONF_CTX_finish(confctx)) {
+ BIO_printf(err, "SSLv2 CONF Test: SSL_CONF command FAIL\n");
+ goto end;
+ }
+
+ /* Option has been cleared and config says have SSLv2 so should be on */
+ if (!checksslv2(currtest, SSLV2ON)) {
+ BIO_printf(err, "SSLv2 CONF Test: On after config #2 FAIL\n");
+ goto end;
+ }
+
+ if (SSL_CONF_cmd(confctx, "Protocol", "-SSLv2") != 2
+ || !SSL_CONF_CTX_finish(confctx)) {
+ BIO_printf(err, "SSLv2 CONF Test: SSL_CONF command FAIL\n");
+ goto end;
+ }
+
+ /* Option has been cleared but config says no SSLv2 so should be off */
+ if (!checksslv2(currtest, SSLV2OFF)) {
+ BIO_printf(err, "SSLv2 CONF Test: Off after config #4 FAIL\n");
+ goto end;
+ }
+
+ }
+
+ testresult = 1;
+
+ end:
+ SSL_free(ssl);
+ SSL_CTX_free(ctx);
+ SSL_CONF_CTX_free(confctx);
+
+ if (!testresult) {
+ printf("SSLv2 CONF test: FAILED (Test %d)\n", currtest);
+ ERR_print_errors(err);
+ } else {
+ printf("SSLv2 CONF test: PASSED\n");
+ }
+
+ ERR_free_strings();
+ ERR_remove_thread_state(NULL);
+ EVP_cleanup();
+ CRYPTO_cleanup_all_ex_data();
+ CRYPTO_mem_leaks(err);
+ BIO_free(err);
+
+ return testresult ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/util/libeay.num b/util/libeay.num
index 7f7487df5044..e5b3c6ea841c 100755
--- a/util/libeay.num
+++ b/util/libeay.num
@@ -1807,6 +1807,8 @@ ASN1_UTCTIME_get 2350 NOEXIST::FUNCTION:
X509_REQ_digest 2362 EXIST::FUNCTION:EVP
X509_CRL_digest 2391 EXIST::FUNCTION:EVP
ASN1_STRING_clear_free 2392 EXIST::FUNCTION:
+SRP_VBASE_get1_by_user 2393 EXIST::FUNCTION:SRP
+SRP_user_pwd_free 2394 EXIST::FUNCTION:SRP
d2i_ASN1_SET_OF_PKCS7 2397 NOEXIST::FUNCTION:
X509_ALGOR_cmp 2398 EXIST::FUNCTION:
EVP_CIPHER_CTX_set_key_length 2399 EXIST::FUNCTION:
diff --git a/util/mk1mf.pl b/util/mk1mf.pl
index 99652aff918c..2629a1c5dd64 100755
--- a/util/mk1mf.pl
+++ b/util/mk1mf.pl
@@ -290,6 +290,7 @@ $cflags.=" -DOPENSSL_NO_HW" if $no_hw;
$cflags.=" -DOPENSSL_FIPS" if $fips;
$cflags.=" -DOPENSSL_NO_JPAKE" if $no_jpake;
$cflags.=" -DOPENSSL_NO_EC2M" if $no_ec2m;
+$cflags.=" -DOPENSSL_NO_WEAK_SSL_CIPHERS" if $no_weak_ssl;
$cflags.= " -DZLIB" if $zlib_opt;
$cflags.= " -DZLIB_SHARED" if $zlib_opt == 2;
@@ -482,7 +483,7 @@ EX_LIBS=$ex_libs
# The OpenSSL directory
SRC_D=$src_dir
-LINK=$link
+LINK_CMD=$link
LFLAGS=$lflags
RSC=$rsc
@@ -1205,6 +1206,7 @@ sub read_options
"no-jpake" => \$no_jpake,
"no-ec2m" => \$no_ec2m,
"no-ec_nistp_64_gcc_128" => 0,
+ "no-weak-ssl-ciphers" => \$no_weak_ssl,
"no-err" => \$no_err,
"no-sock" => \$no_sock,
"no-krb5" => \$no_krb5,
diff --git a/util/pl/BC-32.pl b/util/pl/BC-32.pl
index f7161d7bfe2d..375b0a76dfcb 100644
--- a/util/pl/BC-32.pl
+++ b/util/pl/BC-32.pl
@@ -118,7 +118,7 @@ ___
{
local($ex)=($target =~ /O_SSL/)?' $(L_CRYPTO)':'';
$ex.=' ws2_32.lib gdi32.lib';
- $ret.="\t\$(LINK) \$(MLFLAGS) $efile$target /def:ms/${Name}.def @<<\n \$(SHLIB_EX_OBJ) $objs $ex\n<<\n";
+ $ret.="\t\$(LINK_CMD) \$(MLFLAGS) $efile$target /def:ms/${Name}.def @<<\n \$(SHLIB_EX_OBJ) $objs $ex\n<<\n";
}
$ret.="\n";
return($ret);
@@ -132,7 +132,7 @@ sub do_link_rule
$file =~ s/\//$o/g if $o ne '/';
$n=&bname($target);
$ret.="$target: $files $dep_libs\n";
- $ret.="\t\$(LINK) \$(LFLAGS) $files \$(APP_EX_OBJ), $target,, $libs\n\n";
+ $ret.="\t\$(LINK_CMD) \$(LFLAGS) $files \$(APP_EX_OBJ), $target,, $libs\n\n";
return($ret);
}
diff --git a/util/pl/Mingw32.pl b/util/pl/Mingw32.pl
index fe3fb27a7860..55c85f644714 100644
--- a/util/pl/Mingw32.pl
+++ b/util/pl/Mingw32.pl
@@ -98,7 +98,7 @@ sub do_link_rule
$file =~ s/\//$o/g if $o ne '/';
$n=&bname($target);
$ret.="$target: $files $dep_libs\n";
- $ret.="\t\$(LINK) ${efile}$target \$(LFLAGS) $files $libs\n\n";
+ $ret.="\t\$(LINK_CMD) ${efile}$target \$(LFLAGS) $files $libs\n\n";
return($ret);
}
1;
diff --git a/util/pl/OS2-EMX.pl b/util/pl/OS2-EMX.pl
index 28cd1169079a..92a332e6e906 100644
--- a/util/pl/OS2-EMX.pl
+++ b/util/pl/OS2-EMX.pl
@@ -99,7 +99,7 @@ sub do_lib_rule
{
local($ex)=($target =~ /O_SSL/)?' $(L_CRYPTO)':'';
$ex.=' -lsocket';
- $ret.="\t\$(LINK) \$(SHLIB_CFLAGS) \$(MLFLAGS) $efile$target \$(SHLIB_EX_OBJ) \$(${Name}OBJ) $ex os2/${Name}.def\n";
+ $ret.="\t\$(LINK_CMD) \$(SHLIB_CFLAGS) \$(MLFLAGS) $efile$target \$(SHLIB_EX_OBJ) \$(${Name}OBJ) $ex os2/${Name}.def\n";
$ret.="\temximp -o $out_def/$name.a os2/${Name}.def\n";
$ret.="\temximp -o $out_def/$name.lib os2/${Name}.def\n\n";
}
@@ -113,7 +113,7 @@ sub do_link_rule
$file =~ s/\//$o/g if $o ne '/';
$n=&bname($target);
$ret.="$target: $files $dep_libs\n";
- $ret.="\t\$(LINK) ${efile}$target \$(CFLAG) \$(LFLAGS) $files $libs\n\n";
+ $ret.="\t\$(LINK_CMD) ${efile}$target \$(CFLAG) \$(LFLAGS) $files $libs\n\n";
return($ret);
}
diff --git a/util/pl/VC-32.pl b/util/pl/VC-32.pl
index 0f5547f056c2..dba96cba5e75 100644
--- a/util/pl/VC-32.pl
+++ b/util/pl/VC-32.pl
@@ -330,7 +330,7 @@ sub do_lib_rule
if ($fips && $target =~ /O_CRYPTO/)
{
$ret.="$target: $objs \$(PREMAIN_DSO_EXE)";
- $ret.="\n\tSET FIPS_LINK=\$(LINK)\n";
+ $ret.="\n\tSET FIPS_LINK=\$(LINK_CMD)\n";
$ret.="\tSET FIPS_CC=\$(CC)\n";
$ret.="\tSET FIPS_CC_ARGS=/Fo\$(OBJ_D)${o}fips_premain.obj \$(SHLIB_CFLAGS) -c\n";
$ret.="\tSET PREMAIN_DSO_EXE=\$(PREMAIN_DSO_EXE)\n";
@@ -344,7 +344,7 @@ sub do_lib_rule
else
{
$ret.="$target: $objs";
- $ret.="\n\t\$(LINK) \$(MLFLAGS) $efile$target $name @<<\n \$(SHLIB_EX_OBJ) $objs $ex \$(EX_LIBS)\n<<\n";
+ $ret.="\n\t\$(LINK_CMD) \$(MLFLAGS) $efile$target $name @<<\n \$(SHLIB_EX_OBJ) $objs $ex \$(EX_LIBS)\n<<\n";
}
$ret.="\tIF EXIST \$@.manifest mt -nologo -manifest \$@.manifest -outputresource:\$@;2\n\n";
}
@@ -363,7 +363,7 @@ sub do_link_rule
{
$ret.=" \$(OBJ_D)${o}applink.obj" if $shlib;
$ret.="\n";
- $ret.=" \$(LINK) \$(LFLAGS) $efile$target @<<\n\t";
+ $ret.=" \$(LINK_CMD) \$(LFLAGS) $efile$target @<<\n\t";
if ($files =~ /O_FIPSCANISTER/ && !$fipscanisterbuild) {
$ret.= "\$(EX_LIBS) ";
$ret.= "\$(OBJ_D)${o}applink.obj " if $shlib;
@@ -373,7 +373,7 @@ sub do_link_rule
elsif ($standalone == 2)
{
$ret.="\n";
- $ret.="\tSET FIPS_LINK=\$(LINK)\n";
+ $ret.="\tSET FIPS_LINK=\$(LINK_CMD)\n";
$ret.="\tSET FIPS_CC=\$(CC)\n";
$ret.="\tSET FIPS_CC_ARGS=/Fo\$(OBJ_D)${o}fips_premain.obj \$(SHLIB_CFLAGS) -c\n";
$ret.="\tSET PREMAIN_DSO_EXE=\n";
@@ -386,7 +386,7 @@ sub do_link_rule
else
{
$ret.="\n";
- $ret.="\t\$(LINK) \$(LFLAGS) $efile$target @<<\n";
+ $ret.="\t\$(LINK_CMD) \$(LFLAGS) $efile$target @<<\n";
$ret.="\t\$(APP_EX_OBJ) $files $libs\n<<\n";
}
$ret.="\tIF EXIST \$@.manifest mt -nologo -manifest \$@.manifest -outputresource:\$@;1\n\n";
diff --git a/util/pl/linux.pl b/util/pl/linux.pl
index d24f7b72913c..3362941f7bf3 100644
--- a/util/pl/linux.pl
+++ b/util/pl/linux.pl
@@ -78,7 +78,7 @@ sub do_link_rule
$file =~ s/\//$o/g if $o ne '/';
$n=&bname($target);
$ret.="$target: $files $dep_libs\n";
- $ret.="\t\$(LINK) ${efile}$target \$(LFLAGS) $files $libs\n\n";
+ $ret.="\t\$(LINK_CMD) ${efile}$target \$(LFLAGS) $files $libs\n\n";
return($ret);
}
diff --git a/util/pl/netware.pl b/util/pl/netware.pl
index fe80a9bb8990..16f4f4ee37c5 100644
--- a/util/pl/netware.pl
+++ b/util/pl/netware.pl
@@ -506,22 +506,22 @@ sub do_link_rule
if ($gnuc)
{
$ret.="\t\$(MKLIB) $lib_flags \$(TMP_D)${o}\$(E_EXE).a \$(filter-out \$(TMP_D)${o}\$(E_EXE)${obj},$files)\n";
- $ret.="\t\$(LINK) \$(LFLAGS) $def_file2\n";
+ $ret.="\t\$(LINK_CMD) \$(LFLAGS) $def_file2\n";
$ret.="\t\@$mv \$(E_EXE)2.nlm \$(TEST_D)\n";
}
else
{
- $ret.="\t\$(LINK) \$(LFLAGS) $def_file2 $files \"$prelude\" $libs -o $target2\n";
+ $ret.="\t\$(LINK_CMD) \$(LFLAGS) $def_file2 $files \"$prelude\" $libs -o $target2\n";
}
}
if ($gnuc)
{
- $ret.="\t\$(LINK) \$(LFLAGS) $def_file\n";
+ $ret.="\t\$(LINK_CMD) \$(LFLAGS) $def_file\n";
$ret.="\t\@$mv \$(\@F) \$(TEST_D)\n";
}
else
{
- $ret.="\t\$(LINK) \$(LFLAGS) $def_file $files \"$prelude\" $libs -o $target\n";
+ $ret.="\t\$(LINK_CMD) \$(LFLAGS) $def_file $files \"$prelude\" $libs -o $target\n";
}
$ret.="\n";
diff --git a/util/pl/ultrix.pl b/util/pl/ultrix.pl
index ea370c71f968..0c76c83b4a73 100644
--- a/util/pl/ultrix.pl
+++ b/util/pl/ultrix.pl
@@ -31,7 +31,7 @@ sub do_link_rule
$file =~ s/\//$o/g if $o ne '/';
$n=&bname($target);
$ret.="$target: $files $dep_libs\n";
- $ret.="\t\$(LINK) ${efile}$target \$(LFLAGS) $files $libs\n\n";
+ $ret.="\t\$(LINK_CMD) ${efile}$target \$(LFLAGS) $files $libs\n\n";
return($ret);
}
diff --git a/util/pl/unix.pl b/util/pl/unix.pl
index 1d4e9dc5df19..8818c5bcb1b2 100644
--- a/util/pl/unix.pl
+++ b/util/pl/unix.pl
@@ -164,7 +164,7 @@ sub do_link_rule
$file =~ s/\//$o/g if $o ne '/';
$n=&bname($target);
$ret.="$target: $files $dep_libs\n";
- $ret.="\t\$(LINK) ${efile}$target \$(LFLAGS) $files $libs\n\n";
+ $ret.="\t\$(LINK_CMD) ${efile}$target \$(LFLAGS) $files $libs\n\n";
return($ret);
}