aboutsummaryrefslogtreecommitdiff
path: root/crypto
diff options
context:
space:
mode:
Diffstat (limited to 'crypto')
-rw-r--r--crypto/Makefile30
-rw-r--r--crypto/aes/Makefile5
-rw-r--r--crypto/aes/aes.h4
-rw-r--r--crypto/aes/aes_cbc.c2
-rw-r--r--crypto/aes/aes_core.c8
-rw-r--r--crypto/aes/aes_x86core.c1063
-rwxr-xr-xcrypto/aes/asm/aes-586.pl3
-rwxr-xr-xcrypto/aes/asm/aes-armv4.pl1030
-rwxr-xr-xcrypto/aes/asm/aes-ppc.pl1176
-rwxr-xr-xcrypto/aes/asm/aes-s390x.pl1333
-rwxr-xr-xcrypto/aes/asm/aes-sparcv9.pl1181
-rwxr-xr-xcrypto/aes/asm/aes-x86_64.pl7
-rw-r--r--crypto/asn1/Makefile469
-rw-r--r--crypto/asn1/a_bytes.c2
-rw-r--r--crypto/asn1/a_mbstr.c2
-rw-r--r--crypto/asn1/a_sign.c7
-rw-r--r--crypto/asn1/a_strex.c2
-rw-r--r--crypto/asn1/a_strnid.c2
-rw-r--r--crypto/asn1/a_verify.c7
-rw-r--r--crypto/asn1/ameth_lib.c446
-rw-r--r--crypto/asn1/asn1.h3
-rw-r--r--crypto/asn1/asn1_err.c2
-rw-r--r--crypto/asn1/asn1_gen.c2
-rw-r--r--crypto/asn1/asn1_locl.h134
-rw-r--r--crypto/asn1/asn1_par.c2
-rw-r--r--crypto/asn1/asn1t.h2
-rw-r--r--crypto/asn1/asn_mime.c2
-rw-r--r--crypto/asn1/asn_moid.c2
-rw-r--r--crypto/asn1/asn_pack.c2
-rw-r--r--crypto/asn1/bio_asn1.c495
-rw-r--r--crypto/asn1/bio_ndef.c246
-rw-r--r--crypto/asn1/nsseq.c2
-rw-r--r--crypto/asn1/p5_pbe.c2
-rw-r--r--crypto/asn1/p5_pbev2.c2
-rw-r--r--crypto/asn1/p8_pkey.c2
-rw-r--r--crypto/asn1/t_bitst.c2
-rw-r--r--crypto/asn1/t_crl.c2
-rw-r--r--crypto/asn1/t_spki.c2
-rw-r--r--crypto/asn1/t_x509.c6
-rw-r--r--crypto/asn1/t_x509a.c2
-rw-r--r--crypto/asn1/tasn_dec.c46
-rw-r--r--crypto/asn1/tasn_enc.c2
-rw-r--r--crypto/asn1/tasn_fre.c2
-rw-r--r--crypto/asn1/tasn_new.c2
-rw-r--r--crypto/asn1/tasn_prn.c2
-rw-r--r--crypto/asn1/tasn_typ.c2
-rw-r--r--crypto/asn1/tasn_utl.c2
-rw-r--r--crypto/asn1/x_algor.c2
-rw-r--r--crypto/asn1/x_bignum.c2
-rw-r--r--crypto/asn1/x_exten.c2
-rw-r--r--crypto/asn1/x_long.c2
-rw-r--r--crypto/asn1/x_nx509.c72
-rw-r--r--crypto/asn1/x_x509a.c2
-rw-r--r--crypto/bf/Makefile10
-rw-r--r--crypto/bf/bf_skey.c7
-rw-r--r--crypto/bf/blowfish.h4
-rw-r--r--crypto/bio/Makefile2
-rw-r--r--crypto/bio/bss_bio.c2
-rw-r--r--crypto/bio/bss_file.c2
-rw-r--r--crypto/bio/bss_mem.c22
-rw-r--r--crypto/bio/bss_sock.c5
-rw-r--r--crypto/bn/Makefile16
-rwxr-xr-xcrypto/bn/asm/alpha-mont.pl317
-rwxr-xr-xcrypto/bn/asm/armv4-mont.pl200
-rwxr-xr-xcrypto/bn/asm/mips3-mont.pl327
-rwxr-xr-xcrypto/bn/asm/ppc-mont.pl323
-rwxr-xr-xcrypto/bn/asm/ppc64-mont.pl918
-rwxr-xr-xcrypto/bn/asm/s390x-mont.pl225
-rwxr-xr-xcrypto/bn/asm/s390x.S678
-rwxr-xr-xcrypto/bn/asm/sparcv9-mont.pl606
-rwxr-xr-xcrypto/bn/asm/sparcv9a-mont.pl882
-rwxr-xr-xcrypto/bn/asm/via-mont.pl242
-rwxr-xr-xcrypto/bn/asm/x86-mont.pl591
-rw-r--r--crypto/bn/bn.h15
-rw-r--r--crypto/bn/bn_lib.c19
-rw-r--r--crypto/bn/bn_nist.c706
-rw-r--r--crypto/bn/bn_opt.c87
-rw-r--r--crypto/bn/bn_rand.c6
-rw-r--r--crypto/bn/bn_shift.c2
-rw-r--r--crypto/bn/bn_x931p.c272
-rw-r--r--crypto/bn/bntest.c4
-rw-r--r--crypto/buffer/Makefile13
-rw-r--r--crypto/buffer/buf_str.c116
-rw-r--r--crypto/buffer/buffer.c58
-rw-r--r--crypto/camellia/Makefile2
-rwxr-xr-xcrypto/camellia/asm/cmll-x86.pl1138
-rwxr-xr-xcrypto/camellia/asm/cmll-x86_64.pl1080
-rw-r--r--crypto/camellia/camellia.h5
-rw-r--r--crypto/camellia/cmll_misc.c13
-rw-r--r--crypto/cast/Makefile9
-rw-r--r--crypto/cast/c_skey.c7
-rw-r--r--crypto/cast/cast.h4
-rw-r--r--crypto/cms/Makefile2
-rw-r--r--crypto/cms/cms_sd.c2
-rw-r--r--crypto/cms/cms_smime.c7
-rw-r--r--crypto/comp/Makefile2
-rw-r--r--crypto/comp/c_zlib.c4
-rw-r--r--crypto/conf/Makefile17
-rw-r--r--crypto/conf/conf_mall.c4
-rw-r--r--crypto/conf/conf_mod.c2
-rw-r--r--crypto/conf/conf_sap.c2
-rw-r--r--crypto/cryptlib.c351
-rw-r--r--crypto/crypto.h84
-rw-r--r--crypto/des/Makefile33
-rw-r--r--crypto/des/asm/des_enc.m4345
-rw-r--r--crypto/des/des_enc.c4
-rw-r--r--crypto/des/des_lib.c106
-rw-r--r--crypto/des/ecb_enc.c47
-rw-r--r--crypto/des/enc_read.c4
-rw-r--r--crypto/des/enc_writ.c4
-rw-r--r--crypto/des/set_key.c9
-rw-r--r--crypto/des/times/usparc.cc2
-rw-r--r--crypto/dh/Makefile18
-rw-r--r--crypto/dh/dh.h11
-rw-r--r--crypto/dh/dh_asn1.c2
-rw-r--r--crypto/dh/dh_check.c4
-rw-r--r--crypto/dh/dh_err.c6
-rw-r--r--crypto/dh/dh_gen.c4
-rw-r--r--crypto/dh/dh_key.c4
-rw-r--r--crypto/dsa/Makefile76
-rw-r--r--crypto/dsa/dsa.h39
-rw-r--r--crypto/dsa/dsa_asn1.c82
-rw-r--r--crypto/dsa/dsa_err.c10
-rw-r--r--crypto/dsa/dsa_gen.c3
-rw-r--r--crypto/dsa/dsa_key.c4
-rw-r--r--crypto/dsa/dsa_lib.c49
-rw-r--r--crypto/dsa/dsa_ossl.c3
-rw-r--r--crypto/dsa/dsa_sign.c31
-rw-r--r--crypto/dsa/dsa_utl.c95
-rw-r--r--crypto/dsa/dsa_vrf.c32
-rw-r--r--crypto/dso/Makefile2
-rw-r--r--crypto/dyn_lck.c428
-rw-r--r--crypto/ec/Makefile2
-rw-r--r--crypto/ec/ec_key.c16
-rw-r--r--crypto/ecdh/Makefile33
-rw-r--r--crypto/ecdsa/Makefile37
-rw-r--r--crypto/engine/Makefile231
-rw-r--r--crypto/engine/eng_cnf.c18
-rw-r--r--crypto/engine/eng_padlock.c4
-rw-r--r--crypto/engine/enginetest.c2
-rw-r--r--crypto/err/Makefile49
-rw-r--r--crypto/err/err.c782
-rw-r--r--crypto/err/err.h8
-rw-r--r--crypto/err/err_all.c13
-rw-r--r--crypto/err/err_bio.c75
-rw-r--r--crypto/err/err_def.c665
-rw-r--r--crypto/err/err_prn.c70
-rw-r--r--crypto/err/err_str.c295
-rw-r--r--crypto/err/openssl.ec2
-rw-r--r--crypto/evp/Makefile687
-rw-r--r--crypto/evp/bio_md.c9
-rw-r--r--crypto/evp/dig_eng.c180
-rw-r--r--crypto/evp/digest.c154
-rw-r--r--crypto/evp/e_aes.c35
-rw-r--r--crypto/evp/e_camellia.c2
-rw-r--r--crypto/evp/e_des.c9
-rw-r--r--crypto/evp/e_des3.c29
-rw-r--r--crypto/evp/e_null.c2
-rw-r--r--crypto/evp/e_rc4.c1
-rw-r--r--crypto/evp/enc_min.c390
-rw-r--r--crypto/evp/evp.h78
-rw-r--r--crypto/evp/evp_acnf.c2
-rw-r--r--crypto/evp/evp_cnf.c125
-rw-r--r--crypto/evp/evp_enc.c254
-rw-r--r--crypto/evp/evp_err.c16
-rw-r--r--crypto/evp/evp_lib.c39
-rw-r--r--crypto/evp/evp_locl.h30
-rw-r--r--crypto/evp/evp_pbe.c2
-rw-r--r--crypto/evp/evp_pkey.c2
-rw-r--r--crypto/evp/evp_test.c10
-rw-r--r--crypto/evp/m_dss.c2
-rw-r--r--crypto/evp/m_dss1.c3
-rw-r--r--crypto/evp/m_md2.c1
-rw-r--r--crypto/evp/m_md4.c1
-rw-r--r--crypto/evp/m_md5.c1
-rw-r--r--crypto/evp/m_mdc2.c1
-rw-r--r--crypto/evp/m_sha.c1
-rw-r--r--crypto/evp/m_sha1.c7
-rw-r--r--crypto/evp/names.c7
-rw-r--r--crypto/evp/p5_crpt.c2
-rw-r--r--crypto/evp/p5_crpt2.c2
-rw-r--r--crypto/evp/p_sign.c24
-rw-r--r--crypto/evp/p_verify.c26
-rw-r--r--crypto/fips_err.c7
-rw-r--r--crypto/fips_err.h137
-rw-r--r--crypto/hmac/Makefile15
-rw-r--r--crypto/hmac/hmac.c3
-rw-r--r--crypto/idea/Makefile8
-rw-r--r--crypto/idea/i_skey.c17
-rw-r--r--crypto/idea/idea.h3
-rw-r--r--crypto/jpake/Makefile64
-rw-r--r--crypto/jpake/jpake.c483
-rw-r--r--crypto/jpake/jpake.h129
-rw-r--r--crypto/jpake/jpake_err.c105
-rw-r--r--crypto/jpake/jpaketest.c192
-rw-r--r--crypto/krb5/Makefile2
-rw-r--r--crypto/lhash/Makefile2
-rw-r--r--crypto/md2/Makefile6
-rw-r--r--crypto/md2/md2.h3
-rw-r--r--crypto/md2/md2_dgst.c7
-rw-r--r--crypto/md4/Makefile12
-rw-r--r--crypto/md4/md4.h3
-rw-r--r--crypto/md4/md4_dgst.c7
-rw-r--r--crypto/md5/Makefile12
-rw-r--r--crypto/md5/md5.h3
-rw-r--r--crypto/md5/md5_dgst.c7
-rw-r--r--crypto/mdc2/Makefile2
-rw-r--r--crypto/mdc2/mdc2.h4
-rw-r--r--crypto/mdc2/mdc2dgst.c7
-rw-r--r--crypto/mem.c47
-rw-r--r--crypto/mem_dbg.c28
-rw-r--r--crypto/o_init.c86
-rw-r--r--crypto/objects/Makefile2
-rw-r--r--crypto/objects/obj_dat.h22
-rw-r--r--crypto/objects/obj_dat.pl4
-rw-r--r--crypto/objects/obj_mac.h10
-rw-r--r--crypto/objects/obj_mac.num2
-rw-r--r--crypto/objects/objects.txt3
-rw-r--r--crypto/ocsp/Makefile81
-rw-r--r--crypto/ocsp/ocsp_asn.c2
-rw-r--r--crypto/ocsp/ocsp_ht.c5
-rwxr-xr-xcrypto/ocsp/ocsp_srv.c2
-rw-r--r--crypto/ocsp/ocsp_vfy.c2
-rw-r--r--crypto/opensslconf.h29
-rw-r--r--crypto/opensslconf.h.in15
-rw-r--r--crypto/opensslv.h6
-rw-r--r--crypto/ossl_typ.h2
-rw-r--r--crypto/pem/Makefile134
-rw-r--r--crypto/pem/pem.h5
-rw-r--r--crypto/pem/pem_all.c174
-rw-r--r--crypto/pem/pem_lib.c3
-rw-r--r--crypto/pem/pem_x509.c2
-rw-r--r--crypto/pem/pem_xaux.c2
-rw-r--r--crypto/pkcs12/Makefile177
-rw-r--r--crypto/pkcs12/p12_add.c2
-rw-r--r--crypto/pkcs12/p12_asn.c2
-rw-r--r--crypto/pkcs12/p12_attr.c2
-rw-r--r--crypto/pkcs12/p12_crpt.c2
-rw-r--r--crypto/pkcs12/p12_crt.c16
-rw-r--r--crypto/pkcs12/p12_decr.c2
-rw-r--r--crypto/pkcs12/p12_init.c2
-rw-r--r--crypto/pkcs12/p12_key.c2
-rw-r--r--crypto/pkcs12/p12_kiss.c2
-rw-r--r--crypto/pkcs12/p12_mutl.c2
-rw-r--r--crypto/pkcs12/p12_npas.c2
-rw-r--r--crypto/pkcs12/p12_p8d.c2
-rw-r--r--crypto/pkcs12/p12_p8e.c2
-rw-r--r--crypto/pkcs12/p12_utl.c2
-rw-r--r--crypto/pkcs12/pkcs12.h2
-rw-r--r--crypto/pkcs7/Makefile43
-rw-r--r--crypto/pkcs7/pk7_asn1.c2
-rw-r--r--crypto/pkcs7/pk7_attr.c2
-rw-r--r--crypto/pkcs7/pk7_mime.c2
-rw-r--r--crypto/pkcs7/pk7_smime.c6
-rwxr-xr-xcrypto/ppccpuid.pl94
-rw-r--r--crypto/pqueue/Makefile2
-rw-r--r--crypto/pqueue/pq_compat.h5
-rw-r--r--crypto/rand/Makefile71
-rw-r--r--crypto/rand/md_rand.c12
-rw-r--r--crypto/rand/rand.h29
-rw-r--r--crypto/rand/rand_eng.c152
-rw-r--r--crypto/rand/rand_err.c20
-rwxr-xr-xcrypto/rand/rand_lcl.h11
-rw-r--r--crypto/rand/rand_lib.c71
-rw-r--r--crypto/rand/rand_unix.c2
-rw-r--r--crypto/rand/randfile.c66
-rw-r--r--crypto/rc2/Makefile8
-rw-r--r--crypto/rc2/rc2.h4
-rw-r--r--crypto/rc2/rc2_skey.c17
-rw-r--r--crypto/rc4/Makefile18
-rwxr-xr-xcrypto/rc4/asm/rc4-x86_64.pl2
-rw-r--r--crypto/rc4/rc4.h3
-rw-r--r--crypto/rc4/rc4_fblk.c75
-rw-r--r--crypto/rc4/rc4_skey.c16
-rw-r--r--crypto/rc5/Makefile2
-rw-r--r--crypto/rc5/rc5.h5
-rw-r--r--crypto/rc5/rc5_skey.c17
-rw-r--r--crypto/ripemd/Makefile11
-rw-r--r--crypto/ripemd/README2
-rw-r--r--crypto/ripemd/ripemd.h4
-rw-r--r--crypto/ripemd/rmd_dgst.c7
-rw-r--r--crypto/ripemd/rmd_locl.h2
-rw-r--r--crypto/rsa/Makefile78
-rw-r--r--crypto/rsa/rsa.h39
-rw-r--r--crypto/rsa/rsa_asn1.c2
-rw-r--r--crypto/rsa/rsa_eay.c2
-rw-r--r--crypto/rsa/rsa_eng.c348
-rw-r--r--crypto/rsa/rsa_err.c10
-rw-r--r--crypto/rsa/rsa_gen.c3
-rw-r--r--crypto/rsa/rsa_lib.c272
-rw-r--r--crypto/rsa/rsa_null.c2
-rw-r--r--crypto/rsa/rsa_oaep.c2
-rw-r--r--crypto/rsa/rsa_pss.c6
-rw-r--r--crypto/rsa/rsa_sign.c24
-rw-r--r--crypto/rsa/rsa_x931.c2
-rw-r--r--crypto/rsa/rsa_x931g.c255
-rw-r--r--crypto/s390xcpuid.S90
-rw-r--r--crypto/seed/Makefile2
-rw-r--r--crypto/sha/Makefile35
-rw-r--r--crypto/sha/asm/sha1-ia64.pl1
-rw-r--r--crypto/sha/sha.h3
-rw-r--r--crypto/sha/sha1_one.c2
-rw-r--r--crypto/sha/sha1dgst.c4
-rw-r--r--crypto/sha/sha256.c10
-rw-r--r--crypto/sha/sha512.c12
-rw-r--r--crypto/sha/sha_dgst.c6
-rw-r--r--crypto/sha/sha_locl.h7
-rw-r--r--crypto/sparcv9cap.c154
-rw-r--r--crypto/stack/Makefile2
-rw-r--r--crypto/store/Makefile18
-rw-r--r--crypto/symhacks.h9
-rw-r--r--crypto/txt_db/Makefile2
-rw-r--r--crypto/ui/Makefile2
-rw-r--r--crypto/ui/ui_lib.c1
-rw-r--r--crypto/ui/ui_openssl.c2
-rw-r--r--crypto/x509/Makefile199
-rw-r--r--crypto/x509/by_dir.c4
-rw-r--r--crypto/x509/x509_cmp.c11
-rw-r--r--crypto/x509/x509_trs.c2
-rw-r--r--crypto/x509/x509_vpm.c18
-rw-r--r--crypto/x509/x509cset.c2
-rw-r--r--crypto/x509/x509spki.c2
-rw-r--r--crypto/x509v3/Makefile457
-rw-r--r--crypto/x509v3/ext_dat.h2
-rw-r--r--crypto/x509v3/pcy_cache.c2
-rw-r--r--crypto/x509v3/pcy_data.c2
-rw-r--r--crypto/x509v3/pcy_int.h2
-rw-r--r--crypto/x509v3/pcy_lib.c2
-rw-r--r--crypto/x509v3/pcy_map.c2
-rw-r--r--crypto/x509v3/pcy_node.c2
-rw-r--r--crypto/x509v3/pcy_tree.c2
-rw-r--r--crypto/x509v3/tabtest.c2
-rw-r--r--crypto/x509v3/v3_addr.c34
-rw-r--r--crypto/x509v3/v3_akey.c2
-rw-r--r--crypto/x509v3/v3_akeya.c2
-rw-r--r--crypto/x509v3/v3_alt.c5
-rw-r--r--crypto/x509v3/v3_bcons.c2
-rw-r--r--crypto/x509v3/v3_bitst.c2
-rw-r--r--crypto/x509v3/v3_conf.c2
-rw-r--r--crypto/x509v3/v3_cpols.c9
-rw-r--r--crypto/x509v3/v3_crld.c2
-rw-r--r--crypto/x509v3/v3_enum.c2
-rw-r--r--crypto/x509v3/v3_extku.c2
-rw-r--r--crypto/x509v3/v3_genn.c2
-rw-r--r--crypto/x509v3/v3_ia5.c2
-rw-r--r--crypto/x509v3/v3_info.c2
-rw-r--r--crypto/x509v3/v3_int.c2
-rw-r--r--crypto/x509v3/v3_lib.c2
-rw-r--r--crypto/x509v3/v3_ncons.c2
-rw-r--r--crypto/x509v3/v3_ocsp.c2
-rw-r--r--crypto/x509v3/v3_pcons.c2
-rw-r--r--crypto/x509v3/v3_pku.c2
-rw-r--r--crypto/x509v3/v3_pmaps.c2
-rw-r--r--crypto/x509v3/v3_prn.c2
-rw-r--r--crypto/x509v3/v3_purp.c2
-rw-r--r--crypto/x509v3/v3_skey.c2
-rw-r--r--crypto/x509v3/v3_sxnet.c2
-rw-r--r--crypto/x509v3/v3_utl.c25
-rw-r--r--crypto/x509v3/v3conf.c2
-rw-r--r--crypto/x509v3/v3prin.c2
-rw-r--r--crypto/x509v3/x509v3.h2
361 files changed, 24367 insertions, 3926 deletions
diff --git a/crypto/Makefile b/crypto/Makefile
index bb8eaa036c55..6557f2b4e1dc 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -33,8 +33,8 @@ GENERAL=Makefile README crypto-lib.com install.com
LIB= $(TOP)/libcrypto.a
SHARED_LIB= libcrypto$(SHLIB_EXT)
-LIBSRC= cryptlib.c mem.c mem_clr.c mem_dbg.c cversion.c ex_data.c tmdiff.c cpt_err.c ebcdic.c uid.c o_time.c o_str.c o_dir.c
-LIBOBJ= cryptlib.o mem.o mem_clr.o mem_dbg.o cversion.o ex_data.o tmdiff.o cpt_err.o ebcdic.o uid.o o_time.o o_str.o o_dir.o $(CPUID_OBJ)
+LIBSRC= cryptlib.c dyn_lck.c mem.c mem_clr.c mem_dbg.c cversion.c ex_data.c tmdiff.c cpt_err.c ebcdic.c uid.c o_time.c o_str.c o_dir.c o_init.c fips_err.c
+LIBOBJ= cryptlib.o dyn_lck.o mem.o mem_clr.o mem_dbg.o cversion.o ex_data.o tmdiff.o cpt_err.o ebcdic.o uid.o o_time.o o_str.o o_dir.o o_init.o fips_err.o $(CPUID_OBJ)
SRC= $(LIBSRC)
@@ -47,7 +47,7 @@ ALL= $(GENERAL) $(SRC) $(HEADER)
top:
@(cd ..; $(MAKE) DIRS=$(DIR) all)
-all: shared
+all: lib
buildinf.h: ../Makefile
( echo "#ifndef MK1MF_BUILD"; \
@@ -95,10 +95,10 @@ links:
@target=links; $(RECURSIVE_MAKE)
# lib: and $(LIB): are splitted to avoid end-less loop
-lib: $(LIB)
+lib: buildinf.h $(LIB) subdirs
@touch lib
$(LIB): $(LIBOBJ)
- $(AR) $(LIB) $(LIBOBJ)
+ $(ARX) $(LIB) $(LIBOBJ)
$(RANLIB) $(LIB) || echo Never mind.
shared: buildinf.h lib subdirs
@@ -159,6 +159,13 @@ cversion.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h
cversion.o: ../include/openssl/ossl_typ.h ../include/openssl/safestack.h
cversion.o: ../include/openssl/stack.h ../include/openssl/symhacks.h buildinf.h
cversion.o: cryptlib.h cversion.c
+dyn_lck.o: ../e_os.h ../include/openssl/bio.h ../include/openssl/buffer.h
+dyn_lck.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h
+dyn_lck.o: ../include/openssl/err.h ../include/openssl/lhash.h
+dyn_lck.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h
+dyn_lck.o: ../include/openssl/ossl_typ.h ../include/openssl/safestack.h
+dyn_lck.o: ../include/openssl/stack.h ../include/openssl/symhacks.h cryptlib.h
+dyn_lck.o: dyn_lck.c
ebcdic.o: ../include/openssl/e_os2.h ../include/openssl/opensslconf.h ebcdic.c
ex_data.o: ../e_os.h ../include/openssl/bio.h ../include/openssl/buffer.h
ex_data.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h
@@ -167,6 +174,13 @@ ex_data.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h
ex_data.o: ../include/openssl/ossl_typ.h ../include/openssl/safestack.h
ex_data.o: ../include/openssl/stack.h ../include/openssl/symhacks.h cryptlib.h
ex_data.o: ex_data.c
+fips_err.o: ../include/openssl/bio.h ../include/openssl/crypto.h
+fips_err.o: ../include/openssl/e_os2.h ../include/openssl/err.h
+fips_err.o: ../include/openssl/fips.h ../include/openssl/lhash.h
+fips_err.o: ../include/openssl/opensslconf.h ../include/openssl/opensslv.h
+fips_err.o: ../include/openssl/ossl_typ.h ../include/openssl/safestack.h
+fips_err.o: ../include/openssl/stack.h ../include/openssl/symhacks.h fips_err.c
+fips_err.o: fips_err.h
mem.o: ../e_os.h ../include/openssl/bio.h ../include/openssl/buffer.h
mem.o: ../include/openssl/crypto.h ../include/openssl/e_os2.h
mem.o: ../include/openssl/err.h ../include/openssl/lhash.h
@@ -187,6 +201,12 @@ mem_dbg.o: ../include/openssl/stack.h ../include/openssl/symhacks.h cryptlib.h
mem_dbg.o: mem_dbg.c
o_dir.o: ../e_os.h ../include/openssl/e_os2.h ../include/openssl/opensslconf.h
o_dir.o: LPdir_unix.c o_dir.c o_dir.h
+o_init.o: ../e_os.h ../include/openssl/bio.h ../include/openssl/crypto.h
+o_init.o: ../include/openssl/e_os2.h ../include/openssl/err.h
+o_init.o: ../include/openssl/lhash.h ../include/openssl/opensslconf.h
+o_init.o: ../include/openssl/opensslv.h ../include/openssl/ossl_typ.h
+o_init.o: ../include/openssl/safestack.h ../include/openssl/stack.h
+o_init.o: ../include/openssl/symhacks.h o_init.c
o_str.o: ../e_os.h ../include/openssl/e_os2.h ../include/openssl/opensslconf.h
o_str.o: o_str.c o_str.h
o_time.o: ../include/openssl/e_os2.h ../include/openssl/opensslconf.h o_time.c
diff --git a/crypto/aes/Makefile b/crypto/aes/Makefile
index 22c7203dbb2e..9d174f4c3ef8 100644
--- a/crypto/aes/Makefile
+++ b/crypto/aes/Makefile
@@ -41,7 +41,7 @@ top:
all: lib
lib: $(LIBOBJ)
- $(AR) $(LIB) $(LIBOBJ)
+ $(ARX) $(LIB) $(LIBOBJ)
$(RANLIB) $(LIB) || echo Never mind.
@touch lib
@@ -103,7 +103,8 @@ aes_cfb.o: ../../e_os.h ../../include/openssl/aes.h
aes_cfb.o: ../../include/openssl/e_os2.h ../../include/openssl/opensslconf.h
aes_cfb.o: aes_cfb.c aes_locl.h
aes_core.o: ../../include/openssl/aes.h ../../include/openssl/e_os2.h
-aes_core.o: ../../include/openssl/opensslconf.h aes_core.c aes_locl.h
+aes_core.o: ../../include/openssl/fips.h ../../include/openssl/opensslconf.h
+aes_core.o: aes_core.c aes_locl.h
aes_ctr.o: ../../include/openssl/aes.h ../../include/openssl/e_os2.h
aes_ctr.o: ../../include/openssl/opensslconf.h aes_ctr.c aes_locl.h
aes_ecb.o: ../../include/openssl/aes.h ../../include/openssl/e_os2.h
diff --git a/crypto/aes/aes.h b/crypto/aes/aes.h
index baf0222d49d3..450f2b4051b2 100644
--- a/crypto/aes/aes.h
+++ b/crypto/aes/aes.h
@@ -66,6 +66,10 @@
#define AES_MAXNR 14
#define AES_BLOCK_SIZE 16
+#ifdef OPENSSL_FIPS
+#define FIPS_AES_SIZE_T int
+#endif
+
#ifdef __cplusplus
extern "C" {
#endif
diff --git a/crypto/aes/aes_cbc.c b/crypto/aes/aes_cbc.c
index d2ba6bcdb465..373864cd4bdb 100644
--- a/crypto/aes/aes_cbc.c
+++ b/crypto/aes/aes_cbc.c
@@ -59,6 +59,7 @@
#include <openssl/aes.h>
#include "aes_locl.h"
+#if !defined(OPENSSL_FIPS_AES_ASM)
void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
const unsigned long length, const AES_KEY *key,
unsigned char *ivec, const int enc) {
@@ -129,3 +130,4 @@ void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
}
}
}
+#endif
diff --git a/crypto/aes/aes_core.c b/crypto/aes/aes_core.c
index 3a80e18b0a48..cffdd4daec4d 100644
--- a/crypto/aes/aes_core.c
+++ b/crypto/aes/aes_core.c
@@ -37,6 +37,10 @@
#include <stdlib.h>
#include <openssl/aes.h>
+#ifdef OPENSSL_FIPS
+#include <openssl/fips.h>
+#endif
+
#include "aes_locl.h"
/*
@@ -631,6 +635,10 @@ int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
int i = 0;
u32 temp;
+#ifdef OPENSSL_FIPS
+ FIPS_selftest_check();
+#endif
+
if (!userKey || !key)
return -1;
if (bits != 128 && bits != 192 && bits != 256)
diff --git a/crypto/aes/aes_x86core.c b/crypto/aes/aes_x86core.c
new file mode 100644
index 000000000000..d323e265c05a
--- /dev/null
+++ b/crypto/aes/aes_x86core.c
@@ -0,0 +1,1063 @@
+/* crypto/aes/aes_core.c -*- mode:C; c-file-style: "eay" -*- */
+/**
+ * rijndael-alg-fst.c
+ *
+ * @version 3.0 (December 2000)
+ *
+ * Optimised ANSI C code for the Rijndael cipher (now AES)
+ *
+ * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
+ * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
+ * @author Paulo Barreto <paulo.barreto@terra.com.br>
+ *
+ * This code is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This is experimental x86[_64] derivative. It assumes little-endian
+ * byte order and expects CPU to sustain unaligned memory references.
+ * It is used as playground for cache-time attack mitigations and
+ * serves as reference C implementation for x86[_64] assembler.
+ *
+ * <appro@fy.chalmers.se>
+ */
+
+
+#ifndef AES_DEBUG
+# ifndef NDEBUG
+# define NDEBUG
+# endif
+#endif
+#include <assert.h>
+
+#include <stdlib.h>
+#include <openssl/aes.h>
+#include "aes_locl.h"
+
+/*
+ * These two parameters control which table, 256-byte or 2KB, is
+ * referenced in outer and respectively inner rounds.
+ */
+#define AES_COMPACT_IN_OUTER_ROUNDS
+#ifdef AES_COMPACT_IN_OUTER_ROUNDS
+/* AES_COMPACT_IN_OUTER_ROUNDS costs ~30% in performance, while
+ * adding AES_COMPACT_IN_INNER_ROUNDS reduces benchmark *further*
+ * by factor of ~2. */
+# undef AES_COMPACT_IN_INNER_ROUNDS
+#endif
+
+#if 1
+static void prefetch256(const void *table)
+{
+ volatile unsigned long *t=(void *)table,ret;
+ unsigned long sum;
+ int i;
+
+ /* 32 is common least cache-line size */
+ for (sum=0,i=0;i<256/sizeof(t[0]);i+=32/sizeof(t[0])) sum ^= t[i];
+
+ ret = sum;
+}
+#else
+# define prefetch256(t)
+#endif
+
+#undef GETU32
+#define GETU32(p) (*((u32*)(p)))
+
+#if (defined(_WIN32) || defined(_WIN64)) && !defined(__MINGW32__)
+typedef unsigned __int64 u64;
+#define U64(C) C##UI64
+#elif defined(__arch64__)
+typedef unsigned long u64;
+#define U64(C) C##UL
+#else
+typedef unsigned long long u64;
+#define U64(C) C##ULL
+#endif
+
+#undef ROTATE
+#if defined(_MSC_VER) || defined(__ICC)
+# define ROTATE(a,n) _lrotl(a,n)
+#elif defined(__GNUC__) && __GNUC__>=2
+# if defined(__i386) || defined(__i386__) || defined(__x86_64) || defined(__x86_64__)
+# define ROTATE(a,n) ({ register unsigned int ret; \
+ asm ( \
+ "roll %1,%0" \
+ : "=r"(ret) \
+ : "I"(n), "0"(a) \
+ : "cc"); \
+ ret; \
+ })
+# endif
+#endif
+/*
+Te [x] = S [x].[02, 01, 01, 03, 02, 01, 01, 03];
+Te0[x] = S [x].[02, 01, 01, 03];
+Te1[x] = S [x].[03, 02, 01, 01];
+Te2[x] = S [x].[01, 03, 02, 01];
+Te3[x] = S [x].[01, 01, 03, 02];
+*/
+#define Te0 (u32)((u64*)((u8*)Te+0))
+#define Te1 (u32)((u64*)((u8*)Te+3))
+#define Te2 (u32)((u64*)((u8*)Te+2))
+#define Te3 (u32)((u64*)((u8*)Te+1))
+/*
+Td [x] = Si[x].[0e, 09, 0d, 0b, 0e, 09, 0d, 0b];
+Td0[x] = Si[x].[0e, 09, 0d, 0b];
+Td1[x] = Si[x].[0b, 0e, 09, 0d];
+Td2[x] = Si[x].[0d, 0b, 0e, 09];
+Td3[x] = Si[x].[09, 0d, 0b, 0e];
+Td4[x] = Si[x].[01];
+*/
+#define Td0 (u32)((u64*)((u8*)Td+0))
+#define Td1 (u32)((u64*)((u8*)Td+3))
+#define Td2 (u32)((u64*)((u8*)Td+2))
+#define Td3 (u32)((u64*)((u8*)Td+1))
+
+static const u64 Te[256] = {
+ U64(0xa56363c6a56363c6), U64(0x847c7cf8847c7cf8),
+ U64(0x997777ee997777ee), U64(0x8d7b7bf68d7b7bf6),
+ U64(0x0df2f2ff0df2f2ff), U64(0xbd6b6bd6bd6b6bd6),
+ U64(0xb16f6fdeb16f6fde), U64(0x54c5c59154c5c591),
+ U64(0x5030306050303060), U64(0x0301010203010102),
+ U64(0xa96767cea96767ce), U64(0x7d2b2b567d2b2b56),
+ U64(0x19fefee719fefee7), U64(0x62d7d7b562d7d7b5),
+ U64(0xe6abab4de6abab4d), U64(0x9a7676ec9a7676ec),
+ U64(0x45caca8f45caca8f), U64(0x9d82821f9d82821f),
+ U64(0x40c9c98940c9c989), U64(0x877d7dfa877d7dfa),
+ U64(0x15fafaef15fafaef), U64(0xeb5959b2eb5959b2),
+ U64(0xc947478ec947478e), U64(0x0bf0f0fb0bf0f0fb),
+ U64(0xecadad41ecadad41), U64(0x67d4d4b367d4d4b3),
+ U64(0xfda2a25ffda2a25f), U64(0xeaafaf45eaafaf45),
+ U64(0xbf9c9c23bf9c9c23), U64(0xf7a4a453f7a4a453),
+ U64(0x967272e4967272e4), U64(0x5bc0c09b5bc0c09b),
+ U64(0xc2b7b775c2b7b775), U64(0x1cfdfde11cfdfde1),
+ U64(0xae93933dae93933d), U64(0x6a26264c6a26264c),
+ U64(0x5a36366c5a36366c), U64(0x413f3f7e413f3f7e),
+ U64(0x02f7f7f502f7f7f5), U64(0x4fcccc834fcccc83),
+ U64(0x5c3434685c343468), U64(0xf4a5a551f4a5a551),
+ U64(0x34e5e5d134e5e5d1), U64(0x08f1f1f908f1f1f9),
+ U64(0x937171e2937171e2), U64(0x73d8d8ab73d8d8ab),
+ U64(0x5331316253313162), U64(0x3f15152a3f15152a),
+ U64(0x0c0404080c040408), U64(0x52c7c79552c7c795),
+ U64(0x6523234665232346), U64(0x5ec3c39d5ec3c39d),
+ U64(0x2818183028181830), U64(0xa1969637a1969637),
+ U64(0x0f05050a0f05050a), U64(0xb59a9a2fb59a9a2f),
+ U64(0x0907070e0907070e), U64(0x3612122436121224),
+ U64(0x9b80801b9b80801b), U64(0x3de2e2df3de2e2df),
+ U64(0x26ebebcd26ebebcd), U64(0x6927274e6927274e),
+ U64(0xcdb2b27fcdb2b27f), U64(0x9f7575ea9f7575ea),
+ U64(0x1b0909121b090912), U64(0x9e83831d9e83831d),
+ U64(0x742c2c58742c2c58), U64(0x2e1a1a342e1a1a34),
+ U64(0x2d1b1b362d1b1b36), U64(0xb26e6edcb26e6edc),
+ U64(0xee5a5ab4ee5a5ab4), U64(0xfba0a05bfba0a05b),
+ U64(0xf65252a4f65252a4), U64(0x4d3b3b764d3b3b76),
+ U64(0x61d6d6b761d6d6b7), U64(0xceb3b37dceb3b37d),
+ U64(0x7b2929527b292952), U64(0x3ee3e3dd3ee3e3dd),
+ U64(0x712f2f5e712f2f5e), U64(0x9784841397848413),
+ U64(0xf55353a6f55353a6), U64(0x68d1d1b968d1d1b9),
+ U64(0x0000000000000000), U64(0x2cededc12cededc1),
+ U64(0x6020204060202040), U64(0x1ffcfce31ffcfce3),
+ U64(0xc8b1b179c8b1b179), U64(0xed5b5bb6ed5b5bb6),
+ U64(0xbe6a6ad4be6a6ad4), U64(0x46cbcb8d46cbcb8d),
+ U64(0xd9bebe67d9bebe67), U64(0x4b3939724b393972),
+ U64(0xde4a4a94de4a4a94), U64(0xd44c4c98d44c4c98),
+ U64(0xe85858b0e85858b0), U64(0x4acfcf854acfcf85),
+ U64(0x6bd0d0bb6bd0d0bb), U64(0x2aefefc52aefefc5),
+ U64(0xe5aaaa4fe5aaaa4f), U64(0x16fbfbed16fbfbed),
+ U64(0xc5434386c5434386), U64(0xd74d4d9ad74d4d9a),
+ U64(0x5533336655333366), U64(0x9485851194858511),
+ U64(0xcf45458acf45458a), U64(0x10f9f9e910f9f9e9),
+ U64(0x0602020406020204), U64(0x817f7ffe817f7ffe),
+ U64(0xf05050a0f05050a0), U64(0x443c3c78443c3c78),
+ U64(0xba9f9f25ba9f9f25), U64(0xe3a8a84be3a8a84b),
+ U64(0xf35151a2f35151a2), U64(0xfea3a35dfea3a35d),
+ U64(0xc0404080c0404080), U64(0x8a8f8f058a8f8f05),
+ U64(0xad92923fad92923f), U64(0xbc9d9d21bc9d9d21),
+ U64(0x4838387048383870), U64(0x04f5f5f104f5f5f1),
+ U64(0xdfbcbc63dfbcbc63), U64(0xc1b6b677c1b6b677),
+ U64(0x75dadaaf75dadaaf), U64(0x6321214263212142),
+ U64(0x3010102030101020), U64(0x1affffe51affffe5),
+ U64(0x0ef3f3fd0ef3f3fd), U64(0x6dd2d2bf6dd2d2bf),
+ U64(0x4ccdcd814ccdcd81), U64(0x140c0c18140c0c18),
+ U64(0x3513132635131326), U64(0x2fececc32fececc3),
+ U64(0xe15f5fbee15f5fbe), U64(0xa2979735a2979735),
+ U64(0xcc444488cc444488), U64(0x3917172e3917172e),
+ U64(0x57c4c49357c4c493), U64(0xf2a7a755f2a7a755),
+ U64(0x827e7efc827e7efc), U64(0x473d3d7a473d3d7a),
+ U64(0xac6464c8ac6464c8), U64(0xe75d5dbae75d5dba),
+ U64(0x2b1919322b191932), U64(0x957373e6957373e6),
+ U64(0xa06060c0a06060c0), U64(0x9881811998818119),
+ U64(0xd14f4f9ed14f4f9e), U64(0x7fdcdca37fdcdca3),
+ U64(0x6622224466222244), U64(0x7e2a2a547e2a2a54),
+ U64(0xab90903bab90903b), U64(0x8388880b8388880b),
+ U64(0xca46468cca46468c), U64(0x29eeeec729eeeec7),
+ U64(0xd3b8b86bd3b8b86b), U64(0x3c1414283c141428),
+ U64(0x79dedea779dedea7), U64(0xe25e5ebce25e5ebc),
+ U64(0x1d0b0b161d0b0b16), U64(0x76dbdbad76dbdbad),
+ U64(0x3be0e0db3be0e0db), U64(0x5632326456323264),
+ U64(0x4e3a3a744e3a3a74), U64(0x1e0a0a141e0a0a14),
+ U64(0xdb494992db494992), U64(0x0a06060c0a06060c),
+ U64(0x6c2424486c242448), U64(0xe45c5cb8e45c5cb8),
+ U64(0x5dc2c29f5dc2c29f), U64(0x6ed3d3bd6ed3d3bd),
+ U64(0xefacac43efacac43), U64(0xa66262c4a66262c4),
+ U64(0xa8919139a8919139), U64(0xa4959531a4959531),
+ U64(0x37e4e4d337e4e4d3), U64(0x8b7979f28b7979f2),
+ U64(0x32e7e7d532e7e7d5), U64(0x43c8c88b43c8c88b),
+ U64(0x5937376e5937376e), U64(0xb76d6ddab76d6dda),
+ U64(0x8c8d8d018c8d8d01), U64(0x64d5d5b164d5d5b1),
+ U64(0xd24e4e9cd24e4e9c), U64(0xe0a9a949e0a9a949),
+ U64(0xb46c6cd8b46c6cd8), U64(0xfa5656acfa5656ac),
+ U64(0x07f4f4f307f4f4f3), U64(0x25eaeacf25eaeacf),
+ U64(0xaf6565caaf6565ca), U64(0x8e7a7af48e7a7af4),
+ U64(0xe9aeae47e9aeae47), U64(0x1808081018080810),
+ U64(0xd5baba6fd5baba6f), U64(0x887878f0887878f0),
+ U64(0x6f25254a6f25254a), U64(0x722e2e5c722e2e5c),
+ U64(0x241c1c38241c1c38), U64(0xf1a6a657f1a6a657),
+ U64(0xc7b4b473c7b4b473), U64(0x51c6c69751c6c697),
+ U64(0x23e8e8cb23e8e8cb), U64(0x7cdddda17cdddda1),
+ U64(0x9c7474e89c7474e8), U64(0x211f1f3e211f1f3e),
+ U64(0xdd4b4b96dd4b4b96), U64(0xdcbdbd61dcbdbd61),
+ U64(0x868b8b0d868b8b0d), U64(0x858a8a0f858a8a0f),
+ U64(0x907070e0907070e0), U64(0x423e3e7c423e3e7c),
+ U64(0xc4b5b571c4b5b571), U64(0xaa6666ccaa6666cc),
+ U64(0xd8484890d8484890), U64(0x0503030605030306),
+ U64(0x01f6f6f701f6f6f7), U64(0x120e0e1c120e0e1c),
+ U64(0xa36161c2a36161c2), U64(0x5f35356a5f35356a),
+ U64(0xf95757aef95757ae), U64(0xd0b9b969d0b9b969),
+ U64(0x9186861791868617), U64(0x58c1c19958c1c199),
+ U64(0x271d1d3a271d1d3a), U64(0xb99e9e27b99e9e27),
+ U64(0x38e1e1d938e1e1d9), U64(0x13f8f8eb13f8f8eb),
+ U64(0xb398982bb398982b), U64(0x3311112233111122),
+ U64(0xbb6969d2bb6969d2), U64(0x70d9d9a970d9d9a9),
+ U64(0x898e8e07898e8e07), U64(0xa7949433a7949433),
+ U64(0xb69b9b2db69b9b2d), U64(0x221e1e3c221e1e3c),
+ U64(0x9287871592878715), U64(0x20e9e9c920e9e9c9),
+ U64(0x49cece8749cece87), U64(0xff5555aaff5555aa),
+ U64(0x7828285078282850), U64(0x7adfdfa57adfdfa5),
+ U64(0x8f8c8c038f8c8c03), U64(0xf8a1a159f8a1a159),
+ U64(0x8089890980898909), U64(0x170d0d1a170d0d1a),
+ U64(0xdabfbf65dabfbf65), U64(0x31e6e6d731e6e6d7),
+ U64(0xc6424284c6424284), U64(0xb86868d0b86868d0),
+ U64(0xc3414182c3414182), U64(0xb0999929b0999929),
+ U64(0x772d2d5a772d2d5a), U64(0x110f0f1e110f0f1e),
+ U64(0xcbb0b07bcbb0b07b), U64(0xfc5454a8fc5454a8),
+ U64(0xd6bbbb6dd6bbbb6d), U64(0x3a16162c3a16162c)
+};
+
+static const u8 Te4[256] = {
+ 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U,
+ 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U,
+ 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U,
+ 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U,
+ 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU,
+ 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U,
+ 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU,
+ 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U,
+ 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U,
+ 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U,
+ 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU,
+ 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU,
+ 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U,
+ 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U,
+ 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U,
+ 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U,
+ 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U,
+ 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U,
+ 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U,
+ 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU,
+ 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU,
+ 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U,
+ 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U,
+ 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U,
+ 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U,
+ 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU,
+ 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU,
+ 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU,
+ 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U,
+ 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU,
+ 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U,
+ 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U
+};
+
+static const u64 Td[256] = {
+ U64(0x50a7f45150a7f451), U64(0x5365417e5365417e),
+ U64(0xc3a4171ac3a4171a), U64(0x965e273a965e273a),
+ U64(0xcb6bab3bcb6bab3b), U64(0xf1459d1ff1459d1f),
+ U64(0xab58faacab58faac), U64(0x9303e34b9303e34b),
+ U64(0x55fa302055fa3020), U64(0xf66d76adf66d76ad),
+ U64(0x9176cc889176cc88), U64(0x254c02f5254c02f5),
+ U64(0xfcd7e54ffcd7e54f), U64(0xd7cb2ac5d7cb2ac5),
+ U64(0x8044352680443526), U64(0x8fa362b58fa362b5),
+ U64(0x495ab1de495ab1de), U64(0x671bba25671bba25),
+ U64(0x980eea45980eea45), U64(0xe1c0fe5de1c0fe5d),
+ U64(0x02752fc302752fc3), U64(0x12f04c8112f04c81),
+ U64(0xa397468da397468d), U64(0xc6f9d36bc6f9d36b),
+ U64(0xe75f8f03e75f8f03), U64(0x959c9215959c9215),
+ U64(0xeb7a6dbfeb7a6dbf), U64(0xda595295da595295),
+ U64(0x2d83bed42d83bed4), U64(0xd3217458d3217458),
+ U64(0x2969e0492969e049), U64(0x44c8c98e44c8c98e),
+ U64(0x6a89c2756a89c275), U64(0x78798ef478798ef4),
+ U64(0x6b3e58996b3e5899), U64(0xdd71b927dd71b927),
+ U64(0xb64fe1beb64fe1be), U64(0x17ad88f017ad88f0),
+ U64(0x66ac20c966ac20c9), U64(0xb43ace7db43ace7d),
+ U64(0x184adf63184adf63), U64(0x82311ae582311ae5),
+ U64(0x6033519760335197), U64(0x457f5362457f5362),
+ U64(0xe07764b1e07764b1), U64(0x84ae6bbb84ae6bbb),
+ U64(0x1ca081fe1ca081fe), U64(0x942b08f9942b08f9),
+ U64(0x5868487058684870), U64(0x19fd458f19fd458f),
+ U64(0x876cde94876cde94), U64(0xb7f87b52b7f87b52),
+ U64(0x23d373ab23d373ab), U64(0xe2024b72e2024b72),
+ U64(0x578f1fe3578f1fe3), U64(0x2aab55662aab5566),
+ U64(0x0728ebb20728ebb2), U64(0x03c2b52f03c2b52f),
+ U64(0x9a7bc5869a7bc586), U64(0xa50837d3a50837d3),
+ U64(0xf2872830f2872830), U64(0xb2a5bf23b2a5bf23),
+ U64(0xba6a0302ba6a0302), U64(0x5c8216ed5c8216ed),
+ U64(0x2b1ccf8a2b1ccf8a), U64(0x92b479a792b479a7),
+ U64(0xf0f207f3f0f207f3), U64(0xa1e2694ea1e2694e),
+ U64(0xcdf4da65cdf4da65), U64(0xd5be0506d5be0506),
+ U64(0x1f6234d11f6234d1), U64(0x8afea6c48afea6c4),
+ U64(0x9d532e349d532e34), U64(0xa055f3a2a055f3a2),
+ U64(0x32e18a0532e18a05), U64(0x75ebf6a475ebf6a4),
+ U64(0x39ec830b39ec830b), U64(0xaaef6040aaef6040),
+ U64(0x069f715e069f715e), U64(0x51106ebd51106ebd),
+ U64(0xf98a213ef98a213e), U64(0x3d06dd963d06dd96),
+ U64(0xae053eddae053edd), U64(0x46bde64d46bde64d),
+ U64(0xb58d5491b58d5491), U64(0x055dc471055dc471),
+ U64(0x6fd406046fd40604), U64(0xff155060ff155060),
+ U64(0x24fb981924fb9819), U64(0x97e9bdd697e9bdd6),
+ U64(0xcc434089cc434089), U64(0x779ed967779ed967),
+ U64(0xbd42e8b0bd42e8b0), U64(0x888b8907888b8907),
+ U64(0x385b19e7385b19e7), U64(0xdbeec879dbeec879),
+ U64(0x470a7ca1470a7ca1), U64(0xe90f427ce90f427c),
+ U64(0xc91e84f8c91e84f8), U64(0x0000000000000000),
+ U64(0x8386800983868009), U64(0x48ed2b3248ed2b32),
+ U64(0xac70111eac70111e), U64(0x4e725a6c4e725a6c),
+ U64(0xfbff0efdfbff0efd), U64(0x5638850f5638850f),
+ U64(0x1ed5ae3d1ed5ae3d), U64(0x27392d3627392d36),
+ U64(0x64d90f0a64d90f0a), U64(0x21a65c6821a65c68),
+ U64(0xd1545b9bd1545b9b), U64(0x3a2e36243a2e3624),
+ U64(0xb1670a0cb1670a0c), U64(0x0fe757930fe75793),
+ U64(0xd296eeb4d296eeb4), U64(0x9e919b1b9e919b1b),
+ U64(0x4fc5c0804fc5c080), U64(0xa220dc61a220dc61),
+ U64(0x694b775a694b775a), U64(0x161a121c161a121c),
+ U64(0x0aba93e20aba93e2), U64(0xe52aa0c0e52aa0c0),
+ U64(0x43e0223c43e0223c), U64(0x1d171b121d171b12),
+ U64(0x0b0d090e0b0d090e), U64(0xadc78bf2adc78bf2),
+ U64(0xb9a8b62db9a8b62d), U64(0xc8a91e14c8a91e14),
+ U64(0x8519f1578519f157), U64(0x4c0775af4c0775af),
+ U64(0xbbdd99eebbdd99ee), U64(0xfd607fa3fd607fa3),
+ U64(0x9f2601f79f2601f7), U64(0xbcf5725cbcf5725c),
+ U64(0xc53b6644c53b6644), U64(0x347efb5b347efb5b),
+ U64(0x7629438b7629438b), U64(0xdcc623cbdcc623cb),
+ U64(0x68fcedb668fcedb6), U64(0x63f1e4b863f1e4b8),
+ U64(0xcadc31d7cadc31d7), U64(0x1085634210856342),
+ U64(0x4022971340229713), U64(0x2011c6842011c684),
+ U64(0x7d244a857d244a85), U64(0xf83dbbd2f83dbbd2),
+ U64(0x1132f9ae1132f9ae), U64(0x6da129c76da129c7),
+ U64(0x4b2f9e1d4b2f9e1d), U64(0xf330b2dcf330b2dc),
+ U64(0xec52860dec52860d), U64(0xd0e3c177d0e3c177),
+ U64(0x6c16b32b6c16b32b), U64(0x99b970a999b970a9),
+ U64(0xfa489411fa489411), U64(0x2264e9472264e947),
+ U64(0xc48cfca8c48cfca8), U64(0x1a3ff0a01a3ff0a0),
+ U64(0xd82c7d56d82c7d56), U64(0xef903322ef903322),
+ U64(0xc74e4987c74e4987), U64(0xc1d138d9c1d138d9),
+ U64(0xfea2ca8cfea2ca8c), U64(0x360bd498360bd498),
+ U64(0xcf81f5a6cf81f5a6), U64(0x28de7aa528de7aa5),
+ U64(0x268eb7da268eb7da), U64(0xa4bfad3fa4bfad3f),
+ U64(0xe49d3a2ce49d3a2c), U64(0x0d9278500d927850),
+ U64(0x9bcc5f6a9bcc5f6a), U64(0x62467e5462467e54),
+ U64(0xc2138df6c2138df6), U64(0xe8b8d890e8b8d890),
+ U64(0x5ef7392e5ef7392e), U64(0xf5afc382f5afc382),
+ U64(0xbe805d9fbe805d9f), U64(0x7c93d0697c93d069),
+ U64(0xa92dd56fa92dd56f), U64(0xb31225cfb31225cf),
+ U64(0x3b99acc83b99acc8), U64(0xa77d1810a77d1810),
+ U64(0x6e639ce86e639ce8), U64(0x7bbb3bdb7bbb3bdb),
+ U64(0x097826cd097826cd), U64(0xf418596ef418596e),
+ U64(0x01b79aec01b79aec), U64(0xa89a4f83a89a4f83),
+ U64(0x656e95e6656e95e6), U64(0x7ee6ffaa7ee6ffaa),
+ U64(0x08cfbc2108cfbc21), U64(0xe6e815efe6e815ef),
+ U64(0xd99be7bad99be7ba), U64(0xce366f4ace366f4a),
+ U64(0xd4099fead4099fea), U64(0xd67cb029d67cb029),
+ U64(0xafb2a431afb2a431), U64(0x31233f2a31233f2a),
+ U64(0x3094a5c63094a5c6), U64(0xc066a235c066a235),
+ U64(0x37bc4e7437bc4e74), U64(0xa6ca82fca6ca82fc),
+ U64(0xb0d090e0b0d090e0), U64(0x15d8a73315d8a733),
+ U64(0x4a9804f14a9804f1), U64(0xf7daec41f7daec41),
+ U64(0x0e50cd7f0e50cd7f), U64(0x2ff691172ff69117),
+ U64(0x8dd64d768dd64d76), U64(0x4db0ef434db0ef43),
+ U64(0x544daacc544daacc), U64(0xdf0496e4df0496e4),
+ U64(0xe3b5d19ee3b5d19e), U64(0x1b886a4c1b886a4c),
+ U64(0xb81f2cc1b81f2cc1), U64(0x7f5165467f516546),
+ U64(0x04ea5e9d04ea5e9d), U64(0x5d358c015d358c01),
+ U64(0x737487fa737487fa), U64(0x2e410bfb2e410bfb),
+ U64(0x5a1d67b35a1d67b3), U64(0x52d2db9252d2db92),
+ U64(0x335610e9335610e9), U64(0x1347d66d1347d66d),
+ U64(0x8c61d79a8c61d79a), U64(0x7a0ca1377a0ca137),
+ U64(0x8e14f8598e14f859), U64(0x893c13eb893c13eb),
+ U64(0xee27a9ceee27a9ce), U64(0x35c961b735c961b7),
+ U64(0xede51ce1ede51ce1), U64(0x3cb1477a3cb1477a),
+ U64(0x59dfd29c59dfd29c), U64(0x3f73f2553f73f255),
+ U64(0x79ce141879ce1418), U64(0xbf37c773bf37c773),
+ U64(0xeacdf753eacdf753), U64(0x5baafd5f5baafd5f),
+ U64(0x146f3ddf146f3ddf), U64(0x86db447886db4478),
+ U64(0x81f3afca81f3afca), U64(0x3ec468b93ec468b9),
+ U64(0x2c3424382c342438), U64(0x5f40a3c25f40a3c2),
+ U64(0x72c31d1672c31d16), U64(0x0c25e2bc0c25e2bc),
+ U64(0x8b493c288b493c28), U64(0x41950dff41950dff),
+ U64(0x7101a8397101a839), U64(0xdeb30c08deb30c08),
+ U64(0x9ce4b4d89ce4b4d8), U64(0x90c1566490c15664),
+ U64(0x6184cb7b6184cb7b), U64(0x70b632d570b632d5),
+ U64(0x745c6c48745c6c48), U64(0x4257b8d04257b8d0)
+};
+static const u8 Td4[256] = {
+ 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U,
+ 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU,
+ 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U,
+ 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU,
+ 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU,
+ 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU,
+ 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U,
+ 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U,
+ 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U,
+ 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U,
+ 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU,
+ 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U,
+ 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU,
+ 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U,
+ 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U,
+ 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU,
+ 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU,
+ 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U,
+ 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U,
+ 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU,
+ 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U,
+ 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU,
+ 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U,
+ 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U,
+ 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U,
+ 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU,
+ 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU,
+ 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU,
+ 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U,
+ 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U,
+ 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U,
+ 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU
+};
+
+static const u32 rcon[] = {
+ 0x00000001U, 0x00000002U, 0x00000004U, 0x00000008U,
+ 0x00000010U, 0x00000020U, 0x00000040U, 0x00000080U,
+ 0x0000001bU, 0x00000036U, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+};
+
+/**
+ * Expand the cipher key into the encryption key schedule.
+ */
+int AES_set_encrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key) {
+
+ u32 *rk;
+ int i = 0;
+ u32 temp;
+
+ if (!userKey || !key)
+ return -1;
+ if (bits != 128 && bits != 192 && bits != 256)
+ return -2;
+
+ rk = key->rd_key;
+
+ if (bits==128)
+ key->rounds = 10;
+ else if (bits==192)
+ key->rounds = 12;
+ else
+ key->rounds = 14;
+
+ rk[0] = GETU32(userKey );
+ rk[1] = GETU32(userKey + 4);
+ rk[2] = GETU32(userKey + 8);
+ rk[3] = GETU32(userKey + 12);
+ if (bits == 128) {
+ while (1) {
+ temp = rk[3];
+ rk[4] = rk[0] ^
+ (Te4[(temp >> 8) & 0xff] ) ^
+ (Te4[(temp >> 16) & 0xff] << 8) ^
+ (Te4[(temp >> 24) ] << 16) ^
+ (Te4[(temp ) & 0xff] << 24) ^
+ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+ if (++i == 10) {
+ return 0;
+ }
+ rk += 4;
+ }
+ }
+ rk[4] = GETU32(userKey + 16);
+ rk[5] = GETU32(userKey + 20);
+ if (bits == 192) {
+ while (1) {
+ temp = rk[ 5];
+ rk[ 6] = rk[ 0] ^
+ (Te4[(temp >> 8) & 0xff] ) ^
+ (Te4[(temp >> 16) & 0xff] << 8) ^
+ (Te4[(temp >> 24) ] << 16) ^
+ (Te4[(temp ) & 0xff] << 24) ^
+ rcon[i];
+ rk[ 7] = rk[ 1] ^ rk[ 6];
+ rk[ 8] = rk[ 2] ^ rk[ 7];
+ rk[ 9] = rk[ 3] ^ rk[ 8];
+ if (++i == 8) {
+ return 0;
+ }
+ rk[10] = rk[ 4] ^ rk[ 9];
+ rk[11] = rk[ 5] ^ rk[10];
+ rk += 6;
+ }
+ }
+ rk[6] = GETU32(userKey + 24);
+ rk[7] = GETU32(userKey + 28);
+ if (bits == 256) {
+ while (1) {
+ temp = rk[ 7];
+ rk[ 8] = rk[ 0] ^
+ (Te4[(temp >> 8) & 0xff] ) ^
+ (Te4[(temp >> 16) & 0xff] << 8) ^
+ (Te4[(temp >> 24) ] << 16) ^
+ (Te4[(temp ) & 0xff] << 24) ^
+ rcon[i];
+ rk[ 9] = rk[ 1] ^ rk[ 8];
+ rk[10] = rk[ 2] ^ rk[ 9];
+ rk[11] = rk[ 3] ^ rk[10];
+ if (++i == 7) {
+ return 0;
+ }
+ temp = rk[11];
+ rk[12] = rk[ 4] ^
+ (Te4[(temp ) & 0xff] ) ^
+ (Te4[(temp >> 8) & 0xff] << 8) ^
+ (Te4[(temp >> 16) & 0xff] << 16) ^
+ (Te4[(temp >> 24) ] << 24);
+ rk[13] = rk[ 5] ^ rk[12];
+ rk[14] = rk[ 6] ^ rk[13];
+ rk[15] = rk[ 7] ^ rk[14];
+
+ rk += 8;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Expand the cipher key into the decryption key schedule.
+ */
+int AES_set_decrypt_key(const unsigned char *userKey, const int bits,
+ AES_KEY *key) {
+
+ u32 *rk;
+ int i, j, status;
+ u32 temp;
+
+ /* first, start with an encryption schedule */
+ status = AES_set_encrypt_key(userKey, bits, key);
+ if (status < 0)
+ return status;
+
+ rk = key->rd_key;
+
+ /* invert the order of the round keys: */
+ for (i = 0, j = 4*(key->rounds); i < j; i += 4, j -= 4) {
+ temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
+ temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
+ temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
+ temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
+ }
+ /* apply the inverse MixColumn transform to all round keys but the first and the last: */
+ for (i = 1; i < (key->rounds); i++) {
+ rk += 4;
+#if 1
+ for (j = 0; j < 4; j++) {
+ u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
+
+ tp1 = rk[j];
+ m = tp1 & 0x80808080;
+ tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ m = tp2 & 0x80808080;
+ tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ m = tp4 & 0x80808080;
+ tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ tp9 = tp8 ^ tp1;
+ tpb = tp9 ^ tp2;
+ tpd = tp9 ^ tp4;
+ tpe = tp8 ^ tp4 ^ tp2;
+#if defined(ROTATE)
+ rk[j] = tpe ^ ROTATE(tpd,16) ^
+ ROTATE(tp9,8) ^ ROTATE(tpb,24);
+#else
+ rk[j] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
+ (tp9 >> 24) ^ (tp9 << 8) ^
+ (tpb >> 8) ^ (tpb << 24);
+#endif
+ }
+#else
+ rk[0] =
+ Td0[Te2[(rk[0] ) & 0xff] & 0xff] ^
+ Td1[Te2[(rk[0] >> 8) & 0xff] & 0xff] ^
+ Td2[Te2[(rk[0] >> 16) & 0xff] & 0xff] ^
+ Td3[Te2[(rk[0] >> 24) ] & 0xff];
+ rk[1] =
+ Td0[Te2[(rk[1] ) & 0xff] & 0xff] ^
+ Td1[Te2[(rk[1] >> 8) & 0xff] & 0xff] ^
+ Td2[Te2[(rk[1] >> 16) & 0xff] & 0xff] ^
+ Td3[Te2[(rk[1] >> 24) ] & 0xff];
+ rk[2] =
+ Td0[Te2[(rk[2] ) & 0xff] & 0xff] ^
+ Td1[Te2[(rk[2] >> 8) & 0xff] & 0xff] ^
+ Td2[Te2[(rk[2] >> 16) & 0xff] & 0xff] ^
+ Td3[Te2[(rk[2] >> 24) ] & 0xff];
+ rk[3] =
+ Td0[Te2[(rk[3] ) & 0xff] & 0xff] ^
+ Td1[Te2[(rk[3] >> 8) & 0xff] & 0xff] ^
+ Td2[Te2[(rk[3] >> 16) & 0xff] & 0xff] ^
+ Td3[Te2[(rk[3] >> 24) ] & 0xff];
+#endif
+ }
+ return 0;
+}
+
+/*
+ * Encrypt a single block
+ * in and out can overlap
+ */
+void AES_encrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key) {
+
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t[4];
+ int r;
+
+ assert(in && out && key);
+ rk = key->rd_key;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in ) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+
+#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
+ prefetch256(Te4);
+
+ t[0] = Te4[(s0 ) & 0xff] ^
+ Te4[(s1 >> 8) & 0xff] << 8 ^
+ Te4[(s2 >> 16) & 0xff] << 16 ^
+ Te4[(s3 >> 24) ] << 24;
+ t[1] = Te4[(s1 ) & 0xff] ^
+ Te4[(s2 >> 8) & 0xff] << 8 ^
+ Te4[(s3 >> 16) & 0xff] << 16 ^
+ Te4[(s0 >> 24) ] << 24;
+ t[2] = Te4[(s2 ) & 0xff] ^
+ Te4[(s3 >> 8) & 0xff] << 8 ^
+ Te4[(s0 >> 16) & 0xff] << 16 ^
+ Te4[(s1 >> 24) ] << 24;
+ t[3] = Te4[(s3 ) & 0xff] ^
+ Te4[(s0 >> 8) & 0xff] << 8 ^
+ Te4[(s1 >> 16) & 0xff] << 16 ^
+ Te4[(s2 >> 24) ] << 24;
+
+ /* now do the linear transform using words */
+ { int i;
+ u32 r0, r1, r2;
+
+ for (i = 0; i < 4; i++) {
+ r0 = t[i];
+ r1 = r0 & 0x80808080;
+ r2 = ((r0 & 0x7f7f7f7f) << 1) ^
+ ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
+#if defined(ROTATE)
+ t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
+ ROTATE(r0,16) ^ ROTATE(r0,8);
+#else
+ t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
+ (r0 << 16) ^ (r0 >> 16) ^
+ (r0 << 8) ^ (r0 >> 24);
+#endif
+ t[i] ^= rk[4+i];
+ }
+ }
+#else
+ t[0] = Te0[(s0 ) & 0xff] ^
+ Te1[(s1 >> 8) & 0xff] ^
+ Te2[(s2 >> 16) & 0xff] ^
+ Te3[(s3 >> 24) ] ^
+ rk[4];
+ t[1] = Te0[(s1 ) & 0xff] ^
+ Te1[(s2 >> 8) & 0xff] ^
+ Te2[(s3 >> 16) & 0xff] ^
+ Te3[(s0 >> 24) ] ^
+ rk[5];
+ t[2] = Te0[(s2 ) & 0xff] ^
+ Te1[(s3 >> 8) & 0xff] ^
+ Te2[(s0 >> 16) & 0xff] ^
+ Te3[(s1 >> 24) ] ^
+ rk[6];
+ t[3] = Te0[(s3 ) & 0xff] ^
+ Te1[(s0 >> 8) & 0xff] ^
+ Te2[(s1 >> 16) & 0xff] ^
+ Te3[(s2 >> 24) ] ^
+ rk[7];
+#endif
+ s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
+
+ /*
+ * Nr - 2 full rounds:
+ */
+ for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
+#if defined(AES_COMPACT_IN_INNER_ROUNDS)
+ t[0] = Te4[(s0 ) & 0xff] ^
+ Te4[(s1 >> 8) & 0xff] << 8 ^
+ Te4[(s2 >> 16) & 0xff] << 16 ^
+ Te4[(s3 >> 24) ] << 24;
+ t[1] = Te4[(s1 ) & 0xff] ^
+ Te4[(s2 >> 8) & 0xff] << 8 ^
+ Te4[(s3 >> 16) & 0xff] << 16 ^
+ Te4[(s0 >> 24) ] << 24;
+ t[2] = Te4[(s2 ) & 0xff] ^
+ Te4[(s3 >> 8) & 0xff] << 8 ^
+ Te4[(s0 >> 16) & 0xff] << 16 ^
+ Te4[(s1 >> 24) ] << 24;
+ t[3] = Te4[(s3 ) & 0xff] ^
+ Te4[(s0 >> 8) & 0xff] << 8 ^
+ Te4[(s1 >> 16) & 0xff] << 16 ^
+ Te4[(s2 >> 24) ] << 24;
+
+ /* now do the linear transform using words */
+ { int i;
+ u32 r0, r1, r2;
+
+ for (i = 0; i < 4; i++) {
+ r0 = t[i];
+ r1 = r0 & 0x80808080;
+ r2 = ((r0 & 0x7f7f7f7f) << 1) ^
+ ((r1 - (r1 >> 7)) & 0x1b1b1b1b);
+#if defined(ROTATE)
+ t[i] = r2 ^ ROTATE(r2,24) ^ ROTATE(r0,24) ^
+ ROTATE(r0,16) ^ ROTATE(r0,8);
+#else
+ t[i] = r2 ^ ((r2 ^ r0) << 24) ^ ((r2 ^ r0) >> 8) ^
+ (r0 << 16) ^ (r0 >> 16) ^
+ (r0 << 8) ^ (r0 >> 24);
+#endif
+ t[i] ^= rk[i];
+ }
+ }
+#else
+ t[0] = Te0[(s0 ) & 0xff] ^
+ Te1[(s1 >> 8) & 0xff] ^
+ Te2[(s2 >> 16) & 0xff] ^
+ Te3[(s3 >> 24) ] ^
+ rk[0];
+ t[1] = Te0[(s1 ) & 0xff] ^
+ Te1[(s2 >> 8) & 0xff] ^
+ Te2[(s3 >> 16) & 0xff] ^
+ Te3[(s0 >> 24) ] ^
+ rk[1];
+ t[2] = Te0[(s2 ) & 0xff] ^
+ Te1[(s3 >> 8) & 0xff] ^
+ Te2[(s0 >> 16) & 0xff] ^
+ Te3[(s1 >> 24) ] ^
+ rk[2];
+ t[3] = Te0[(s3 ) & 0xff] ^
+ Te1[(s0 >> 8) & 0xff] ^
+ Te2[(s1 >> 16) & 0xff] ^
+ Te3[(s2 >> 24) ] ^
+ rk[3];
+#endif
+ s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
+ }
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
+ prefetch256(Te4);
+
+ *(u32*)(out+0) =
+ Te4[(s0 ) & 0xff] ^
+ Te4[(s1 >> 8) & 0xff] << 8 ^
+ Te4[(s2 >> 16) & 0xff] << 16 ^
+ Te4[(s3 >> 24) ] << 24 ^
+ rk[0];
+ *(u32*)(out+4) =
+ Te4[(s1 ) & 0xff] ^
+ Te4[(s2 >> 8) & 0xff] << 8 ^
+ Te4[(s3 >> 16) & 0xff] << 16 ^
+ Te4[(s0 >> 24) ] << 24 ^
+ rk[1];
+ *(u32*)(out+8) =
+ Te4[(s2 ) & 0xff] ^
+ Te4[(s3 >> 8) & 0xff] << 8 ^
+ Te4[(s0 >> 16) & 0xff] << 16 ^
+ Te4[(s1 >> 24) ] << 24 ^
+ rk[2];
+ *(u32*)(out+12) =
+ Te4[(s3 ) & 0xff] ^
+ Te4[(s0 >> 8) & 0xff] << 8 ^
+ Te4[(s1 >> 16) & 0xff] << 16 ^
+ Te4[(s2 >> 24) ] << 24 ^
+ rk[3];
+#else
+ *(u32*)(out+0) =
+ (Te2[(s0 ) & 0xff] & 0x000000ffU) ^
+ (Te3[(s1 >> 8) & 0xff] & 0x0000ff00U) ^
+ (Te0[(s2 >> 16) & 0xff] & 0x00ff0000U) ^
+ (Te1[(s3 >> 24) ] & 0xff000000U) ^
+ rk[0];
+ *(u32*)(out+4) =
+ (Te2[(s1 ) & 0xff] & 0x000000ffU) ^
+ (Te3[(s2 >> 8) & 0xff] & 0x0000ff00U) ^
+ (Te0[(s3 >> 16) & 0xff] & 0x00ff0000U) ^
+ (Te1[(s0 >> 24) ] & 0xff000000U) ^
+ rk[1];
+ *(u32*)(out+8) =
+ (Te2[(s2 ) & 0xff] & 0x000000ffU) ^
+ (Te3[(s3 >> 8) & 0xff] & 0x0000ff00U) ^
+ (Te0[(s0 >> 16) & 0xff] & 0x00ff0000U) ^
+ (Te1[(s1 >> 24) ] & 0xff000000U) ^
+ rk[2];
+ *(u32*)(out+12) =
+ (Te2[(s3 ) & 0xff] & 0x000000ffU) ^
+ (Te3[(s0 >> 8) & 0xff] & 0x0000ff00U) ^
+ (Te0[(s1 >> 16) & 0xff] & 0x00ff0000U) ^
+ (Te1[(s2 >> 24) ] & 0xff000000U) ^
+ rk[3];
+#endif
+}
+
+/*
+ * Decrypt a single block
+ * in and out can overlap
+ */
+void AES_decrypt(const unsigned char *in, unsigned char *out,
+ const AES_KEY *key) {
+
+ const u32 *rk;
+ u32 s0, s1, s2, s3, t[4];
+ int r;
+
+ assert(in && out && key);
+ rk = key->rd_key;
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(in ) ^ rk[0];
+ s1 = GETU32(in + 4) ^ rk[1];
+ s2 = GETU32(in + 8) ^ rk[2];
+ s3 = GETU32(in + 12) ^ rk[3];
+
+#if defined(AES_COMPACT_IN_OUTER_ROUNDS)
+ prefetch256(Td4);
+
+ t[0] = Td4[(s0 ) & 0xff] ^
+ Td4[(s3 >> 8) & 0xff] << 8 ^
+ Td4[(s2 >> 16) & 0xff] << 16 ^
+ Td4[(s1 >> 24) ] << 24;
+ t[1] = Td4[(s1 ) & 0xff] ^
+ Td4[(s0 >> 8) & 0xff] << 8 ^
+ Td4[(s3 >> 16) & 0xff] << 16 ^
+ Td4[(s2 >> 24) ] << 24;
+ t[2] = Td4[(s2 ) & 0xff] ^
+ Td4[(s1 >> 8) & 0xff] << 8 ^
+ Td4[(s0 >> 16) & 0xff] << 16 ^
+ Td4[(s3 >> 24) ] << 24;
+ t[3] = Td4[(s3 ) & 0xff] ^
+ Td4[(s2 >> 8) & 0xff] << 8 ^
+ Td4[(s1 >> 16) & 0xff] << 16 ^
+ Td4[(s0 >> 24) ] << 24;
+
+ /* now do the linear transform using words */
+ { int i;
+ u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
+
+ for (i = 0; i < 4; i++) {
+ tp1 = t[i];
+ m = tp1 & 0x80808080;
+ tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ m = tp2 & 0x80808080;
+ tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ m = tp4 & 0x80808080;
+ tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ tp9 = tp8 ^ tp1;
+ tpb = tp9 ^ tp2;
+ tpd = tp9 ^ tp4;
+ tpe = tp8 ^ tp4 ^ tp2;
+#if defined(ROTATE)
+ t[i] = tpe ^ ROTATE(tpd,16) ^
+ ROTATE(tp9,8) ^ ROTATE(tpb,24);
+#else
+ t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
+ (tp9 >> 24) ^ (tp9 << 8) ^
+ (tpb >> 8) ^ (tpb << 24);
+#endif
+ t[i] ^= rk[4+i];
+ }
+ }
+#else
+ t[0] = Td0[(s0 ) & 0xff] ^
+ Td1[(s3 >> 8) & 0xff] ^
+ Td2[(s2 >> 16) & 0xff] ^
+ Td3[(s1 >> 24) ] ^
+ rk[4];
+ t[1] = Td0[(s1 ) & 0xff] ^
+ Td1[(s0 >> 8) & 0xff] ^
+ Td2[(s3 >> 16) & 0xff] ^
+ Td3[(s2 >> 24) ] ^
+ rk[5];
+ t[2] = Td0[(s2 ) & 0xff] ^
+ Td1[(s1 >> 8) & 0xff] ^
+ Td2[(s0 >> 16) & 0xff] ^
+ Td3[(s3 >> 24) ] ^
+ rk[6];
+ t[3] = Td0[(s3 ) & 0xff] ^
+ Td1[(s2 >> 8) & 0xff] ^
+ Td2[(s1 >> 16) & 0xff] ^
+ Td3[(s0 >> 24) ] ^
+ rk[7];
+#endif
+ s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
+
+ /*
+ * Nr - 2 full rounds:
+ */
+ for (rk+=8,r=key->rounds-2; r>0; rk+=4,r--) {
+#if defined(AES_COMPACT_IN_INNER_ROUNDS)
+ t[0] = Td4[(s0 ) & 0xff] ^
+ Td4[(s3 >> 8) & 0xff] << 8 ^
+ Td4[(s2 >> 16) & 0xff] << 16 ^
+ Td4[(s1 >> 24) ] << 24;
+ t[1] = Td4[(s1 ) & 0xff] ^
+ Td4[(s0 >> 8) & 0xff] << 8 ^
+ Td4[(s3 >> 16) & 0xff] << 16 ^
+ Td4[(s2 >> 24) ] << 24;
+ t[2] = Td4[(s2 ) & 0xff] ^
+ Td4[(s1 >> 8) & 0xff] << 8 ^
+ Td4[(s0 >> 16) & 0xff] << 16 ^
+ Td4[(s3 >> 24) ] << 24;
+ t[3] = Td4[(s3 ) & 0xff] ^
+ Td4[(s2 >> 8) & 0xff] << 8 ^
+ Td4[(s1 >> 16) & 0xff] << 16 ^
+ Td4[(s0 >> 24) ] << 24;
+
+ /* now do the linear transform using words */
+ { int i;
+ u32 tp1, tp2, tp4, tp8, tp9, tpb, tpd, tpe, m;
+
+ for (i = 0; i < 4; i++) {
+ tp1 = t[i];
+ m = tp1 & 0x80808080;
+ tp2 = ((tp1 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ m = tp2 & 0x80808080;
+ tp4 = ((tp2 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ m = tp4 & 0x80808080;
+ tp8 = ((tp4 & 0x7f7f7f7f) << 1) ^
+ ((m - (m >> 7)) & 0x1b1b1b1b);
+ tp9 = tp8 ^ tp1;
+ tpb = tp9 ^ tp2;
+ tpd = tp9 ^ tp4;
+ tpe = tp8 ^ tp4 ^ tp2;
+#if defined(ROTATE)
+ t[i] = tpe ^ ROTATE(tpd,16) ^
+ ROTATE(tp9,8) ^ ROTATE(tpb,24);
+#else
+ t[i] = tpe ^ (tpd >> 16) ^ (tpd << 16) ^
+ (tp9 >> 24) ^ (tp9 << 8) ^
+ (tpb >> 8) ^ (tpb << 24);
+#endif
+ t[i] ^= rk[i];
+ }
+ }
+#else
+ t[0] = Td0[(s0 ) & 0xff] ^
+ Td1[(s3 >> 8) & 0xff] ^
+ Td2[(s2 >> 16) & 0xff] ^
+ Td3[(s1 >> 24) ] ^
+ rk[0];
+ t[1] = Td0[(s1 ) & 0xff] ^
+ Td1[(s0 >> 8) & 0xff] ^
+ Td2[(s3 >> 16) & 0xff] ^
+ Td3[(s2 >> 24) ] ^
+ rk[1];
+ t[2] = Td0[(s2 ) & 0xff] ^
+ Td1[(s1 >> 8) & 0xff] ^
+ Td2[(s0 >> 16) & 0xff] ^
+ Td3[(s3 >> 24) ] ^
+ rk[2];
+ t[3] = Td0[(s3 ) & 0xff] ^
+ Td1[(s2 >> 8) & 0xff] ^
+ Td2[(s1 >> 16) & 0xff] ^
+ Td3[(s0 >> 24) ] ^
+ rk[3];
+#endif
+ s0 = t[0]; s1 = t[1]; s2 = t[2]; s3 = t[3];
+ }
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ prefetch256(Td4);
+
+ *(u32*)(out+0) =
+ (Td4[(s0 ) & 0xff]) ^
+ (Td4[(s3 >> 8) & 0xff] << 8) ^
+ (Td4[(s2 >> 16) & 0xff] << 16) ^
+ (Td4[(s1 >> 24) ] << 24) ^
+ rk[0];
+ *(u32*)(out+4) =
+ (Td4[(s1 ) & 0xff]) ^
+ (Td4[(s0 >> 8) & 0xff] << 8) ^
+ (Td4[(s3 >> 16) & 0xff] << 16) ^
+ (Td4[(s2 >> 24) ] << 24) ^
+ rk[1];
+ *(u32*)(out+8) =
+ (Td4[(s2 ) & 0xff]) ^
+ (Td4[(s1 >> 8) & 0xff] << 8) ^
+ (Td4[(s0 >> 16) & 0xff] << 16) ^
+ (Td4[(s3 >> 24) ] << 24) ^
+ rk[2];
+ *(u32*)(out+12) =
+ (Td4[(s3 ) & 0xff]) ^
+ (Td4[(s2 >> 8) & 0xff] << 8) ^
+ (Td4[(s1 >> 16) & 0xff] << 16) ^
+ (Td4[(s0 >> 24) ] << 24) ^
+ rk[3];
+}
diff --git a/crypto/aes/asm/aes-586.pl b/crypto/aes/asm/aes-586.pl
index 89fa2617944b..3bc46a968e37 100755
--- a/crypto/aes/asm/aes-586.pl
+++ b/crypto/aes/asm/aes-586.pl
@@ -955,8 +955,9 @@ my $mark=&DWP(60+240,"esp"); #copy of aes_key->rounds
&align (4);
&set_label("enc_tail");
- &push ($key eq "edi" ? $key : ""); # push ivp
+ &mov ($s0,$key eq "edi" ? $key : "");
&mov ($key,$_out); # load out
+ &push ($s0); # push ivp
&mov ($s1,16);
&sub ($s1,$s2);
&cmp ($key,$acc); # compare with inp
diff --git a/crypto/aes/asm/aes-armv4.pl b/crypto/aes/asm/aes-armv4.pl
new file mode 100755
index 000000000000..15742c1ec54b
--- /dev/null
+++ b/crypto/aes/asm/aes-armv4.pl
@@ -0,0 +1,1030 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# AES for ARMv4
+
+# January 2007.
+#
+# Code uses single 1K S-box and is >2 times faster than code generated
+# by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which
+# allows to merge logical or arithmetic operation with shift or rotate
+# in one instruction and emit combined result every cycle. The module
+# is endian-neutral. The performance is ~42 cycles/byte for 128-bit
+# key.
+
+# May 2007.
+#
+# AES_set_[en|de]crypt_key is added.
+
+$s0="r0";
+$s1="r1";
+$s2="r2";
+$s3="r3";
+$t1="r4";
+$t2="r5";
+$t3="r6";
+$i1="r7";
+$i2="r8";
+$i3="r9";
+
+$tbl="r10";
+$key="r11";
+$rounds="r12";
+
+$code=<<___;
+.text
+.code 32
+
+.type AES_Te,%object
+.align 5
+AES_Te:
+.word 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d
+.word 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554
+.word 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d
+.word 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a
+.word 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87
+.word 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b
+.word 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea
+.word 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b
+.word 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a
+.word 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f
+.word 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108
+.word 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f
+.word 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e
+.word 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5
+.word 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d
+.word 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f
+.word 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e
+.word 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb
+.word 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce
+.word 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497
+.word 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c
+.word 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed
+.word 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b
+.word 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a
+.word 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16
+.word 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594
+.word 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81
+.word 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3
+.word 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a
+.word 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504
+.word 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163
+.word 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d
+.word 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f
+.word 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739
+.word 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47
+.word 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395
+.word 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f
+.word 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883
+.word 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c
+.word 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76
+.word 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e
+.word 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4
+.word 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6
+.word 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b
+.word 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7
+.word 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0
+.word 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25
+.word 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818
+.word 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72
+.word 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651
+.word 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21
+.word 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85
+.word 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa
+.word 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12
+.word 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0
+.word 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9
+.word 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133
+.word 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7
+.word 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920
+.word 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a
+.word 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17
+.word 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8
+.word 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11
+.word 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a
+@ Te4[256]
+.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
+.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+@ rcon[]
+.word 0x01000000, 0x02000000, 0x04000000, 0x08000000
+.word 0x10000000, 0x20000000, 0x40000000, 0x80000000
+.word 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
+.size AES_Te,.-AES_Te
+
+@ void AES_encrypt(const unsigned char *in, unsigned char *out,
+@ const AES_KEY *key) {
+.global AES_encrypt
+.type AES_encrypt,%function
+.align 5
+AES_encrypt:
+ sub r3,pc,#8 @ AES_encrypt
+ stmdb sp!,{r1,r4-r12,lr}
+ mov $rounds,r0 @ inp
+ mov $key,r2
+ sub $tbl,r3,#AES_encrypt-AES_Te @ Te
+
+ ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
+ ldrb $t1,[$rounds,#2] @ manner...
+ ldrb $t2,[$rounds,#1]
+ ldrb $t3,[$rounds,#0]
+ orr $s0,$s0,$t1,lsl#8
+ orr $s0,$s0,$t2,lsl#16
+ orr $s0,$s0,$t3,lsl#24
+ ldrb $s1,[$rounds,#7]
+ ldrb $t1,[$rounds,#6]
+ ldrb $t2,[$rounds,#5]
+ ldrb $t3,[$rounds,#4]
+ orr $s1,$s1,$t1,lsl#8
+ orr $s1,$s1,$t2,lsl#16
+ orr $s1,$s1,$t3,lsl#24
+ ldrb $s2,[$rounds,#11]
+ ldrb $t1,[$rounds,#10]
+ ldrb $t2,[$rounds,#9]
+ ldrb $t3,[$rounds,#8]
+ orr $s2,$s2,$t1,lsl#8
+ orr $s2,$s2,$t2,lsl#16
+ orr $s2,$s2,$t3,lsl#24
+ ldrb $s3,[$rounds,#15]
+ ldrb $t1,[$rounds,#14]
+ ldrb $t2,[$rounds,#13]
+ ldrb $t3,[$rounds,#12]
+ orr $s3,$s3,$t1,lsl#8
+ orr $s3,$s3,$t2,lsl#16
+ orr $s3,$s3,$t3,lsl#24
+
+ bl _armv4_AES_encrypt
+
+ ldr $rounds,[sp],#4 @ pop out
+ mov $t1,$s0,lsr#24 @ write output in endian-neutral
+ mov $t2,$s0,lsr#16 @ manner...
+ mov $t3,$s0,lsr#8
+ strb $t1,[$rounds,#0]
+ strb $t2,[$rounds,#1]
+ strb $t3,[$rounds,#2]
+ strb $s0,[$rounds,#3]
+ mov $t1,$s1,lsr#24
+ mov $t2,$s1,lsr#16
+ mov $t3,$s1,lsr#8
+ strb $t1,[$rounds,#4]
+ strb $t2,[$rounds,#5]
+ strb $t3,[$rounds,#6]
+ strb $s1,[$rounds,#7]
+ mov $t1,$s2,lsr#24
+ mov $t2,$s2,lsr#16
+ mov $t3,$s2,lsr#8
+ strb $t1,[$rounds,#8]
+ strb $t2,[$rounds,#9]
+ strb $t3,[$rounds,#10]
+ strb $s2,[$rounds,#11]
+ mov $t1,$s3,lsr#24
+ mov $t2,$s3,lsr#16
+ mov $t3,$s3,lsr#8
+ strb $t1,[$rounds,#12]
+ strb $t2,[$rounds,#13]
+ strb $t3,[$rounds,#14]
+ strb $s3,[$rounds,#15]
+
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+.size AES_encrypt,.-AES_encrypt
+
+.type _armv4_AES_encrypt,%function
+.align 2
+_armv4_AES_encrypt:
+ str lr,[sp,#-4]! @ push lr
+ ldr $t1,[$key],#16
+ ldr $t2,[$key,#-12]
+ ldr $t3,[$key,#-8]
+ ldr $i1,[$key,#-4]
+ ldr $rounds,[$key,#240-16]
+ eor $s0,$s0,$t1
+ eor $s1,$s1,$t2
+ eor $s2,$s2,$t3
+ eor $s3,$s3,$i1
+ sub $rounds,$rounds,#1
+ mov lr,#255
+
+.Lenc_loop:
+ and $i2,lr,$s0,lsr#8
+ and $i3,lr,$s0,lsr#16
+ and $i1,lr,$s0
+ mov $s0,$s0,lsr#24
+ ldr $t1,[$tbl,$i1,lsl#2] @ Te3[s0>>0]
+ ldr $s0,[$tbl,$s0,lsl#2] @ Te0[s0>>24]
+ ldr $t2,[$tbl,$i2,lsl#2] @ Te2[s0>>8]
+ ldr $t3,[$tbl,$i3,lsl#2] @ Te1[s0>>16]
+
+ and $i1,lr,$s1,lsr#16 @ i0
+ and $i2,lr,$s1
+ and $i3,lr,$s1,lsr#8
+ mov $s1,$s1,lsr#24
+ ldr $i1,[$tbl,$i1,lsl#2] @ Te1[s1>>16]
+ ldr $s1,[$tbl,$s1,lsl#2] @ Te0[s1>>24]
+ ldr $i2,[$tbl,$i2,lsl#2] @ Te3[s1>>0]
+ ldr $i3,[$tbl,$i3,lsl#2] @ Te2[s1>>8]
+ eor $s0,$s0,$i1,ror#8
+ eor $s1,$s1,$t1,ror#24
+ eor $t2,$t2,$i2,ror#8
+ eor $t3,$t3,$i3,ror#8
+
+ and $i1,lr,$s2,lsr#8 @ i0
+ and $i2,lr,$s2,lsr#16 @ i1
+ and $i3,lr,$s2
+ mov $s2,$s2,lsr#24
+ ldr $i1,[$tbl,$i1,lsl#2] @ Te2[s2>>8]
+ ldr $i2,[$tbl,$i2,lsl#2] @ Te1[s2>>16]
+ ldr $s2,[$tbl,$s2,lsl#2] @ Te0[s2>>24]
+ ldr $i3,[$tbl,$i3,lsl#2] @ Te3[s2>>0]
+ eor $s0,$s0,$i1,ror#16
+ eor $s1,$s1,$i2,ror#8
+ eor $s2,$s2,$t2,ror#16
+ eor $t3,$t3,$i3,ror#16
+
+ and $i1,lr,$s3 @ i0
+ and $i2,lr,$s3,lsr#8 @ i1
+ and $i3,lr,$s3,lsr#16 @ i2
+ mov $s3,$s3,lsr#24
+ ldr $i1,[$tbl,$i1,lsl#2] @ Te3[s3>>0]
+ ldr $i2,[$tbl,$i2,lsl#2] @ Te2[s3>>8]
+ ldr $i3,[$tbl,$i3,lsl#2] @ Te1[s3>>16]
+ ldr $s3,[$tbl,$s3,lsl#2] @ Te0[s3>>24]
+ eor $s0,$s0,$i1,ror#24
+ eor $s1,$s1,$i2,ror#16
+ eor $s2,$s2,$i3,ror#8
+ eor $s3,$s3,$t3,ror#8
+
+ ldr $t1,[$key],#16
+ ldr $t2,[$key,#-12]
+ ldr $t3,[$key,#-8]
+ ldr $i1,[$key,#-4]
+ eor $s0,$s0,$t1
+ eor $s1,$s1,$t2
+ eor $s2,$s2,$t3
+ eor $s3,$s3,$i1
+
+ subs $rounds,$rounds,#1
+ bne .Lenc_loop
+
+ add $tbl,$tbl,#2
+
+ and $i1,lr,$s0
+ and $i2,lr,$s0,lsr#8
+ and $i3,lr,$s0,lsr#16
+ mov $s0,$s0,lsr#24
+ ldrb $t1,[$tbl,$i1,lsl#2] @ Te4[s0>>0]
+ ldrb $s0,[$tbl,$s0,lsl#2] @ Te4[s0>>24]
+ ldrb $t2,[$tbl,$i2,lsl#2] @ Te4[s0>>8]
+ ldrb $t3,[$tbl,$i3,lsl#2] @ Te4[s0>>16]
+
+ and $i1,lr,$s1,lsr#16 @ i0
+ and $i2,lr,$s1
+ and $i3,lr,$s1,lsr#8
+ mov $s1,$s1,lsr#24
+ ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s1>>16]
+ ldrb $s1,[$tbl,$s1,lsl#2] @ Te4[s1>>24]
+ ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s1>>0]
+ ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s1>>8]
+ eor $s0,$i1,$s0,lsl#8
+ eor $s1,$t1,$s1,lsl#24
+ eor $t2,$i2,$t2,lsl#8
+ eor $t3,$i3,$t3,lsl#8
+
+ and $i1,lr,$s2,lsr#8 @ i0
+ and $i2,lr,$s2,lsr#16 @ i1
+ and $i3,lr,$s2
+ mov $s2,$s2,lsr#24
+ ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s2>>8]
+ ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s2>>16]
+ ldrb $s2,[$tbl,$s2,lsl#2] @ Te4[s2>>24]
+ ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s2>>0]
+ eor $s0,$i1,$s0,lsl#8
+ eor $s1,$s1,$i2,lsl#16
+ eor $s2,$t2,$s2,lsl#24
+ eor $t3,$i3,$t3,lsl#8
+
+ and $i1,lr,$s3 @ i0
+ and $i2,lr,$s3,lsr#8 @ i1
+ and $i3,lr,$s3,lsr#16 @ i2
+ mov $s3,$s3,lsr#24
+ ldrb $i1,[$tbl,$i1,lsl#2] @ Te4[s3>>0]
+ ldrb $i2,[$tbl,$i2,lsl#2] @ Te4[s3>>8]
+ ldrb $i3,[$tbl,$i3,lsl#2] @ Te4[s3>>16]
+ ldrb $s3,[$tbl,$s3,lsl#2] @ Te4[s3>>24]
+ eor $s0,$i1,$s0,lsl#8
+ eor $s1,$s1,$i2,lsl#8
+ eor $s2,$s2,$i3,lsl#16
+ eor $s3,$t3,$s3,lsl#24
+
+ ldr lr,[sp],#4 @ pop lr
+ ldr $t1,[$key,#0]
+ ldr $t2,[$key,#4]
+ ldr $t3,[$key,#8]
+ ldr $i1,[$key,#12]
+ eor $s0,$s0,$t1
+ eor $s1,$s1,$t2
+ eor $s2,$s2,$t3
+ eor $s3,$s3,$i1
+
+ sub $tbl,$tbl,#2
+ mov pc,lr @ return
+.size _armv4_AES_encrypt,.-_armv4_AES_encrypt
+
+.global AES_set_encrypt_key
+.type AES_set_encrypt_key,%function
+.align 5
+AES_set_encrypt_key:
+ sub r3,pc,#8 @ AES_set_encrypt_key
+ teq r0,#0
+ moveq r0,#-1
+ beq .Labrt
+ teq r2,#0
+ moveq r0,#-1
+ beq .Labrt
+
+ teq r1,#128
+ beq .Lok
+ teq r1,#192
+ beq .Lok
+ teq r1,#256
+ movne r0,#-1
+ bne .Labrt
+
+.Lok: stmdb sp!,{r4-r12,lr}
+ sub $tbl,r3,#AES_set_encrypt_key-AES_Te-1024 @ Te4
+
+ mov $rounds,r0 @ inp
+ mov lr,r1 @ bits
+ mov $key,r2 @ key
+
+ ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
+ ldrb $t1,[$rounds,#2] @ manner...
+ ldrb $t2,[$rounds,#1]
+ ldrb $t3,[$rounds,#0]
+ orr $s0,$s0,$t1,lsl#8
+ orr $s0,$s0,$t2,lsl#16
+ orr $s0,$s0,$t3,lsl#24
+ ldrb $s1,[$rounds,#7]
+ ldrb $t1,[$rounds,#6]
+ ldrb $t2,[$rounds,#5]
+ ldrb $t3,[$rounds,#4]
+ orr $s1,$s1,$t1,lsl#8
+ orr $s1,$s1,$t2,lsl#16
+ orr $s1,$s1,$t3,lsl#24
+ ldrb $s2,[$rounds,#11]
+ ldrb $t1,[$rounds,#10]
+ ldrb $t2,[$rounds,#9]
+ ldrb $t3,[$rounds,#8]
+ orr $s2,$s2,$t1,lsl#8
+ orr $s2,$s2,$t2,lsl#16
+ orr $s2,$s2,$t3,lsl#24
+ ldrb $s3,[$rounds,#15]
+ ldrb $t1,[$rounds,#14]
+ ldrb $t2,[$rounds,#13]
+ ldrb $t3,[$rounds,#12]
+ orr $s3,$s3,$t1,lsl#8
+ orr $s3,$s3,$t2,lsl#16
+ orr $s3,$s3,$t3,lsl#24
+ str $s0,[$key],#16
+ str $s1,[$key,#-12]
+ str $s2,[$key,#-8]
+ str $s3,[$key,#-4]
+
+ teq lr,#128
+ bne .Lnot128
+ mov $rounds,#10
+ str $rounds,[$key,#240-16]
+ add $t3,$tbl,#256 @ rcon
+ mov lr,#255
+
+.L128_loop:
+ and $t2,lr,$s3,lsr#24
+ and $i1,lr,$s3,lsr#16
+ and $i2,lr,$s3,lsr#8
+ and $i3,lr,$s3
+ ldrb $t2,[$tbl,$t2]
+ ldrb $i1,[$tbl,$i1]
+ ldrb $i2,[$tbl,$i2]
+ ldrb $i3,[$tbl,$i3]
+ ldr $t1,[$t3],#4 @ rcon[i++]
+ orr $t2,$t2,$i1,lsl#24
+ orr $t2,$t2,$i2,lsl#16
+ orr $t2,$t2,$i3,lsl#8
+ eor $t2,$t2,$t1
+ eor $s0,$s0,$t2 @ rk[4]=rk[0]^...
+ eor $s1,$s1,$s0 @ rk[5]=rk[1]^rk[4]
+ eor $s2,$s2,$s1 @ rk[6]=rk[2]^rk[5]
+ eor $s3,$s3,$s2 @ rk[7]=rk[3]^rk[6]
+ str $s0,[$key],#16
+ str $s1,[$key,#-12]
+ str $s2,[$key,#-8]
+ str $s3,[$key,#-4]
+
+ subs $rounds,$rounds,#1
+ bne .L128_loop
+ sub r2,$key,#176
+ b .Ldone
+
+.Lnot128:
+ ldrb $i2,[$rounds,#19]
+ ldrb $t1,[$rounds,#18]
+ ldrb $t2,[$rounds,#17]
+ ldrb $t3,[$rounds,#16]
+ orr $i2,$i2,$t1,lsl#8
+ orr $i2,$i2,$t2,lsl#16
+ orr $i2,$i2,$t3,lsl#24
+ ldrb $i3,[$rounds,#23]
+ ldrb $t1,[$rounds,#22]
+ ldrb $t2,[$rounds,#21]
+ ldrb $t3,[$rounds,#20]
+ orr $i3,$i3,$t1,lsl#8
+ orr $i3,$i3,$t2,lsl#16
+ orr $i3,$i3,$t3,lsl#24
+ str $i2,[$key],#8
+ str $i3,[$key,#-4]
+
+ teq lr,#192
+ bne .Lnot192
+ mov $rounds,#12
+ str $rounds,[$key,#240-24]
+ add $t3,$tbl,#256 @ rcon
+ mov lr,#255
+ mov $rounds,#8
+
+.L192_loop:
+ and $t2,lr,$i3,lsr#24
+ and $i1,lr,$i3,lsr#16
+ and $i2,lr,$i3,lsr#8
+ and $i3,lr,$i3
+ ldrb $t2,[$tbl,$t2]
+ ldrb $i1,[$tbl,$i1]
+ ldrb $i2,[$tbl,$i2]
+ ldrb $i3,[$tbl,$i3]
+ ldr $t1,[$t3],#4 @ rcon[i++]
+ orr $t2,$t2,$i1,lsl#24
+ orr $t2,$t2,$i2,lsl#16
+ orr $t2,$t2,$i3,lsl#8
+ eor $i3,$t2,$t1
+ eor $s0,$s0,$i3 @ rk[6]=rk[0]^...
+ eor $s1,$s1,$s0 @ rk[7]=rk[1]^rk[6]
+ eor $s2,$s2,$s1 @ rk[8]=rk[2]^rk[7]
+ eor $s3,$s3,$s2 @ rk[9]=rk[3]^rk[8]
+ str $s0,[$key],#24
+ str $s1,[$key,#-20]
+ str $s2,[$key,#-16]
+ str $s3,[$key,#-12]
+
+ subs $rounds,$rounds,#1
+ subeq r2,$key,#216
+ beq .Ldone
+
+ ldr $i1,[$key,#-32]
+ ldr $i2,[$key,#-28]
+ eor $i1,$i1,$s3 @ rk[10]=rk[4]^rk[9]
+ eor $i3,$i2,$i1 @ rk[11]=rk[5]^rk[10]
+ str $i1,[$key,#-8]
+ str $i3,[$key,#-4]
+ b .L192_loop
+
+.Lnot192:
+ ldrb $i2,[$rounds,#27]
+ ldrb $t1,[$rounds,#26]
+ ldrb $t2,[$rounds,#25]
+ ldrb $t3,[$rounds,#24]
+ orr $i2,$i2,$t1,lsl#8
+ orr $i2,$i2,$t2,lsl#16
+ orr $i2,$i2,$t3,lsl#24
+ ldrb $i3,[$rounds,#31]
+ ldrb $t1,[$rounds,#30]
+ ldrb $t2,[$rounds,#29]
+ ldrb $t3,[$rounds,#28]
+ orr $i3,$i3,$t1,lsl#8
+ orr $i3,$i3,$t2,lsl#16
+ orr $i3,$i3,$t3,lsl#24
+ str $i2,[$key],#8
+ str $i3,[$key,#-4]
+
+ mov $rounds,#14
+ str $rounds,[$key,#240-32]
+ add $t3,$tbl,#256 @ rcon
+ mov lr,#255
+ mov $rounds,#7
+
+.L256_loop:
+ and $t2,lr,$i3,lsr#24
+ and $i1,lr,$i3,lsr#16
+ and $i2,lr,$i3,lsr#8
+ and $i3,lr,$i3
+ ldrb $t2,[$tbl,$t2]
+ ldrb $i1,[$tbl,$i1]
+ ldrb $i2,[$tbl,$i2]
+ ldrb $i3,[$tbl,$i3]
+ ldr $t1,[$t3],#4 @ rcon[i++]
+ orr $t2,$t2,$i1,lsl#24
+ orr $t2,$t2,$i2,lsl#16
+ orr $t2,$t2,$i3,lsl#8
+ eor $i3,$t2,$t1
+ eor $s0,$s0,$i3 @ rk[8]=rk[0]^...
+ eor $s1,$s1,$s0 @ rk[9]=rk[1]^rk[8]
+ eor $s2,$s2,$s1 @ rk[10]=rk[2]^rk[9]
+ eor $s3,$s3,$s2 @ rk[11]=rk[3]^rk[10]
+ str $s0,[$key],#32
+ str $s1,[$key,#-28]
+ str $s2,[$key,#-24]
+ str $s3,[$key,#-20]
+
+ subs $rounds,$rounds,#1
+ subeq r2,$key,#256
+ beq .Ldone
+
+ and $t2,lr,$s3
+ and $i1,lr,$s3,lsr#8
+ and $i2,lr,$s3,lsr#16
+ and $i3,lr,$s3,lsr#24
+ ldrb $t2,[$tbl,$t2]
+ ldrb $i1,[$tbl,$i1]
+ ldrb $i2,[$tbl,$i2]
+ ldrb $i3,[$tbl,$i3]
+ orr $t2,$t2,$i1,lsl#8
+ orr $t2,$t2,$i2,lsl#16
+ orr $t2,$t2,$i3,lsl#24
+
+ ldr $t1,[$key,#-48]
+ ldr $i1,[$key,#-44]
+ ldr $i2,[$key,#-40]
+ ldr $i3,[$key,#-36]
+ eor $t1,$t1,$t2 @ rk[12]=rk[4]^...
+ eor $i1,$i1,$t1 @ rk[13]=rk[5]^rk[12]
+ eor $i2,$i2,$i1 @ rk[14]=rk[6]^rk[13]
+ eor $i3,$i3,$i2 @ rk[15]=rk[7]^rk[14]
+ str $t1,[$key,#-16]
+ str $i1,[$key,#-12]
+ str $i2,[$key,#-8]
+ str $i3,[$key,#-4]
+ b .L256_loop
+
+.Ldone: mov r0,#0
+ ldmia sp!,{r4-r12,lr}
+.Labrt: tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+.size AES_set_encrypt_key,.-AES_set_encrypt_key
+
+.global AES_set_decrypt_key
+.type AES_set_decrypt_key,%function
+.align 5
+AES_set_decrypt_key:
+ str lr,[sp,#-4]! @ push lr
+ bl AES_set_encrypt_key
+ teq r0,#0
+ ldrne lr,[sp],#4 @ pop lr
+ bne .Labrt
+
+ stmdb sp!,{r4-r12}
+
+ ldr $rounds,[r2,#240] @ AES_set_encrypt_key preserves r2,
+ mov $key,r2 @ which is AES_KEY *key
+ mov $i1,r2
+ add $i2,r2,$rounds,lsl#4
+
+.Linv: ldr $s0,[$i1]
+ ldr $s1,[$i1,#4]
+ ldr $s2,[$i1,#8]
+ ldr $s3,[$i1,#12]
+ ldr $t1,[$i2]
+ ldr $t2,[$i2,#4]
+ ldr $t3,[$i2,#8]
+ ldr $i3,[$i2,#12]
+ str $s0,[$i2],#-16
+ str $s1,[$i2,#16+4]
+ str $s2,[$i2,#16+8]
+ str $s3,[$i2,#16+12]
+ str $t1,[$i1],#16
+ str $t2,[$i1,#-12]
+ str $t3,[$i1,#-8]
+ str $i3,[$i1,#-4]
+ teq $i1,$i2
+ bne .Linv
+___
+$mask80=$i1;
+$mask1b=$i2;
+$mask7f=$i3;
+$code.=<<___;
+ ldr $s0,[$key,#16]! @ prefetch tp1
+ mov $mask80,#0x80
+ mov $mask1b,#0x1b
+ orr $mask80,$mask80,#0x8000
+ orr $mask1b,$mask1b,#0x1b00
+ orr $mask80,$mask80,$mask80,lsl#16
+ orr $mask1b,$mask1b,$mask1b,lsl#16
+ sub $rounds,$rounds,#1
+ mvn $mask7f,$mask80
+ mov $rounds,$rounds,lsl#2 @ (rounds-1)*4
+
+.Lmix: and $t1,$s0,$mask80
+ and $s1,$s0,$mask7f
+ sub $t1,$t1,$t1,lsr#7
+ and $t1,$t1,$mask1b
+ eor $s1,$t1,$s1,lsl#1 @ tp2
+
+ and $t1,$s1,$mask80
+ and $s2,$s1,$mask7f
+ sub $t1,$t1,$t1,lsr#7
+ and $t1,$t1,$mask1b
+ eor $s2,$t1,$s2,lsl#1 @ tp4
+
+ and $t1,$s2,$mask80
+ and $s3,$s2,$mask7f
+ sub $t1,$t1,$t1,lsr#7
+ and $t1,$t1,$mask1b
+ eor $s3,$t1,$s3,lsl#1 @ tp8
+
+ eor $t1,$s1,$s2
+ eor $t2,$s0,$s3 @ tp9
+ eor $t1,$t1,$s3 @ tpe
+ eor $t1,$t1,$s1,ror#24
+ eor $t1,$t1,$t2,ror#24 @ ^= ROTATE(tpb=tp9^tp2,8)
+ eor $t1,$t1,$s2,ror#16
+ eor $t1,$t1,$t2,ror#16 @ ^= ROTATE(tpd=tp9^tp4,16)
+ eor $t1,$t1,$t2,ror#8 @ ^= ROTATE(tp9,24)
+
+ ldr $s0,[$key,#4] @ prefetch tp1
+ str $t1,[$key],#4
+ subs $rounds,$rounds,#1
+ bne .Lmix
+
+ mov r0,#0
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+.size AES_set_decrypt_key,.-AES_set_decrypt_key
+
+.type AES_Td,%object
+.align 5
+AES_Td:
+.word 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96
+.word 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393
+.word 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25
+.word 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f
+.word 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1
+.word 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6
+.word 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da
+.word 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844
+.word 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd
+.word 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4
+.word 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45
+.word 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94
+.word 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7
+.word 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a
+.word 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5
+.word 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c
+.word 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1
+.word 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a
+.word 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75
+.word 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051
+.word 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46
+.word 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff
+.word 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77
+.word 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb
+.word 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000
+.word 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e
+.word 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927
+.word 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a
+.word 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e
+.word 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16
+.word 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d
+.word 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8
+.word 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd
+.word 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34
+.word 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163
+.word 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120
+.word 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d
+.word 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0
+.word 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422
+.word 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef
+.word 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36
+.word 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4
+.word 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662
+.word 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5
+.word 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3
+.word 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b
+.word 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8
+.word 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6
+.word 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6
+.word 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0
+.word 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815
+.word 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f
+.word 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df
+.word 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f
+.word 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e
+.word 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713
+.word 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89
+.word 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c
+.word 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf
+.word 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86
+.word 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f
+.word 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541
+.word 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190
+.word 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742
+@ Td4[256]
+.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+.size AES_Td,.-AES_Td
+
+@ void AES_decrypt(const unsigned char *in, unsigned char *out,
+@ const AES_KEY *key) {
+.global AES_decrypt
+.type AES_decrypt,%function
+.align 5
+AES_decrypt:
+ sub r3,pc,#8 @ AES_decrypt
+ stmdb sp!,{r1,r4-r12,lr}
+ mov $rounds,r0 @ inp
+ mov $key,r2
+ sub $tbl,r3,#AES_decrypt-AES_Td @ Td
+
+ ldrb $s0,[$rounds,#3] @ load input data in endian-neutral
+ ldrb $t1,[$rounds,#2] @ manner...
+ ldrb $t2,[$rounds,#1]
+ ldrb $t3,[$rounds,#0]
+ orr $s0,$s0,$t1,lsl#8
+ orr $s0,$s0,$t2,lsl#16
+ orr $s0,$s0,$t3,lsl#24
+ ldrb $s1,[$rounds,#7]
+ ldrb $t1,[$rounds,#6]
+ ldrb $t2,[$rounds,#5]
+ ldrb $t3,[$rounds,#4]
+ orr $s1,$s1,$t1,lsl#8
+ orr $s1,$s1,$t2,lsl#16
+ orr $s1,$s1,$t3,lsl#24
+ ldrb $s2,[$rounds,#11]
+ ldrb $t1,[$rounds,#10]
+ ldrb $t2,[$rounds,#9]
+ ldrb $t3,[$rounds,#8]
+ orr $s2,$s2,$t1,lsl#8
+ orr $s2,$s2,$t2,lsl#16
+ orr $s2,$s2,$t3,lsl#24
+ ldrb $s3,[$rounds,#15]
+ ldrb $t1,[$rounds,#14]
+ ldrb $t2,[$rounds,#13]
+ ldrb $t3,[$rounds,#12]
+ orr $s3,$s3,$t1,lsl#8
+ orr $s3,$s3,$t2,lsl#16
+ orr $s3,$s3,$t3,lsl#24
+
+ bl _armv4_AES_decrypt
+
+ ldr $rounds,[sp],#4 @ pop out
+ mov $t1,$s0,lsr#24 @ write output in endian-neutral
+ mov $t2,$s0,lsr#16 @ manner...
+ mov $t3,$s0,lsr#8
+ strb $t1,[$rounds,#0]
+ strb $t2,[$rounds,#1]
+ strb $t3,[$rounds,#2]
+ strb $s0,[$rounds,#3]
+ mov $t1,$s1,lsr#24
+ mov $t2,$s1,lsr#16
+ mov $t3,$s1,lsr#8
+ strb $t1,[$rounds,#4]
+ strb $t2,[$rounds,#5]
+ strb $t3,[$rounds,#6]
+ strb $s1,[$rounds,#7]
+ mov $t1,$s2,lsr#24
+ mov $t2,$s2,lsr#16
+ mov $t3,$s2,lsr#8
+ strb $t1,[$rounds,#8]
+ strb $t2,[$rounds,#9]
+ strb $t3,[$rounds,#10]
+ strb $s2,[$rounds,#11]
+ mov $t1,$s3,lsr#24
+ mov $t2,$s3,lsr#16
+ mov $t3,$s3,lsr#8
+ strb $t1,[$rounds,#12]
+ strb $t2,[$rounds,#13]
+ strb $t3,[$rounds,#14]
+ strb $s3,[$rounds,#15]
+
+ ldmia sp!,{r4-r12,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+.size AES_decrypt,.-AES_decrypt
+
+.type _armv4_AES_decrypt,%function
+.align 2
+_armv4_AES_decrypt:
+ str lr,[sp,#-4]! @ push lr
+ ldr $t1,[$key],#16
+ ldr $t2,[$key,#-12]
+ ldr $t3,[$key,#-8]
+ ldr $i1,[$key,#-4]
+ ldr $rounds,[$key,#240-16]
+ eor $s0,$s0,$t1
+ eor $s1,$s1,$t2
+ eor $s2,$s2,$t3
+ eor $s3,$s3,$i1
+ sub $rounds,$rounds,#1
+ mov lr,#255
+
+.Ldec_loop:
+ and $i1,lr,$s0,lsr#16
+ and $i2,lr,$s0,lsr#8
+ and $i3,lr,$s0
+ mov $s0,$s0,lsr#24
+ ldr $t1,[$tbl,$i1,lsl#2] @ Td1[s0>>16]
+ ldr $s0,[$tbl,$s0,lsl#2] @ Td0[s0>>24]
+ ldr $t2,[$tbl,$i2,lsl#2] @ Td2[s0>>8]
+ ldr $t3,[$tbl,$i3,lsl#2] @ Td3[s0>>0]
+
+ and $i1,lr,$s1 @ i0
+ and $i2,lr,$s1,lsr#16
+ and $i3,lr,$s1,lsr#8
+ mov $s1,$s1,lsr#24
+ ldr $i1,[$tbl,$i1,lsl#2] @ Td3[s1>>0]
+ ldr $s1,[$tbl,$s1,lsl#2] @ Td0[s1>>24]
+ ldr $i2,[$tbl,$i2,lsl#2] @ Td1[s1>>16]
+ ldr $i3,[$tbl,$i3,lsl#2] @ Td2[s1>>8]
+ eor $s0,$s0,$i1,ror#24
+ eor $s1,$s1,$t1,ror#8
+ eor $t2,$i2,$t2,ror#8
+ eor $t3,$i3,$t3,ror#8
+
+ and $i1,lr,$s2,lsr#8 @ i0
+ and $i2,lr,$s2 @ i1
+ and $i3,lr,$s2,lsr#16
+ mov $s2,$s2,lsr#24
+ ldr $i1,[$tbl,$i1,lsl#2] @ Td2[s2>>8]
+ ldr $i2,[$tbl,$i2,lsl#2] @ Td3[s2>>0]
+ ldr $s2,[$tbl,$s2,lsl#2] @ Td0[s2>>24]
+ ldr $i3,[$tbl,$i3,lsl#2] @ Td1[s2>>16]
+ eor $s0,$s0,$i1,ror#16
+ eor $s1,$s1,$i2,ror#24
+ eor $s2,$s2,$t2,ror#8
+ eor $t3,$i3,$t3,ror#8
+
+ and $i1,lr,$s3,lsr#16 @ i0
+ and $i2,lr,$s3,lsr#8 @ i1
+ and $i3,lr,$s3 @ i2
+ mov $s3,$s3,lsr#24
+ ldr $i1,[$tbl,$i1,lsl#2] @ Td1[s3>>16]
+ ldr $i2,[$tbl,$i2,lsl#2] @ Td2[s3>>8]
+ ldr $i3,[$tbl,$i3,lsl#2] @ Td3[s3>>0]
+ ldr $s3,[$tbl,$s3,lsl#2] @ Td0[s3>>24]
+ eor $s0,$s0,$i1,ror#8
+ eor $s1,$s1,$i2,ror#16
+ eor $s2,$s2,$i3,ror#24
+ eor $s3,$s3,$t3,ror#8
+
+ ldr $t1,[$key],#16
+ ldr $t2,[$key,#-12]
+ ldr $t3,[$key,#-8]
+ ldr $i1,[$key,#-4]
+ eor $s0,$s0,$t1
+ eor $s1,$s1,$t2
+ eor $s2,$s2,$t3
+ eor $s3,$s3,$i1
+
+ subs $rounds,$rounds,#1
+ bne .Ldec_loop
+
+ add $tbl,$tbl,#1024
+
+ ldr $t1,[$tbl,#0] @ prefetch Td4
+ ldr $t2,[$tbl,#32]
+ ldr $t3,[$tbl,#64]
+ ldr $i1,[$tbl,#96]
+ ldr $i2,[$tbl,#128]
+ ldr $i3,[$tbl,#160]
+ ldr $t1,[$tbl,#192]
+ ldr $t2,[$tbl,#224]
+
+ and $i1,lr,$s0,lsr#16
+ and $i2,lr,$s0,lsr#8
+ and $i3,lr,$s0
+ ldrb $s0,[$tbl,$s0,lsr#24] @ Td4[s0>>24]
+ ldrb $t1,[$tbl,$i1] @ Td4[s0>>16]
+ ldrb $t2,[$tbl,$i2] @ Td4[s0>>8]
+ ldrb $t3,[$tbl,$i3] @ Td4[s0>>0]
+
+ and $i1,lr,$s1 @ i0
+ and $i2,lr,$s1,lsr#16
+ and $i3,lr,$s1,lsr#8
+ ldrb $i1,[$tbl,$i1] @ Td4[s1>>0]
+ ldrb $s1,[$tbl,$s1,lsr#24] @ Td4[s1>>24]
+ ldrb $i2,[$tbl,$i2] @ Td4[s1>>16]
+ ldrb $i3,[$tbl,$i3] @ Td4[s1>>8]
+ eor $s0,$i1,$s0,lsl#24
+ eor $s1,$t1,$s1,lsl#8
+ eor $t2,$t2,$i2,lsl#8
+ eor $t3,$t3,$i3,lsl#8
+
+ and $i1,lr,$s2,lsr#8 @ i0
+ and $i2,lr,$s2 @ i1
+ and $i3,lr,$s2,lsr#16
+ ldrb $i1,[$tbl,$i1] @ Td4[s2>>8]
+ ldrb $i2,[$tbl,$i2] @ Td4[s2>>0]
+ ldrb $s2,[$tbl,$s2,lsr#24] @ Td4[s2>>24]
+ ldrb $i3,[$tbl,$i3] @ Td4[s2>>16]
+ eor $s0,$s0,$i1,lsl#8
+ eor $s1,$i2,$s1,lsl#16
+ eor $s2,$t2,$s2,lsl#16
+ eor $t3,$t3,$i3,lsl#16
+
+ and $i1,lr,$s3,lsr#16 @ i0
+ and $i2,lr,$s3,lsr#8 @ i1
+ and $i3,lr,$s3 @ i2
+ ldrb $i1,[$tbl,$i1] @ Td4[s3>>16]
+ ldrb $i2,[$tbl,$i2] @ Td4[s3>>8]
+ ldrb $i3,[$tbl,$i3] @ Td4[s3>>0]
+ ldrb $s3,[$tbl,$s3,lsr#24] @ Td4[s3>>24]
+ eor $s0,$s0,$i1,lsl#16
+ eor $s1,$s1,$i2,lsl#8
+ eor $s2,$i3,$s2,lsl#8
+ eor $s3,$t3,$s3,lsl#24
+
+ ldr lr,[sp],#4 @ pop lr
+ ldr $t1,[$key,#0]
+ ldr $t2,[$key,#4]
+ ldr $t3,[$key,#8]
+ ldr $i1,[$key,#12]
+ eor $s0,$s0,$t1
+ eor $s1,$s1,$t2
+ eor $s2,$s2,$t3
+ eor $s3,$s3,$i1
+
+ sub $tbl,$tbl,#1024
+ mov pc,lr @ return
+.size _armv4_AES_decrypt,.-_armv4_AES_decrypt
+.asciz "AES for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
+___
+
+$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
+print $code;
diff --git a/crypto/aes/asm/aes-ppc.pl b/crypto/aes/asm/aes-ppc.pl
new file mode 100755
index 000000000000..ce427655ef7b
--- /dev/null
+++ b/crypto/aes/asm/aes-ppc.pl
@@ -0,0 +1,1176 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# Needs more work: key setup, page boundaries, CBC routine...
+#
+# ppc_AES_[en|de]crypt perform at 18 cycles per byte processed with
+# 128-bit key, which is ~40% better than 64-bit code generated by gcc
+# 4.0. But these are not the ones currently used! Their "compact"
+# counterparts are, for security reason. ppc_AES_encrypt_compact runs
+# at 1/2 of ppc_AES_encrypt speed, while ppc_AES_decrypt_compact -
+# at 1/3 of ppc_AES_decrypt.
+
+$flavour = shift;
+
+if ($flavour =~ /64/) {
+ $SIZE_T =8;
+ $STU ="stdu";
+ $POP ="ld";
+ $PUSH ="std";
+} elsif ($flavour =~ /32/) {
+ $SIZE_T =4;
+ $STU ="stwu";
+ $POP ="lwz";
+ $PUSH ="stw";
+} else { die "nonsense $flavour"; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
+
+$FRAME=32*$SIZE_T;
+
+sub _data_word()
+{ my $i;
+ while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
+}
+
+$sp="r1";
+$toc="r2";
+$inp="r3";
+$out="r4";
+$key="r5";
+
+$Tbl0="r3";
+$Tbl1="r6";
+$Tbl2="r7";
+$Tbl3="r2";
+
+$s0="r8";
+$s1="r9";
+$s2="r10";
+$s3="r11";
+
+$t0="r12";
+$t1="r13";
+$t2="r14";
+$t3="r15";
+
+$acc00="r16";
+$acc01="r17";
+$acc02="r18";
+$acc03="r19";
+
+$acc04="r20";
+$acc05="r21";
+$acc06="r22";
+$acc07="r23";
+
+$acc08="r24";
+$acc09="r25";
+$acc10="r26";
+$acc11="r27";
+
+$acc12="r28";
+$acc13="r29";
+$acc14="r30";
+$acc15="r31";
+
+# stay away from TLS pointer
+if ($SIZE_T==8) { die if ($t1 ne "r13"); $t1="r0"; }
+else { die if ($Tbl3 ne "r2"); $Tbl3=$t0; $t0="r0"; }
+$mask80=$Tbl2;
+$mask1b=$Tbl3;
+
+$code.=<<___;
+.machine "any"
+.text
+
+.align 7
+LAES_Te:
+ mflr r0
+ bcl 20,31,\$+4
+ mflr $Tbl0 ; vvvvv "distance" between . and 1st data entry
+ addi $Tbl0,$Tbl0,`128-8`
+ mtlr r0
+ blr
+ .space `32-24`
+LAES_Td:
+ mflr r0
+ bcl 20,31,\$+4
+ mflr $Tbl0 ; vvvvvvvv "distance" between . and 1st data entry
+ addi $Tbl0,$Tbl0,`128-8-32+2048+256`
+ mtlr r0
+ blr
+ .space `128-32-24`
+___
+&_data_word(
+ 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
+ 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
+ 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
+ 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
+ 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
+ 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
+ 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
+ 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
+ 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
+ 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
+ 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
+ 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
+ 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
+ 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
+ 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
+ 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
+ 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
+ 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
+ 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
+ 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
+ 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
+ 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
+ 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
+ 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
+ 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
+ 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
+ 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
+ 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
+ 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
+ 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
+ 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
+ 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
+ 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
+ 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
+ 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
+ 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
+ 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
+ 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
+ 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
+ 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
+ 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
+ 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
+ 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
+ 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
+ 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
+ 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
+ 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
+ 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
+ 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
+ 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
+ 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
+ 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
+ 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
+ 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
+ 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
+ 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
+ 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
+ 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
+ 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
+ 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
+ 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
+ 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
+ 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
+ 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
+$code.=<<___;
+.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
+.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+___
+&_data_word(
+ 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
+ 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
+ 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
+ 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
+ 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
+ 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
+ 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
+ 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
+ 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
+ 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
+ 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
+ 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
+ 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
+ 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
+ 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
+ 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
+ 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
+ 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
+ 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
+ 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
+ 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
+ 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
+ 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
+ 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
+ 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
+ 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
+ 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
+ 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
+ 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
+ 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
+ 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
+ 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
+ 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
+ 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
+ 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
+ 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
+ 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
+ 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
+ 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
+ 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
+ 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
+ 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
+ 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
+ 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
+ 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
+ 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
+ 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
+ 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
+ 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
+ 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
+ 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
+ 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
+ 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
+ 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
+ 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
+ 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
+ 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
+ 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
+ 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
+ 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
+ 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
+ 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
+ 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
+ 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
+$code.=<<___;
+.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+
+
+.globl .AES_encrypt
+.align 7
+.AES_encrypt:
+ mflr r0
+ $STU $sp,-$FRAME($sp)
+
+ $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
+ $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
+ $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
+ $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
+ $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
+ $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
+ $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
+ $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
+ $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
+ $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
+ $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
+ $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
+ $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
+ $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
+ $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
+ $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
+ $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
+ $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
+ $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
+ $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
+ $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
+
+ lwz $s0,0($inp)
+ lwz $s1,4($inp)
+ lwz $s2,8($inp)
+ lwz $s3,12($inp)
+ bl LAES_Te
+ bl Lppc_AES_encrypt_compact
+ stw $s0,0($out)
+ stw $s1,4($out)
+ stw $s2,8($out)
+ stw $s3,12($out)
+
+ $POP r0,`$FRAME-$SIZE_T*21`($sp)
+ $POP $toc,`$FRAME-$SIZE_T*20`($sp)
+ $POP r13,`$FRAME-$SIZE_T*19`($sp)
+ $POP r14,`$FRAME-$SIZE_T*18`($sp)
+ $POP r15,`$FRAME-$SIZE_T*17`($sp)
+ $POP r16,`$FRAME-$SIZE_T*16`($sp)
+ $POP r17,`$FRAME-$SIZE_T*15`($sp)
+ $POP r18,`$FRAME-$SIZE_T*14`($sp)
+ $POP r19,`$FRAME-$SIZE_T*13`($sp)
+ $POP r20,`$FRAME-$SIZE_T*12`($sp)
+ $POP r21,`$FRAME-$SIZE_T*11`($sp)
+ $POP r22,`$FRAME-$SIZE_T*10`($sp)
+ $POP r23,`$FRAME-$SIZE_T*9`($sp)
+ $POP r24,`$FRAME-$SIZE_T*8`($sp)
+ $POP r25,`$FRAME-$SIZE_T*7`($sp)
+ $POP r26,`$FRAME-$SIZE_T*6`($sp)
+ $POP r27,`$FRAME-$SIZE_T*5`($sp)
+ $POP r28,`$FRAME-$SIZE_T*4`($sp)
+ $POP r29,`$FRAME-$SIZE_T*3`($sp)
+ $POP r30,`$FRAME-$SIZE_T*2`($sp)
+ $POP r31,`$FRAME-$SIZE_T*1`($sp)
+ mtlr r0
+ addi $sp,$sp,$FRAME
+ blr
+
+.align 4
+Lppc_AES_encrypt:
+ lwz $acc00,240($key)
+ lwz $t0,0($key)
+ lwz $t1,4($key)
+ lwz $t2,8($key)
+ lwz $t3,12($key)
+ addi $Tbl1,$Tbl0,3
+ addi $Tbl2,$Tbl0,2
+ addi $Tbl3,$Tbl0,1
+ addi $acc00,$acc00,-1
+ addi $key,$key,16
+ xor $s0,$s0,$t0
+ xor $s1,$s1,$t1
+ xor $s2,$s2,$t2
+ xor $s3,$s3,$t3
+ mtctr $acc00
+.align 4
+Lenc_loop:
+ rlwinm $acc00,$s0,`32-24+3`,21,28
+ rlwinm $acc01,$s1,`32-24+3`,21,28
+ lwz $t0,0($key)
+ lwz $t1,4($key)
+ rlwinm $acc02,$s2,`32-24+3`,21,28
+ rlwinm $acc03,$s3,`32-24+3`,21,28
+ lwz $t2,8($key)
+ lwz $t3,12($key)
+ rlwinm $acc04,$s1,`32-16+3`,21,28
+ rlwinm $acc05,$s2,`32-16+3`,21,28
+ lwzx $acc00,$Tbl0,$acc00
+ lwzx $acc01,$Tbl0,$acc01
+ rlwinm $acc06,$s3,`32-16+3`,21,28
+ rlwinm $acc07,$s0,`32-16+3`,21,28
+ lwzx $acc02,$Tbl0,$acc02
+ lwzx $acc03,$Tbl0,$acc03
+ rlwinm $acc08,$s2,`32-8+3`,21,28
+ rlwinm $acc09,$s3,`32-8+3`,21,28
+ lwzx $acc04,$Tbl1,$acc04
+ lwzx $acc05,$Tbl1,$acc05
+ rlwinm $acc10,$s0,`32-8+3`,21,28
+ rlwinm $acc11,$s1,`32-8+3`,21,28
+ lwzx $acc06,$Tbl1,$acc06
+ lwzx $acc07,$Tbl1,$acc07
+ rlwinm $acc12,$s3,`0+3`,21,28
+ rlwinm $acc13,$s0,`0+3`,21,28
+ lwzx $acc08,$Tbl2,$acc08
+ lwzx $acc09,$Tbl2,$acc09
+ rlwinm $acc14,$s1,`0+3`,21,28
+ rlwinm $acc15,$s2,`0+3`,21,28
+ lwzx $acc10,$Tbl2,$acc10
+ lwzx $acc11,$Tbl2,$acc11
+ xor $t0,$t0,$acc00
+ xor $t1,$t1,$acc01
+ lwzx $acc12,$Tbl3,$acc12
+ lwzx $acc13,$Tbl3,$acc13
+ xor $t2,$t2,$acc02
+ xor $t3,$t3,$acc03
+ lwzx $acc14,$Tbl3,$acc14
+ lwzx $acc15,$Tbl3,$acc15
+ xor $t0,$t0,$acc04
+ xor $t1,$t1,$acc05
+ xor $t2,$t2,$acc06
+ xor $t3,$t3,$acc07
+ xor $t0,$t0,$acc08
+ xor $t1,$t1,$acc09
+ xor $t2,$t2,$acc10
+ xor $t3,$t3,$acc11
+ xor $s0,$t0,$acc12
+ xor $s1,$t1,$acc13
+ xor $s2,$t2,$acc14
+ xor $s3,$t3,$acc15
+ addi $key,$key,16
+ bdnz- Lenc_loop
+
+ addi $Tbl2,$Tbl0,2048
+ nop
+ lwz $acc08,`2048+0`($Tbl0) ! prefetch Te4
+ lwz $acc09,`2048+32`($Tbl0)
+ lwz $acc10,`2048+64`($Tbl0)
+ lwz $acc11,`2048+96`($Tbl0)
+ lwz $acc08,`2048+128`($Tbl0)
+ lwz $acc09,`2048+160`($Tbl0)
+ lwz $acc10,`2048+192`($Tbl0)
+ lwz $acc11,`2048+224`($Tbl0)
+ rlwinm $acc00,$s0,`32-24`,24,31
+ rlwinm $acc01,$s1,`32-24`,24,31
+ lwz $t0,0($key)
+ lwz $t1,4($key)
+ rlwinm $acc02,$s2,`32-24`,24,31
+ rlwinm $acc03,$s3,`32-24`,24,31
+ lwz $t2,8($key)
+ lwz $t3,12($key)
+ rlwinm $acc04,$s1,`32-16`,24,31
+ rlwinm $acc05,$s2,`32-16`,24,31
+ lbzx $acc00,$Tbl2,$acc00
+ lbzx $acc01,$Tbl2,$acc01
+ rlwinm $acc06,$s3,`32-16`,24,31
+ rlwinm $acc07,$s0,`32-16`,24,31
+ lbzx $acc02,$Tbl2,$acc02
+ lbzx $acc03,$Tbl2,$acc03
+ rlwinm $acc08,$s2,`32-8`,24,31
+ rlwinm $acc09,$s3,`32-8`,24,31
+ lbzx $acc04,$Tbl2,$acc04
+ lbzx $acc05,$Tbl2,$acc05
+ rlwinm $acc10,$s0,`32-8`,24,31
+ rlwinm $acc11,$s1,`32-8`,24,31
+ lbzx $acc06,$Tbl2,$acc06
+ lbzx $acc07,$Tbl2,$acc07
+ rlwinm $acc12,$s3,`0`,24,31
+ rlwinm $acc13,$s0,`0`,24,31
+ lbzx $acc08,$Tbl2,$acc08
+ lbzx $acc09,$Tbl2,$acc09
+ rlwinm $acc14,$s1,`0`,24,31
+ rlwinm $acc15,$s2,`0`,24,31
+ lbzx $acc10,$Tbl2,$acc10
+ lbzx $acc11,$Tbl2,$acc11
+ rlwinm $s0,$acc00,24,0,7
+ rlwinm $s1,$acc01,24,0,7
+ lbzx $acc12,$Tbl2,$acc12
+ lbzx $acc13,$Tbl2,$acc13
+ rlwinm $s2,$acc02,24,0,7
+ rlwinm $s3,$acc03,24,0,7
+ lbzx $acc14,$Tbl2,$acc14
+ lbzx $acc15,$Tbl2,$acc15
+ rlwimi $s0,$acc04,16,8,15
+ rlwimi $s1,$acc05,16,8,15
+ rlwimi $s2,$acc06,16,8,15
+ rlwimi $s3,$acc07,16,8,15
+ rlwimi $s0,$acc08,8,16,23
+ rlwimi $s1,$acc09,8,16,23
+ rlwimi $s2,$acc10,8,16,23
+ rlwimi $s3,$acc11,8,16,23
+ or $s0,$s0,$acc12
+ or $s1,$s1,$acc13
+ or $s2,$s2,$acc14
+ or $s3,$s3,$acc15
+ xor $s0,$s0,$t0
+ xor $s1,$s1,$t1
+ xor $s2,$s2,$t2
+ xor $s3,$s3,$t3
+ blr
+
+.align 4
+Lppc_AES_encrypt_compact:
+ lwz $acc00,240($key)
+ lwz $t0,0($key)
+ lwz $t1,4($key)
+ lwz $t2,8($key)
+ lwz $t3,12($key)
+ addi $Tbl1,$Tbl0,2048
+ lis $mask80,0x8080
+ lis $mask1b,0x1b1b
+ addi $key,$key,16
+ ori $mask80,$mask80,0x8080
+ ori $mask1b,$mask1b,0x1b1b
+ mtctr $acc00
+.align 4
+Lenc_compact_loop:
+ xor $s0,$s0,$t0
+ xor $s1,$s1,$t1
+ xor $s2,$s2,$t2
+ xor $s3,$s3,$t3
+ rlwinm $acc00,$s0,`32-24`,24,31
+ rlwinm $acc01,$s1,`32-24`,24,31
+ rlwinm $acc02,$s2,`32-24`,24,31
+ rlwinm $acc03,$s3,`32-24`,24,31
+ lbzx $acc00,$Tbl1,$acc00
+ lbzx $acc01,$Tbl1,$acc01
+ rlwinm $acc04,$s1,`32-16`,24,31
+ rlwinm $acc05,$s2,`32-16`,24,31
+ lbzx $acc02,$Tbl1,$acc02
+ lbzx $acc03,$Tbl1,$acc03
+ rlwinm $acc06,$s3,`32-16`,24,31
+ rlwinm $acc07,$s0,`32-16`,24,31
+ lbzx $acc04,$Tbl1,$acc04
+ lbzx $acc05,$Tbl1,$acc05
+ rlwinm $acc08,$s2,`32-8`,24,31
+ rlwinm $acc09,$s3,`32-8`,24,31
+ lbzx $acc06,$Tbl1,$acc06
+ lbzx $acc07,$Tbl1,$acc07
+ rlwinm $acc10,$s0,`32-8`,24,31
+ rlwinm $acc11,$s1,`32-8`,24,31
+ lbzx $acc08,$Tbl1,$acc08
+ lbzx $acc09,$Tbl1,$acc09
+ rlwinm $acc12,$s3,`0`,24,31
+ rlwinm $acc13,$s0,`0`,24,31
+ lbzx $acc10,$Tbl1,$acc10
+ lbzx $acc11,$Tbl1,$acc11
+ rlwinm $acc14,$s1,`0`,24,31
+ rlwinm $acc15,$s2,`0`,24,31
+ lbzx $acc12,$Tbl1,$acc12
+ lbzx $acc13,$Tbl1,$acc13
+ rlwinm $s0,$acc00,24,0,7
+ rlwinm $s1,$acc01,24,0,7
+ lbzx $acc14,$Tbl1,$acc14
+ lbzx $acc15,$Tbl1,$acc15
+ rlwinm $s2,$acc02,24,0,7
+ rlwinm $s3,$acc03,24,0,7
+ rlwimi $s0,$acc04,16,8,15
+ rlwimi $s1,$acc05,16,8,15
+ rlwimi $s2,$acc06,16,8,15
+ rlwimi $s3,$acc07,16,8,15
+ rlwimi $s0,$acc08,8,16,23
+ rlwimi $s1,$acc09,8,16,23
+ rlwimi $s2,$acc10,8,16,23
+ rlwimi $s3,$acc11,8,16,23
+ lwz $t0,0($key)
+ lwz $t1,4($key)
+ or $s0,$s0,$acc12
+ or $s1,$s1,$acc13
+ lwz $t2,8($key)
+ lwz $t3,12($key)
+ or $s2,$s2,$acc14
+ or $s3,$s3,$acc15
+
+ addi $key,$key,16
+ bdz Lenc_compact_done
+
+ and $acc00,$s0,$mask80 # r1=r0&0x80808080
+ and $acc01,$s1,$mask80
+ and $acc02,$s2,$mask80
+ and $acc03,$s3,$mask80
+ srwi $acc04,$acc00,7 # r1>>7
+ srwi $acc05,$acc01,7
+ srwi $acc06,$acc02,7
+ srwi $acc07,$acc03,7
+ andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
+ andc $acc09,$s1,$mask80
+ andc $acc10,$s2,$mask80
+ andc $acc11,$s3,$mask80
+ sub $acc00,$acc00,$acc04 # r1-(r1>>7)
+ sub $acc01,$acc01,$acc05
+ sub $acc02,$acc02,$acc06
+ sub $acc03,$acc03,$acc07
+ add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
+ add $acc09,$acc09,$acc09
+ add $acc10,$acc10,$acc10
+ add $acc11,$acc11,$acc11
+ and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
+ and $acc01,$acc01,$mask1b
+ and $acc02,$acc02,$mask1b
+ and $acc03,$acc03,$mask1b
+ xor $acc00,$acc00,$acc08 # r2
+ xor $acc01,$acc01,$acc09
+ xor $acc02,$acc02,$acc10
+ xor $acc03,$acc03,$acc11
+
+ rotlwi $acc12,$s0,16 # ROTATE(r0,16)
+ rotlwi $acc13,$s1,16
+ rotlwi $acc14,$s2,16
+ rotlwi $acc15,$s3,16
+ xor $s0,$s0,$acc00 # r0^r2
+ xor $s1,$s1,$acc01
+ xor $s2,$s2,$acc02
+ xor $s3,$s3,$acc03
+ rotrwi $s0,$s0,24 # ROTATE(r2^r0,24)
+ rotrwi $s1,$s1,24
+ rotrwi $s2,$s2,24
+ rotrwi $s3,$s3,24
+ xor $s0,$s0,$acc00 # ROTATE(r2^r0,24)^r2
+ xor $s1,$s1,$acc01
+ xor $s2,$s2,$acc02
+ xor $s3,$s3,$acc03
+ rotlwi $acc08,$acc12,8 # ROTATE(r0,24)
+ rotlwi $acc09,$acc13,8
+ rotlwi $acc10,$acc14,8
+ rotlwi $acc11,$acc15,8
+ xor $s0,$s0,$acc12 #
+ xor $s1,$s1,$acc13
+ xor $s2,$s2,$acc14
+ xor $s3,$s3,$acc15
+ xor $s0,$s0,$acc08 #
+ xor $s1,$s1,$acc09
+ xor $s2,$s2,$acc10
+ xor $s3,$s3,$acc11
+
+ b Lenc_compact_loop
+.align 4
+Lenc_compact_done:
+ xor $s0,$s0,$t0
+ xor $s1,$s1,$t1
+ xor $s2,$s2,$t2
+ xor $s3,$s3,$t3
+ blr
+
+.globl .AES_decrypt
+.align 7
+.AES_decrypt:
+ mflr r0
+ $STU $sp,-$FRAME($sp)
+
+ $PUSH r0,`$FRAME-$SIZE_T*21`($sp)
+ $PUSH $toc,`$FRAME-$SIZE_T*20`($sp)
+ $PUSH r13,`$FRAME-$SIZE_T*19`($sp)
+ $PUSH r14,`$FRAME-$SIZE_T*18`($sp)
+ $PUSH r15,`$FRAME-$SIZE_T*17`($sp)
+ $PUSH r16,`$FRAME-$SIZE_T*16`($sp)
+ $PUSH r17,`$FRAME-$SIZE_T*15`($sp)
+ $PUSH r18,`$FRAME-$SIZE_T*14`($sp)
+ $PUSH r19,`$FRAME-$SIZE_T*13`($sp)
+ $PUSH r20,`$FRAME-$SIZE_T*12`($sp)
+ $PUSH r21,`$FRAME-$SIZE_T*11`($sp)
+ $PUSH r22,`$FRAME-$SIZE_T*10`($sp)
+ $PUSH r23,`$FRAME-$SIZE_T*9`($sp)
+ $PUSH r24,`$FRAME-$SIZE_T*8`($sp)
+ $PUSH r25,`$FRAME-$SIZE_T*7`($sp)
+ $PUSH r26,`$FRAME-$SIZE_T*6`($sp)
+ $PUSH r27,`$FRAME-$SIZE_T*5`($sp)
+ $PUSH r28,`$FRAME-$SIZE_T*4`($sp)
+ $PUSH r29,`$FRAME-$SIZE_T*3`($sp)
+ $PUSH r30,`$FRAME-$SIZE_T*2`($sp)
+ $PUSH r31,`$FRAME-$SIZE_T*1`($sp)
+
+ lwz $s0,0($inp)
+ lwz $s1,4($inp)
+ lwz $s2,8($inp)
+ lwz $s3,12($inp)
+ bl LAES_Td
+ bl Lppc_AES_decrypt_compact
+ stw $s0,0($out)
+ stw $s1,4($out)
+ stw $s2,8($out)
+ stw $s3,12($out)
+
+ $POP r0,`$FRAME-$SIZE_T*21`($sp)
+ $POP $toc,`$FRAME-$SIZE_T*20`($sp)
+ $POP r13,`$FRAME-$SIZE_T*19`($sp)
+ $POP r14,`$FRAME-$SIZE_T*18`($sp)
+ $POP r15,`$FRAME-$SIZE_T*17`($sp)
+ $POP r16,`$FRAME-$SIZE_T*16`($sp)
+ $POP r17,`$FRAME-$SIZE_T*15`($sp)
+ $POP r18,`$FRAME-$SIZE_T*14`($sp)
+ $POP r19,`$FRAME-$SIZE_T*13`($sp)
+ $POP r20,`$FRAME-$SIZE_T*12`($sp)
+ $POP r21,`$FRAME-$SIZE_T*11`($sp)
+ $POP r22,`$FRAME-$SIZE_T*10`($sp)
+ $POP r23,`$FRAME-$SIZE_T*9`($sp)
+ $POP r24,`$FRAME-$SIZE_T*8`($sp)
+ $POP r25,`$FRAME-$SIZE_T*7`($sp)
+ $POP r26,`$FRAME-$SIZE_T*6`($sp)
+ $POP r27,`$FRAME-$SIZE_T*5`($sp)
+ $POP r28,`$FRAME-$SIZE_T*4`($sp)
+ $POP r29,`$FRAME-$SIZE_T*3`($sp)
+ $POP r30,`$FRAME-$SIZE_T*2`($sp)
+ $POP r31,`$FRAME-$SIZE_T*1`($sp)
+ mtlr r0
+ addi $sp,$sp,$FRAME
+ blr
+
+.align 4
+Lppc_AES_decrypt:
+ lwz $acc00,240($key)
+ lwz $t0,0($key)
+ lwz $t1,4($key)
+ lwz $t2,8($key)
+ lwz $t3,12($key)
+ addi $Tbl1,$Tbl0,3
+ addi $Tbl2,$Tbl0,2
+ addi $Tbl3,$Tbl0,1
+ addi $acc00,$acc00,-1
+ addi $key,$key,16
+ xor $s0,$s0,$t0
+ xor $s1,$s1,$t1
+ xor $s2,$s2,$t2
+ xor $s3,$s3,$t3
+ mtctr $acc00
+.align 4
+Ldec_loop:
+ rlwinm $acc00,$s0,`32-24+3`,21,28
+ rlwinm $acc01,$s1,`32-24+3`,21,28
+ lwz $t0,0($key)
+ lwz $t1,4($key)
+ rlwinm $acc02,$s2,`32-24+3`,21,28
+ rlwinm $acc03,$s3,`32-24+3`,21,28
+ lwz $t2,8($key)
+ lwz $t3,12($key)
+ rlwinm $acc04,$s3,`32-16+3`,21,28
+ rlwinm $acc05,$s0,`32-16+3`,21,28
+ lwzx $acc00,$Tbl0,$acc00
+ lwzx $acc01,$Tbl0,$acc01
+ rlwinm $acc06,$s1,`32-16+3`,21,28
+ rlwinm $acc07,$s2,`32-16+3`,21,28
+ lwzx $acc02,$Tbl0,$acc02
+ lwzx $acc03,$Tbl0,$acc03
+ rlwinm $acc08,$s2,`32-8+3`,21,28
+ rlwinm $acc09,$s3,`32-8+3`,21,28
+ lwzx $acc04,$Tbl1,$acc04
+ lwzx $acc05,$Tbl1,$acc05
+ rlwinm $acc10,$s0,`32-8+3`,21,28
+ rlwinm $acc11,$s1,`32-8+3`,21,28
+ lwzx $acc06,$Tbl1,$acc06
+ lwzx $acc07,$Tbl1,$acc07
+ rlwinm $acc12,$s1,`0+3`,21,28
+ rlwinm $acc13,$s2,`0+3`,21,28
+ lwzx $acc08,$Tbl2,$acc08
+ lwzx $acc09,$Tbl2,$acc09
+ rlwinm $acc14,$s3,`0+3`,21,28
+ rlwinm $acc15,$s0,`0+3`,21,28
+ lwzx $acc10,$Tbl2,$acc10
+ lwzx $acc11,$Tbl2,$acc11
+ xor $t0,$t0,$acc00
+ xor $t1,$t1,$acc01
+ lwzx $acc12,$Tbl3,$acc12
+ lwzx $acc13,$Tbl3,$acc13
+ xor $t2,$t2,$acc02
+ xor $t3,$t3,$acc03
+ lwzx $acc14,$Tbl3,$acc14
+ lwzx $acc15,$Tbl3,$acc15
+ xor $t0,$t0,$acc04
+ xor $t1,$t1,$acc05
+ xor $t2,$t2,$acc06
+ xor $t3,$t3,$acc07
+ xor $t0,$t0,$acc08
+ xor $t1,$t1,$acc09
+ xor $t2,$t2,$acc10
+ xor $t3,$t3,$acc11
+ xor $s0,$t0,$acc12
+ xor $s1,$t1,$acc13
+ xor $s2,$t2,$acc14
+ xor $s3,$t3,$acc15
+ addi $key,$key,16
+ bdnz- Ldec_loop
+
+ addi $Tbl2,$Tbl0,2048
+ nop
+ lwz $acc08,`2048+0`($Tbl0) ! prefetch Td4
+ lwz $acc09,`2048+32`($Tbl0)
+ lwz $acc10,`2048+64`($Tbl0)
+ lwz $acc11,`2048+96`($Tbl0)
+ lwz $acc08,`2048+128`($Tbl0)
+ lwz $acc09,`2048+160`($Tbl0)
+ lwz $acc10,`2048+192`($Tbl0)
+ lwz $acc11,`2048+224`($Tbl0)
+ rlwinm $acc00,$s0,`32-24`,24,31
+ rlwinm $acc01,$s1,`32-24`,24,31
+ lwz $t0,0($key)
+ lwz $t1,4($key)
+ rlwinm $acc02,$s2,`32-24`,24,31
+ rlwinm $acc03,$s3,`32-24`,24,31
+ lwz $t2,8($key)
+ lwz $t3,12($key)
+ rlwinm $acc04,$s3,`32-16`,24,31
+ rlwinm $acc05,$s0,`32-16`,24,31
+ lbzx $acc00,$Tbl2,$acc00
+ lbzx $acc01,$Tbl2,$acc01
+ rlwinm $acc06,$s1,`32-16`,24,31
+ rlwinm $acc07,$s2,`32-16`,24,31
+ lbzx $acc02,$Tbl2,$acc02
+ lbzx $acc03,$Tbl2,$acc03
+ rlwinm $acc08,$s2,`32-8`,24,31
+ rlwinm $acc09,$s3,`32-8`,24,31
+ lbzx $acc04,$Tbl2,$acc04
+ lbzx $acc05,$Tbl2,$acc05
+ rlwinm $acc10,$s0,`32-8`,24,31
+ rlwinm $acc11,$s1,`32-8`,24,31
+ lbzx $acc06,$Tbl2,$acc06
+ lbzx $acc07,$Tbl2,$acc07
+ rlwinm $acc12,$s1,`0`,24,31
+ rlwinm $acc13,$s2,`0`,24,31
+ lbzx $acc08,$Tbl2,$acc08
+ lbzx $acc09,$Tbl2,$acc09
+ rlwinm $acc14,$s3,`0`,24,31
+ rlwinm $acc15,$s0,`0`,24,31
+ lbzx $acc10,$Tbl2,$acc10
+ lbzx $acc11,$Tbl2,$acc11
+ rlwinm $s0,$acc00,24,0,7
+ rlwinm $s1,$acc01,24,0,7
+ lbzx $acc12,$Tbl2,$acc12
+ lbzx $acc13,$Tbl2,$acc13
+ rlwinm $s2,$acc02,24,0,7
+ rlwinm $s3,$acc03,24,0,7
+ lbzx $acc14,$Tbl2,$acc14
+ lbzx $acc15,$Tbl2,$acc15
+ rlwimi $s0,$acc04,16,8,15
+ rlwimi $s1,$acc05,16,8,15
+ rlwimi $s2,$acc06,16,8,15
+ rlwimi $s3,$acc07,16,8,15
+ rlwimi $s0,$acc08,8,16,23
+ rlwimi $s1,$acc09,8,16,23
+ rlwimi $s2,$acc10,8,16,23
+ rlwimi $s3,$acc11,8,16,23
+ or $s0,$s0,$acc12
+ or $s1,$s1,$acc13
+ or $s2,$s2,$acc14
+ or $s3,$s3,$acc15
+ xor $s0,$s0,$t0
+ xor $s1,$s1,$t1
+ xor $s2,$s2,$t2
+ xor $s3,$s3,$t3
+ blr
+
+.align 4
+Lppc_AES_decrypt_compact:
+ lwz $acc00,240($key)
+ lwz $t0,0($key)
+ lwz $t1,4($key)
+ lwz $t2,8($key)
+ lwz $t3,12($key)
+ addi $Tbl1,$Tbl0,2048
+ lis $mask80,0x8080
+ lis $mask1b,0x1b1b
+ addi $key,$key,16
+ ori $mask80,$mask80,0x8080
+ ori $mask1b,$mask1b,0x1b1b
+___
+$code.=<<___ if ($SIZE_T==8);
+ insrdi $mask80,$mask80,32,0
+ insrdi $mask1b,$mask1b,32,0
+___
+$code.=<<___;
+ mtctr $acc00
+.align 4
+Ldec_compact_loop:
+ xor $s0,$s0,$t0
+ xor $s1,$s1,$t1
+ xor $s2,$s2,$t2
+ xor $s3,$s3,$t3
+ rlwinm $acc00,$s0,`32-24`,24,31
+ rlwinm $acc01,$s1,`32-24`,24,31
+ rlwinm $acc02,$s2,`32-24`,24,31
+ rlwinm $acc03,$s3,`32-24`,24,31
+ lbzx $acc00,$Tbl1,$acc00
+ lbzx $acc01,$Tbl1,$acc01
+ rlwinm $acc04,$s3,`32-16`,24,31
+ rlwinm $acc05,$s0,`32-16`,24,31
+ lbzx $acc02,$Tbl1,$acc02
+ lbzx $acc03,$Tbl1,$acc03
+ rlwinm $acc06,$s1,`32-16`,24,31
+ rlwinm $acc07,$s2,`32-16`,24,31
+ lbzx $acc04,$Tbl1,$acc04
+ lbzx $acc05,$Tbl1,$acc05
+ rlwinm $acc08,$s2,`32-8`,24,31
+ rlwinm $acc09,$s3,`32-8`,24,31
+ lbzx $acc06,$Tbl1,$acc06
+ lbzx $acc07,$Tbl1,$acc07
+ rlwinm $acc10,$s0,`32-8`,24,31
+ rlwinm $acc11,$s1,`32-8`,24,31
+ lbzx $acc08,$Tbl1,$acc08
+ lbzx $acc09,$Tbl1,$acc09
+ rlwinm $acc12,$s1,`0`,24,31
+ rlwinm $acc13,$s2,`0`,24,31
+ lbzx $acc10,$Tbl1,$acc10
+ lbzx $acc11,$Tbl1,$acc11
+ rlwinm $acc14,$s3,`0`,24,31
+ rlwinm $acc15,$s0,`0`,24,31
+ lbzx $acc12,$Tbl1,$acc12
+ lbzx $acc13,$Tbl1,$acc13
+ rlwinm $s0,$acc00,24,0,7
+ rlwinm $s1,$acc01,24,0,7
+ lbzx $acc14,$Tbl1,$acc14
+ lbzx $acc15,$Tbl1,$acc15
+ rlwinm $s2,$acc02,24,0,7
+ rlwinm $s3,$acc03,24,0,7
+ rlwimi $s0,$acc04,16,8,15
+ rlwimi $s1,$acc05,16,8,15
+ rlwimi $s2,$acc06,16,8,15
+ rlwimi $s3,$acc07,16,8,15
+ rlwimi $s0,$acc08,8,16,23
+ rlwimi $s1,$acc09,8,16,23
+ rlwimi $s2,$acc10,8,16,23
+ rlwimi $s3,$acc11,8,16,23
+ lwz $t0,0($key)
+ lwz $t1,4($key)
+ or $s0,$s0,$acc12
+ or $s1,$s1,$acc13
+ lwz $t2,8($key)
+ lwz $t3,12($key)
+ or $s2,$s2,$acc14
+ or $s3,$s3,$acc15
+
+ addi $key,$key,16
+ bdz Ldec_compact_done
+___
+$code.=<<___ if ($SIZE_T==8);
+ # vectorized permutation improves decrypt performance by 10%
+ insrdi $s0,$s1,32,0
+ insrdi $s2,$s3,32,0
+
+ and $acc00,$s0,$mask80 # r1=r0&0x80808080
+ and $acc02,$s2,$mask80
+ srdi $acc04,$acc00,7 # r1>>7
+ srdi $acc06,$acc02,7
+ andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
+ andc $acc10,$s2,$mask80
+ sub $acc00,$acc00,$acc04 # r1-(r1>>7)
+ sub $acc02,$acc02,$acc06
+ add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
+ add $acc10,$acc10,$acc10
+ and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
+ and $acc02,$acc02,$mask1b
+ xor $acc00,$acc00,$acc08 # r2
+ xor $acc02,$acc02,$acc10
+
+ and $acc04,$acc00,$mask80 # r1=r2&0x80808080
+ and $acc06,$acc02,$mask80
+ srdi $acc08,$acc04,7 # r1>>7
+ srdi $acc10,$acc06,7
+ andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
+ andc $acc14,$acc02,$mask80
+ sub $acc04,$acc04,$acc08 # r1-(r1>>7)
+ sub $acc06,$acc06,$acc10
+ add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
+ add $acc14,$acc14,$acc14
+ and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
+ and $acc06,$acc06,$mask1b
+ xor $acc04,$acc04,$acc12 # r4
+ xor $acc06,$acc06,$acc14
+
+ and $acc08,$acc04,$mask80 # r1=r4&0x80808080
+ and $acc10,$acc06,$mask80
+ srdi $acc12,$acc08,7 # r1>>7
+ srdi $acc14,$acc10,7
+ sub $acc08,$acc08,$acc12 # r1-(r1>>7)
+ sub $acc10,$acc10,$acc14
+ andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
+ andc $acc14,$acc06,$mask80
+ add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
+ add $acc14,$acc14,$acc14
+ and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
+ and $acc10,$acc10,$mask1b
+ xor $acc08,$acc08,$acc12 # r8
+ xor $acc10,$acc10,$acc14
+
+ xor $acc00,$acc00,$s0 # r2^r0
+ xor $acc02,$acc02,$s2
+ xor $acc04,$acc04,$s0 # r4^r0
+ xor $acc06,$acc06,$s2
+
+ extrdi $acc01,$acc00,32,0
+ extrdi $acc03,$acc02,32,0
+ extrdi $acc05,$acc04,32,0
+ extrdi $acc07,$acc06,32,0
+ extrdi $acc09,$acc08,32,0
+ extrdi $acc11,$acc10,32,0
+___
+$code.=<<___ if ($SIZE_T==4);
+ and $acc00,$s0,$mask80 # r1=r0&0x80808080
+ and $acc01,$s1,$mask80
+ and $acc02,$s2,$mask80
+ and $acc03,$s3,$mask80
+ srwi $acc04,$acc00,7 # r1>>7
+ srwi $acc05,$acc01,7
+ srwi $acc06,$acc02,7
+ srwi $acc07,$acc03,7
+ andc $acc08,$s0,$mask80 # r0&0x7f7f7f7f
+ andc $acc09,$s1,$mask80
+ andc $acc10,$s2,$mask80
+ andc $acc11,$s3,$mask80
+ sub $acc00,$acc00,$acc04 # r1-(r1>>7)
+ sub $acc01,$acc01,$acc05
+ sub $acc02,$acc02,$acc06
+ sub $acc03,$acc03,$acc07
+ add $acc08,$acc08,$acc08 # (r0&0x7f7f7f7f)<<1
+ add $acc09,$acc09,$acc09
+ add $acc10,$acc10,$acc10
+ add $acc11,$acc11,$acc11
+ and $acc00,$acc00,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
+ and $acc01,$acc01,$mask1b
+ and $acc02,$acc02,$mask1b
+ and $acc03,$acc03,$mask1b
+ xor $acc00,$acc00,$acc08 # r2
+ xor $acc01,$acc01,$acc09
+ xor $acc02,$acc02,$acc10
+ xor $acc03,$acc03,$acc11
+
+ and $acc04,$acc00,$mask80 # r1=r2&0x80808080
+ and $acc05,$acc01,$mask80
+ and $acc06,$acc02,$mask80
+ and $acc07,$acc03,$mask80
+ srwi $acc08,$acc04,7 # r1>>7
+ srwi $acc09,$acc05,7
+ srwi $acc10,$acc06,7
+ srwi $acc11,$acc07,7
+ andc $acc12,$acc00,$mask80 # r2&0x7f7f7f7f
+ andc $acc13,$acc01,$mask80
+ andc $acc14,$acc02,$mask80
+ andc $acc15,$acc03,$mask80
+ sub $acc04,$acc04,$acc08 # r1-(r1>>7)
+ sub $acc05,$acc05,$acc09
+ sub $acc06,$acc06,$acc10
+ sub $acc07,$acc07,$acc11
+ add $acc12,$acc12,$acc12 # (r2&0x7f7f7f7f)<<1
+ add $acc13,$acc13,$acc13
+ add $acc14,$acc14,$acc14
+ add $acc15,$acc15,$acc15
+ and $acc04,$acc04,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
+ and $acc05,$acc05,$mask1b
+ and $acc06,$acc06,$mask1b
+ and $acc07,$acc07,$mask1b
+ xor $acc04,$acc04,$acc12 # r4
+ xor $acc05,$acc05,$acc13
+ xor $acc06,$acc06,$acc14
+ xor $acc07,$acc07,$acc15
+
+ and $acc08,$acc04,$mask80 # r1=r4&0x80808080
+ and $acc09,$acc05,$mask80
+ and $acc10,$acc06,$mask80
+ and $acc11,$acc07,$mask80
+ srwi $acc12,$acc08,7 # r1>>7
+ srwi $acc13,$acc09,7
+ srwi $acc14,$acc10,7
+ srwi $acc15,$acc11,7
+ sub $acc08,$acc08,$acc12 # r1-(r1>>7)
+ sub $acc09,$acc09,$acc13
+ sub $acc10,$acc10,$acc14
+ sub $acc11,$acc11,$acc15
+ andc $acc12,$acc04,$mask80 # r4&0x7f7f7f7f
+ andc $acc13,$acc05,$mask80
+ andc $acc14,$acc06,$mask80
+ andc $acc15,$acc07,$mask80
+ add $acc12,$acc12,$acc12 # (r4&0x7f7f7f7f)<<1
+ add $acc13,$acc13,$acc13
+ add $acc14,$acc14,$acc14
+ add $acc15,$acc15,$acc15
+ and $acc08,$acc08,$mask1b # (r1-(r1>>7))&0x1b1b1b1b
+ and $acc09,$acc09,$mask1b
+ and $acc10,$acc10,$mask1b
+ and $acc11,$acc11,$mask1b
+ xor $acc08,$acc08,$acc12 # r8
+ xor $acc09,$acc09,$acc13
+ xor $acc10,$acc10,$acc14
+ xor $acc11,$acc11,$acc15
+
+ xor $acc00,$acc00,$s0 # r2^r0
+ xor $acc01,$acc01,$s1
+ xor $acc02,$acc02,$s2
+ xor $acc03,$acc03,$s3
+ xor $acc04,$acc04,$s0 # r4^r0
+ xor $acc05,$acc05,$s1
+ xor $acc06,$acc06,$s2
+ xor $acc07,$acc07,$s3
+___
+$code.=<<___;
+ rotrwi $s0,$s0,8 # = ROTATE(r0,8)
+ rotrwi $s1,$s1,8
+ rotrwi $s2,$s2,8
+ rotrwi $s3,$s3,8
+ xor $s0,$s0,$acc00 # ^= r2^r0
+ xor $s1,$s1,$acc01
+ xor $s2,$s2,$acc02
+ xor $s3,$s3,$acc03
+ xor $acc00,$acc00,$acc08
+ xor $acc01,$acc01,$acc09
+ xor $acc02,$acc02,$acc10
+ xor $acc03,$acc03,$acc11
+ xor $s0,$s0,$acc04 # ^= r4^r0
+ xor $s1,$s1,$acc05
+ xor $s2,$s2,$acc06
+ xor $s3,$s3,$acc07
+ rotrwi $acc00,$acc00,24
+ rotrwi $acc01,$acc01,24
+ rotrwi $acc02,$acc02,24
+ rotrwi $acc03,$acc03,24
+ xor $acc04,$acc04,$acc08
+ xor $acc05,$acc05,$acc09
+ xor $acc06,$acc06,$acc10
+ xor $acc07,$acc07,$acc11
+ xor $s0,$s0,$acc08 # ^= r8 [^((r4^r0)^(r2^r0)=r4^r2)]
+ xor $s1,$s1,$acc09
+ xor $s2,$s2,$acc10
+ xor $s3,$s3,$acc11
+ rotrwi $acc04,$acc04,16
+ rotrwi $acc05,$acc05,16
+ rotrwi $acc06,$acc06,16
+ rotrwi $acc07,$acc07,16
+ xor $s0,$s0,$acc00 # ^= ROTATE(r8^r2^r0,24)
+ xor $s1,$s1,$acc01
+ xor $s2,$s2,$acc02
+ xor $s3,$s3,$acc03
+ rotrwi $acc08,$acc08,8
+ rotrwi $acc09,$acc09,8
+ rotrwi $acc10,$acc10,8
+ rotrwi $acc11,$acc11,8
+ xor $s0,$s0,$acc04 # ^= ROTATE(r8^r4^r0,16)
+ xor $s1,$s1,$acc05
+ xor $s2,$s2,$acc06
+ xor $s3,$s3,$acc07
+ xor $s0,$s0,$acc08 # ^= ROTATE(r8,8)
+ xor $s1,$s1,$acc09
+ xor $s2,$s2,$acc10
+ xor $s3,$s3,$acc11
+
+ b Ldec_compact_loop
+.align 4
+Ldec_compact_done:
+ xor $s0,$s0,$t0
+ xor $s1,$s1,$t1
+ xor $s2,$s2,$t2
+ xor $s3,$s3,$t3
+ blr
+.long 0
+.asciz "AES for PPC, CRYPTOGAMS by <appro\@openssl.org>"
+.align 7
+___
+
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+print $code;
+close STDOUT;
diff --git a/crypto/aes/asm/aes-s390x.pl b/crypto/aes/asm/aes-s390x.pl
new file mode 100755
index 000000000000..4b27afd92fc4
--- /dev/null
+++ b/crypto/aes/asm/aes-s390x.pl
@@ -0,0 +1,1333 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# AES for s390x.
+
+# April 2007.
+#
+# Software performance improvement over gcc-generated code is ~70% and
+# in absolute terms is ~73 cycles per byte processed with 128-bit key.
+# You're likely to exclaim "why so slow?" Keep in mind that z-CPUs are
+# *strictly* in-order execution and issued instruction [in this case
+# load value from memory is critical] has to complete before execution
+# flow proceeds. S-boxes are compressed to 2KB[+256B].
+#
+# As for hardware acceleration support. It's basically a "teaser," as
+# it can and should be improved in several ways. Most notably support
+# for CBC is not utilized, nor multiple blocks are ever processed.
+# Then software key schedule can be postponed till hardware support
+# detection... Performance improvement over assembler is reportedly
+# ~2.5x, but can reach >8x [naturally on larger chunks] if proper
+# support is implemented.
+
+# May 2007.
+#
+# Implement AES_set_[en|de]crypt_key. Key schedule setup is avoided
+# for 128-bit keys, if hardware support is detected.
+
+# Januray 2009.
+#
+# Add support for hardware AES192/256 and reschedule instructions to
+# minimize/avoid Address Generation Interlock hazard and to favour
+# dual-issue z10 pipeline. This gave ~25% improvement on z10 and
+# almost 50% on z9. The gain is smaller on z10, because being dual-
+# issue z10 makes it improssible to eliminate the interlock condition:
+# critial path is not long enough. Yet it spends ~24 cycles per byte
+# processed with 128-bit key.
+#
+# Unlike previous version hardware support detection takes place only
+# at the moment of key schedule setup, which is denoted in key->rounds.
+# This is done, because deferred key setup can't be made MT-safe, not
+# for key lengthes longer than 128 bits.
+#
+# Add AES_cbc_encrypt, which gives incredible performance improvement,
+# it was measured to be ~6.6x. It's less than previously mentioned 8x,
+# because software implementation was optimized.
+
+$softonly=0; # allow hardware support
+
+$t0="%r0"; $mask="%r0";
+$t1="%r1";
+$t2="%r2"; $inp="%r2";
+$t3="%r3"; $out="%r3"; $bits="%r3";
+$key="%r4";
+$i1="%r5";
+$i2="%r6";
+$i3="%r7";
+$s0="%r8";
+$s1="%r9";
+$s2="%r10";
+$s3="%r11";
+$tbl="%r12";
+$rounds="%r13";
+$ra="%r14";
+$sp="%r15";
+
+sub _data_word()
+{ my $i;
+ while(defined($i=shift)) { $code.=sprintf".long\t0x%08x,0x%08x\n",$i,$i; }
+}
+
+$code=<<___;
+.text
+
+.type AES_Te,\@object
+.align 256
+AES_Te:
+___
+&_data_word(
+ 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
+ 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
+ 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
+ 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
+ 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
+ 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
+ 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
+ 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
+ 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
+ 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
+ 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
+ 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
+ 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
+ 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
+ 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
+ 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
+ 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
+ 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
+ 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
+ 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
+ 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
+ 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
+ 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
+ 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
+ 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
+ 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
+ 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
+ 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
+ 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
+ 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
+ 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
+ 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
+ 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
+ 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
+ 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
+ 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
+ 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
+ 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
+ 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
+ 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
+ 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
+ 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
+ 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
+ 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
+ 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
+ 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
+ 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
+ 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
+ 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
+ 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
+ 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
+ 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
+ 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
+ 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
+ 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
+ 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
+ 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
+ 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
+ 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
+ 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
+ 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
+ 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
+ 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
+ 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
+$code.=<<___;
+# Te4[256]
+.byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
+.byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+.byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+.byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+.byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+.byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+.byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+.byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+.byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+.byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+.byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+.byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+.byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+.byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+.byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+.byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+.byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+.byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+.byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+.byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+.byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+.byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+.byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+.byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+.byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+.byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+.byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+.byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+.byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+.byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+.byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+.byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+# rcon[]
+.long 0x01000000, 0x02000000, 0x04000000, 0x08000000
+.long 0x10000000, 0x20000000, 0x40000000, 0x80000000
+.long 0x1B000000, 0x36000000, 0, 0, 0, 0, 0, 0
+.align 256
+.size AES_Te,.-AES_Te
+
+# void AES_encrypt(const unsigned char *inp, unsigned char *out,
+# const AES_KEY *key) {
+.globl AES_encrypt
+.type AES_encrypt,\@function
+AES_encrypt:
+___
+$code.=<<___ if (!$softonly);
+ l %r0,240($key)
+ lhi %r1,16
+ clr %r0,%r1
+ jl .Lesoft
+
+ la %r1,0($key)
+ #la %r2,0($inp)
+ la %r4,0($out)
+ lghi %r3,16 # single block length
+ .long 0xb92e0042 # km %r4,%r2
+ brc 1,.-4 # can this happen?
+ br %r14
+.align 64
+.Lesoft:
+___
+$code.=<<___;
+ stmg %r3,$ra,24($sp)
+
+ llgf $s0,0($inp)
+ llgf $s1,4($inp)
+ llgf $s2,8($inp)
+ llgf $s3,12($inp)
+
+ larl $tbl,AES_Te
+ bras $ra,_s390x_AES_encrypt
+
+ lg $out,24($sp)
+ st $s0,0($out)
+ st $s1,4($out)
+ st $s2,8($out)
+ st $s3,12($out)
+
+ lmg %r6,$ra,48($sp)
+ br $ra
+.size AES_encrypt,.-AES_encrypt
+
+.type _s390x_AES_encrypt,\@function
+.align 16
+_s390x_AES_encrypt:
+ stg $ra,152($sp)
+ x $s0,0($key)
+ x $s1,4($key)
+ x $s2,8($key)
+ x $s3,12($key)
+ l $rounds,240($key)
+ llill $mask,`0xff<<3`
+ aghi $rounds,-1
+ j .Lenc_loop
+.align 16
+.Lenc_loop:
+ sllg $t1,$s0,`0+3`
+ srlg $t2,$s0,`8-3`
+ srlg $t3,$s0,`16-3`
+ srl $s0,`24-3`
+ nr $s0,$mask
+ ngr $t1,$mask
+ nr $t2,$mask
+ nr $t3,$mask
+
+ srlg $i1,$s1,`16-3` # i0
+ sllg $i2,$s1,`0+3`
+ srlg $i3,$s1,`8-3`
+ srl $s1,`24-3`
+ nr $i1,$mask
+ nr $s1,$mask
+ ngr $i2,$mask
+ nr $i3,$mask
+
+ l $s0,0($s0,$tbl) # Te0[s0>>24]
+ l $t1,1($t1,$tbl) # Te3[s0>>0]
+ l $t2,2($t2,$tbl) # Te2[s0>>8]
+ l $t3,3($t3,$tbl) # Te1[s0>>16]
+
+ x $s0,3($i1,$tbl) # Te1[s1>>16]
+ l $s1,0($s1,$tbl) # Te0[s1>>24]
+ x $t2,1($i2,$tbl) # Te3[s1>>0]
+ x $t3,2($i3,$tbl) # Te2[s1>>8]
+
+ srlg $i1,$s2,`8-3` # i0
+ srlg $i2,$s2,`16-3` # i1
+ nr $i1,$mask
+ nr $i2,$mask
+ sllg $i3,$s2,`0+3`
+ srl $s2,`24-3`
+ nr $s2,$mask
+ ngr $i3,$mask
+
+ xr $s1,$t1
+ srlg $ra,$s3,`8-3` # i1
+ sllg $t1,$s3,`0+3` # i0
+ nr $ra,$mask
+ la $key,16($key)
+ ngr $t1,$mask
+
+ x $s0,2($i1,$tbl) # Te2[s2>>8]
+ x $s1,3($i2,$tbl) # Te1[s2>>16]
+ l $s2,0($s2,$tbl) # Te0[s2>>24]
+ x $t3,1($i3,$tbl) # Te3[s2>>0]
+
+ srlg $i3,$s3,`16-3` # i2
+ xr $s2,$t2
+ srl $s3,`24-3`
+ nr $i3,$mask
+ nr $s3,$mask
+
+ x $s0,0($key)
+ x $s1,4($key)
+ x $s2,8($key)
+ x $t3,12($key)
+
+ x $s0,1($t1,$tbl) # Te3[s3>>0]
+ x $s1,2($ra,$tbl) # Te2[s3>>8]
+ x $s2,3($i3,$tbl) # Te1[s3>>16]
+ l $s3,0($s3,$tbl) # Te0[s3>>24]
+ xr $s3,$t3
+
+ brct $rounds,.Lenc_loop
+ .align 16
+
+ sllg $t1,$s0,`0+3`
+ srlg $t2,$s0,`8-3`
+ ngr $t1,$mask
+ srlg $t3,$s0,`16-3`
+ srl $s0,`24-3`
+ nr $s0,$mask
+ nr $t2,$mask
+ nr $t3,$mask
+
+ srlg $i1,$s1,`16-3` # i0
+ sllg $i2,$s1,`0+3`
+ ngr $i2,$mask
+ srlg $i3,$s1,`8-3`
+ srl $s1,`24-3`
+ nr $i1,$mask
+ nr $s1,$mask
+ nr $i3,$mask
+
+ llgc $s0,2($s0,$tbl) # Te4[s0>>24]
+ llgc $t1,2($t1,$tbl) # Te4[s0>>0]
+ sll $s0,24
+ llgc $t2,2($t2,$tbl) # Te4[s0>>8]
+ llgc $t3,2($t3,$tbl) # Te4[s0>>16]
+ sll $t2,8
+ sll $t3,16
+
+ llgc $i1,2($i1,$tbl) # Te4[s1>>16]
+ llgc $s1,2($s1,$tbl) # Te4[s1>>24]
+ llgc $i2,2($i2,$tbl) # Te4[s1>>0]
+ llgc $i3,2($i3,$tbl) # Te4[s1>>8]
+ sll $i1,16
+ sll $s1,24
+ sll $i3,8
+ or $s0,$i1
+ or $s1,$t1
+ or $t2,$i2
+ or $t3,$i3
+
+ srlg $i1,$s2,`8-3` # i0
+ srlg $i2,$s2,`16-3` # i1
+ nr $i1,$mask
+ nr $i2,$mask
+ sllg $i3,$s2,`0+3`
+ srl $s2,`24-3`
+ ngr $i3,$mask
+ nr $s2,$mask
+
+ sllg $t1,$s3,`0+3` # i0
+ srlg $ra,$s3,`8-3` # i1
+ ngr $t1,$mask
+
+ llgc $i1,2($i1,$tbl) # Te4[s2>>8]
+ llgc $i2,2($i2,$tbl) # Te4[s2>>16]
+ sll $i1,8
+ llgc $s2,2($s2,$tbl) # Te4[s2>>24]
+ llgc $i3,2($i3,$tbl) # Te4[s2>>0]
+ sll $i2,16
+ nr $ra,$mask
+ sll $s2,24
+ or $s0,$i1
+ or $s1,$i2
+ or $s2,$t2
+ or $t3,$i3
+
+ srlg $i3,$s3,`16-3` # i2
+ srl $s3,`24-3`
+ nr $i3,$mask
+ nr $s3,$mask
+
+ l $t0,16($key)
+ l $t2,20($key)
+
+ llgc $i1,2($t1,$tbl) # Te4[s3>>0]
+ llgc $i2,2($ra,$tbl) # Te4[s3>>8]
+ llgc $i3,2($i3,$tbl) # Te4[s3>>16]
+ llgc $s3,2($s3,$tbl) # Te4[s3>>24]
+ sll $i2,8
+ sll $i3,16
+ sll $s3,24
+ or $s0,$i1
+ or $s1,$i2
+ or $s2,$i3
+ or $s3,$t3
+
+ lg $ra,152($sp)
+ xr $s0,$t0
+ xr $s1,$t2
+ x $s2,24($key)
+ x $s3,28($key)
+
+ br $ra
+.size _s390x_AES_encrypt,.-_s390x_AES_encrypt
+___
+
+$code.=<<___;
+.type AES_Td,\@object
+.align 256
+AES_Td:
+___
+&_data_word(
+ 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
+ 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
+ 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
+ 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
+ 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
+ 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
+ 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
+ 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
+ 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
+ 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
+ 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
+ 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
+ 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
+ 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
+ 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
+ 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
+ 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
+ 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
+ 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
+ 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
+ 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
+ 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
+ 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
+ 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
+ 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
+ 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
+ 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
+ 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
+ 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
+ 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
+ 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
+ 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
+ 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
+ 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
+ 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
+ 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
+ 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
+ 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
+ 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
+ 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
+ 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
+ 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
+ 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
+ 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
+ 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
+ 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
+ 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
+ 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
+ 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
+ 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
+ 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
+ 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
+ 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
+ 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
+ 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
+ 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
+ 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
+ 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
+ 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
+ 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
+ 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
+ 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
+ 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
+ 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
+$code.=<<___;
+# Td4[256]
+.byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+.byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+.byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+.byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+.byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+.byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+.byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+.byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+.byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+.byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+.byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+.byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+.byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+.byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+.byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+.byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+.byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+.byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+.byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+.byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+.byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+.byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+.byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+.byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+.byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+.byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+.byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+.byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+.byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+.byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+.byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+.byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+.size AES_Td,.-AES_Td
+
+# void AES_decrypt(const unsigned char *inp, unsigned char *out,
+# const AES_KEY *key) {
+.globl AES_decrypt
+.type AES_decrypt,\@function
+AES_decrypt:
+___
+$code.=<<___ if (!$softonly);
+ l %r0,240($key)
+ lhi %r1,16
+ clr %r0,%r1
+ jl .Ldsoft
+
+ la %r1,0($key)
+ #la %r2,0($inp)
+ la %r4,0($out)
+ lghi %r3,16 # single block length
+ .long 0xb92e0042 # km %r4,%r2
+ brc 1,.-4 # can this happen?
+ br %r14
+.align 64
+.Ldsoft:
+___
+$code.=<<___;
+ stmg %r3,$ra,24($sp)
+
+ llgf $s0,0($inp)
+ llgf $s1,4($inp)
+ llgf $s2,8($inp)
+ llgf $s3,12($inp)
+
+ larl $tbl,AES_Td
+ bras $ra,_s390x_AES_decrypt
+
+ lg $out,24($sp)
+ st $s0,0($out)
+ st $s1,4($out)
+ st $s2,8($out)
+ st $s3,12($out)
+
+ lmg %r6,$ra,48($sp)
+ br $ra
+.size AES_decrypt,.-AES_decrypt
+
+.type _s390x_AES_decrypt,\@function
+.align 16
+_s390x_AES_decrypt:
+ stg $ra,152($sp)
+ x $s0,0($key)
+ x $s1,4($key)
+ x $s2,8($key)
+ x $s3,12($key)
+ l $rounds,240($key)
+ llill $mask,`0xff<<3`
+ aghi $rounds,-1
+ j .Ldec_loop
+.align 16
+.Ldec_loop:
+ srlg $t1,$s0,`16-3`
+ srlg $t2,$s0,`8-3`
+ sllg $t3,$s0,`0+3`
+ srl $s0,`24-3`
+ nr $s0,$mask
+ nr $t1,$mask
+ nr $t2,$mask
+ ngr $t3,$mask
+
+ sllg $i1,$s1,`0+3` # i0
+ srlg $i2,$s1,`16-3`
+ srlg $i3,$s1,`8-3`
+ srl $s1,`24-3`
+ ngr $i1,$mask
+ nr $s1,$mask
+ nr $i2,$mask
+ nr $i3,$mask
+
+ l $s0,0($s0,$tbl) # Td0[s0>>24]
+ l $t1,3($t1,$tbl) # Td1[s0>>16]
+ l $t2,2($t2,$tbl) # Td2[s0>>8]
+ l $t3,1($t3,$tbl) # Td3[s0>>0]
+
+ x $s0,1($i1,$tbl) # Td3[s1>>0]
+ l $s1,0($s1,$tbl) # Td0[s1>>24]
+ x $t2,3($i2,$tbl) # Td1[s1>>16]
+ x $t3,2($i3,$tbl) # Td2[s1>>8]
+
+ srlg $i1,$s2,`8-3` # i0
+ sllg $i2,$s2,`0+3` # i1
+ srlg $i3,$s2,`16-3`
+ srl $s2,`24-3`
+ nr $i1,$mask
+ ngr $i2,$mask
+ nr $s2,$mask
+ nr $i3,$mask
+
+ xr $s1,$t1
+ srlg $ra,$s3,`8-3` # i1
+ srlg $t1,$s3,`16-3` # i0
+ nr $ra,$mask
+ la $key,16($key)
+ nr $t1,$mask
+
+ x $s0,2($i1,$tbl) # Td2[s2>>8]
+ x $s1,1($i2,$tbl) # Td3[s2>>0]
+ l $s2,0($s2,$tbl) # Td0[s2>>24]
+ x $t3,3($i3,$tbl) # Td1[s2>>16]
+
+ sllg $i3,$s3,`0+3` # i2
+ srl $s3,`24-3`
+ ngr $i3,$mask
+ nr $s3,$mask
+
+ xr $s2,$t2
+ x $s0,0($key)
+ x $s1,4($key)
+ x $s2,8($key)
+ x $t3,12($key)
+
+ x $s0,3($t1,$tbl) # Td1[s3>>16]
+ x $s1,2($ra,$tbl) # Td2[s3>>8]
+ x $s2,1($i3,$tbl) # Td3[s3>>0]
+ l $s3,0($s3,$tbl) # Td0[s3>>24]
+ xr $s3,$t3
+
+ brct $rounds,.Ldec_loop
+ .align 16
+
+ l $t1,`2048+0`($tbl) # prefetch Td4
+ l $t2,`2048+64`($tbl)
+ l $t3,`2048+128`($tbl)
+ l $i1,`2048+192`($tbl)
+ llill $mask,0xff
+
+ srlg $i3,$s0,24 # i0
+ srlg $t1,$s0,16
+ srlg $t2,$s0,8
+ nr $s0,$mask # i3
+ nr $t1,$mask
+
+ srlg $i1,$s1,24
+ nr $t2,$mask
+ srlg $i2,$s1,16
+ srlg $ra,$s1,8
+ nr $s1,$mask # i0
+ nr $i2,$mask
+ nr $ra,$mask
+
+ llgc $i3,2048($i3,$tbl) # Td4[s0>>24]
+ llgc $t1,2048($t1,$tbl) # Td4[s0>>16]
+ llgc $t2,2048($t2,$tbl) # Td4[s0>>8]
+ sll $t1,16
+ llgc $t3,2048($s0,$tbl) # Td4[s0>>0]
+ sllg $s0,$i3,24
+ sll $t2,8
+
+ llgc $s1,2048($s1,$tbl) # Td4[s1>>0]
+ llgc $i1,2048($i1,$tbl) # Td4[s1>>24]
+ llgc $i2,2048($i2,$tbl) # Td4[s1>>16]
+ sll $i1,24
+ llgc $i3,2048($ra,$tbl) # Td4[s1>>8]
+ sll $i2,16
+ sll $i3,8
+ or $s0,$s1
+ or $t1,$i1
+ or $t2,$i2
+ or $t3,$i3
+
+ srlg $i1,$s2,8 # i0
+ srlg $i2,$s2,24
+ srlg $i3,$s2,16
+ nr $s2,$mask # i1
+ nr $i1,$mask
+ nr $i3,$mask
+ llgc $i1,2048($i1,$tbl) # Td4[s2>>8]
+ llgc $s1,2048($s2,$tbl) # Td4[s2>>0]
+ llgc $i2,2048($i2,$tbl) # Td4[s2>>24]
+ llgc $i3,2048($i3,$tbl) # Td4[s2>>16]
+ sll $i1,8
+ sll $i2,24
+ or $s0,$i1
+ sll $i3,16
+ or $t2,$i2
+ or $t3,$i3
+
+ srlg $i1,$s3,16 # i0
+ srlg $i2,$s3,8 # i1
+ srlg $i3,$s3,24
+ nr $s3,$mask # i2
+ nr $i1,$mask
+ nr $i2,$mask
+
+ lg $ra,152($sp)
+ or $s1,$t1
+ l $t0,16($key)
+ l $t1,20($key)
+
+ llgc $i1,2048($i1,$tbl) # Td4[s3>>16]
+ llgc $i2,2048($i2,$tbl) # Td4[s3>>8]
+ sll $i1,16
+ llgc $s2,2048($s3,$tbl) # Td4[s3>>0]
+ llgc $s3,2048($i3,$tbl) # Td4[s3>>24]
+ sll $i2,8
+ sll $s3,24
+ or $s0,$i1
+ or $s1,$i2
+ or $s2,$t2
+ or $s3,$t3
+
+ xr $s0,$t0
+ xr $s1,$t1
+ x $s2,24($key)
+ x $s3,28($key)
+
+ br $ra
+.size _s390x_AES_decrypt,.-_s390x_AES_decrypt
+___
+
+$code.=<<___;
+# void AES_set_encrypt_key(const unsigned char *in, int bits,
+# AES_KEY *key) {
+.globl AES_set_encrypt_key
+.type AES_set_encrypt_key,\@function
+.align 16
+AES_set_encrypt_key:
+ lghi $t0,0
+ clgr $inp,$t0
+ je .Lminus1
+ clgr $key,$t0
+ je .Lminus1
+
+ lghi $t0,128
+ clr $bits,$t0
+ je .Lproceed
+ lghi $t0,192
+ clr $bits,$t0
+ je .Lproceed
+ lghi $t0,256
+ clr $bits,$t0
+ je .Lproceed
+ lghi %r2,-2
+ br %r14
+
+.align 16
+.Lproceed:
+___
+$code.=<<___ if (!$softonly);
+ # convert bits to km code, [128,192,256]->[18,19,20]
+ lhi %r5,-128
+ lhi %r0,18
+ ar %r5,$bits
+ srl %r5,6
+ ar %r5,%r0
+
+ lghi %r0,0 # query capability vector
+ la %r1,16($sp)
+ .long 0xb92f0042 # kmc %r4,%r2
+
+ llihh %r1,0x8000
+ srlg %r1,%r1,0(%r5)
+ ng %r1,16($sp)
+ jz .Lekey_internal
+
+ lmg %r0,%r1,0($inp) # just copy 128 bits...
+ stmg %r0,%r1,0($key)
+ lhi %r0,192
+ cr $bits,%r0
+ jl 1f
+ lg %r1,16($inp)
+ stg %r1,16($key)
+ je 1f
+ lg %r1,24($inp)
+ stg %r1,24($key)
+1: st $bits,236($key) # save bits
+ st %r5,240($key) # save km code
+ lghi %r2,0
+ br %r14
+___
+$code.=<<___;
+.align 16
+.Lekey_internal:
+ stmg %r6,%r13,48($sp) # all non-volatile regs
+
+ larl $tbl,AES_Te+2048
+
+ llgf $s0,0($inp)
+ llgf $s1,4($inp)
+ llgf $s2,8($inp)
+ llgf $s3,12($inp)
+ st $s0,0($key)
+ st $s1,4($key)
+ st $s2,8($key)
+ st $s3,12($key)
+ lghi $t0,128
+ cr $bits,$t0
+ jne .Lnot128
+
+ llill $mask,0xff
+ lghi $t3,0 # i=0
+ lghi $rounds,10
+ st $rounds,240($key)
+
+ llgfr $t2,$s3 # temp=rk[3]
+ srlg $i1,$s3,8
+ srlg $i2,$s3,16
+ srlg $i3,$s3,24
+ nr $t2,$mask
+ nr $i1,$mask
+ nr $i2,$mask
+
+.align 16
+.L128_loop:
+ la $t2,0($t2,$tbl)
+ la $i1,0($i1,$tbl)
+ la $i2,0($i2,$tbl)
+ la $i3,0($i3,$tbl)
+ icm $t2,2,0($t2) # Te4[rk[3]>>0]<<8
+ icm $t2,4,0($i1) # Te4[rk[3]>>8]<<16
+ icm $t2,8,0($i2) # Te4[rk[3]>>16]<<24
+ icm $t2,1,0($i3) # Te4[rk[3]>>24]
+ x $t2,256($t3,$tbl) # rcon[i]
+ xr $s0,$t2 # rk[4]=rk[0]^...
+ xr $s1,$s0 # rk[5]=rk[1]^rk[4]
+ xr $s2,$s1 # rk[6]=rk[2]^rk[5]
+ xr $s3,$s2 # rk[7]=rk[3]^rk[6]
+
+ llgfr $t2,$s3 # temp=rk[3]
+ srlg $i1,$s3,8
+ srlg $i2,$s3,16
+ nr $t2,$mask
+ nr $i1,$mask
+ srlg $i3,$s3,24
+ nr $i2,$mask
+
+ st $s0,16($key)
+ st $s1,20($key)
+ st $s2,24($key)
+ st $s3,28($key)
+ la $key,16($key) # key+=4
+ la $t3,4($t3) # i++
+ brct $rounds,.L128_loop
+ lghi %r2,0
+ lmg %r6,%r13,48($sp)
+ br $ra
+
+.align 16
+.Lnot128:
+ llgf $t0,16($inp)
+ llgf $t1,20($inp)
+ st $t0,16($key)
+ st $t1,20($key)
+ lghi $t0,192
+ cr $bits,$t0
+ jne .Lnot192
+
+ llill $mask,0xff
+ lghi $t3,0 # i=0
+ lghi $rounds,12
+ st $rounds,240($key)
+ lghi $rounds,8
+
+ srlg $i1,$t1,8
+ srlg $i2,$t1,16
+ srlg $i3,$t1,24
+ nr $t1,$mask
+ nr $i1,$mask
+ nr $i2,$mask
+
+.align 16
+.L192_loop:
+ la $t1,0($t1,$tbl)
+ la $i1,0($i1,$tbl)
+ la $i2,0($i2,$tbl)
+ la $i3,0($i3,$tbl)
+ icm $t1,2,0($t1) # Te4[rk[5]>>0]<<8
+ icm $t1,4,0($i1) # Te4[rk[5]>>8]<<16
+ icm $t1,8,0($i2) # Te4[rk[5]>>16]<<24
+ icm $t1,1,0($i3) # Te4[rk[5]>>24]
+ x $t1,256($t3,$tbl) # rcon[i]
+ xr $s0,$t1 # rk[6]=rk[0]^...
+ xr $s1,$s0 # rk[7]=rk[1]^rk[6]
+ xr $s2,$s1 # rk[8]=rk[2]^rk[7]
+ xr $s3,$s2 # rk[9]=rk[3]^rk[8]
+
+ st $s0,24($key)
+ st $s1,28($key)
+ st $s2,32($key)
+ st $s3,36($key)
+ brct $rounds,.L192_continue
+ lghi %r2,0
+ lmg %r6,%r13,48($sp)
+ br $ra
+
+.align 16
+.L192_continue:
+ lgr $t1,$s3
+ x $t1,16($key) # rk[10]=rk[4]^rk[9]
+ st $t1,40($key)
+ x $t1,20($key) # rk[11]=rk[5]^rk[10]
+ st $t1,44($key)
+
+ srlg $i1,$t1,8
+ srlg $i2,$t1,16
+ srlg $i3,$t1,24
+ nr $t1,$mask
+ nr $i1,$mask
+ nr $i2,$mask
+
+ la $key,24($key) # key+=6
+ la $t3,4($t3) # i++
+ j .L192_loop
+
+.align 16
+.Lnot192:
+ llgf $t0,24($inp)
+ llgf $t1,28($inp)
+ st $t0,24($key)
+ st $t1,28($key)
+ llill $mask,0xff
+ lghi $t3,0 # i=0
+ lghi $rounds,14
+ st $rounds,240($key)
+ lghi $rounds,7
+
+ srlg $i1,$t1,8
+ srlg $i2,$t1,16
+ srlg $i3,$t1,24
+ nr $t1,$mask
+ nr $i1,$mask
+ nr $i2,$mask
+
+.align 16
+.L256_loop:
+ la $t1,0($t1,$tbl)
+ la $i1,0($i1,$tbl)
+ la $i2,0($i2,$tbl)
+ la $i3,0($i3,$tbl)
+ icm $t1,2,0($t1) # Te4[rk[7]>>0]<<8
+ icm $t1,4,0($i1) # Te4[rk[7]>>8]<<16
+ icm $t1,8,0($i2) # Te4[rk[7]>>16]<<24
+ icm $t1,1,0($i3) # Te4[rk[7]>>24]
+ x $t1,256($t3,$tbl) # rcon[i]
+ xr $s0,$t1 # rk[8]=rk[0]^...
+ xr $s1,$s0 # rk[9]=rk[1]^rk[8]
+ xr $s2,$s1 # rk[10]=rk[2]^rk[9]
+ xr $s3,$s2 # rk[11]=rk[3]^rk[10]
+ st $s0,32($key)
+ st $s1,36($key)
+ st $s2,40($key)
+ st $s3,44($key)
+ brct $rounds,.L256_continue
+ lghi %r2,0
+ lmg %r6,%r13,48($sp)
+ br $ra
+
+.align 16
+.L256_continue:
+ lgr $t1,$s3 # temp=rk[11]
+ srlg $i1,$s3,8
+ srlg $i2,$s3,16
+ srlg $i3,$s3,24
+ nr $t1,$mask
+ nr $i1,$mask
+ nr $i2,$mask
+ la $t1,0($t1,$tbl)
+ la $i1,0($i1,$tbl)
+ la $i2,0($i2,$tbl)
+ la $i3,0($i3,$tbl)
+ llgc $t1,0($t1) # Te4[rk[11]>>0]
+ icm $t1,2,0($i1) # Te4[rk[11]>>8]<<8
+ icm $t1,4,0($i2) # Te4[rk[11]>>16]<<16
+ icm $t1,8,0($i3) # Te4[rk[11]>>24]<<24
+ x $t1,16($key) # rk[12]=rk[4]^...
+ st $t1,48($key)
+ x $t1,20($key) # rk[13]=rk[5]^rk[12]
+ st $t1,52($key)
+ x $t1,24($key) # rk[14]=rk[6]^rk[13]
+ st $t1,56($key)
+ x $t1,28($key) # rk[15]=rk[7]^rk[14]
+ st $t1,60($key)
+
+ srlg $i1,$t1,8
+ srlg $i2,$t1,16
+ srlg $i3,$t1,24
+ nr $t1,$mask
+ nr $i1,$mask
+ nr $i2,$mask
+
+ la $key,32($key) # key+=8
+ la $t3,4($t3) # i++
+ j .L256_loop
+
+.Lminus1:
+ lghi %r2,-1
+ br $ra
+.size AES_set_encrypt_key,.-AES_set_encrypt_key
+
+# void AES_set_decrypt_key(const unsigned char *in, int bits,
+# AES_KEY *key) {
+.globl AES_set_decrypt_key
+.type AES_set_decrypt_key,\@function
+.align 16
+AES_set_decrypt_key:
+ stg $key,32($sp) # I rely on AES_set_encrypt_key to
+ stg $ra,112($sp) # save non-volatile registers!
+ bras $ra,AES_set_encrypt_key
+ lg $key,32($sp)
+ lg $ra,112($sp)
+ ltgr %r2,%r2
+ bnzr $ra
+___
+$code.=<<___ if (!$softonly);
+ l $t0,240($key)
+ lhi $t1,16
+ cr $t0,$t1
+ jl .Lgo
+ oill $t0,0x80 # set "decrypt" bit
+ st $t0,240($key)
+ br $ra
+
+.align 16
+.Ldkey_internal:
+ stg $key,32($sp)
+ stg $ra,40($sp)
+ bras $ra,.Lekey_internal
+ lg $key,32($sp)
+ lg $ra,40($sp)
+___
+$code.=<<___;
+
+.Lgo: llgf $rounds,240($key)
+ la $i1,0($key)
+ sllg $i2,$rounds,4
+ la $i2,0($i2,$key)
+ srl $rounds,1
+ lghi $t1,-16
+
+.align 16
+.Linv: lmg $s0,$s1,0($i1)
+ lmg $s2,$s3,0($i2)
+ stmg $s0,$s1,0($i2)
+ stmg $s2,$s3,0($i1)
+ la $i1,16($i1)
+ la $i2,0($t1,$i2)
+ brct $rounds,.Linv
+___
+$mask80=$i1;
+$mask1b=$i2;
+$maskfe=$i3;
+$code.=<<___;
+ llgf $rounds,240($key)
+ aghi $rounds,-1
+ sll $rounds,2 # (rounds-1)*4
+ llilh $mask80,0x8080
+ llilh $mask1b,0x1b1b
+ llilh $maskfe,0xfefe
+ oill $mask80,0x8080
+ oill $mask1b,0x1b1b
+ oill $maskfe,0xfefe
+
+.align 16
+.Lmix: l $s0,16($key) # tp1
+ lr $s1,$s0
+ ngr $s1,$mask80
+ srlg $t1,$s1,7
+ slr $s1,$t1
+ nr $s1,$mask1b
+ sllg $t1,$s0,1
+ nr $t1,$maskfe
+ xr $s1,$t1 # tp2
+
+ lr $s2,$s1
+ ngr $s2,$mask80
+ srlg $t1,$s2,7
+ slr $s2,$t1
+ nr $s2,$mask1b
+ sllg $t1,$s1,1
+ nr $t1,$maskfe
+ xr $s2,$t1 # tp4
+
+ lr $s3,$s2
+ ngr $s3,$mask80
+ srlg $t1,$s3,7
+ slr $s3,$t1
+ nr $s3,$mask1b
+ sllg $t1,$s2,1
+ nr $t1,$maskfe
+ xr $s3,$t1 # tp8
+
+ xr $s1,$s0 # tp2^tp1
+ xr $s2,$s0 # tp4^tp1
+ rll $s0,$s0,24 # = ROTATE(tp1,8)
+ xr $s2,$s3 # ^=tp8
+ xr $s0,$s1 # ^=tp2^tp1
+ xr $s1,$s3 # tp2^tp1^tp8
+ xr $s0,$s2 # ^=tp4^tp1^tp8
+ rll $s1,$s1,8
+ rll $s2,$s2,16
+ xr $s0,$s1 # ^= ROTATE(tp8^tp2^tp1,24)
+ rll $s3,$s3,24
+ xr $s0,$s2 # ^= ROTATE(tp8^tp4^tp1,16)
+ xr $s0,$s3 # ^= ROTATE(tp8,8)
+
+ st $s0,16($key)
+ la $key,4($key)
+ brct $rounds,.Lmix
+
+ lmg %r6,%r13,48($sp)# as was saved by AES_set_encrypt_key!
+ lghi %r2,0
+ br $ra
+.size AES_set_decrypt_key,.-AES_set_decrypt_key
+___
+
+#void AES_cbc_encrypt(const unsigned char *in, unsigned char *out,
+# size_t length, const AES_KEY *key,
+# unsigned char *ivec, const int enc)
+{
+my $inp="%r2";
+my $out="%r4"; # length and out are swapped
+my $len="%r3";
+my $key="%r5";
+my $ivp="%r6";
+
+$code.=<<___;
+.globl AES_cbc_encrypt
+.type AES_cbc_encrypt,\@function
+.align 16
+AES_cbc_encrypt:
+ xgr %r3,%r4 # flip %r3 and %r4, out and len
+ xgr %r4,%r3
+ xgr %r3,%r4
+___
+$code.=<<___ if (!$softonly);
+ lhi %r0,16
+ cl %r0,240($key)
+ jh .Lcbc_software
+
+ lg %r0,0($ivp) # copy ivec
+ lg %r1,8($ivp)
+ stmg %r0,%r1,16($sp)
+ lmg %r0,%r1,0($key) # copy key, cover 256 bit
+ stmg %r0,%r1,32($sp)
+ lmg %r0,%r1,16($key)
+ stmg %r0,%r1,48($sp)
+ l %r0,240($key) # load kmc code
+ lghi $key,15 # res=len%16, len-=res;
+ ngr $key,$len
+ slgr $len,$key
+ la %r1,16($sp) # parameter block - ivec || key
+ jz .Lkmc_truncated
+ .long 0xb92f0042 # kmc %r4,%r2
+ brc 1,.-4 # pay attention to "partial completion"
+ ltr $key,$key
+ jnz .Lkmc_truncated
+.Lkmc_done:
+ lmg %r0,%r1,16($sp) # copy ivec to caller
+ stg %r0,0($ivp)
+ stg %r1,8($ivp)
+ br $ra
+.align 16
+.Lkmc_truncated:
+ ahi $key,-1 # it's the way it's encoded in mvc
+ tmll %r0,0x80
+ jnz .Lkmc_truncated_dec
+ lghi %r1,0
+ stg %r1,128($sp)
+ stg %r1,136($sp)
+ bras %r1,1f
+ mvc 128(1,$sp),0($inp)
+1: ex $key,0(%r1)
+ la %r1,16($sp) # restore parameter block
+ la $inp,128($sp)
+ lghi $len,16
+ .long 0xb92f0042 # kmc %r4,%r2
+ j .Lkmc_done
+.align 16
+.Lkmc_truncated_dec:
+ stg $out,64($sp)
+ la $out,128($sp)
+ lghi $len,16
+ .long 0xb92f0042 # kmc %r4,%r2
+ lg $out,64($sp)
+ bras %r1,2f
+ mvc 0(1,$out),128($sp)
+2: ex $key,0(%r1)
+ j .Lkmc_done
+.align 16
+.Lcbc_software:
+___
+$code.=<<___;
+ stmg $key,$ra,40($sp)
+ lhi %r0,0
+ cl %r0,164($sp)
+ je .Lcbc_decrypt
+
+ larl $tbl,AES_Te
+
+ llgf $s0,0($ivp)
+ llgf $s1,4($ivp)
+ llgf $s2,8($ivp)
+ llgf $s3,12($ivp)
+
+ lghi $t0,16
+ slgr $len,$t0
+ brc 4,.Lcbc_enc_tail # if borrow
+.Lcbc_enc_loop:
+ stmg $inp,$out,16($sp)
+ x $s0,0($inp)
+ x $s1,4($inp)
+ x $s2,8($inp)
+ x $s3,12($inp)
+ lgr %r4,$key
+
+ bras $ra,_s390x_AES_encrypt
+
+ lmg $inp,$key,16($sp)
+ st $s0,0($out)
+ st $s1,4($out)
+ st $s2,8($out)
+ st $s3,12($out)
+
+ la $inp,16($inp)
+ la $out,16($out)
+ lghi $t0,16
+ ltgr $len,$len
+ jz .Lcbc_enc_done
+ slgr $len,$t0
+ brc 4,.Lcbc_enc_tail # if borrow
+ j .Lcbc_enc_loop
+.align 16
+.Lcbc_enc_done:
+ lg $ivp,48($sp)
+ st $s0,0($ivp)
+ st $s1,4($ivp)
+ st $s2,8($ivp)
+ st $s3,12($ivp)
+
+ lmg %r7,$ra,56($sp)
+ br $ra
+
+.align 16
+.Lcbc_enc_tail:
+ aghi $len,15
+ lghi $t0,0
+ stg $t0,128($sp)
+ stg $t0,136($sp)
+ bras $t1,3f
+ mvc 128(1,$sp),0($inp)
+3: ex $len,0($t1)
+ lghi $len,0
+ la $inp,128($sp)
+ j .Lcbc_enc_loop
+
+.align 16
+.Lcbc_decrypt:
+ larl $tbl,AES_Td
+
+ lg $t0,0($ivp)
+ lg $t1,8($ivp)
+ stmg $t0,$t1,128($sp)
+
+.Lcbc_dec_loop:
+ stmg $inp,$out,16($sp)
+ llgf $s0,0($inp)
+ llgf $s1,4($inp)
+ llgf $s2,8($inp)
+ llgf $s3,12($inp)
+ lgr %r4,$key
+
+ bras $ra,_s390x_AES_decrypt
+
+ lmg $inp,$key,16($sp)
+ sllg $s0,$s0,32
+ sllg $s2,$s2,32
+ lr $s0,$s1
+ lr $s2,$s3
+
+ lg $t0,0($inp)
+ lg $t1,8($inp)
+ xg $s0,128($sp)
+ xg $s2,136($sp)
+ lghi $s1,16
+ slgr $len,$s1
+ brc 4,.Lcbc_dec_tail # if borrow
+ brc 2,.Lcbc_dec_done # if zero
+ stg $s0,0($out)
+ stg $s2,8($out)
+ stmg $t0,$t1,128($sp)
+
+ la $inp,16($inp)
+ la $out,16($out)
+ j .Lcbc_dec_loop
+
+.Lcbc_dec_done:
+ stg $s0,0($out)
+ stg $s2,8($out)
+.Lcbc_dec_exit:
+ lmg $ivp,$ra,48($sp)
+ stmg $t0,$t1,0($ivp)
+
+ br $ra
+
+.align 16
+.Lcbc_dec_tail:
+ aghi $len,15
+ stg $s0,128($sp)
+ stg $s2,136($sp)
+ bras $s1,4f
+ mvc 0(1,$out),128($sp)
+4: ex $len,0($s1)
+ j .Lcbc_dec_exit
+.size AES_cbc_encrypt,.-AES_cbc_encrypt
+___
+}
+$code.=<<___;
+.string "AES for s390x, CRYPTOGAMS by <appro\@openssl.org>"
+___
+
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+print $code;
diff --git a/crypto/aes/asm/aes-sparcv9.pl b/crypto/aes/asm/aes-sparcv9.pl
new file mode 100755
index 000000000000..c57b3a2d6d34
--- /dev/null
+++ b/crypto/aes/asm/aes-sparcv9.pl
@@ -0,0 +1,1181 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. Rights for redistribution and usage in source and binary
+# forms are granted according to the OpenSSL license.
+# ====================================================================
+#
+# Version 1.1
+#
+# The major reason for undertaken effort was to mitigate the hazard of
+# cache-timing attack. This is [currently and initially!] addressed in
+# two ways. 1. S-boxes are compressed from 5KB to 2KB+256B size each.
+# 2. References to them are scheduled for L2 cache latency, meaning
+# that the tables don't have to reside in L1 cache. Once again, this
+# is an initial draft and one should expect more countermeasures to
+# be implemented...
+#
+# Version 1.1 prefetches T[ed]4 in order to mitigate attack on last
+# round.
+#
+# Even though performance was not the primary goal [on the contrary,
+# extra shifts "induced" by compressed S-box and longer loop epilogue
+# "induced" by scheduling for L2 have negative effect on performance],
+# the code turned out to run in ~23 cycles per processed byte en-/
+# decrypted with 128-bit key. This is pretty good result for code
+# with mentioned qualities and UltraSPARC core. Compared to Sun C
+# generated code my encrypt procedure runs just few percents faster,
+# while decrypt one - whole 50% faster [yes, Sun C failed to generate
+# optimal decrypt procedure]. Compared to GNU C generated code both
+# procedures are more than 60% faster:-)
+
+$bits=32;
+for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
+if ($bits==64) { $bias=2047; $frame=192; }
+else { $bias=0; $frame=112; }
+$locals=16;
+
+$acc0="%l0";
+$acc1="%o0";
+$acc2="%o1";
+$acc3="%o2";
+
+$acc4="%l1";
+$acc5="%o3";
+$acc6="%o4";
+$acc7="%o5";
+
+$acc8="%l2";
+$acc9="%o7";
+$acc10="%g1";
+$acc11="%g2";
+
+$acc12="%l3";
+$acc13="%g3";
+$acc14="%g4";
+$acc15="%g5";
+
+$t0="%l4";
+$t1="%l5";
+$t2="%l6";
+$t3="%l7";
+
+$s0="%i0";
+$s1="%i1";
+$s2="%i2";
+$s3="%i3";
+$tbl="%i4";
+$key="%i5";
+$rounds="%i7"; # aliases with return address, which is off-loaded to stack
+
+sub _data_word()
+{ my $i;
+ while(defined($i=shift)) { $code.=sprintf"\t.long\t0x%08x,0x%08x\n",$i,$i; }
+}
+
+$code.=<<___ if ($bits==64);
+.register %g2,#scratch
+.register %g3,#scratch
+___
+$code.=<<___;
+.section ".text",#alloc,#execinstr
+
+.align 256
+AES_Te:
+___
+&_data_word(
+ 0xc66363a5, 0xf87c7c84, 0xee777799, 0xf67b7b8d,
+ 0xfff2f20d, 0xd66b6bbd, 0xde6f6fb1, 0x91c5c554,
+ 0x60303050, 0x02010103, 0xce6767a9, 0x562b2b7d,
+ 0xe7fefe19, 0xb5d7d762, 0x4dababe6, 0xec76769a,
+ 0x8fcaca45, 0x1f82829d, 0x89c9c940, 0xfa7d7d87,
+ 0xeffafa15, 0xb25959eb, 0x8e4747c9, 0xfbf0f00b,
+ 0x41adadec, 0xb3d4d467, 0x5fa2a2fd, 0x45afafea,
+ 0x239c9cbf, 0x53a4a4f7, 0xe4727296, 0x9bc0c05b,
+ 0x75b7b7c2, 0xe1fdfd1c, 0x3d9393ae, 0x4c26266a,
+ 0x6c36365a, 0x7e3f3f41, 0xf5f7f702, 0x83cccc4f,
+ 0x6834345c, 0x51a5a5f4, 0xd1e5e534, 0xf9f1f108,
+ 0xe2717193, 0xabd8d873, 0x62313153, 0x2a15153f,
+ 0x0804040c, 0x95c7c752, 0x46232365, 0x9dc3c35e,
+ 0x30181828, 0x379696a1, 0x0a05050f, 0x2f9a9ab5,
+ 0x0e070709, 0x24121236, 0x1b80809b, 0xdfe2e23d,
+ 0xcdebeb26, 0x4e272769, 0x7fb2b2cd, 0xea75759f,
+ 0x1209091b, 0x1d83839e, 0x582c2c74, 0x341a1a2e,
+ 0x361b1b2d, 0xdc6e6eb2, 0xb45a5aee, 0x5ba0a0fb,
+ 0xa45252f6, 0x763b3b4d, 0xb7d6d661, 0x7db3b3ce,
+ 0x5229297b, 0xdde3e33e, 0x5e2f2f71, 0x13848497,
+ 0xa65353f5, 0xb9d1d168, 0x00000000, 0xc1eded2c,
+ 0x40202060, 0xe3fcfc1f, 0x79b1b1c8, 0xb65b5bed,
+ 0xd46a6abe, 0x8dcbcb46, 0x67bebed9, 0x7239394b,
+ 0x944a4ade, 0x984c4cd4, 0xb05858e8, 0x85cfcf4a,
+ 0xbbd0d06b, 0xc5efef2a, 0x4faaaae5, 0xedfbfb16,
+ 0x864343c5, 0x9a4d4dd7, 0x66333355, 0x11858594,
+ 0x8a4545cf, 0xe9f9f910, 0x04020206, 0xfe7f7f81,
+ 0xa05050f0, 0x783c3c44, 0x259f9fba, 0x4ba8a8e3,
+ 0xa25151f3, 0x5da3a3fe, 0x804040c0, 0x058f8f8a,
+ 0x3f9292ad, 0x219d9dbc, 0x70383848, 0xf1f5f504,
+ 0x63bcbcdf, 0x77b6b6c1, 0xafdada75, 0x42212163,
+ 0x20101030, 0xe5ffff1a, 0xfdf3f30e, 0xbfd2d26d,
+ 0x81cdcd4c, 0x180c0c14, 0x26131335, 0xc3ecec2f,
+ 0xbe5f5fe1, 0x359797a2, 0x884444cc, 0x2e171739,
+ 0x93c4c457, 0x55a7a7f2, 0xfc7e7e82, 0x7a3d3d47,
+ 0xc86464ac, 0xba5d5de7, 0x3219192b, 0xe6737395,
+ 0xc06060a0, 0x19818198, 0x9e4f4fd1, 0xa3dcdc7f,
+ 0x44222266, 0x542a2a7e, 0x3b9090ab, 0x0b888883,
+ 0x8c4646ca, 0xc7eeee29, 0x6bb8b8d3, 0x2814143c,
+ 0xa7dede79, 0xbc5e5ee2, 0x160b0b1d, 0xaddbdb76,
+ 0xdbe0e03b, 0x64323256, 0x743a3a4e, 0x140a0a1e,
+ 0x924949db, 0x0c06060a, 0x4824246c, 0xb85c5ce4,
+ 0x9fc2c25d, 0xbdd3d36e, 0x43acacef, 0xc46262a6,
+ 0x399191a8, 0x319595a4, 0xd3e4e437, 0xf279798b,
+ 0xd5e7e732, 0x8bc8c843, 0x6e373759, 0xda6d6db7,
+ 0x018d8d8c, 0xb1d5d564, 0x9c4e4ed2, 0x49a9a9e0,
+ 0xd86c6cb4, 0xac5656fa, 0xf3f4f407, 0xcfeaea25,
+ 0xca6565af, 0xf47a7a8e, 0x47aeaee9, 0x10080818,
+ 0x6fbabad5, 0xf0787888, 0x4a25256f, 0x5c2e2e72,
+ 0x381c1c24, 0x57a6a6f1, 0x73b4b4c7, 0x97c6c651,
+ 0xcbe8e823, 0xa1dddd7c, 0xe874749c, 0x3e1f1f21,
+ 0x964b4bdd, 0x61bdbddc, 0x0d8b8b86, 0x0f8a8a85,
+ 0xe0707090, 0x7c3e3e42, 0x71b5b5c4, 0xcc6666aa,
+ 0x904848d8, 0x06030305, 0xf7f6f601, 0x1c0e0e12,
+ 0xc26161a3, 0x6a35355f, 0xae5757f9, 0x69b9b9d0,
+ 0x17868691, 0x99c1c158, 0x3a1d1d27, 0x279e9eb9,
+ 0xd9e1e138, 0xebf8f813, 0x2b9898b3, 0x22111133,
+ 0xd26969bb, 0xa9d9d970, 0x078e8e89, 0x339494a7,
+ 0x2d9b9bb6, 0x3c1e1e22, 0x15878792, 0xc9e9e920,
+ 0x87cece49, 0xaa5555ff, 0x50282878, 0xa5dfdf7a,
+ 0x038c8c8f, 0x59a1a1f8, 0x09898980, 0x1a0d0d17,
+ 0x65bfbfda, 0xd7e6e631, 0x844242c6, 0xd06868b8,
+ 0x824141c3, 0x299999b0, 0x5a2d2d77, 0x1e0f0f11,
+ 0x7bb0b0cb, 0xa85454fc, 0x6dbbbbd6, 0x2c16163a);
+$code.=<<___;
+ .byte 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
+ .byte 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
+ .byte 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
+ .byte 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
+ .byte 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
+ .byte 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
+ .byte 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
+ .byte 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
+ .byte 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
+ .byte 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
+ .byte 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
+ .byte 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
+ .byte 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
+ .byte 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
+ .byte 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
+ .byte 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
+ .byte 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
+ .byte 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
+ .byte 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
+ .byte 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
+ .byte 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
+ .byte 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
+ .byte 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
+ .byte 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
+ .byte 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
+ .byte 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
+ .byte 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
+ .byte 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
+ .byte 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
+ .byte 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
+ .byte 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
+ .byte 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
+.type AES_Te,#object
+.size AES_Te,(.-AES_Te)
+
+.align 64
+.skip 16
+_sparcv9_AES_encrypt:
+ save %sp,-$frame-$locals,%sp
+ stx %i7,[%sp+$bias+$frame+0] ! off-load return address
+ ld [$key+240],$rounds
+ ld [$key+0],$t0
+ ld [$key+4],$t1 !
+ ld [$key+8],$t2
+ srl $rounds,1,$rounds
+ xor $t0,$s0,$s0
+ ld [$key+12],$t3
+ srl $s0,21,$acc0
+ xor $t1,$s1,$s1
+ ld [$key+16],$t0
+ srl $s1,13,$acc1 !
+ xor $t2,$s2,$s2
+ ld [$key+20],$t1
+ xor $t3,$s3,$s3
+ ld [$key+24],$t2
+ and $acc0,2040,$acc0
+ ld [$key+28],$t3
+ nop
+.Lenc_loop:
+ srl $s2,5,$acc2 !
+ and $acc1,2040,$acc1
+ ldx [$tbl+$acc0],$acc0
+ sll $s3,3,$acc3
+ and $acc2,2040,$acc2
+ ldx [$tbl+$acc1],$acc1
+ srl $s1,21,$acc4
+ and $acc3,2040,$acc3
+ ldx [$tbl+$acc2],$acc2 !
+ srl $s2,13,$acc5
+ and $acc4,2040,$acc4
+ ldx [$tbl+$acc3],$acc3
+ srl $s3,5,$acc6
+ and $acc5,2040,$acc5
+ ldx [$tbl+$acc4],$acc4
+ fmovs %f0,%f0
+ sll $s0,3,$acc7 !
+ and $acc6,2040,$acc6
+ ldx [$tbl+$acc5],$acc5
+ srl $s2,21,$acc8
+ and $acc7,2040,$acc7
+ ldx [$tbl+$acc6],$acc6
+ srl $s3,13,$acc9
+ and $acc8,2040,$acc8
+ ldx [$tbl+$acc7],$acc7 !
+ srl $s0,5,$acc10
+ and $acc9,2040,$acc9
+ ldx [$tbl+$acc8],$acc8
+ sll $s1,3,$acc11
+ and $acc10,2040,$acc10
+ ldx [$tbl+$acc9],$acc9
+ fmovs %f0,%f0
+ srl $s3,21,$acc12 !
+ and $acc11,2040,$acc11
+ ldx [$tbl+$acc10],$acc10
+ srl $s0,13,$acc13
+ and $acc12,2040,$acc12
+ ldx [$tbl+$acc11],$acc11
+ srl $s1,5,$acc14
+ and $acc13,2040,$acc13
+ ldx [$tbl+$acc12],$acc12 !
+ sll $s2,3,$acc15
+ and $acc14,2040,$acc14
+ ldx [$tbl+$acc13],$acc13
+ and $acc15,2040,$acc15
+ add $key,32,$key
+ ldx [$tbl+$acc14],$acc14
+ fmovs %f0,%f0
+ subcc $rounds,1,$rounds !
+ ldx [$tbl+$acc15],$acc15
+ bz,a,pn %icc,.Lenc_last
+ add $tbl,2048,$rounds
+
+ srlx $acc1,8,$acc1
+ xor $acc0,$t0,$t0
+ ld [$key+0],$s0
+ fmovs %f0,%f0
+ srlx $acc2,16,$acc2 !
+ xor $acc1,$t0,$t0
+ ld [$key+4],$s1
+ srlx $acc3,24,$acc3
+ xor $acc2,$t0,$t0
+ ld [$key+8],$s2
+ srlx $acc5,8,$acc5
+ xor $acc3,$t0,$t0
+ ld [$key+12],$s3 !
+ srlx $acc6,16,$acc6
+ xor $acc4,$t1,$t1
+ fmovs %f0,%f0
+ srlx $acc7,24,$acc7
+ xor $acc5,$t1,$t1
+ srlx $acc9,8,$acc9
+ xor $acc6,$t1,$t1
+ srlx $acc10,16,$acc10 !
+ xor $acc7,$t1,$t1
+ srlx $acc11,24,$acc11
+ xor $acc8,$t2,$t2
+ srlx $acc13,8,$acc13
+ xor $acc9,$t2,$t2
+ srlx $acc14,16,$acc14
+ xor $acc10,$t2,$t2
+ srlx $acc15,24,$acc15 !
+ xor $acc11,$t2,$t2
+ xor $acc12,$acc14,$acc14
+ xor $acc13,$t3,$t3
+ srl $t0,21,$acc0
+ xor $acc14,$t3,$t3
+ srl $t1,13,$acc1
+ xor $acc15,$t3,$t3
+
+ and $acc0,2040,$acc0 !
+ srl $t2,5,$acc2
+ and $acc1,2040,$acc1
+ ldx [$tbl+$acc0],$acc0
+ sll $t3,3,$acc3
+ and $acc2,2040,$acc2
+ ldx [$tbl+$acc1],$acc1
+ fmovs %f0,%f0
+ srl $t1,21,$acc4 !
+ and $acc3,2040,$acc3
+ ldx [$tbl+$acc2],$acc2
+ srl $t2,13,$acc5
+ and $acc4,2040,$acc4
+ ldx [$tbl+$acc3],$acc3
+ srl $t3,5,$acc6
+ and $acc5,2040,$acc5
+ ldx [$tbl+$acc4],$acc4 !
+ sll $t0,3,$acc7
+ and $acc6,2040,$acc6
+ ldx [$tbl+$acc5],$acc5
+ srl $t2,21,$acc8
+ and $acc7,2040,$acc7
+ ldx [$tbl+$acc6],$acc6
+ fmovs %f0,%f0
+ srl $t3,13,$acc9 !
+ and $acc8,2040,$acc8
+ ldx [$tbl+$acc7],$acc7
+ srl $t0,5,$acc10
+ and $acc9,2040,$acc9
+ ldx [$tbl+$acc8],$acc8
+ sll $t1,3,$acc11
+ and $acc10,2040,$acc10
+ ldx [$tbl+$acc9],$acc9 !
+ srl $t3,21,$acc12
+ and $acc11,2040,$acc11
+ ldx [$tbl+$acc10],$acc10
+ srl $t0,13,$acc13
+ and $acc12,2040,$acc12
+ ldx [$tbl+$acc11],$acc11
+ fmovs %f0,%f0
+ srl $t1,5,$acc14 !
+ and $acc13,2040,$acc13
+ ldx [$tbl+$acc12],$acc12
+ sll $t2,3,$acc15
+ and $acc14,2040,$acc14
+ ldx [$tbl+$acc13],$acc13
+ srlx $acc1,8,$acc1
+ and $acc15,2040,$acc15
+ ldx [$tbl+$acc14],$acc14 !
+
+ srlx $acc2,16,$acc2
+ xor $acc0,$s0,$s0
+ ldx [$tbl+$acc15],$acc15
+ srlx $acc3,24,$acc3
+ xor $acc1,$s0,$s0
+ ld [$key+16],$t0
+ fmovs %f0,%f0
+ srlx $acc5,8,$acc5 !
+ xor $acc2,$s0,$s0
+ ld [$key+20],$t1
+ srlx $acc6,16,$acc6
+ xor $acc3,$s0,$s0
+ ld [$key+24],$t2
+ srlx $acc7,24,$acc7
+ xor $acc4,$s1,$s1
+ ld [$key+28],$t3 !
+ srlx $acc9,8,$acc9
+ xor $acc5,$s1,$s1
+ ldx [$tbl+2048+0],%g0 ! prefetch te4
+ srlx $acc10,16,$acc10
+ xor $acc6,$s1,$s1
+ ldx [$tbl+2048+32],%g0 ! prefetch te4
+ srlx $acc11,24,$acc11
+ xor $acc7,$s1,$s1
+ ldx [$tbl+2048+64],%g0 ! prefetch te4
+ srlx $acc13,8,$acc13
+ xor $acc8,$s2,$s2
+ ldx [$tbl+2048+96],%g0 ! prefetch te4
+ srlx $acc14,16,$acc14 !
+ xor $acc9,$s2,$s2
+ ldx [$tbl+2048+128],%g0 ! prefetch te4
+ srlx $acc15,24,$acc15
+ xor $acc10,$s2,$s2
+ ldx [$tbl+2048+160],%g0 ! prefetch te4
+ srl $s0,21,$acc0
+ xor $acc11,$s2,$s2
+ ldx [$tbl+2048+192],%g0 ! prefetch te4
+ xor $acc12,$acc14,$acc14
+ xor $acc13,$s3,$s3
+ ldx [$tbl+2048+224],%g0 ! prefetch te4
+ srl $s1,13,$acc1 !
+ xor $acc14,$s3,$s3
+ xor $acc15,$s3,$s3
+ ba .Lenc_loop
+ and $acc0,2040,$acc0
+
+.align 32
+.Lenc_last:
+ srlx $acc1,8,$acc1 !
+ xor $acc0,$t0,$t0
+ ld [$key+0],$s0
+ srlx $acc2,16,$acc2
+ xor $acc1,$t0,$t0
+ ld [$key+4],$s1
+ srlx $acc3,24,$acc3
+ xor $acc2,$t0,$t0
+ ld [$key+8],$s2 !
+ srlx $acc5,8,$acc5
+ xor $acc3,$t0,$t0
+ ld [$key+12],$s3
+ srlx $acc6,16,$acc6
+ xor $acc4,$t1,$t1
+ srlx $acc7,24,$acc7
+ xor $acc5,$t1,$t1
+ srlx $acc9,8,$acc9 !
+ xor $acc6,$t1,$t1
+ srlx $acc10,16,$acc10
+ xor $acc7,$t1,$t1
+ srlx $acc11,24,$acc11
+ xor $acc8,$t2,$t2
+ srlx $acc13,8,$acc13
+ xor $acc9,$t2,$t2
+ srlx $acc14,16,$acc14 !
+ xor $acc10,$t2,$t2
+ srlx $acc15,24,$acc15
+ xor $acc11,$t2,$t2
+ xor $acc12,$acc14,$acc14
+ xor $acc13,$t3,$t3
+ srl $t0,24,$acc0
+ xor $acc14,$t3,$t3
+ srl $t1,16,$acc1 !
+ xor $acc15,$t3,$t3
+
+ srl $t2,8,$acc2
+ and $acc1,255,$acc1
+ ldub [$rounds+$acc0],$acc0
+ srl $t1,24,$acc4
+ and $acc2,255,$acc2
+ ldub [$rounds+$acc1],$acc1
+ srl $t2,16,$acc5 !
+ and $t3,255,$acc3
+ ldub [$rounds+$acc2],$acc2
+ ldub [$rounds+$acc3],$acc3
+ srl $t3,8,$acc6
+ and $acc5,255,$acc5
+ ldub [$rounds+$acc4],$acc4
+ fmovs %f0,%f0
+ srl $t2,24,$acc8 !
+ and $acc6,255,$acc6
+ ldub [$rounds+$acc5],$acc5
+ srl $t3,16,$acc9
+ and $t0,255,$acc7
+ ldub [$rounds+$acc6],$acc6
+ ldub [$rounds+$acc7],$acc7
+ fmovs %f0,%f0
+ srl $t0,8,$acc10 !
+ and $acc9,255,$acc9
+ ldub [$rounds+$acc8],$acc8
+ srl $t3,24,$acc12
+ and $acc10,255,$acc10
+ ldub [$rounds+$acc9],$acc9
+ srl $t0,16,$acc13
+ and $t1,255,$acc11
+ ldub [$rounds+$acc10],$acc10 !
+ srl $t1,8,$acc14
+ and $acc13,255,$acc13
+ ldub [$rounds+$acc11],$acc11
+ ldub [$rounds+$acc12],$acc12
+ and $acc14,255,$acc14
+ ldub [$rounds+$acc13],$acc13
+ and $t2,255,$acc15
+ ldub [$rounds+$acc14],$acc14 !
+
+ sll $acc0,24,$acc0
+ xor $acc3,$s0,$s0
+ ldub [$rounds+$acc15],$acc15
+ sll $acc1,16,$acc1
+ xor $acc0,$s0,$s0
+ ldx [%sp+$bias+$frame+0],%i7 ! restore return address
+ fmovs %f0,%f0
+ sll $acc2,8,$acc2 !
+ xor $acc1,$s0,$s0
+ sll $acc4,24,$acc4
+ xor $acc2,$s0,$s0
+ sll $acc5,16,$acc5
+ xor $acc7,$s1,$s1
+ sll $acc6,8,$acc6
+ xor $acc4,$s1,$s1
+ sll $acc8,24,$acc8 !
+ xor $acc5,$s1,$s1
+ sll $acc9,16,$acc9
+ xor $acc11,$s2,$s2
+ sll $acc10,8,$acc10
+ xor $acc6,$s1,$s1
+ sll $acc12,24,$acc12
+ xor $acc8,$s2,$s2
+ sll $acc13,16,$acc13 !
+ xor $acc9,$s2,$s2
+ sll $acc14,8,$acc14
+ xor $acc10,$s2,$s2
+ xor $acc12,$acc14,$acc14
+ xor $acc13,$s3,$s3
+ xor $acc14,$s3,$s3
+ xor $acc15,$s3,$s3
+
+ ret
+ restore
+.type _sparcv9_AES_encrypt,#function
+.size _sparcv9_AES_encrypt,(.-_sparcv9_AES_encrypt)
+
+.align 32
+.globl AES_encrypt
+AES_encrypt:
+ or %o0,%o1,%g1
+ andcc %g1,3,%g0
+ bnz,pn %xcc,.Lunaligned_enc
+ save %sp,-$frame,%sp
+
+ ld [%i0+0],%o0
+ ld [%i0+4],%o1
+ ld [%i0+8],%o2
+ ld [%i0+12],%o3
+
+1: call .+8
+ add %o7,AES_Te-1b,%o4
+ call _sparcv9_AES_encrypt
+ mov %i2,%o5
+
+ st %o0,[%i1+0]
+ st %o1,[%i1+4]
+ st %o2,[%i1+8]
+ st %o3,[%i1+12]
+
+ ret
+ restore
+
+.align 32
+.Lunaligned_enc:
+ ldub [%i0+0],%l0
+ ldub [%i0+1],%l1
+ ldub [%i0+2],%l2
+
+ sll %l0,24,%l0
+ ldub [%i0+3],%l3
+ sll %l1,16,%l1
+ ldub [%i0+4],%l4
+ sll %l2,8,%l2
+ or %l1,%l0,%l0
+ ldub [%i0+5],%l5
+ sll %l4,24,%l4
+ or %l3,%l2,%l2
+ ldub [%i0+6],%l6
+ sll %l5,16,%l5
+ or %l0,%l2,%o0
+ ldub [%i0+7],%l7
+
+ sll %l6,8,%l6
+ or %l5,%l4,%l4
+ ldub [%i0+8],%l0
+ or %l7,%l6,%l6
+ ldub [%i0+9],%l1
+ or %l4,%l6,%o1
+ ldub [%i0+10],%l2
+
+ sll %l0,24,%l0
+ ldub [%i0+11],%l3
+ sll %l1,16,%l1
+ ldub [%i0+12],%l4
+ sll %l2,8,%l2
+ or %l1,%l0,%l0
+ ldub [%i0+13],%l5
+ sll %l4,24,%l4
+ or %l3,%l2,%l2
+ ldub [%i0+14],%l6
+ sll %l5,16,%l5
+ or %l0,%l2,%o2
+ ldub [%i0+15],%l7
+
+ sll %l6,8,%l6
+ or %l5,%l4,%l4
+ or %l7,%l6,%l6
+ or %l4,%l6,%o3
+
+1: call .+8
+ add %o7,AES_Te-1b,%o4
+ call _sparcv9_AES_encrypt
+ mov %i2,%o5
+
+ srl %o0,24,%l0
+ srl %o0,16,%l1
+ stb %l0,[%i1+0]
+ srl %o0,8,%l2
+ stb %l1,[%i1+1]
+ stb %l2,[%i1+2]
+ srl %o1,24,%l4
+ stb %o0,[%i1+3]
+
+ srl %o1,16,%l5
+ stb %l4,[%i1+4]
+ srl %o1,8,%l6
+ stb %l5,[%i1+5]
+ stb %l6,[%i1+6]
+ srl %o2,24,%l0
+ stb %o1,[%i1+7]
+
+ srl %o2,16,%l1
+ stb %l0,[%i1+8]
+ srl %o2,8,%l2
+ stb %l1,[%i1+9]
+ stb %l2,[%i1+10]
+ srl %o3,24,%l4
+ stb %o2,[%i1+11]
+
+ srl %o3,16,%l5
+ stb %l4,[%i1+12]
+ srl %o3,8,%l6
+ stb %l5,[%i1+13]
+ stb %l6,[%i1+14]
+ stb %o3,[%i1+15]
+
+ ret
+ restore
+.type AES_encrypt,#function
+.size AES_encrypt,(.-AES_encrypt)
+
+___
+
+$code.=<<___;
+.align 256
+AES_Td:
+___
+&_data_word(
+ 0x51f4a750, 0x7e416553, 0x1a17a4c3, 0x3a275e96,
+ 0x3bab6bcb, 0x1f9d45f1, 0xacfa58ab, 0x4be30393,
+ 0x2030fa55, 0xad766df6, 0x88cc7691, 0xf5024c25,
+ 0x4fe5d7fc, 0xc52acbd7, 0x26354480, 0xb562a38f,
+ 0xdeb15a49, 0x25ba1b67, 0x45ea0e98, 0x5dfec0e1,
+ 0xc32f7502, 0x814cf012, 0x8d4697a3, 0x6bd3f9c6,
+ 0x038f5fe7, 0x15929c95, 0xbf6d7aeb, 0x955259da,
+ 0xd4be832d, 0x587421d3, 0x49e06929, 0x8ec9c844,
+ 0x75c2896a, 0xf48e7978, 0x99583e6b, 0x27b971dd,
+ 0xbee14fb6, 0xf088ad17, 0xc920ac66, 0x7dce3ab4,
+ 0x63df4a18, 0xe51a3182, 0x97513360, 0x62537f45,
+ 0xb16477e0, 0xbb6bae84, 0xfe81a01c, 0xf9082b94,
+ 0x70486858, 0x8f45fd19, 0x94de6c87, 0x527bf8b7,
+ 0xab73d323, 0x724b02e2, 0xe31f8f57, 0x6655ab2a,
+ 0xb2eb2807, 0x2fb5c203, 0x86c57b9a, 0xd33708a5,
+ 0x302887f2, 0x23bfa5b2, 0x02036aba, 0xed16825c,
+ 0x8acf1c2b, 0xa779b492, 0xf307f2f0, 0x4e69e2a1,
+ 0x65daf4cd, 0x0605bed5, 0xd134621f, 0xc4a6fe8a,
+ 0x342e539d, 0xa2f355a0, 0x058ae132, 0xa4f6eb75,
+ 0x0b83ec39, 0x4060efaa, 0x5e719f06, 0xbd6e1051,
+ 0x3e218af9, 0x96dd063d, 0xdd3e05ae, 0x4de6bd46,
+ 0x91548db5, 0x71c45d05, 0x0406d46f, 0x605015ff,
+ 0x1998fb24, 0xd6bde997, 0x894043cc, 0x67d99e77,
+ 0xb0e842bd, 0x07898b88, 0xe7195b38, 0x79c8eedb,
+ 0xa17c0a47, 0x7c420fe9, 0xf8841ec9, 0x00000000,
+ 0x09808683, 0x322bed48, 0x1e1170ac, 0x6c5a724e,
+ 0xfd0efffb, 0x0f853856, 0x3daed51e, 0x362d3927,
+ 0x0a0fd964, 0x685ca621, 0x9b5b54d1, 0x24362e3a,
+ 0x0c0a67b1, 0x9357e70f, 0xb4ee96d2, 0x1b9b919e,
+ 0x80c0c54f, 0x61dc20a2, 0x5a774b69, 0x1c121a16,
+ 0xe293ba0a, 0xc0a02ae5, 0x3c22e043, 0x121b171d,
+ 0x0e090d0b, 0xf28bc7ad, 0x2db6a8b9, 0x141ea9c8,
+ 0x57f11985, 0xaf75074c, 0xee99ddbb, 0xa37f60fd,
+ 0xf701269f, 0x5c72f5bc, 0x44663bc5, 0x5bfb7e34,
+ 0x8b432976, 0xcb23c6dc, 0xb6edfc68, 0xb8e4f163,
+ 0xd731dcca, 0x42638510, 0x13972240, 0x84c61120,
+ 0x854a247d, 0xd2bb3df8, 0xaef93211, 0xc729a16d,
+ 0x1d9e2f4b, 0xdcb230f3, 0x0d8652ec, 0x77c1e3d0,
+ 0x2bb3166c, 0xa970b999, 0x119448fa, 0x47e96422,
+ 0xa8fc8cc4, 0xa0f03f1a, 0x567d2cd8, 0x223390ef,
+ 0x87494ec7, 0xd938d1c1, 0x8ccaa2fe, 0x98d40b36,
+ 0xa6f581cf, 0xa57ade28, 0xdab78e26, 0x3fadbfa4,
+ 0x2c3a9de4, 0x5078920d, 0x6a5fcc9b, 0x547e4662,
+ 0xf68d13c2, 0x90d8b8e8, 0x2e39f75e, 0x82c3aff5,
+ 0x9f5d80be, 0x69d0937c, 0x6fd52da9, 0xcf2512b3,
+ 0xc8ac993b, 0x10187da7, 0xe89c636e, 0xdb3bbb7b,
+ 0xcd267809, 0x6e5918f4, 0xec9ab701, 0x834f9aa8,
+ 0xe6956e65, 0xaaffe67e, 0x21bccf08, 0xef15e8e6,
+ 0xbae79bd9, 0x4a6f36ce, 0xea9f09d4, 0x29b07cd6,
+ 0x31a4b2af, 0x2a3f2331, 0xc6a59430, 0x35a266c0,
+ 0x744ebc37, 0xfc82caa6, 0xe090d0b0, 0x33a7d815,
+ 0xf104984a, 0x41ecdaf7, 0x7fcd500e, 0x1791f62f,
+ 0x764dd68d, 0x43efb04d, 0xccaa4d54, 0xe49604df,
+ 0x9ed1b5e3, 0x4c6a881b, 0xc12c1fb8, 0x4665517f,
+ 0x9d5eea04, 0x018c355d, 0xfa877473, 0xfb0b412e,
+ 0xb3671d5a, 0x92dbd252, 0xe9105633, 0x6dd64713,
+ 0x9ad7618c, 0x37a10c7a, 0x59f8148e, 0xeb133c89,
+ 0xcea927ee, 0xb761c935, 0xe11ce5ed, 0x7a47b13c,
+ 0x9cd2df59, 0x55f2733f, 0x1814ce79, 0x73c737bf,
+ 0x53f7cdea, 0x5ffdaa5b, 0xdf3d6f14, 0x7844db86,
+ 0xcaaff381, 0xb968c43e, 0x3824342c, 0xc2a3405f,
+ 0x161dc372, 0xbce2250c, 0x283c498b, 0xff0d9541,
+ 0x39a80171, 0x080cb3de, 0xd8b4e49c, 0x6456c190,
+ 0x7bcb8461, 0xd532b670, 0x486c5c74, 0xd0b85742);
+$code.=<<___;
+ .byte 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
+ .byte 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
+ .byte 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
+ .byte 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
+ .byte 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
+ .byte 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
+ .byte 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
+ .byte 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
+ .byte 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
+ .byte 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
+ .byte 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
+ .byte 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
+ .byte 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
+ .byte 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
+ .byte 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
+ .byte 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
+ .byte 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
+ .byte 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
+ .byte 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
+ .byte 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
+ .byte 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
+ .byte 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
+ .byte 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
+ .byte 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
+ .byte 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
+ .byte 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
+ .byte 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
+ .byte 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
+ .byte 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
+ .byte 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
+ .byte 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
+ .byte 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
+.type AES_Td,#object
+.size AES_Td,(.-AES_Td)
+
+.align 64
+.skip 16
+_sparcv9_AES_decrypt:
+ save %sp,-$frame-$locals,%sp
+ stx %i7,[%sp+$bias+$frame+0] ! off-load return address
+ ld [$key+240],$rounds
+ ld [$key+0],$t0
+ ld [$key+4],$t1 !
+ ld [$key+8],$t2
+ ld [$key+12],$t3
+ srl $rounds,1,$rounds
+ xor $t0,$s0,$s0
+ ld [$key+16],$t0
+ xor $t1,$s1,$s1
+ ld [$key+20],$t1
+ srl $s0,21,$acc0 !
+ xor $t2,$s2,$s2
+ ld [$key+24],$t2
+ xor $t3,$s3,$s3
+ and $acc0,2040,$acc0
+ ld [$key+28],$t3
+ srl $s3,13,$acc1
+ nop
+.Ldec_loop:
+ srl $s2,5,$acc2 !
+ and $acc1,2040,$acc1
+ ldx [$tbl+$acc0],$acc0
+ sll $s1,3,$acc3
+ and $acc2,2040,$acc2
+ ldx [$tbl+$acc1],$acc1
+ srl $s1,21,$acc4
+ and $acc3,2040,$acc3
+ ldx [$tbl+$acc2],$acc2 !
+ srl $s0,13,$acc5
+ and $acc4,2040,$acc4
+ ldx [$tbl+$acc3],$acc3
+ srl $s3,5,$acc6
+ and $acc5,2040,$acc5
+ ldx [$tbl+$acc4],$acc4
+ fmovs %f0,%f0
+ sll $s2,3,$acc7 !
+ and $acc6,2040,$acc6
+ ldx [$tbl+$acc5],$acc5
+ srl $s2,21,$acc8
+ and $acc7,2040,$acc7
+ ldx [$tbl+$acc6],$acc6
+ srl $s1,13,$acc9
+ and $acc8,2040,$acc8
+ ldx [$tbl+$acc7],$acc7 !
+ srl $s0,5,$acc10
+ and $acc9,2040,$acc9
+ ldx [$tbl+$acc8],$acc8
+ sll $s3,3,$acc11
+ and $acc10,2040,$acc10
+ ldx [$tbl+$acc9],$acc9
+ fmovs %f0,%f0
+ srl $s3,21,$acc12 !
+ and $acc11,2040,$acc11
+ ldx [$tbl+$acc10],$acc10
+ srl $s2,13,$acc13
+ and $acc12,2040,$acc12
+ ldx [$tbl+$acc11],$acc11
+ srl $s1,5,$acc14
+ and $acc13,2040,$acc13
+ ldx [$tbl+$acc12],$acc12 !
+ sll $s0,3,$acc15
+ and $acc14,2040,$acc14
+ ldx [$tbl+$acc13],$acc13
+ and $acc15,2040,$acc15
+ add $key,32,$key
+ ldx [$tbl+$acc14],$acc14
+ fmovs %f0,%f0
+ subcc $rounds,1,$rounds !
+ ldx [$tbl+$acc15],$acc15
+ bz,a,pn %icc,.Ldec_last
+ add $tbl,2048,$rounds
+
+ srlx $acc1,8,$acc1
+ xor $acc0,$t0,$t0
+ ld [$key+0],$s0
+ fmovs %f0,%f0
+ srlx $acc2,16,$acc2 !
+ xor $acc1,$t0,$t0
+ ld [$key+4],$s1
+ srlx $acc3,24,$acc3
+ xor $acc2,$t0,$t0
+ ld [$key+8],$s2
+ srlx $acc5,8,$acc5
+ xor $acc3,$t0,$t0
+ ld [$key+12],$s3 !
+ srlx $acc6,16,$acc6
+ xor $acc4,$t1,$t1
+ fmovs %f0,%f0
+ srlx $acc7,24,$acc7
+ xor $acc5,$t1,$t1
+ srlx $acc9,8,$acc9
+ xor $acc6,$t1,$t1
+ srlx $acc10,16,$acc10 !
+ xor $acc7,$t1,$t1
+ srlx $acc11,24,$acc11
+ xor $acc8,$t2,$t2
+ srlx $acc13,8,$acc13
+ xor $acc9,$t2,$t2
+ srlx $acc14,16,$acc14
+ xor $acc10,$t2,$t2
+ srlx $acc15,24,$acc15 !
+ xor $acc11,$t2,$t2
+ xor $acc12,$acc14,$acc14
+ xor $acc13,$t3,$t3
+ srl $t0,21,$acc0
+ xor $acc14,$t3,$t3
+ xor $acc15,$t3,$t3
+ srl $t3,13,$acc1
+
+ and $acc0,2040,$acc0 !
+ srl $t2,5,$acc2
+ and $acc1,2040,$acc1
+ ldx [$tbl+$acc0],$acc0
+ sll $t1,3,$acc3
+ and $acc2,2040,$acc2
+ ldx [$tbl+$acc1],$acc1
+ fmovs %f0,%f0
+ srl $t1,21,$acc4 !
+ and $acc3,2040,$acc3
+ ldx [$tbl+$acc2],$acc2
+ srl $t0,13,$acc5
+ and $acc4,2040,$acc4
+ ldx [$tbl+$acc3],$acc3
+ srl $t3,5,$acc6
+ and $acc5,2040,$acc5
+ ldx [$tbl+$acc4],$acc4 !
+ sll $t2,3,$acc7
+ and $acc6,2040,$acc6
+ ldx [$tbl+$acc5],$acc5
+ srl $t2,21,$acc8
+ and $acc7,2040,$acc7
+ ldx [$tbl+$acc6],$acc6
+ fmovs %f0,%f0
+ srl $t1,13,$acc9 !
+ and $acc8,2040,$acc8
+ ldx [$tbl+$acc7],$acc7
+ srl $t0,5,$acc10
+ and $acc9,2040,$acc9
+ ldx [$tbl+$acc8],$acc8
+ sll $t3,3,$acc11
+ and $acc10,2040,$acc10
+ ldx [$tbl+$acc9],$acc9 !
+ srl $t3,21,$acc12
+ and $acc11,2040,$acc11
+ ldx [$tbl+$acc10],$acc10
+ srl $t2,13,$acc13
+ and $acc12,2040,$acc12
+ ldx [$tbl+$acc11],$acc11
+ fmovs %f0,%f0
+ srl $t1,5,$acc14 !
+ and $acc13,2040,$acc13
+ ldx [$tbl+$acc12],$acc12
+ sll $t0,3,$acc15
+ and $acc14,2040,$acc14
+ ldx [$tbl+$acc13],$acc13
+ srlx $acc1,8,$acc1
+ and $acc15,2040,$acc15
+ ldx [$tbl+$acc14],$acc14 !
+
+ srlx $acc2,16,$acc2
+ xor $acc0,$s0,$s0
+ ldx [$tbl+$acc15],$acc15
+ srlx $acc3,24,$acc3
+ xor $acc1,$s0,$s0
+ ld [$key+16],$t0
+ fmovs %f0,%f0
+ srlx $acc5,8,$acc5 !
+ xor $acc2,$s0,$s0
+ ld [$key+20],$t1
+ srlx $acc6,16,$acc6
+ xor $acc3,$s0,$s0
+ ld [$key+24],$t2
+ srlx $acc7,24,$acc7
+ xor $acc4,$s1,$s1
+ ld [$key+28],$t3 !
+ srlx $acc9,8,$acc9
+ xor $acc5,$s1,$s1
+ ldx [$tbl+2048+0],%g0 ! prefetch td4
+ srlx $acc10,16,$acc10
+ xor $acc6,$s1,$s1
+ ldx [$tbl+2048+32],%g0 ! prefetch td4
+ srlx $acc11,24,$acc11
+ xor $acc7,$s1,$s1
+ ldx [$tbl+2048+64],%g0 ! prefetch td4
+ srlx $acc13,8,$acc13
+ xor $acc8,$s2,$s2
+ ldx [$tbl+2048+96],%g0 ! prefetch td4
+ srlx $acc14,16,$acc14 !
+ xor $acc9,$s2,$s2
+ ldx [$tbl+2048+128],%g0 ! prefetch td4
+ srlx $acc15,24,$acc15
+ xor $acc10,$s2,$s2
+ ldx [$tbl+2048+160],%g0 ! prefetch td4
+ srl $s0,21,$acc0
+ xor $acc11,$s2,$s2
+ ldx [$tbl+2048+192],%g0 ! prefetch td4
+ xor $acc12,$acc14,$acc14
+ xor $acc13,$s3,$s3
+ ldx [$tbl+2048+224],%g0 ! prefetch td4
+ and $acc0,2040,$acc0 !
+ xor $acc14,$s3,$s3
+ xor $acc15,$s3,$s3
+ ba .Ldec_loop
+ srl $s3,13,$acc1
+
+.align 32
+.Ldec_last:
+ srlx $acc1,8,$acc1 !
+ xor $acc0,$t0,$t0
+ ld [$key+0],$s0
+ srlx $acc2,16,$acc2
+ xor $acc1,$t0,$t0
+ ld [$key+4],$s1
+ srlx $acc3,24,$acc3
+ xor $acc2,$t0,$t0
+ ld [$key+8],$s2 !
+ srlx $acc5,8,$acc5
+ xor $acc3,$t0,$t0
+ ld [$key+12],$s3
+ srlx $acc6,16,$acc6
+ xor $acc4,$t1,$t1
+ srlx $acc7,24,$acc7
+ xor $acc5,$t1,$t1
+ srlx $acc9,8,$acc9 !
+ xor $acc6,$t1,$t1
+ srlx $acc10,16,$acc10
+ xor $acc7,$t1,$t1
+ srlx $acc11,24,$acc11
+ xor $acc8,$t2,$t2
+ srlx $acc13,8,$acc13
+ xor $acc9,$t2,$t2
+ srlx $acc14,16,$acc14 !
+ xor $acc10,$t2,$t2
+ srlx $acc15,24,$acc15
+ xor $acc11,$t2,$t2
+ xor $acc12,$acc14,$acc14
+ xor $acc13,$t3,$t3
+ srl $t0,24,$acc0
+ xor $acc14,$t3,$t3
+ xor $acc15,$t3,$t3 !
+ srl $t3,16,$acc1
+
+ srl $t2,8,$acc2
+ and $acc1,255,$acc1
+ ldub [$rounds+$acc0],$acc0
+ srl $t1,24,$acc4
+ and $acc2,255,$acc2
+ ldub [$rounds+$acc1],$acc1
+ srl $t0,16,$acc5 !
+ and $t1,255,$acc3
+ ldub [$rounds+$acc2],$acc2
+ ldub [$rounds+$acc3],$acc3
+ srl $t3,8,$acc6
+ and $acc5,255,$acc5
+ ldub [$rounds+$acc4],$acc4
+ fmovs %f0,%f0
+ srl $t2,24,$acc8 !
+ and $acc6,255,$acc6
+ ldub [$rounds+$acc5],$acc5
+ srl $t1,16,$acc9
+ and $t2,255,$acc7
+ ldub [$rounds+$acc6],$acc6
+ ldub [$rounds+$acc7],$acc7
+ fmovs %f0,%f0
+ srl $t0,8,$acc10 !
+ and $acc9,255,$acc9
+ ldub [$rounds+$acc8],$acc8
+ srl $t3,24,$acc12
+ and $acc10,255,$acc10
+ ldub [$rounds+$acc9],$acc9
+ srl $t2,16,$acc13
+ and $t3,255,$acc11
+ ldub [$rounds+$acc10],$acc10 !
+ srl $t1,8,$acc14
+ and $acc13,255,$acc13
+ ldub [$rounds+$acc11],$acc11
+ ldub [$rounds+$acc12],$acc12
+ and $acc14,255,$acc14
+ ldub [$rounds+$acc13],$acc13
+ and $t0,255,$acc15
+ ldub [$rounds+$acc14],$acc14 !
+
+ sll $acc0,24,$acc0
+ xor $acc3,$s0,$s0
+ ldub [$rounds+$acc15],$acc15
+ sll $acc1,16,$acc1
+ xor $acc0,$s0,$s0
+ ldx [%sp+$bias+$frame+0],%i7 ! restore return address
+ fmovs %f0,%f0
+ sll $acc2,8,$acc2 !
+ xor $acc1,$s0,$s0
+ sll $acc4,24,$acc4
+ xor $acc2,$s0,$s0
+ sll $acc5,16,$acc5
+ xor $acc7,$s1,$s1
+ sll $acc6,8,$acc6
+ xor $acc4,$s1,$s1
+ sll $acc8,24,$acc8 !
+ xor $acc5,$s1,$s1
+ sll $acc9,16,$acc9
+ xor $acc11,$s2,$s2
+ sll $acc10,8,$acc10
+ xor $acc6,$s1,$s1
+ sll $acc12,24,$acc12
+ xor $acc8,$s2,$s2
+ sll $acc13,16,$acc13 !
+ xor $acc9,$s2,$s2
+ sll $acc14,8,$acc14
+ xor $acc10,$s2,$s2
+ xor $acc12,$acc14,$acc14
+ xor $acc13,$s3,$s3
+ xor $acc14,$s3,$s3
+ xor $acc15,$s3,$s3
+
+ ret
+ restore
+.type _sparcv9_AES_decrypt,#function
+.size _sparcv9_AES_decrypt,(.-_sparcv9_AES_decrypt)
+
+.align 32
+.globl AES_decrypt
+AES_decrypt:
+ or %o0,%o1,%g1
+ andcc %g1,3,%g0
+ bnz,pn %xcc,.Lunaligned_dec
+ save %sp,-$frame,%sp
+
+ ld [%i0+0],%o0
+ ld [%i0+4],%o1
+ ld [%i0+8],%o2
+ ld [%i0+12],%o3
+
+1: call .+8
+ add %o7,AES_Td-1b,%o4
+ call _sparcv9_AES_decrypt
+ mov %i2,%o5
+
+ st %o0,[%i1+0]
+ st %o1,[%i1+4]
+ st %o2,[%i1+8]
+ st %o3,[%i1+12]
+
+ ret
+ restore
+
+.align 32
+.Lunaligned_dec:
+ ldub [%i0+0],%l0
+ ldub [%i0+1],%l1
+ ldub [%i0+2],%l2
+
+ sll %l0,24,%l0
+ ldub [%i0+3],%l3
+ sll %l1,16,%l1
+ ldub [%i0+4],%l4
+ sll %l2,8,%l2
+ or %l1,%l0,%l0
+ ldub [%i0+5],%l5
+ sll %l4,24,%l4
+ or %l3,%l2,%l2
+ ldub [%i0+6],%l6
+ sll %l5,16,%l5
+ or %l0,%l2,%o0
+ ldub [%i0+7],%l7
+
+ sll %l6,8,%l6
+ or %l5,%l4,%l4
+ ldub [%i0+8],%l0
+ or %l7,%l6,%l6
+ ldub [%i0+9],%l1
+ or %l4,%l6,%o1
+ ldub [%i0+10],%l2
+
+ sll %l0,24,%l0
+ ldub [%i0+11],%l3
+ sll %l1,16,%l1
+ ldub [%i0+12],%l4
+ sll %l2,8,%l2
+ or %l1,%l0,%l0
+ ldub [%i0+13],%l5
+ sll %l4,24,%l4
+ or %l3,%l2,%l2
+ ldub [%i0+14],%l6
+ sll %l5,16,%l5
+ or %l0,%l2,%o2
+ ldub [%i0+15],%l7
+
+ sll %l6,8,%l6
+ or %l5,%l4,%l4
+ or %l7,%l6,%l6
+ or %l4,%l6,%o3
+
+1: call .+8
+ add %o7,AES_Td-1b,%o4
+ call _sparcv9_AES_decrypt
+ mov %i2,%o5
+
+ srl %o0,24,%l0
+ srl %o0,16,%l1
+ stb %l0,[%i1+0]
+ srl %o0,8,%l2
+ stb %l1,[%i1+1]
+ stb %l2,[%i1+2]
+ srl %o1,24,%l4
+ stb %o0,[%i1+3]
+
+ srl %o1,16,%l5
+ stb %l4,[%i1+4]
+ srl %o1,8,%l6
+ stb %l5,[%i1+5]
+ stb %l6,[%i1+6]
+ srl %o2,24,%l0
+ stb %o1,[%i1+7]
+
+ srl %o2,16,%l1
+ stb %l0,[%i1+8]
+ srl %o2,8,%l2
+ stb %l1,[%i1+9]
+ stb %l2,[%i1+10]
+ srl %o3,24,%l4
+ stb %o2,[%i1+11]
+
+ srl %o3,16,%l5
+ stb %l4,[%i1+12]
+ srl %o3,8,%l6
+ stb %l5,[%i1+13]
+ stb %l6,[%i1+14]
+ stb %o3,[%i1+15]
+
+ ret
+ restore
+.type AES_decrypt,#function
+.size AES_decrypt,(.-AES_decrypt)
+___
+
+# fmovs instructions substituting for FP nops were originally added
+# to meet specific instruction alignment requirements to maximize ILP.
+# As UltraSPARC T1, a.k.a. Niagara, has shared FPU, FP nops can have
+# undesired effect, so just omit them and sacrifice some portion of
+# percent in performance...
+$code =~ s/fmovs.*$//gem;
+
+print $code;
diff --git a/crypto/aes/asm/aes-x86_64.pl b/crypto/aes/asm/aes-x86_64.pl
index 44e0bf8cae3a..f616f1751f7e 100755
--- a/crypto/aes/asm/aes-x86_64.pl
+++ b/crypto/aes/asm/aes-x86_64.pl
@@ -1198,19 +1198,20 @@ AES_cbc_encrypt:
ret
.align 4
.Lcbc_enc_tail:
- cmp $inp,$out
- je .Lcbc_enc_in_place
+ mov %rax,%r11
+ mov %rcx,%r12
mov %r10,%rcx
mov $inp,%rsi
mov $out,%rdi
.long 0xF689A4F3 # rep movsb
-.Lcbc_enc_in_place:
mov \$16,%rcx # zero tail
sub %r10,%rcx
xor %rax,%rax
.long 0xF689AAF3 # rep stosb
mov $out,$inp # this is not a mistake!
movq \$16,$_len # len=16
+ mov %r11,%rax
+ mov %r12,%rcx
jmp .Lcbc_enc_loop # one more spin...
#----------------------------- DECRYPT -----------------------------#
.align 16
diff --git a/crypto/asn1/Makefile b/crypto/asn1/Makefile
index 63066899d0bd..94a6885804ff 100644
--- a/crypto/asn1/Makefile
+++ b/crypto/asn1/Makefile
@@ -63,7 +63,7 @@ pk: pk.c
all: lib
lib: $(LIBOBJ)
- $(AR) $(LIB) $(LIBOBJ)
+ $(ARX) $(LIB) $(LIBOBJ)
$(RANLIB) $(LIB) || echo Never mind.
@touch lib
@@ -142,9 +142,9 @@ a_digest.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
a_digest.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
a_digest.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
a_digest.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
-a_digest.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
-a_digest.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
-a_digest.o: ../../include/openssl/opensslconf.h
+a_digest.o: ../../include/openssl/evp.h ../../include/openssl/fips.h
+a_digest.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
+a_digest.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
a_digest.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
a_digest.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
a_digest.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
@@ -250,27 +250,27 @@ a_sign.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
a_sign.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
a_sign.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
a_sign.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
-a_sign.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
-a_sign.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
-a_sign.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
-a_sign.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
-a_sign.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
-a_sign.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
-a_sign.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
-a_sign.o: ../cryptlib.h a_sign.c
+a_sign.o: ../../include/openssl/evp.h ../../include/openssl/fips.h
+a_sign.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
+a_sign.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+a_sign.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
+a_sign.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
+a_sign.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
+a_sign.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
+a_sign.o: ../../include/openssl/x509_vfy.h ../cryptlib.h a_sign.c
a_strex.o: ../../e_os.h ../../include/openssl/asn1.h
a_strex.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
a_strex.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
a_strex.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
a_strex.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
-a_strex.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
-a_strex.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
-a_strex.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
-a_strex.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
-a_strex.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
-a_strex.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
-a_strex.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
-a_strex.o: ../cryptlib.h a_strex.c charmap.h
+a_strex.o: ../../include/openssl/evp.h ../../include/openssl/fips.h
+a_strex.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
+a_strex.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+a_strex.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
+a_strex.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
+a_strex.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
+a_strex.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
+a_strex.o: ../../include/openssl/x509_vfy.h ../cryptlib.h a_strex.c charmap.h
a_strnid.o: ../../e_os.h ../../include/openssl/asn1.h
a_strnid.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
a_strnid.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
@@ -318,8 +318,9 @@ a_verify.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
a_verify.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
a_verify.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
a_verify.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-a_verify.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-a_verify.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+a_verify.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+a_verify.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+a_verify.o: ../../include/openssl/opensslconf.h
a_verify.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
a_verify.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
a_verify.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
@@ -338,8 +339,9 @@ asn1_gen.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h
asn1_gen.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
asn1_gen.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
asn1_gen.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-asn1_gen.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-asn1_gen.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+asn1_gen.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+asn1_gen.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+asn1_gen.o: ../../include/openssl/opensslconf.h
asn1_gen.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
asn1_gen.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
asn1_gen.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
@@ -369,8 +371,9 @@ asn_mime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
asn_mime.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
asn_mime.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
asn_mime.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-asn_mime.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-asn_mime.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+asn_mime.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+asn_mime.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+asn_mime.o: ../../include/openssl/opensslconf.h
asn_mime.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
asn_mime.o: ../../include/openssl/pkcs7.h ../../include/openssl/rand.h
asn_mime.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
@@ -383,9 +386,9 @@ asn_moid.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h
asn_moid.o: ../../include/openssl/dso.h ../../include/openssl/e_os2.h
asn_moid.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
asn_moid.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
-asn_moid.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
-asn_moid.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
-asn_moid.o: ../../include/openssl/opensslconf.h
+asn_moid.o: ../../include/openssl/evp.h ../../include/openssl/fips.h
+asn_moid.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
+asn_moid.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
asn_moid.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
asn_moid.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
asn_moid.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
@@ -404,23 +407,23 @@ d2i_pr.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
d2i_pr.o: ../../include/openssl/crypto.h ../../include/openssl/dsa.h
d2i_pr.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
d2i_pr.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-d2i_pr.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-d2i_pr.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-d2i_pr.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-d2i_pr.o: ../../include/openssl/rsa.h ../../include/openssl/safestack.h
-d2i_pr.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
-d2i_pr.o: ../cryptlib.h d2i_pr.c
+d2i_pr.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+d2i_pr.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+d2i_pr.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+d2i_pr.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rsa.h
+d2i_pr.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
+d2i_pr.o: ../../include/openssl/symhacks.h ../cryptlib.h d2i_pr.c
d2i_pu.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
d2i_pu.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
d2i_pu.o: ../../include/openssl/crypto.h ../../include/openssl/dsa.h
d2i_pu.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
d2i_pu.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-d2i_pu.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-d2i_pu.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-d2i_pu.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-d2i_pu.o: ../../include/openssl/rsa.h ../../include/openssl/safestack.h
-d2i_pu.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
-d2i_pu.o: ../cryptlib.h d2i_pu.c
+d2i_pu.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+d2i_pu.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+d2i_pu.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+d2i_pu.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rsa.h
+d2i_pu.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
+d2i_pu.o: ../../include/openssl/symhacks.h ../cryptlib.h d2i_pu.c
evp_asn1.o: ../../e_os.h ../../include/openssl/asn1.h
evp_asn1.o: ../../include/openssl/asn1_mac.h ../../include/openssl/bio.h
evp_asn1.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
@@ -456,71 +459,73 @@ i2d_pr.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
i2d_pr.o: ../../include/openssl/crypto.h ../../include/openssl/dsa.h
i2d_pr.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
i2d_pr.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-i2d_pr.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-i2d_pr.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-i2d_pr.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-i2d_pr.o: ../../include/openssl/rsa.h ../../include/openssl/safestack.h
-i2d_pr.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
-i2d_pr.o: ../cryptlib.h i2d_pr.c
+i2d_pr.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+i2d_pr.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+i2d_pr.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+i2d_pr.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rsa.h
+i2d_pr.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
+i2d_pr.o: ../../include/openssl/symhacks.h ../cryptlib.h i2d_pr.c
i2d_pu.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
i2d_pu.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
i2d_pu.o: ../../include/openssl/crypto.h ../../include/openssl/dsa.h
i2d_pu.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
i2d_pu.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-i2d_pu.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-i2d_pu.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-i2d_pu.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-i2d_pu.o: ../../include/openssl/rsa.h ../../include/openssl/safestack.h
-i2d_pu.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
-i2d_pu.o: ../cryptlib.h i2d_pu.c
+i2d_pu.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+i2d_pu.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+i2d_pu.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+i2d_pu.o: ../../include/openssl/ossl_typ.h ../../include/openssl/rsa.h
+i2d_pu.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
+i2d_pu.o: ../../include/openssl/symhacks.h ../cryptlib.h i2d_pu.c
n_pkey.o: ../../e_os.h ../../include/openssl/asn1.h
n_pkey.o: ../../include/openssl/asn1_mac.h ../../include/openssl/asn1t.h
n_pkey.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
n_pkey.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
n_pkey.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
n_pkey.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
-n_pkey.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
-n_pkey.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
-n_pkey.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
-n_pkey.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
-n_pkey.o: ../../include/openssl/rsa.h ../../include/openssl/safestack.h
-n_pkey.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-n_pkey.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-n_pkey.o: ../../include/openssl/x509_vfy.h ../cryptlib.h n_pkey.c
+n_pkey.o: ../../include/openssl/evp.h ../../include/openssl/fips.h
+n_pkey.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
+n_pkey.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+n_pkey.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
+n_pkey.o: ../../include/openssl/pkcs7.h ../../include/openssl/rsa.h
+n_pkey.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+n_pkey.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+n_pkey.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+n_pkey.o: ../cryptlib.h n_pkey.c
nsseq.o: ../../include/openssl/asn1.h ../../include/openssl/asn1t.h
nsseq.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
nsseq.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
nsseq.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
nsseq.o: ../../include/openssl/ecdsa.h ../../include/openssl/evp.h
-nsseq.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-nsseq.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-nsseq.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-nsseq.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
-nsseq.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-nsseq.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-nsseq.o: ../../include/openssl/x509_vfy.h nsseq.c
+nsseq.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+nsseq.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+nsseq.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+nsseq.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+nsseq.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+nsseq.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+nsseq.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h nsseq.c
p5_pbe.o: ../../e_os.h ../../include/openssl/asn1.h
p5_pbe.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
p5_pbe.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
p5_pbe.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
p5_pbe.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
p5_pbe.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-p5_pbe.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-p5_pbe.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-p5_pbe.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-p5_pbe.o: ../../include/openssl/pkcs7.h ../../include/openssl/rand.h
-p5_pbe.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
-p5_pbe.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
-p5_pbe.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
-p5_pbe.o: ../cryptlib.h p5_pbe.c
+p5_pbe.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+p5_pbe.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+p5_pbe.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+p5_pbe.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+p5_pbe.o: ../../include/openssl/rand.h ../../include/openssl/safestack.h
+p5_pbe.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
+p5_pbe.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
+p5_pbe.o: ../../include/openssl/x509_vfy.h ../cryptlib.h p5_pbe.c
p5_pbev2.o: ../../e_os.h ../../include/openssl/asn1.h
p5_pbev2.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
p5_pbev2.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
p5_pbev2.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
p5_pbev2.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
p5_pbev2.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-p5_pbev2.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-p5_pbev2.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+p5_pbev2.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+p5_pbev2.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+p5_pbev2.o: ../../include/openssl/opensslconf.h
p5_pbev2.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
p5_pbev2.o: ../../include/openssl/pkcs7.h ../../include/openssl/rand.h
p5_pbev2.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
@@ -533,41 +538,42 @@ p8_pkey.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
p8_pkey.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
p8_pkey.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
p8_pkey.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-p8_pkey.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-p8_pkey.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-p8_pkey.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-p8_pkey.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
-p8_pkey.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-p8_pkey.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-p8_pkey.o: ../../include/openssl/x509_vfy.h ../cryptlib.h p8_pkey.c
+p8_pkey.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+p8_pkey.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+p8_pkey.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+p8_pkey.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+p8_pkey.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+p8_pkey.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+p8_pkey.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+p8_pkey.o: ../cryptlib.h p8_pkey.c
t_bitst.o: ../../e_os.h ../../include/openssl/asn1.h
t_bitst.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
t_bitst.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h
t_bitst.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
t_bitst.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
t_bitst.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-t_bitst.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-t_bitst.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-t_bitst.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-t_bitst.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
-t_bitst.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-t_bitst.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-t_bitst.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h
-t_bitst.o: ../cryptlib.h t_bitst.c
+t_bitst.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+t_bitst.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+t_bitst.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+t_bitst.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+t_bitst.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+t_bitst.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+t_bitst.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+t_bitst.o: ../../include/openssl/x509v3.h ../cryptlib.h t_bitst.c
t_crl.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
t_crl.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
t_crl.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h
t_crl.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
t_crl.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
t_crl.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-t_crl.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-t_crl.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-t_crl.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-t_crl.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
-t_crl.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-t_crl.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-t_crl.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h
-t_crl.o: ../cryptlib.h t_crl.c
+t_crl.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+t_crl.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+t_crl.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+t_crl.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+t_crl.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+t_crl.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+t_crl.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+t_crl.o: ../../include/openssl/x509v3.h ../cryptlib.h t_crl.c
t_pkey.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
t_pkey.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
t_pkey.o: ../../include/openssl/crypto.h ../../include/openssl/dh.h
@@ -585,57 +591,57 @@ t_req.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h
t_req.o: ../../include/openssl/dsa.h ../../include/openssl/e_os2.h
t_req.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
t_req.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
-t_req.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
-t_req.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
-t_req.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
-t_req.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
-t_req.o: ../../include/openssl/rsa.h ../../include/openssl/safestack.h
-t_req.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-t_req.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-t_req.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h
-t_req.o: ../cryptlib.h t_req.c
+t_req.o: ../../include/openssl/evp.h ../../include/openssl/fips.h
+t_req.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
+t_req.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+t_req.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
+t_req.o: ../../include/openssl/pkcs7.h ../../include/openssl/rsa.h
+t_req.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+t_req.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+t_req.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+t_req.o: ../../include/openssl/x509v3.h ../cryptlib.h t_req.c
t_spki.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
t_spki.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
t_spki.o: ../../include/openssl/crypto.h ../../include/openssl/dsa.h
t_spki.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
t_spki.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
t_spki.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-t_spki.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-t_spki.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-t_spki.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-t_spki.o: ../../include/openssl/pkcs7.h ../../include/openssl/rsa.h
-t_spki.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
-t_spki.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
-t_spki.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
-t_spki.o: ../cryptlib.h t_spki.c
+t_spki.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+t_spki.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+t_spki.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+t_spki.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+t_spki.o: ../../include/openssl/rsa.h ../../include/openssl/safestack.h
+t_spki.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
+t_spki.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
+t_spki.o: ../../include/openssl/x509_vfy.h ../cryptlib.h t_spki.c
t_x509.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
t_x509.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
t_x509.o: ../../include/openssl/conf.h ../../include/openssl/crypto.h
t_x509.o: ../../include/openssl/dsa.h ../../include/openssl/e_os2.h
t_x509.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
t_x509.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
-t_x509.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
-t_x509.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
-t_x509.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
-t_x509.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
-t_x509.o: ../../include/openssl/rsa.h ../../include/openssl/safestack.h
-t_x509.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-t_x509.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-t_x509.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h
-t_x509.o: ../cryptlib.h t_x509.c
+t_x509.o: ../../include/openssl/evp.h ../../include/openssl/fips.h
+t_x509.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
+t_x509.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+t_x509.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
+t_x509.o: ../../include/openssl/pkcs7.h ../../include/openssl/rsa.h
+t_x509.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+t_x509.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+t_x509.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+t_x509.o: ../../include/openssl/x509v3.h ../cryptlib.h t_x509.c
t_x509a.o: ../../e_os.h ../../include/openssl/asn1.h
t_x509a.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
t_x509a.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
t_x509a.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
t_x509a.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
-t_x509a.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
-t_x509a.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
-t_x509a.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
-t_x509a.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
-t_x509a.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
-t_x509a.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
-t_x509a.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
-t_x509a.o: ../cryptlib.h t_x509a.c
+t_x509a.o: ../../include/openssl/evp.h ../../include/openssl/fips.h
+t_x509a.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
+t_x509a.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+t_x509a.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
+t_x509a.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
+t_x509a.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
+t_x509a.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
+t_x509a.o: ../../include/openssl/x509_vfy.h ../cryptlib.h t_x509a.c
tasn_dec.o: ../../include/openssl/asn1.h ../../include/openssl/asn1t.h
tasn_dec.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
tasn_dec.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
@@ -688,21 +694,23 @@ x_algor.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
x_algor.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
x_algor.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
x_algor.o: ../../include/openssl/ecdsa.h ../../include/openssl/evp.h
-x_algor.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-x_algor.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-x_algor.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-x_algor.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
-x_algor.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-x_algor.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-x_algor.o: ../../include/openssl/x509_vfy.h x_algor.c
+x_algor.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+x_algor.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+x_algor.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+x_algor.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+x_algor.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+x_algor.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+x_algor.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+x_algor.o: x_algor.c
x_attrib.o: ../../e_os.h ../../include/openssl/asn1.h
x_attrib.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
x_attrib.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
x_attrib.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
x_attrib.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
x_attrib.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-x_attrib.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-x_attrib.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+x_attrib.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+x_attrib.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+x_attrib.o: ../../include/openssl/opensslconf.h
x_attrib.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
x_attrib.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
x_attrib.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
@@ -723,37 +731,40 @@ x_crl.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
x_crl.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
x_crl.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
x_crl.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-x_crl.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-x_crl.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-x_crl.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-x_crl.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
-x_crl.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-x_crl.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-x_crl.o: ../../include/openssl/x509_vfy.h ../cryptlib.h x_crl.c
+x_crl.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+x_crl.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+x_crl.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+x_crl.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+x_crl.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+x_crl.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+x_crl.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+x_crl.o: ../cryptlib.h x_crl.c
x_exten.o: ../../include/openssl/asn1.h ../../include/openssl/asn1t.h
x_exten.o: ../../include/openssl/bio.h ../../include/openssl/buffer.h
x_exten.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
x_exten.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
x_exten.o: ../../include/openssl/ecdsa.h ../../include/openssl/evp.h
-x_exten.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-x_exten.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-x_exten.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-x_exten.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
-x_exten.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-x_exten.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-x_exten.o: ../../include/openssl/x509_vfy.h x_exten.c
+x_exten.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+x_exten.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+x_exten.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+x_exten.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+x_exten.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+x_exten.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+x_exten.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+x_exten.o: x_exten.c
x_info.o: ../../e_os.h ../../include/openssl/asn1.h ../../include/openssl/bio.h
x_info.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
x_info.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
x_info.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
x_info.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-x_info.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-x_info.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-x_info.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-x_info.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
-x_info.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-x_info.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-x_info.o: ../../include/openssl/x509_vfy.h ../cryptlib.h x_info.c
+x_info.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+x_info.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+x_info.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+x_info.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+x_info.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+x_info.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+x_info.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+x_info.o: ../cryptlib.h x_info.c
x_long.o: ../../e_os.h ../../include/openssl/asn1.h
x_long.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
x_long.o: ../../include/openssl/bn.h ../../include/openssl/buffer.h
@@ -769,35 +780,37 @@ x_name.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
x_name.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
x_name.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
x_name.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-x_name.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-x_name.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-x_name.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-x_name.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
-x_name.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-x_name.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-x_name.o: ../../include/openssl/x509_vfy.h ../cryptlib.h x_name.c
+x_name.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+x_name.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+x_name.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+x_name.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+x_name.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+x_name.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+x_name.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+x_name.o: ../cryptlib.h x_name.c
x_pkey.o: ../../e_os.h ../../include/openssl/asn1.h
x_pkey.o: ../../include/openssl/asn1_mac.h ../../include/openssl/bio.h
x_pkey.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
x_pkey.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
x_pkey.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
x_pkey.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-x_pkey.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-x_pkey.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-x_pkey.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-x_pkey.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
-x_pkey.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-x_pkey.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-x_pkey.o: ../../include/openssl/x509_vfy.h ../cryptlib.h x_pkey.c
+x_pkey.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+x_pkey.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+x_pkey.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+x_pkey.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+x_pkey.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+x_pkey.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+x_pkey.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+x_pkey.o: ../cryptlib.h x_pkey.c
x_pubkey.o: ../../e_os.h ../../include/openssl/asn1.h
x_pubkey.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
x_pubkey.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
x_pubkey.o: ../../include/openssl/dsa.h ../../include/openssl/e_os2.h
x_pubkey.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
x_pubkey.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
-x_pubkey.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
-x_pubkey.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
-x_pubkey.o: ../../include/openssl/opensslconf.h
+x_pubkey.o: ../../include/openssl/evp.h ../../include/openssl/fips.h
+x_pubkey.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
+x_pubkey.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
x_pubkey.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
x_pubkey.o: ../../include/openssl/pkcs7.h ../../include/openssl/rsa.h
x_pubkey.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
@@ -810,76 +823,82 @@ x_req.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
x_req.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
x_req.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
x_req.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-x_req.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-x_req.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-x_req.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-x_req.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
-x_req.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-x_req.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-x_req.o: ../../include/openssl/x509_vfy.h ../cryptlib.h x_req.c
+x_req.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+x_req.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+x_req.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+x_req.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+x_req.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+x_req.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+x_req.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+x_req.o: ../cryptlib.h x_req.c
x_sig.o: ../../e_os.h ../../include/openssl/asn1.h
x_sig.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
x_sig.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
x_sig.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
x_sig.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
x_sig.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-x_sig.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-x_sig.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-x_sig.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-x_sig.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
-x_sig.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-x_sig.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-x_sig.o: ../../include/openssl/x509_vfy.h ../cryptlib.h x_sig.c
+x_sig.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+x_sig.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+x_sig.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+x_sig.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+x_sig.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+x_sig.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+x_sig.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+x_sig.o: ../cryptlib.h x_sig.c
x_spki.o: ../../e_os.h ../../include/openssl/asn1.h
x_spki.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
x_spki.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
x_spki.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
x_spki.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
x_spki.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-x_spki.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-x_spki.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-x_spki.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-x_spki.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
-x_spki.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-x_spki.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-x_spki.o: ../../include/openssl/x509_vfy.h ../cryptlib.h x_spki.c
+x_spki.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+x_spki.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+x_spki.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+x_spki.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+x_spki.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+x_spki.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+x_spki.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+x_spki.o: ../cryptlib.h x_spki.c
x_val.o: ../../e_os.h ../../include/openssl/asn1.h
x_val.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
x_val.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
x_val.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
x_val.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
x_val.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-x_val.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-x_val.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-x_val.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-x_val.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
-x_val.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-x_val.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-x_val.o: ../../include/openssl/x509_vfy.h ../cryptlib.h x_val.c
+x_val.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+x_val.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+x_val.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+x_val.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+x_val.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+x_val.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+x_val.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+x_val.o: ../cryptlib.h x_val.c
x_x509.o: ../../e_os.h ../../include/openssl/asn1.h
x_x509.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
x_x509.o: ../../include/openssl/buffer.h ../../include/openssl/conf.h
x_x509.o: ../../include/openssl/crypto.h ../../include/openssl/e_os2.h
x_x509.o: ../../include/openssl/ec.h ../../include/openssl/ecdh.h
x_x509.o: ../../include/openssl/ecdsa.h ../../include/openssl/err.h
-x_x509.o: ../../include/openssl/evp.h ../../include/openssl/lhash.h
-x_x509.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
-x_x509.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
-x_x509.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
-x_x509.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
-x_x509.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
-x_x509.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
-x_x509.o: ../../include/openssl/x509v3.h ../cryptlib.h x_x509.c
+x_x509.o: ../../include/openssl/evp.h ../../include/openssl/fips.h
+x_x509.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
+x_x509.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
+x_x509.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
+x_x509.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
+x_x509.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
+x_x509.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
+x_x509.o: ../../include/openssl/x509_vfy.h ../../include/openssl/x509v3.h
+x_x509.o: ../cryptlib.h x_x509.c
x_x509a.o: ../../e_os.h ../../include/openssl/asn1.h
x_x509a.o: ../../include/openssl/asn1t.h ../../include/openssl/bio.h
x_x509a.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
x_x509a.o: ../../include/openssl/e_os2.h ../../include/openssl/ec.h
x_x509a.o: ../../include/openssl/ecdh.h ../../include/openssl/ecdsa.h
x_x509a.o: ../../include/openssl/err.h ../../include/openssl/evp.h
-x_x509a.o: ../../include/openssl/lhash.h ../../include/openssl/obj_mac.h
-x_x509a.o: ../../include/openssl/objects.h ../../include/openssl/opensslconf.h
-x_x509a.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
-x_x509a.o: ../../include/openssl/pkcs7.h ../../include/openssl/safestack.h
-x_x509a.o: ../../include/openssl/sha.h ../../include/openssl/stack.h
-x_x509a.o: ../../include/openssl/symhacks.h ../../include/openssl/x509.h
-x_x509a.o: ../../include/openssl/x509_vfy.h ../cryptlib.h x_x509a.c
+x_x509a.o: ../../include/openssl/fips.h ../../include/openssl/lhash.h
+x_x509a.o: ../../include/openssl/obj_mac.h ../../include/openssl/objects.h
+x_x509a.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+x_x509a.o: ../../include/openssl/ossl_typ.h ../../include/openssl/pkcs7.h
+x_x509a.o: ../../include/openssl/safestack.h ../../include/openssl/sha.h
+x_x509a.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+x_x509a.o: ../../include/openssl/x509.h ../../include/openssl/x509_vfy.h
+x_x509a.o: ../cryptlib.h x_x509a.c
diff --git a/crypto/asn1/a_bytes.c b/crypto/asn1/a_bytes.c
index 8d13f9c93113..92d630cdbaf8 100644
--- a/crypto/asn1/a_bytes.c
+++ b/crypto/asn1/a_bytes.c
@@ -79,7 +79,7 @@ ASN1_STRING *d2i_ASN1_type_bytes(ASN1_STRING **a, const unsigned char **pp,
if (tag >= 32)
{
- i=ASN1_R_TAG_VALUE_TOO_HIGH;;
+ i=ASN1_R_TAG_VALUE_TOO_HIGH;
goto err;
}
if (!(ASN1_tag2bit(tag) & type))
diff --git a/crypto/asn1/a_mbstr.c b/crypto/asn1/a_mbstr.c
index 2d4800a22a4c..1bcd0468938e 100644
--- a/crypto/asn1/a_mbstr.c
+++ b/crypto/asn1/a_mbstr.c
@@ -1,5 +1,5 @@
/* a_mbstr.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 1999.
*/
/* ====================================================================
diff --git a/crypto/asn1/a_sign.c b/crypto/asn1/a_sign.c
index 1081950518c7..4dee45fbb83a 100644
--- a/crypto/asn1/a_sign.c
+++ b/crypto/asn1/a_sign.c
@@ -267,7 +267,12 @@ int ASN1_item_sign(const ASN1_ITEM *it, X509_ALGOR *algor1, X509_ALGOR *algor2,
goto err;
}
- EVP_SignInit_ex(&ctx,type, NULL);
+ if (!EVP_SignInit_ex(&ctx,type, NULL))
+ {
+ outl=0;
+ ASN1err(ASN1_F_ASN1_ITEM_SIGN,ERR_R_EVP_LIB);
+ goto err;
+ }
EVP_SignUpdate(&ctx,(unsigned char *)buf_in,inl);
if (!EVP_SignFinal(&ctx,(unsigned char *)buf_out,
(unsigned int *)&outl,pkey))
diff --git a/crypto/asn1/a_strex.c b/crypto/asn1/a_strex.c
index c2dbb6f9a58d..7fc14d3296c1 100644
--- a/crypto/asn1/a_strex.c
+++ b/crypto/asn1/a_strex.c
@@ -1,5 +1,5 @@
/* a_strex.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 2000.
*/
/* ====================================================================
diff --git a/crypto/asn1/a_strnid.c b/crypto/asn1/a_strnid.c
index 613bbc4a7da9..fe515b52baed 100644
--- a/crypto/asn1/a_strnid.c
+++ b/crypto/asn1/a_strnid.c
@@ -1,5 +1,5 @@
/* a_strnid.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 1999.
*/
/* ====================================================================
diff --git a/crypto/asn1/a_verify.c b/crypto/asn1/a_verify.c
index fdce6e4380b6..da3efaaf8de5 100644
--- a/crypto/asn1/a_verify.c
+++ b/crypto/asn1/a_verify.c
@@ -100,7 +100,12 @@ int ASN1_verify(i2d_of_void *i2d, X509_ALGOR *a, ASN1_BIT_STRING *signature,
p=buf_in;
i2d(data,&p);
- EVP_VerifyInit_ex(&ctx,type, NULL);
+ if (!EVP_VerifyInit_ex(&ctx,type, NULL))
+ {
+ ASN1err(ASN1_F_ASN1_VERIFY,ERR_R_EVP_LIB);
+ ret=0;
+ goto err;
+ }
EVP_VerifyUpdate(&ctx,(unsigned char *)buf_in,inl);
OPENSSL_cleanse(buf_in,(unsigned int)inl);
diff --git a/crypto/asn1/ameth_lib.c b/crypto/asn1/ameth_lib.c
new file mode 100644
index 000000000000..18957c669e45
--- /dev/null
+++ b/crypto/asn1/ameth_lib.c
@@ -0,0 +1,446 @@
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
+ * project 2006.
+ */
+/* ====================================================================
+ * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com). This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+#include <stdio.h>
+#include "cryptlib.h"
+#include <openssl/asn1t.h>
+#include <openssl/x509.h>
+#ifndef OPENSSL_NO_ENGINE
+#include <openssl/engine.h>
+#endif
+#include "asn1_locl.h"
+
+extern const EVP_PKEY_ASN1_METHOD rsa_asn1_meths[];
+extern const EVP_PKEY_ASN1_METHOD dsa_asn1_meths[];
+extern const EVP_PKEY_ASN1_METHOD dh_asn1_meth;
+extern const EVP_PKEY_ASN1_METHOD eckey_asn1_meth;
+extern const EVP_PKEY_ASN1_METHOD hmac_asn1_meth;
+
+/* Keep this sorted in type order !! */
+static const EVP_PKEY_ASN1_METHOD *standard_methods[] =
+ {
+#ifndef OPENSSL_NO_RSA
+ &rsa_asn1_meths[0],
+ &rsa_asn1_meths[1],
+#endif
+#ifndef OPENSSL_NO_DH
+ &dh_asn1_meth,
+#endif
+#ifndef OPENSSL_NO_DSA
+ &dsa_asn1_meths[0],
+ &dsa_asn1_meths[1],
+ &dsa_asn1_meths[2],
+ &dsa_asn1_meths[3],
+ &dsa_asn1_meths[4],
+#endif
+#ifndef OPENSSL_NO_EC
+ &eckey_asn1_meth,
+#endif
+ &hmac_asn1_meth
+ };
+
+typedef int sk_cmp_fn_type(const char * const *a, const char * const *b);
+DECLARE_STACK_OF(EVP_PKEY_ASN1_METHOD)
+static STACK_OF(EVP_PKEY_ASN1_METHOD) *app_methods = NULL;
+
+
+
+#ifdef TEST
+void main()
+ {
+ int i;
+ for (i = 0;
+ i < sizeof(standard_methods)/sizeof(EVP_PKEY_ASN1_METHOD *);
+ i++)
+ fprintf(stderr, "Number %d id=%d (%s)\n", i,
+ standard_methods[i]->pkey_id,
+ OBJ_nid2sn(standard_methods[i]->pkey_id));
+ }
+#endif
+
+DECLARE_OBJ_BSEARCH_CMP_FN(const EVP_PKEY_ASN1_METHOD *,
+ const EVP_PKEY_ASN1_METHOD *, ameth);
+
+static int ameth_cmp(const EVP_PKEY_ASN1_METHOD * const *a,
+ const EVP_PKEY_ASN1_METHOD * const *b)
+ {
+ return ((*a)->pkey_id - (*b)->pkey_id);
+ }
+
+IMPLEMENT_OBJ_BSEARCH_CMP_FN(const EVP_PKEY_ASN1_METHOD *,
+ const EVP_PKEY_ASN1_METHOD *, ameth);
+
+int EVP_PKEY_asn1_get_count(void)
+ {
+ int num = sizeof(standard_methods)/sizeof(EVP_PKEY_ASN1_METHOD *);
+ if (app_methods)
+ num += sk_EVP_PKEY_ASN1_METHOD_num(app_methods);
+ return num;
+ }
+
+const EVP_PKEY_ASN1_METHOD *EVP_PKEY_asn1_get0(int idx)
+ {
+ int num = sizeof(standard_methods)/sizeof(EVP_PKEY_ASN1_METHOD *);
+ if (idx < 0)
+ return NULL;
+ if (idx < num)
+ return standard_methods[idx];
+ idx -= num;
+ return sk_EVP_PKEY_ASN1_METHOD_value(app_methods, idx);
+ }
+
+static const EVP_PKEY_ASN1_METHOD *pkey_asn1_find(int type)
+ {
+ EVP_PKEY_ASN1_METHOD tmp;
+ const EVP_PKEY_ASN1_METHOD *t = &tmp, **ret;
+ tmp.pkey_id = type;
+ if (app_methods)
+ {
+ int idx;
+ idx = sk_EVP_PKEY_ASN1_METHOD_find(app_methods, &tmp);
+ if (idx >= 0)
+ return sk_EVP_PKEY_ASN1_METHOD_value(app_methods, idx);
+ }
+ ret = OBJ_bsearch_ameth(&t, standard_methods,
+ sizeof(standard_methods)
+ /sizeof(EVP_PKEY_ASN1_METHOD *));
+ if (!ret || !*ret)
+ return NULL;
+ return *ret;
+ }
+
+/* Find an implementation of an ASN1 algorithm. If 'pe' is not NULL
+ * also search through engines and set *pe to a functional reference
+ * to the engine implementing 'type' or NULL if no engine implements
+ * it.
+ */
+
+const EVP_PKEY_ASN1_METHOD *EVP_PKEY_asn1_find(ENGINE **pe, int type)
+ {
+ const EVP_PKEY_ASN1_METHOD *t;
+ ENGINE *e;
+
+ for (;;)
+ {
+ t = pkey_asn1_find(type);
+ if (!t || !(t->pkey_flags & ASN1_PKEY_ALIAS))
+ break;
+ type = t->pkey_base_id;
+ }
+ if (pe)
+ {
+#ifndef OPENSSL_NO_ENGINE
+ /* type will contain the final unaliased type */
+ e = ENGINE_get_pkey_asn1_meth_engine(type);
+ if (e)
+ {
+ *pe = e;
+ return ENGINE_get_pkey_asn1_meth(e, type);
+ }
+#endif
+ *pe = NULL;
+ }
+ return t;
+ }
+
+const EVP_PKEY_ASN1_METHOD *EVP_PKEY_asn1_find_str(ENGINE **pe,
+ const char *str, int len)
+ {
+ int i;
+ const EVP_PKEY_ASN1_METHOD *ameth;
+ if (len == -1)
+ len = strlen(str);
+ if (pe)
+ {
+#ifndef OPENSSL_NO_ENGINE
+ ENGINE *e;
+ ameth = ENGINE_pkey_asn1_find_str(&e, str, len);
+ if (ameth)
+ {
+ /* Convert structural into
+ * functional reference
+ */
+ if (!ENGINE_init(e))
+ ameth = NULL;
+ ENGINE_free(e);
+ *pe = e;
+ return ameth;
+ }
+#endif
+ *pe = NULL;
+ }
+ for (i = 0; i < EVP_PKEY_asn1_get_count(); i++)
+ {
+ ameth = EVP_PKEY_asn1_get0(i);
+ if (ameth->pkey_flags & ASN1_PKEY_ALIAS)
+ continue;
+ if (((int)strlen(ameth->pem_str) == len) &&
+ !strncasecmp(ameth->pem_str, str, len))
+ return ameth;
+ }
+ return NULL;
+ }
+
+int EVP_PKEY_asn1_add0(const EVP_PKEY_ASN1_METHOD *ameth)
+ {
+ if (app_methods == NULL)
+ {
+ app_methods = sk_EVP_PKEY_ASN1_METHOD_new(ameth_cmp);
+ if (!app_methods)
+ return 0;
+ }
+ if (!sk_EVP_PKEY_ASN1_METHOD_push(app_methods, ameth))
+ return 0;
+ sk_EVP_PKEY_ASN1_METHOD_sort(app_methods);
+ return 1;
+ }
+
+int EVP_PKEY_asn1_add_alias(int to, int from)
+ {
+ EVP_PKEY_ASN1_METHOD *ameth;
+ ameth = EVP_PKEY_asn1_new(from, ASN1_PKEY_ALIAS, NULL, NULL);
+ if (!ameth)
+ return 0;
+ ameth->pkey_base_id = to;
+ return EVP_PKEY_asn1_add0(ameth);
+ }
+
+int EVP_PKEY_asn1_get0_info(int *ppkey_id, int *ppkey_base_id, int *ppkey_flags,
+ const char **pinfo, const char **ppem_str,
+ const EVP_PKEY_ASN1_METHOD *ameth)
+ {
+ if (!ameth)
+ return 0;
+ if (ppkey_id)
+ *ppkey_id = ameth->pkey_id;
+ if (ppkey_base_id)
+ *ppkey_base_id = ameth->pkey_base_id;
+ if (ppkey_flags)
+ *ppkey_flags = ameth->pkey_flags;
+ if (pinfo)
+ *pinfo = ameth->info;
+ if (ppem_str)
+ *ppem_str = ameth->pem_str;
+ return 1;
+ }
+
+const EVP_PKEY_ASN1_METHOD* EVP_PKEY_get0_asn1(EVP_PKEY *pkey)
+ {
+ return pkey->ameth;
+ }
+
+EVP_PKEY_ASN1_METHOD* EVP_PKEY_asn1_new(int id, int flags,
+ const char *pem_str, const char *info)
+ {
+ EVP_PKEY_ASN1_METHOD *ameth;
+ ameth = OPENSSL_malloc(sizeof(EVP_PKEY_ASN1_METHOD));
+ if (!ameth)
+ return NULL;
+
+ ameth->pkey_id = id;
+ ameth->pkey_base_id = id;
+ ameth->pkey_flags = flags | ASN1_PKEY_DYNAMIC;
+
+ if (info)
+ {
+ ameth->info = BUF_strdup(info);
+ if (!ameth->info)
+ goto err;
+ }
+
+ if (pem_str)
+ {
+ ameth->pem_str = BUF_strdup(pem_str);
+ if (!ameth->pem_str)
+ goto err;
+ }
+
+ ameth->pub_decode = 0;
+ ameth->pub_encode = 0;
+ ameth->pub_cmp = 0;
+ ameth->pub_print = 0;
+
+ ameth->priv_decode = 0;
+ ameth->priv_encode = 0;
+ ameth->priv_print = 0;
+
+ ameth->old_priv_encode = 0;
+ ameth->old_priv_decode = 0;
+
+ ameth->pkey_size = 0;
+ ameth->pkey_bits = 0;
+
+ ameth->param_decode = 0;
+ ameth->param_encode = 0;
+ ameth->param_missing = 0;
+ ameth->param_copy = 0;
+ ameth->param_cmp = 0;
+ ameth->param_print = 0;
+
+ ameth->pkey_free = 0;
+ ameth->pkey_ctrl = 0;
+
+ return ameth;
+
+ err:
+
+ EVP_PKEY_asn1_free(ameth);
+ return NULL;
+
+ }
+
+void EVP_PKEY_asn1_copy(EVP_PKEY_ASN1_METHOD *dst,
+ const EVP_PKEY_ASN1_METHOD *src)
+ {
+
+ dst->pub_decode = src->pub_decode;
+ dst->pub_encode = src->pub_encode;
+ dst->pub_cmp = src->pub_cmp;
+ dst->pub_print = src->pub_print;
+
+ dst->priv_decode = src->priv_decode;
+ dst->priv_encode = src->priv_encode;
+ dst->priv_print = src->priv_print;
+
+ dst->old_priv_encode = src->old_priv_encode;
+ dst->old_priv_decode = src->old_priv_decode;
+
+ dst->pkey_size = src->pkey_size;
+ dst->pkey_bits = src->pkey_bits;
+
+ dst->param_decode = src->param_decode;
+ dst->param_encode = src->param_encode;
+ dst->param_missing = src->param_missing;
+ dst->param_copy = src->param_copy;
+ dst->param_cmp = src->param_cmp;
+ dst->param_print = src->param_print;
+
+ dst->pkey_free = src->pkey_free;
+ dst->pkey_ctrl = src->pkey_ctrl;
+
+ }
+
+void EVP_PKEY_asn1_free(EVP_PKEY_ASN1_METHOD *ameth)
+ {
+ if (ameth && (ameth->pkey_flags & ASN1_PKEY_DYNAMIC))
+ {
+ if (ameth->pem_str)
+ OPENSSL_free(ameth->pem_str);
+ if (ameth->info)
+ OPENSSL_free(ameth->info);
+ OPENSSL_free(ameth);
+ }
+ }
+
+void EVP_PKEY_asn1_set_public(EVP_PKEY_ASN1_METHOD *ameth,
+ int (*pub_decode)(EVP_PKEY *pk, X509_PUBKEY *pub),
+ int (*pub_encode)(X509_PUBKEY *pub, const EVP_PKEY *pk),
+ int (*pub_cmp)(const EVP_PKEY *a, const EVP_PKEY *b),
+ int (*pub_print)(BIO *out, const EVP_PKEY *pkey, int indent,
+ ASN1_PCTX *pctx),
+ int (*pkey_size)(const EVP_PKEY *pk),
+ int (*pkey_bits)(const EVP_PKEY *pk))
+ {
+ ameth->pub_decode = pub_decode;
+ ameth->pub_encode = pub_encode;
+ ameth->pub_cmp = pub_cmp;
+ ameth->pub_print = pub_print;
+ ameth->pkey_size = pkey_size;
+ ameth->pkey_bits = pkey_bits;
+ }
+
+void EVP_PKEY_asn1_set_private(EVP_PKEY_ASN1_METHOD *ameth,
+ int (*priv_decode)(EVP_PKEY *pk, PKCS8_PRIV_KEY_INFO *p8inf),
+ int (*priv_encode)(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pk),
+ int (*priv_print)(BIO *out, const EVP_PKEY *pkey, int indent,
+ ASN1_PCTX *pctx))
+ {
+ ameth->priv_decode = priv_decode;
+ ameth->priv_encode = priv_encode;
+ ameth->priv_print = priv_print;
+ }
+
+void EVP_PKEY_asn1_set_param(EVP_PKEY_ASN1_METHOD *ameth,
+ int (*param_decode)(EVP_PKEY *pkey,
+ const unsigned char **pder, int derlen),
+ int (*param_encode)(const EVP_PKEY *pkey, unsigned char **pder),
+ int (*param_missing)(const EVP_PKEY *pk),
+ int (*param_copy)(EVP_PKEY *to, const EVP_PKEY *from),
+ int (*param_cmp)(const EVP_PKEY *a, const EVP_PKEY *b),
+ int (*param_print)(BIO *out, const EVP_PKEY *pkey, int indent,
+ ASN1_PCTX *pctx))
+ {
+ ameth->param_decode = param_decode;
+ ameth->param_encode = param_encode;
+ ameth->param_missing = param_missing;
+ ameth->param_copy = param_copy;
+ ameth->param_cmp = param_cmp;
+ ameth->param_print = param_print;
+ }
+
+void EVP_PKEY_asn1_set_free(EVP_PKEY_ASN1_METHOD *ameth,
+ void (*pkey_free)(EVP_PKEY *pkey))
+ {
+ ameth->pkey_free = pkey_free;
+ }
+
+void EVP_PKEY_asn1_set_ctrl(EVP_PKEY_ASN1_METHOD *ameth,
+ int (*pkey_ctrl)(EVP_PKEY *pkey, int op,
+ long arg1, void *arg2))
+ {
+ ameth->pkey_ctrl = pkey_ctrl;
+ }
diff --git a/crypto/asn1/asn1.h b/crypto/asn1/asn1.h
index 424cd348bb5e..e3385226d4a5 100644
--- a/crypto/asn1/asn1.h
+++ b/crypto/asn1/asn1.h
@@ -612,6 +612,7 @@ typedef struct BIT_STRING_BITNAME_st {
B_ASN1_GENERALIZEDTIME
#define B_ASN1_PRINTABLE \
+ B_ASN1_NUMERICSTRING| \
B_ASN1_PRINTABLESTRING| \
B_ASN1_T61STRING| \
B_ASN1_IA5STRING| \
@@ -1217,6 +1218,7 @@ void ERR_load_ASN1_strings(void);
#define ASN1_R_BAD_OBJECT_HEADER 102
#define ASN1_R_BAD_PASSWORD_READ 103
#define ASN1_R_BAD_TAG 104
+#define ASN1_R_BMPSTRING_IS_WRONG_LENGTH 210
#define ASN1_R_BN_LIB 105
#define ASN1_R_BOOLEAN_IS_WRONG_LENGTH 106
#define ASN1_R_BUFFER_TOO_SMALL 107
@@ -1306,6 +1308,7 @@ void ERR_load_ASN1_strings(void);
#define ASN1_R_UNABLE_TO_DECODE_RSA_KEY 157
#define ASN1_R_UNABLE_TO_DECODE_RSA_PRIVATE_KEY 158
#define ASN1_R_UNEXPECTED_EOC 159
+#define ASN1_R_UNIVERSALSTRING_IS_WRONG_LENGTH 211
#define ASN1_R_UNKNOWN_FORMAT 160
#define ASN1_R_UNKNOWN_MESSAGE_DIGEST_ALGORITHM 161
#define ASN1_R_UNKNOWN_OBJECT_TYPE 162
diff --git a/crypto/asn1/asn1_err.c b/crypto/asn1/asn1_err.c
index f8a3e2e6cd01..5f5de98eed53 100644
--- a/crypto/asn1/asn1_err.c
+++ b/crypto/asn1/asn1_err.c
@@ -195,6 +195,7 @@ static ERR_STRING_DATA ASN1_str_reasons[]=
{ERR_REASON(ASN1_R_BAD_OBJECT_HEADER) ,"bad object header"},
{ERR_REASON(ASN1_R_BAD_PASSWORD_READ) ,"bad password read"},
{ERR_REASON(ASN1_R_BAD_TAG) ,"bad tag"},
+{ERR_REASON(ASN1_R_BMPSTRING_IS_WRONG_LENGTH),"bmpstring is wrong length"},
{ERR_REASON(ASN1_R_BN_LIB) ,"bn lib"},
{ERR_REASON(ASN1_R_BOOLEAN_IS_WRONG_LENGTH),"boolean is wrong length"},
{ERR_REASON(ASN1_R_BUFFER_TOO_SMALL) ,"buffer too small"},
@@ -284,6 +285,7 @@ static ERR_STRING_DATA ASN1_str_reasons[]=
{ERR_REASON(ASN1_R_UNABLE_TO_DECODE_RSA_KEY),"unable to decode rsa key"},
{ERR_REASON(ASN1_R_UNABLE_TO_DECODE_RSA_PRIVATE_KEY),"unable to decode rsa private key"},
{ERR_REASON(ASN1_R_UNEXPECTED_EOC) ,"unexpected eoc"},
+{ERR_REASON(ASN1_R_UNIVERSALSTRING_IS_WRONG_LENGTH),"universalstring is wrong length"},
{ERR_REASON(ASN1_R_UNKNOWN_FORMAT) ,"unknown format"},
{ERR_REASON(ASN1_R_UNKNOWN_MESSAGE_DIGEST_ALGORITHM),"unknown message digest algorithm"},
{ERR_REASON(ASN1_R_UNKNOWN_OBJECT_TYPE) ,"unknown object type"},
diff --git a/crypto/asn1/asn1_gen.c b/crypto/asn1/asn1_gen.c
index 26c832781e40..2da38292c8c5 100644
--- a/crypto/asn1/asn1_gen.c
+++ b/crypto/asn1/asn1_gen.c
@@ -1,5 +1,5 @@
/* asn1_gen.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 2002.
*/
/* ====================================================================
diff --git a/crypto/asn1/asn1_locl.h b/crypto/asn1/asn1_locl.h
new file mode 100644
index 000000000000..5aa65e28f5f5
--- /dev/null
+++ b/crypto/asn1/asn1_locl.h
@@ -0,0 +1,134 @@
+/* asn1t.h */
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
+ * project 2006.
+ */
+/* ====================================================================
+ * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com). This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+/* Internal ASN1 structures and functions: not for application use */
+
+/* ASN1 print context structure */
+
+struct asn1_pctx_st
+ {
+ unsigned long flags;
+ unsigned long nm_flags;
+ unsigned long cert_flags;
+ unsigned long oid_flags;
+ unsigned long str_flags;
+ } /* ASN1_PCTX */;
+
+/* ASN1 public key method structure */
+
+struct evp_pkey_asn1_method_st
+ {
+ int pkey_id;
+ int pkey_base_id;
+ unsigned long pkey_flags;
+
+ char *pem_str;
+ char *info;
+
+ int (*pub_decode)(EVP_PKEY *pk, X509_PUBKEY *pub);
+ int (*pub_encode)(X509_PUBKEY *pub, const EVP_PKEY *pk);
+ int (*pub_cmp)(const EVP_PKEY *a, const EVP_PKEY *b);
+ int (*pub_print)(BIO *out, const EVP_PKEY *pkey, int indent,
+ ASN1_PCTX *pctx);
+
+ int (*priv_decode)(EVP_PKEY *pk, PKCS8_PRIV_KEY_INFO *p8inf);
+ int (*priv_encode)(PKCS8_PRIV_KEY_INFO *p8, const EVP_PKEY *pk);
+ int (*priv_print)(BIO *out, const EVP_PKEY *pkey, int indent,
+ ASN1_PCTX *pctx);
+
+ int (*pkey_size)(const EVP_PKEY *pk);
+ int (*pkey_bits)(const EVP_PKEY *pk);
+
+ int (*param_decode)(EVP_PKEY *pkey,
+ const unsigned char **pder, int derlen);
+ int (*param_encode)(const EVP_PKEY *pkey, unsigned char **pder);
+ int (*param_missing)(const EVP_PKEY *pk);
+ int (*param_copy)(EVP_PKEY *to, const EVP_PKEY *from);
+ int (*param_cmp)(const EVP_PKEY *a, const EVP_PKEY *b);
+ int (*param_print)(BIO *out, const EVP_PKEY *pkey, int indent,
+ ASN1_PCTX *pctx);
+
+ void (*pkey_free)(EVP_PKEY *pkey);
+ int (*pkey_ctrl)(EVP_PKEY *pkey, int op, long arg1, void *arg2);
+
+ /* Legacy functions for old PEM */
+
+ int (*old_priv_decode)(EVP_PKEY *pkey,
+ const unsigned char **pder, int derlen);
+ int (*old_priv_encode)(const EVP_PKEY *pkey, unsigned char **pder);
+
+ } /* EVP_PKEY_ASN1_METHOD */;
+
+/* Method to handle CRL access.
+ * In general a CRL could be very large (several Mb) and can consume large
+ * amounts of resources if stored in memory by multiple processes.
+ * This method allows general CRL operations to be redirected to more
+ * efficient callbacks: for example a CRL entry database.
+ */
+
+#define X509_CRL_METHOD_DYNAMIC 1
+
+struct x509_crl_method_st
+ {
+ int flags;
+ int (*crl_init)(X509_CRL *crl);
+ int (*crl_free)(X509_CRL *crl);
+ int (*crl_lookup)(X509_CRL *crl, X509_REVOKED **ret,
+ ASN1_INTEGER *ser, X509_NAME *issuer);
+ int (*crl_verify)(X509_CRL *crl, EVP_PKEY *pk);
+ };
diff --git a/crypto/asn1/asn1_par.c b/crypto/asn1/asn1_par.c
index 501b62a4b199..8657f73d66a2 100644
--- a/crypto/asn1/asn1_par.c
+++ b/crypto/asn1/asn1_par.c
@@ -213,6 +213,8 @@ static int asn1_parse2(BIO *bp, const unsigned char **pp, long length, int offse
(tag == V_ASN1_T61STRING) ||
(tag == V_ASN1_IA5STRING) ||
(tag == V_ASN1_VISIBLESTRING) ||
+ (tag == V_ASN1_NUMERICSTRING) ||
+ (tag == V_ASN1_UTF8STRING) ||
(tag == V_ASN1_UTCTIME) ||
(tag == V_ASN1_GENERALIZEDTIME))
{
diff --git a/crypto/asn1/asn1t.h b/crypto/asn1/asn1t.h
index bf315e65ed38..ac14f9415b84 100644
--- a/crypto/asn1/asn1t.h
+++ b/crypto/asn1/asn1t.h
@@ -1,5 +1,5 @@
/* asn1t.h */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 2000.
*/
/* ====================================================================
diff --git a/crypto/asn1/asn_mime.c b/crypto/asn1/asn_mime.c
index bc80b20d6323..d8d9e76cc06d 100644
--- a/crypto/asn1/asn_mime.c
+++ b/crypto/asn1/asn_mime.c
@@ -152,7 +152,6 @@ static ASN1_VALUE *b64_read_asn1(BIO *bio, const ASN1_ITEM *it)
static int asn1_write_micalg(BIO *out, STACK_OF(X509_ALGOR) *mdalgs)
{
- const EVP_MD *md;
int i, have_unknown = 0, write_comma, md_nid;
have_unknown = 0;
write_comma = 0;
@@ -162,7 +161,6 @@ static int asn1_write_micalg(BIO *out, STACK_OF(X509_ALGOR) *mdalgs)
BIO_write(out, ",", 1);
write_comma = 1;
md_nid = OBJ_obj2nid(sk_X509_ALGOR_value(mdalgs, i)->algorithm);
- md = EVP_get_digestbynid(md_nid);
switch(md_nid)
{
case NID_sha1:
diff --git a/crypto/asn1/asn_moid.c b/crypto/asn1/asn_moid.c
index 9132350f1078..1ea6a5924838 100644
--- a/crypto/asn1/asn_moid.c
+++ b/crypto/asn1/asn_moid.c
@@ -1,5 +1,5 @@
/* asn_moid.c */
-/* Written by Stephen Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Stephen Henson (steve@openssl.org) for the OpenSSL
* project 2001.
*/
/* ====================================================================
diff --git a/crypto/asn1/asn_pack.c b/crypto/asn1/asn_pack.c
index e8b671b7b51b..f1a5a0563227 100644
--- a/crypto/asn1/asn_pack.c
+++ b/crypto/asn1/asn_pack.c
@@ -1,5 +1,5 @@
/* asn_pack.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 1999.
*/
/* ====================================================================
diff --git a/crypto/asn1/bio_asn1.c b/crypto/asn1/bio_asn1.c
new file mode 100644
index 000000000000..dc7efd551c05
--- /dev/null
+++ b/crypto/asn1/bio_asn1.c
@@ -0,0 +1,495 @@
+/* bio_asn1.c */
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
+ * project.
+ */
+/* ====================================================================
+ * Copyright (c) 2006 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com). This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+/* Experimental ASN1 BIO. When written through the data is converted
+ * to an ASN1 string type: default is OCTET STRING. Additional functions
+ * can be provided to add prefix and suffix data.
+ */
+
+#include <string.h>
+#include <openssl/bio.h>
+#include <openssl/asn1.h>
+
+/* Must be large enough for biggest tag+length */
+#define DEFAULT_ASN1_BUF_SIZE 20
+
+typedef enum
+ {
+ ASN1_STATE_START,
+ ASN1_STATE_PRE_COPY,
+ ASN1_STATE_HEADER,
+ ASN1_STATE_HEADER_COPY,
+ ASN1_STATE_DATA_COPY,
+ ASN1_STATE_POST_COPY,
+ ASN1_STATE_DONE
+ } asn1_bio_state_t;
+
+typedef struct BIO_ASN1_EX_FUNCS_st
+ {
+ asn1_ps_func *ex_func;
+ asn1_ps_func *ex_free_func;
+ } BIO_ASN1_EX_FUNCS;
+
+typedef struct BIO_ASN1_BUF_CTX_t
+ {
+ /* Internal state */
+ asn1_bio_state_t state;
+ /* Internal buffer */
+ unsigned char *buf;
+ /* Size of buffer */
+ int bufsize;
+ /* Current position in buffer */
+ int bufpos;
+ /* Current buffer length */
+ int buflen;
+ /* Amount of data to copy */
+ int copylen;
+ /* Class and tag to use */
+ int asn1_class, asn1_tag;
+ asn1_ps_func *prefix, *prefix_free, *suffix, *suffix_free;
+ /* Extra buffer for prefix and suffix data */
+ unsigned char *ex_buf;
+ int ex_len;
+ int ex_pos;
+ void *ex_arg;
+ } BIO_ASN1_BUF_CTX;
+
+
+static int asn1_bio_write(BIO *h, const char *buf,int num);
+static int asn1_bio_read(BIO *h, char *buf, int size);
+static int asn1_bio_puts(BIO *h, const char *str);
+static int asn1_bio_gets(BIO *h, char *str, int size);
+static long asn1_bio_ctrl(BIO *h, int cmd, long arg1, void *arg2);
+static int asn1_bio_new(BIO *h);
+static int asn1_bio_free(BIO *data);
+static long asn1_bio_callback_ctrl(BIO *h, int cmd, bio_info_cb *fp);
+
+static int asn1_bio_init(BIO_ASN1_BUF_CTX *ctx, int size);
+static int asn1_bio_flush_ex(BIO *b, BIO_ASN1_BUF_CTX *ctx,
+ asn1_ps_func *cleanup, asn1_bio_state_t next);
+static int asn1_bio_setup_ex(BIO *b, BIO_ASN1_BUF_CTX *ctx,
+ asn1_ps_func *setup,
+ asn1_bio_state_t ex_state,
+ asn1_bio_state_t other_state);
+
+static BIO_METHOD methods_asn1=
+ {
+ BIO_TYPE_ASN1,
+ "asn1",
+ asn1_bio_write,
+ asn1_bio_read,
+ asn1_bio_puts,
+ asn1_bio_gets,
+ asn1_bio_ctrl,
+ asn1_bio_new,
+ asn1_bio_free,
+ asn1_bio_callback_ctrl,
+ };
+
+BIO_METHOD *BIO_f_asn1(void)
+ {
+ return(&methods_asn1);
+ }
+
+
+static int asn1_bio_new(BIO *b)
+ {
+ BIO_ASN1_BUF_CTX *ctx;
+ ctx = OPENSSL_malloc(sizeof(BIO_ASN1_BUF_CTX));
+ if (!ctx)
+ return 0;
+ if (!asn1_bio_init(ctx, DEFAULT_ASN1_BUF_SIZE))
+ return 0;
+ b->init = 1;
+ b->ptr = (char *)ctx;
+ b->flags = 0;
+ return 1;
+ }
+
+static int asn1_bio_init(BIO_ASN1_BUF_CTX *ctx, int size)
+ {
+ ctx->buf = OPENSSL_malloc(size);
+ if (!ctx->buf)
+ return 0;
+ ctx->bufsize = size;
+ ctx->bufpos = 0;
+ ctx->buflen = 0;
+ ctx->copylen = 0;
+ ctx->asn1_class = V_ASN1_UNIVERSAL;
+ ctx->asn1_tag = V_ASN1_OCTET_STRING;
+ ctx->ex_buf = 0;
+ ctx->ex_pos = 0;
+ ctx->ex_len = 0;
+ ctx->state = ASN1_STATE_START;
+ return 1;
+ }
+
+static int asn1_bio_free(BIO *b)
+ {
+ BIO_ASN1_BUF_CTX *ctx;
+ ctx = (BIO_ASN1_BUF_CTX *) b->ptr;
+ if (ctx == NULL)
+ return 0;
+ if (ctx->buf)
+ OPENSSL_free(ctx->buf);
+ OPENSSL_free(ctx);
+ b->init = 0;
+ b->ptr = NULL;
+ b->flags = 0;
+ return 1;
+ }
+
+static int asn1_bio_write(BIO *b, const char *in , int inl)
+ {
+ BIO_ASN1_BUF_CTX *ctx;
+ int wrmax, wrlen, ret;
+ unsigned char *p;
+ if (!in || (inl < 0) || (b->next_bio == NULL))
+ return 0;
+ ctx = (BIO_ASN1_BUF_CTX *) b->ptr;
+ if (ctx == NULL)
+ return 0;
+
+ wrlen = 0;
+ ret = -1;
+
+ for(;;)
+ {
+ switch (ctx->state)
+ {
+
+ /* Setup prefix data, call it */
+ case ASN1_STATE_START:
+ if (!asn1_bio_setup_ex(b, ctx, ctx->prefix,
+ ASN1_STATE_PRE_COPY, ASN1_STATE_HEADER))
+ return 0;
+ break;
+
+ /* Copy any pre data first */
+ case ASN1_STATE_PRE_COPY:
+
+ ret = asn1_bio_flush_ex(b, ctx, ctx->prefix_free,
+ ASN1_STATE_HEADER);
+
+ if (ret <= 0)
+ goto done;
+
+ break;
+
+ case ASN1_STATE_HEADER:
+ ctx->buflen =
+ ASN1_object_size(0, inl, ctx->asn1_tag) - inl;
+ OPENSSL_assert(ctx->buflen <= ctx->bufsize);
+ p = ctx->buf;
+ ASN1_put_object(&p, 0, inl,
+ ctx->asn1_tag, ctx->asn1_class);
+ ctx->copylen = inl;
+ ctx->state = ASN1_STATE_HEADER_COPY;
+
+ break;
+
+ case ASN1_STATE_HEADER_COPY:
+ ret = BIO_write(b->next_bio,
+ ctx->buf + ctx->bufpos, ctx->buflen);
+ if (ret <= 0)
+ goto done;
+
+ ctx->buflen -= ret;
+ if (ctx->buflen)
+ ctx->bufpos += ret;
+ else
+ {
+ ctx->bufpos = 0;
+ ctx->state = ASN1_STATE_DATA_COPY;
+ }
+
+ break;
+
+ case ASN1_STATE_DATA_COPY:
+
+ if (inl > ctx->copylen)
+ wrmax = ctx->copylen;
+ else
+ wrmax = inl;
+ ret = BIO_write(b->next_bio, in, wrmax);
+ if (ret <= 0)
+ break;
+ wrlen += ret;
+ ctx->copylen -= ret;
+ in += ret;
+ inl -= ret;
+
+ if (ctx->copylen == 0)
+ ctx->state = ASN1_STATE_HEADER;
+
+ if (inl == 0)
+ goto done;
+
+ break;
+
+ default:
+ BIO_clear_retry_flags(b);
+ return 0;
+
+ }
+
+ }
+
+ done:
+ BIO_clear_retry_flags(b);
+ BIO_copy_next_retry(b);
+
+ return (wrlen > 0) ? wrlen : ret;
+
+ }
+
+static int asn1_bio_flush_ex(BIO *b, BIO_ASN1_BUF_CTX *ctx,
+ asn1_ps_func *cleanup, asn1_bio_state_t next)
+ {
+ int ret;
+ if (ctx->ex_len <= 0)
+ return 1;
+ for(;;)
+ {
+ ret = BIO_write(b->next_bio, ctx->ex_buf + ctx->ex_pos,
+ ctx->ex_len);
+ if (ret <= 0)
+ break;
+ ctx->ex_len -= ret;
+ if (ctx->ex_len > 0)
+ ctx->ex_pos += ret;
+ else
+ {
+ if(cleanup)
+ cleanup(b, &ctx->ex_buf, &ctx->ex_len,
+ &ctx->ex_arg);
+ ctx->state = next;
+ ctx->ex_pos = 0;
+ break;
+ }
+ }
+ return ret;
+ }
+
+static int asn1_bio_setup_ex(BIO *b, BIO_ASN1_BUF_CTX *ctx,
+ asn1_ps_func *setup,
+ asn1_bio_state_t ex_state,
+ asn1_bio_state_t other_state)
+ {
+ if (setup && !setup(b, &ctx->ex_buf, &ctx->ex_len, &ctx->ex_arg))
+ {
+ BIO_clear_retry_flags(b);
+ return 0;
+ }
+ if (ctx->ex_len > 0)
+ ctx->state = ex_state;
+ else
+ ctx->state = other_state;
+ return 1;
+ }
+
+static int asn1_bio_read(BIO *b, char *in , int inl)
+ {
+ if (!b->next_bio)
+ return 0;
+ return BIO_read(b->next_bio, in , inl);
+ }
+
+static int asn1_bio_puts(BIO *b, const char *str)
+ {
+ return asn1_bio_write(b, str, strlen(str));
+ }
+
+static int asn1_bio_gets(BIO *b, char *str, int size)
+ {
+ if (!b->next_bio)
+ return 0;
+ return BIO_gets(b->next_bio, str , size);
+ }
+
+static long asn1_bio_callback_ctrl(BIO *b, int cmd, bio_info_cb *fp)
+ {
+ if (b->next_bio == NULL) return(0);
+ return BIO_callback_ctrl(b->next_bio,cmd,fp);
+ }
+
+static long asn1_bio_ctrl(BIO *b, int cmd, long arg1, void *arg2)
+ {
+ BIO_ASN1_BUF_CTX *ctx;
+ BIO_ASN1_EX_FUNCS *ex_func;
+ long ret = 1;
+ ctx = (BIO_ASN1_BUF_CTX *) b->ptr;
+ if (ctx == NULL)
+ return 0;
+ switch(cmd)
+ {
+
+ case BIO_C_SET_PREFIX:
+ ex_func = arg2;
+ ctx->prefix = ex_func->ex_func;
+ ctx->prefix_free = ex_func->ex_free_func;
+ break;
+
+ case BIO_C_GET_PREFIX:
+ ex_func = arg2;
+ ex_func->ex_func = ctx->prefix;
+ ex_func->ex_free_func = ctx->prefix_free;
+ break;
+
+ case BIO_C_SET_SUFFIX:
+ ex_func = arg2;
+ ctx->suffix = ex_func->ex_func;
+ ctx->suffix_free = ex_func->ex_free_func;
+ break;
+
+ case BIO_C_GET_SUFFIX:
+ ex_func = arg2;
+ ex_func->ex_func = ctx->suffix;
+ ex_func->ex_free_func = ctx->suffix_free;
+ break;
+
+ case BIO_C_SET_EX_ARG:
+ ctx->ex_arg = arg2;
+ break;
+
+ case BIO_C_GET_EX_ARG:
+ *(void **)arg2 = ctx->ex_arg;
+ break;
+
+ case BIO_CTRL_FLUSH:
+ if (!b->next_bio)
+ return 0;
+
+ /* Call post function if possible */
+ if (ctx->state == ASN1_STATE_HEADER)
+ {
+ if (!asn1_bio_setup_ex(b, ctx, ctx->suffix,
+ ASN1_STATE_POST_COPY, ASN1_STATE_DONE))
+ return 0;
+ }
+
+ if (ctx->state == ASN1_STATE_POST_COPY)
+ {
+ ret = asn1_bio_flush_ex(b, ctx, ctx->suffix_free,
+ ASN1_STATE_DONE);
+ if (ret <= 0)
+ return ret;
+ }
+
+ if (ctx->state == ASN1_STATE_DONE)
+ return BIO_ctrl(b->next_bio, cmd, arg1, arg2);
+ else
+ {
+ BIO_clear_retry_flags(b);
+ return 0;
+ }
+ break;
+
+
+ default:
+ if (!b->next_bio)
+ return 0;
+ return BIO_ctrl(b->next_bio, cmd, arg1, arg2);
+
+ }
+
+ return ret;
+ }
+
+static int asn1_bio_set_ex(BIO *b, int cmd,
+ asn1_ps_func *ex_func, asn1_ps_func *ex_free_func)
+ {
+ BIO_ASN1_EX_FUNCS extmp;
+ extmp.ex_func = ex_func;
+ extmp.ex_free_func = ex_free_func;
+ return BIO_ctrl(b, cmd, 0, &extmp);
+ }
+
+static int asn1_bio_get_ex(BIO *b, int cmd,
+ asn1_ps_func **ex_func, asn1_ps_func **ex_free_func)
+ {
+ BIO_ASN1_EX_FUNCS extmp;
+ int ret;
+ ret = BIO_ctrl(b, cmd, 0, &extmp);
+ if (ret > 0)
+ {
+ *ex_func = extmp.ex_func;
+ *ex_free_func = extmp.ex_free_func;
+ }
+ return ret;
+ }
+
+int BIO_asn1_set_prefix(BIO *b, asn1_ps_func *prefix, asn1_ps_func *prefix_free)
+ {
+ return asn1_bio_set_ex(b, BIO_C_SET_PREFIX, prefix, prefix_free);
+ }
+
+int BIO_asn1_get_prefix(BIO *b, asn1_ps_func **pprefix, asn1_ps_func **pprefix_free)
+ {
+ return asn1_bio_get_ex(b, BIO_C_GET_PREFIX, pprefix, pprefix_free);
+ }
+
+int BIO_asn1_set_suffix(BIO *b, asn1_ps_func *suffix, asn1_ps_func *suffix_free)
+ {
+ return asn1_bio_set_ex(b, BIO_C_SET_SUFFIX, suffix, suffix_free);
+ }
+
+int BIO_asn1_get_suffix(BIO *b, asn1_ps_func **psuffix, asn1_ps_func **psuffix_free)
+ {
+ return asn1_bio_get_ex(b, BIO_C_GET_SUFFIX, psuffix, psuffix_free);
+ }
diff --git a/crypto/asn1/bio_ndef.c b/crypto/asn1/bio_ndef.c
new file mode 100644
index 000000000000..370389b1e6e7
--- /dev/null
+++ b/crypto/asn1/bio_ndef.c
@@ -0,0 +1,246 @@
+/* bio_ndef.c */
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
+ * project.
+ */
+/* ====================================================================
+ * Copyright (c) 2008 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ */
+
+#include <openssl/asn1.h>
+#include <openssl/asn1t.h>
+#include <openssl/bio.h>
+#include <openssl/err.h>
+
+#ifndef OPENSSL_SYSNAME_NETWARE
+#include <memory.h>
+#endif
+#include <stdio.h>
+
+/* Experimental NDEF ASN1 BIO support routines */
+
+/* The usage is quite simple, initialize an ASN1 structure,
+ * get a BIO from it then any data written through the BIO
+ * will end up translated to approptiate format on the fly.
+ * The data is streamed out and does *not* need to be
+ * all held in memory at once.
+ *
+ * When the BIO is flushed the output is finalized and any
+ * signatures etc written out.
+ *
+ * The BIO is a 'proper' BIO and can handle non blocking I/O
+ * correctly.
+ *
+ * The usage is simple. The implementation is *not*...
+ */
+
+/* BIO support data stored in the ASN1 BIO ex_arg */
+
+typedef struct ndef_aux_st
+ {
+ /* ASN1 structure this BIO refers to */
+ ASN1_VALUE *val;
+ const ASN1_ITEM *it;
+ /* Top of the BIO chain */
+ BIO *ndef_bio;
+ /* Output BIO */
+ BIO *out;
+ /* Boundary where content is inserted */
+ unsigned char **boundary;
+ /* DER buffer start */
+ unsigned char *derbuf;
+ } NDEF_SUPPORT;
+
+static int ndef_prefix(BIO *b, unsigned char **pbuf, int *plen, void *parg);
+static int ndef_prefix_free(BIO *b, unsigned char **pbuf, int *plen, void *parg);
+static int ndef_suffix(BIO *b, unsigned char **pbuf, int *plen, void *parg);
+static int ndef_suffix_free(BIO *b, unsigned char **pbuf, int *plen, void *parg);
+
+BIO *BIO_new_NDEF(BIO *out, ASN1_VALUE *val, const ASN1_ITEM *it)
+ {
+ NDEF_SUPPORT *ndef_aux = NULL;
+ BIO *asn_bio = NULL;
+ const ASN1_AUX *aux = it->funcs;
+ ASN1_STREAM_ARG sarg;
+
+ if (!aux || !aux->asn1_cb)
+ {
+ ASN1err(ASN1_F_BIO_NEW_NDEF, ASN1_R_STREAMING_NOT_SUPPORTED);
+ return NULL;
+ }
+ ndef_aux = OPENSSL_malloc(sizeof(NDEF_SUPPORT));
+ asn_bio = BIO_new(BIO_f_asn1());
+
+ /* ASN1 bio needs to be next to output BIO */
+
+ out = BIO_push(asn_bio, out);
+
+ if (!ndef_aux || !asn_bio || !out)
+ goto err;
+
+ BIO_asn1_set_prefix(asn_bio, ndef_prefix, ndef_prefix_free);
+ BIO_asn1_set_suffix(asn_bio, ndef_suffix, ndef_suffix_free);
+
+ /* Now let callback prepend any digest, cipher etc BIOs
+ * ASN1 structure needs.
+ */
+
+ sarg.out = out;
+ sarg.ndef_bio = NULL;
+ sarg.boundary = NULL;
+
+ if (aux->asn1_cb(ASN1_OP_STREAM_PRE, &val, it, &sarg) <= 0)
+ goto err;
+
+ ndef_aux->val = val;
+ ndef_aux->it = it;
+ ndef_aux->ndef_bio = sarg.ndef_bio;
+ ndef_aux->boundary = sarg.boundary;
+ ndef_aux->out = out;
+
+ BIO_ctrl(asn_bio, BIO_C_SET_EX_ARG, 0, ndef_aux);
+
+ return sarg.ndef_bio;
+
+ err:
+ if (asn_bio)
+ BIO_free(asn_bio);
+ if (ndef_aux)
+ OPENSSL_free(ndef_aux);
+ return NULL;
+ }
+
+static int ndef_prefix(BIO *b, unsigned char **pbuf, int *plen, void *parg)
+ {
+ NDEF_SUPPORT *ndef_aux;
+ unsigned char *p;
+ int derlen;
+
+ if (!parg)
+ return 0;
+
+ ndef_aux = *(NDEF_SUPPORT **)parg;
+
+ derlen = ASN1_item_ndef_i2d(ndef_aux->val, NULL, ndef_aux->it);
+ p = OPENSSL_malloc(derlen);
+ ndef_aux->derbuf = p;
+ *pbuf = p;
+ derlen = ASN1_item_ndef_i2d(ndef_aux->val, &p, ndef_aux->it);
+
+ if (!*ndef_aux->boundary)
+ return 0;
+
+ *plen = *ndef_aux->boundary - *pbuf;
+
+ return 1;
+ }
+
+static int ndef_prefix_free(BIO *b, unsigned char **pbuf, int *plen, void *parg)
+ {
+ NDEF_SUPPORT *ndef_aux;
+
+ if (!parg)
+ return 0;
+
+ ndef_aux = *(NDEF_SUPPORT **)parg;
+
+ if (ndef_aux->derbuf)
+ OPENSSL_free(ndef_aux->derbuf);
+
+ ndef_aux->derbuf = NULL;
+ *pbuf = NULL;
+ *plen = 0;
+ return 1;
+ }
+
+static int ndef_suffix_free(BIO *b, unsigned char **pbuf, int *plen, void *parg)
+ {
+ NDEF_SUPPORT **pndef_aux = (NDEF_SUPPORT **)parg;
+ if (!ndef_prefix_free(b, pbuf, plen, parg))
+ return 0;
+ OPENSSL_free(*pndef_aux);
+ *pndef_aux = NULL;
+ return 1;
+ }
+
+static int ndef_suffix(BIO *b, unsigned char **pbuf, int *plen, void *parg)
+ {
+ NDEF_SUPPORT *ndef_aux;
+ unsigned char *p;
+ int derlen;
+ const ASN1_AUX *aux;
+ ASN1_STREAM_ARG sarg;
+
+ if (!parg)
+ return 0;
+
+ ndef_aux = *(NDEF_SUPPORT **)parg;
+
+ aux = ndef_aux->it->funcs;
+
+ /* Finalize structures */
+ sarg.ndef_bio = ndef_aux->ndef_bio;
+ sarg.out = ndef_aux->out;
+ sarg.boundary = ndef_aux->boundary;
+ if (aux->asn1_cb(ASN1_OP_STREAM_POST,
+ &ndef_aux->val, ndef_aux->it, &sarg) <= 0)
+ return 0;
+
+ derlen = ASN1_item_ndef_i2d(ndef_aux->val, NULL, ndef_aux->it);
+ p = OPENSSL_malloc(derlen);
+ ndef_aux->derbuf = p;
+ *pbuf = p;
+ derlen = ASN1_item_ndef_i2d(ndef_aux->val, &p, ndef_aux->it);
+
+ if (!*ndef_aux->boundary)
+ return 0;
+ *pbuf = *ndef_aux->boundary;
+ *plen = derlen - (*ndef_aux->boundary - ndef_aux->derbuf);
+
+ return 1;
+ }
diff --git a/crypto/asn1/nsseq.c b/crypto/asn1/nsseq.c
index 50e2d4d07a13..e551c57d59d0 100644
--- a/crypto/asn1/nsseq.c
+++ b/crypto/asn1/nsseq.c
@@ -1,5 +1,5 @@
/* nsseq.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 1999.
*/
/* ====================================================================
diff --git a/crypto/asn1/p5_pbe.c b/crypto/asn1/p5_pbe.c
index da91170094b5..c4582f8041e4 100644
--- a/crypto/asn1/p5_pbe.c
+++ b/crypto/asn1/p5_pbe.c
@@ -1,5 +1,5 @@
/* p5_pbe.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 1999.
*/
/* ====================================================================
diff --git a/crypto/asn1/p5_pbev2.c b/crypto/asn1/p5_pbev2.c
index c834a38ddf3c..2b0516afeed3 100644
--- a/crypto/asn1/p5_pbev2.c
+++ b/crypto/asn1/p5_pbev2.c
@@ -1,5 +1,5 @@
/* p5_pbev2.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 1999-2004.
*/
/* ====================================================================
diff --git a/crypto/asn1/p8_pkey.c b/crypto/asn1/p8_pkey.c
index 24b409132f53..0a1957556e54 100644
--- a/crypto/asn1/p8_pkey.c
+++ b/crypto/asn1/p8_pkey.c
@@ -1,5 +1,5 @@
/* p8_pkey.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 1999.
*/
/* ====================================================================
diff --git a/crypto/asn1/t_bitst.c b/crypto/asn1/t_bitst.c
index 397332d9b8e6..2e59a25fa1ed 100644
--- a/crypto/asn1/t_bitst.c
+++ b/crypto/asn1/t_bitst.c
@@ -1,5 +1,5 @@
/* t_bitst.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 1999.
*/
/* ====================================================================
diff --git a/crypto/asn1/t_crl.c b/crypto/asn1/t_crl.c
index 929b3e590438..bdb244c015bd 100644
--- a/crypto/asn1/t_crl.c
+++ b/crypto/asn1/t_crl.c
@@ -1,5 +1,5 @@
/* t_crl.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 1999.
*/
/* ====================================================================
diff --git a/crypto/asn1/t_spki.c b/crypto/asn1/t_spki.c
index c2a5797dd8bf..a73369b94920 100644
--- a/crypto/asn1/t_spki.c
+++ b/crypto/asn1/t_spki.c
@@ -1,5 +1,5 @@
/* t_spki.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 1999.
*/
/* ====================================================================
diff --git a/crypto/asn1/t_x509.c b/crypto/asn1/t_x509.c
index ae72b525d7d4..8f746f9c0517 100644
--- a/crypto/asn1/t_x509.c
+++ b/crypto/asn1/t_x509.c
@@ -332,7 +332,7 @@ int X509_signature_print(BIO *bp, X509_ALGOR *sigalg, ASN1_STRING *sig)
int ASN1_STRING_print(BIO *bp, ASN1_STRING *v)
{
int i,n;
- char buf[80],*p;;
+ char buf[80],*p;
if (v == NULL) return(0);
n=0;
@@ -393,7 +393,7 @@ int ASN1_GENERALIZEDTIME_print(BIO *bp, ASN1_GENERALIZEDTIME *tm)
d= (v[6]-'0')*10+(v[7]-'0');
h= (v[8]-'0')*10+(v[9]-'0');
m= (v[10]-'0')*10+(v[11]-'0');
- if (i >= 14 &&
+ if (tm->length >= 14 &&
(v[12] >= '0') && (v[12] <= '9') &&
(v[13] >= '0') && (v[13] <= '9'))
s= (v[12]-'0')*10+(v[13]-'0');
@@ -429,7 +429,7 @@ int ASN1_UTCTIME_print(BIO *bp, ASN1_UTCTIME *tm)
d= (v[4]-'0')*10+(v[5]-'0');
h= (v[6]-'0')*10+(v[7]-'0');
m= (v[8]-'0')*10+(v[9]-'0');
- if (i >=12 &&
+ if (tm->length >=12 &&
(v[10] >= '0') && (v[10] <= '9') &&
(v[11] >= '0') && (v[11] <= '9'))
s= (v[10]-'0')*10+(v[11]-'0');
diff --git a/crypto/asn1/t_x509a.c b/crypto/asn1/t_x509a.c
index ffbbfb51f43e..8b18801a173e 100644
--- a/crypto/asn1/t_x509a.c
+++ b/crypto/asn1/t_x509a.c
@@ -1,5 +1,5 @@
/* t_x509a.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 1999.
*/
/* ====================================================================
diff --git a/crypto/asn1/tasn_dec.c b/crypto/asn1/tasn_dec.c
index 0ee406231e7c..48bc1c0d4d09 100644
--- a/crypto/asn1/tasn_dec.c
+++ b/crypto/asn1/tasn_dec.c
@@ -1,5 +1,5 @@
/* tasn_dec.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 2000.
*/
/* ====================================================================
@@ -69,7 +69,7 @@ static int asn1_check_eoc(const unsigned char **in, long len);
static int asn1_find_end(const unsigned char **in, long len, char inf);
static int asn1_collect(BUF_MEM *buf, const unsigned char **in, long len,
- char inf, int tag, int aclass);
+ char inf, int tag, int aclass, int depth);
static int collect_data(BUF_MEM *buf, const unsigned char **p, long plen);
@@ -611,7 +611,6 @@ static int asn1_template_ex_d2i(ASN1_VALUE **val,
err:
ASN1_template_free(val, tt);
- *val = NULL;
return 0;
}
@@ -758,7 +757,6 @@ static int asn1_template_noexp_d2i(ASN1_VALUE **val,
err:
ASN1_template_free(val, tt);
- *val = NULL;
return 0;
}
@@ -878,7 +876,7 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval,
* internally irrespective of the type. So instead just check
* for UNIVERSAL class and ignore the tag.
*/
- if (!asn1_collect(&buf, &p, plen, inf, -1, V_ASN1_UNIVERSAL))
+ if (!asn1_collect(&buf, &p, plen, inf, -1, V_ASN1_UNIVERSAL, 0))
{
free_cont = 1;
goto err;
@@ -1012,6 +1010,18 @@ int asn1_ex_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len,
case V_ASN1_SET:
case V_ASN1_SEQUENCE:
default:
+ if (utype == V_ASN1_BMPSTRING && (len & 1))
+ {
+ ASN1err(ASN1_F_ASN1_EX_C2I,
+ ASN1_R_BMPSTRING_IS_WRONG_LENGTH);
+ goto err;
+ }
+ if (utype == V_ASN1_UNIVERSALSTRING && (len & 3))
+ {
+ ASN1err(ASN1_F_ASN1_EX_C2I,
+ ASN1_R_UNIVERSALSTRING_IS_WRONG_LENGTH);
+ goto err;
+ }
/* All based on ASN1_STRING and handled the same */
if (!*pval)
{
@@ -1128,8 +1138,18 @@ static int asn1_find_end(const unsigned char **in, long len, char inf)
* if it is indefinite length.
*/
+#ifndef ASN1_MAX_STRING_NEST
+/* This determines how many levels of recursion are permitted in ASN1
+ * string types. If it is not limited stack overflows can occur. If set
+ * to zero no recursion is allowed at all. Although zero should be adequate
+ * examples exist that require a value of 1. So 5 should be more than enough.
+ */
+#define ASN1_MAX_STRING_NEST 5
+#endif
+
+
static int asn1_collect(BUF_MEM *buf, const unsigned char **in, long len,
- char inf, int tag, int aclass)
+ char inf, int tag, int aclass, int depth)
{
const unsigned char *p, *q;
long plen;
@@ -1171,13 +1191,15 @@ static int asn1_collect(BUF_MEM *buf, const unsigned char **in, long len,
/* If indefinite length constructed update max length */
if (cst)
{
-#ifdef OPENSSL_ALLOW_NESTED_ASN1_STRINGS
- if (!asn1_collect(buf, &p, plen, ininf, tag, aclass))
+ if (depth >= ASN1_MAX_STRING_NEST)
+ {
+ ASN1err(ASN1_F_ASN1_COLLECT,
+ ASN1_R_NESTED_ASN1_STRING);
+ return 0;
+ }
+ if (!asn1_collect(buf, &p, plen, ininf, tag, aclass,
+ depth + 1))
return 0;
-#else
- ASN1err(ASN1_F_ASN1_COLLECT, ASN1_R_NESTED_ASN1_STRING);
- return 0;
-#endif
}
else if (plen && !collect_data(buf, &p, plen))
return 0;
diff --git a/crypto/asn1/tasn_enc.c b/crypto/asn1/tasn_enc.c
index be19b36acd5f..2721f904a633 100644
--- a/crypto/asn1/tasn_enc.c
+++ b/crypto/asn1/tasn_enc.c
@@ -1,5 +1,5 @@
/* tasn_enc.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 2000.
*/
/* ====================================================================
diff --git a/crypto/asn1/tasn_fre.c b/crypto/asn1/tasn_fre.c
index bb7c1e2af489..d7c017fa1d42 100644
--- a/crypto/asn1/tasn_fre.c
+++ b/crypto/asn1/tasn_fre.c
@@ -1,5 +1,5 @@
/* tasn_fre.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 2000.
*/
/* ====================================================================
diff --git a/crypto/asn1/tasn_new.c b/crypto/asn1/tasn_new.c
index 531dad365c0e..5c6a2ebd4d04 100644
--- a/crypto/asn1/tasn_new.c
+++ b/crypto/asn1/tasn_new.c
@@ -1,5 +1,5 @@
/* tasn_new.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 2000.
*/
/* ====================================================================
diff --git a/crypto/asn1/tasn_prn.c b/crypto/asn1/tasn_prn.c
index 719639b511fc..b9c96a6dbe1d 100644
--- a/crypto/asn1/tasn_prn.c
+++ b/crypto/asn1/tasn_prn.c
@@ -1,5 +1,5 @@
/* tasn_prn.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 2000.
*/
/* ====================================================================
diff --git a/crypto/asn1/tasn_typ.c b/crypto/asn1/tasn_typ.c
index 6f17f1bec716..6252213d15f0 100644
--- a/crypto/asn1/tasn_typ.c
+++ b/crypto/asn1/tasn_typ.c
@@ -1,5 +1,5 @@
/* tasn_typ.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 2000.
*/
/* ====================================================================
diff --git a/crypto/asn1/tasn_utl.c b/crypto/asn1/tasn_utl.c
index 34d520b180aa..ca9ec7a32f59 100644
--- a/crypto/asn1/tasn_utl.c
+++ b/crypto/asn1/tasn_utl.c
@@ -1,5 +1,5 @@
/* tasn_utl.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 2000.
*/
/* ====================================================================
diff --git a/crypto/asn1/x_algor.c b/crypto/asn1/x_algor.c
index 33533aba862b..99e53429b797 100644
--- a/crypto/asn1/x_algor.c
+++ b/crypto/asn1/x_algor.c
@@ -1,5 +1,5 @@
/* x_algor.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 2000.
*/
/* ====================================================================
diff --git a/crypto/asn1/x_bignum.c b/crypto/asn1/x_bignum.c
index 869c05d931de..9cf3204a1b59 100644
--- a/crypto/asn1/x_bignum.c
+++ b/crypto/asn1/x_bignum.c
@@ -1,5 +1,5 @@
/* x_bignum.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 2000.
*/
/* ====================================================================
diff --git a/crypto/asn1/x_exten.c b/crypto/asn1/x_exten.c
index 1732e667125c..3a21239926ab 100644
--- a/crypto/asn1/x_exten.c
+++ b/crypto/asn1/x_exten.c
@@ -1,5 +1,5 @@
/* x_exten.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 2000.
*/
/* ====================================================================
diff --git a/crypto/asn1/x_long.c b/crypto/asn1/x_long.c
index 0db233cb95f6..bf35457c1f74 100644
--- a/crypto/asn1/x_long.c
+++ b/crypto/asn1/x_long.c
@@ -1,5 +1,5 @@
/* x_long.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 2000.
*/
/* ====================================================================
diff --git a/crypto/asn1/x_nx509.c b/crypto/asn1/x_nx509.c
new file mode 100644
index 000000000000..fbd9a22db345
--- /dev/null
+++ b/crypto/asn1/x_nx509.c
@@ -0,0 +1,72 @@
+/* x_nx509.c */
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
+ * project 2005.
+ */
+/* ====================================================================
+ * Copyright (c) 2005 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com). This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+#include <stddef.h>
+#include <openssl/x509.h>
+#include <openssl/asn1.h>
+#include <openssl/asn1t.h>
+
+/* Old netscape certificate wrapper format */
+
+ASN1_SEQUENCE(NETSCAPE_X509) = {
+ ASN1_SIMPLE(NETSCAPE_X509, header, ASN1_OCTET_STRING),
+ ASN1_OPT(NETSCAPE_X509, cert, X509)
+} ASN1_SEQUENCE_END(NETSCAPE_X509)
+
+IMPLEMENT_ASN1_FUNCTIONS(NETSCAPE_X509)
+
diff --git a/crypto/asn1/x_x509a.c b/crypto/asn1/x_x509a.c
index 13db5fd03fda..b603f82de716 100644
--- a/crypto/asn1/x_x509a.c
+++ b/crypto/asn1/x_x509a.c
@@ -1,5 +1,5 @@
/* a_x509a.c */
-/* Written by Dr Stephen N Henson (shenson@bigfoot.com) for the OpenSSL
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
* project 1999.
*/
/* ====================================================================
diff --git a/crypto/bf/Makefile b/crypto/bf/Makefile
index 8441954a8d95..7f4f03eb821c 100644
--- a/crypto/bf/Makefile
+++ b/crypto/bf/Makefile
@@ -40,7 +40,7 @@ top:
all: lib
lib: $(LIBOBJ)
- $(AR) $(LIB) $(LIBOBJ)
+ $(ARX) $(LIB) $(LIBOBJ)
$(RANLIB) $(LIB) || echo Never mind.
@touch lib
@@ -103,5 +103,9 @@ bf_enc.o: ../../include/openssl/blowfish.h ../../include/openssl/e_os2.h
bf_enc.o: ../../include/openssl/opensslconf.h bf_enc.c bf_locl.h
bf_ofb64.o: ../../include/openssl/blowfish.h ../../include/openssl/e_os2.h
bf_ofb64.o: ../../include/openssl/opensslconf.h bf_locl.h bf_ofb64.c
-bf_skey.o: ../../include/openssl/blowfish.h ../../include/openssl/e_os2.h
-bf_skey.o: ../../include/openssl/opensslconf.h bf_locl.h bf_pi.h bf_skey.c
+bf_skey.o: ../../include/openssl/blowfish.h ../../include/openssl/crypto.h
+bf_skey.o: ../../include/openssl/e_os2.h ../../include/openssl/fips.h
+bf_skey.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
+bf_skey.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h
+bf_skey.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
+bf_skey.o: bf_locl.h bf_pi.h bf_skey.c
diff --git a/crypto/bf/bf_skey.c b/crypto/bf/bf_skey.c
index 3673cdee6e26..6ac2aeb27959 100644
--- a/crypto/bf/bf_skey.c
+++ b/crypto/bf/bf_skey.c
@@ -59,10 +59,15 @@
#include <stdio.h>
#include <string.h>
#include <openssl/blowfish.h>
+#include <openssl/crypto.h>
+#ifdef OPENSSL_FIPS
+#include <openssl/fips.h>
+#endif
+
#include "bf_locl.h"
#include "bf_pi.h"
-void BF_set_key(BF_KEY *key, int len, const unsigned char *data)
+FIPS_NON_FIPS_VCIPHER_Init(BF)
{
int i;
BF_LONG *p,ri,in[2];
diff --git a/crypto/bf/blowfish.h b/crypto/bf/blowfish.h
index cd49e85ab29a..d24ffccb65f6 100644
--- a/crypto/bf/blowfish.h
+++ b/crypto/bf/blowfish.h
@@ -104,7 +104,9 @@ typedef struct bf_key_st
BF_LONG S[4*256];
} BF_KEY;
-
+#ifdef OPENSSL_FIPS
+void private_BF_set_key(BF_KEY *key, int len, const unsigned char *data);
+#endif
void BF_set_key(BF_KEY *key, int len, const unsigned char *data);
void BF_encrypt(BF_LONG *data,const BF_KEY *key);
diff --git a/crypto/bio/Makefile b/crypto/bio/Makefile
index 1ef6c2fb9fdb..1cd76ce7a2fb 100644
--- a/crypto/bio/Makefile
+++ b/crypto/bio/Makefile
@@ -45,7 +45,7 @@ top:
all: lib
lib: $(LIBOBJ)
- $(AR) $(LIB) $(LIBOBJ)
+ $(ARX) $(LIB) $(LIBOBJ)
$(RANLIB) $(LIB) || echo Never mind.
@touch lib
diff --git a/crypto/bio/bss_bio.c b/crypto/bio/bss_bio.c
index 0f9f0955b411..76bd48e7679b 100644
--- a/crypto/bio/bss_bio.c
+++ b/crypto/bio/bss_bio.c
@@ -919,6 +919,6 @@ int BIO_nwrite(BIO *bio, char **buf, int num)
ret = BIO_ctrl(bio, BIO_C_NWRITE, num, buf);
if (ret > 0)
- bio->num_read += ret;
+ bio->num_write += ret;
return ret;
}
diff --git a/crypto/bio/bss_file.c b/crypto/bio/bss_file.c
index 4df9927c437e..9ad46fa081db 100644
--- a/crypto/bio/bss_file.c
+++ b/crypto/bio/bss_file.c
@@ -279,7 +279,7 @@ static long MS_CALLBACK file_ctrl(BIO *b, int cmd, long num, void *ptr)
#endif
{
#if defined(OPENSSL_SYS_WINDOWS)
- int fd = fileno((FILE*)ptr);
+ int fd = _fileno((FILE*)ptr);
if (num & BIO_FP_TEXT)
_setmode(fd,_O_TEXT);
else
diff --git a/crypto/bio/bss_mem.c b/crypto/bio/bss_mem.c
index a4edb711aec1..e7ab9cb3a3f8 100644
--- a/crypto/bio/bss_mem.c
+++ b/crypto/bio/bss_mem.c
@@ -284,6 +284,7 @@ static int mem_gets(BIO *bp, char *buf, int size)
BIO_clear_retry_flags(bp);
j=bm->length;
+ if ((size-1) < j) j=size-1;
if (j <= 0)
{
*buf='\0';
@@ -292,17 +293,18 @@ static int mem_gets(BIO *bp, char *buf, int size)
p=bm->data;
for (i=0; i<j; i++)
{
- if (p[i] == '\n') break;
- }
- if (i == j)
- {
- BIO_set_retry_read(bp);
- /* return(-1); change the semantics 0.6.6a */
+ if (p[i] == '\n')
+ {
+ i++;
+ break;
+ }
}
- else
- i++;
- /* i is the max to copy */
- if ((size-1) < i) i=size-1;
+
+ /*
+ * i is now the max num of bytes to copy, either j or up to
+ * and including the first newline
+ */
+
i=mem_read(bp,buf,i);
if (i > 0) buf[i]='\0';
ret=i;
diff --git a/crypto/bio/bss_sock.c b/crypto/bio/bss_sock.c
index 472dd75821c5..30c3ceab468c 100644
--- a/crypto/bio/bss_sock.c
+++ b/crypto/bio/bss_sock.c
@@ -60,6 +60,9 @@
#include <errno.h>
#define USE_SOCKETS
#include "cryptlib.h"
+
+#ifndef OPENSSL_NO_SOCK
+
#include <openssl/bio.h>
#ifdef WATT32
@@ -300,3 +303,5 @@ int BIO_sock_non_fatal_error(int err)
}
return(0);
}
+
+#endif /* #ifndef OPENSSL_NO_SOCK */
diff --git a/crypto/bn/Makefile b/crypto/bn/Makefile
index 0491e3db4c47..f5e8f65a4698 100644
--- a/crypto/bn/Makefile
+++ b/crypto/bn/Makefile
@@ -28,13 +28,13 @@ LIBSRC= bn_add.c bn_div.c bn_exp.c bn_lib.c bn_ctx.c bn_mul.c bn_mod.c \
bn_print.c bn_rand.c bn_shift.c bn_word.c bn_blind.c \
bn_kron.c bn_sqrt.c bn_gcd.c bn_prime.c bn_err.c bn_sqr.c bn_asm.c \
bn_recp.c bn_mont.c bn_mpi.c bn_exp2.c bn_gf2m.c bn_nist.c \
- bn_depr.c bn_const.c
+ bn_depr.c bn_x931p.c bn_const.c bn_opt.c
LIBOBJ= bn_add.o bn_div.o bn_exp.o bn_lib.o bn_ctx.o bn_mul.o bn_mod.o \
bn_print.o bn_rand.o bn_shift.o bn_word.o bn_blind.o \
bn_kron.o bn_sqrt.o bn_gcd.o bn_prime.o bn_err.o bn_sqr.o $(BN_ASM) \
bn_recp.o bn_mont.o bn_mpi.o bn_exp2.o bn_gf2m.o bn_nist.o \
- bn_depr.o bn_const.o
+ bn_depr.o bn_x931p.o bn_const.o bn_opt.o
SRC= $(LIBSRC)
@@ -58,7 +58,7 @@ bnbug: bnbug.c ../../libcrypto.a top
cc -g -I../../include bnbug.c -o bnbug ../../libcrypto.a
lib: $(LIBOBJ)
- $(AR) $(LIB) $(LIBOBJ)
+ $(ARX) $(LIB) $(LIBOBJ)
$(RANLIB) $(LIB) || echo Never mind.
@touch lib
@@ -292,6 +292,13 @@ bn_nist.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
bn_nist.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
bn_nist.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
bn_nist.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_nist.c
+bn_opt.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
+bn_opt.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
+bn_opt.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
+bn_opt.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
+bn_opt.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
+bn_opt.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
+bn_opt.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_opt.c
bn_prime.o: ../../e_os.h ../../include/openssl/bio.h ../../include/openssl/bn.h
bn_prime.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
bn_prime.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
@@ -350,3 +357,6 @@ bn_word.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
bn_word.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
bn_word.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
bn_word.o: ../../include/openssl/symhacks.h ../cryptlib.h bn_lcl.h bn_word.c
+bn_x931p.o: ../../include/openssl/bn.h ../../include/openssl/e_os2.h
+bn_x931p.o: ../../include/openssl/opensslconf.h
+bn_x931p.o: ../../include/openssl/ossl_typ.h bn_x931p.c
diff --git a/crypto/bn/asm/alpha-mont.pl b/crypto/bn/asm/alpha-mont.pl
new file mode 100755
index 000000000000..7a2cc3173b0e
--- /dev/null
+++ b/crypto/bn/asm/alpha-mont.pl
@@ -0,0 +1,317 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# On 21264 RSA sign performance improves by 70/35/20/15 percent for
+# 512/1024/2048/4096 bit key lengths. This is against vendor compiler
+# instructed to '-tune host' code with in-line assembler. Other
+# benchmarks improve by 15-20%. To anchor it to something else, the
+# code provides approximately the same performance per GHz as AMD64.
+# I.e. if you compare 1GHz 21264 and 2GHz Opteron, you'll observe ~2x
+# difference.
+
+# int bn_mul_mont(
+$rp="a0"; # BN_ULONG *rp,
+$ap="a1"; # const BN_ULONG *ap,
+$bp="a2"; # const BN_ULONG *bp,
+$np="a3"; # const BN_ULONG *np,
+$n0="a4"; # const BN_ULONG *n0,
+$num="a5"; # int num);
+
+$lo0="t0";
+$hi0="t1";
+$lo1="t2";
+$hi1="t3";
+$aj="t4";
+$bi="t5";
+$nj="t6";
+$tp="t7";
+$alo="t8";
+$ahi="t9";
+$nlo="t10";
+$nhi="t11";
+$tj="t12";
+$i="s3";
+$j="s4";
+$m1="s5";
+
+$code=<<___;
+#include <asm.h>
+#include <regdef.h>
+
+.text
+
+.set noat
+.set noreorder
+
+.globl bn_mul_mont
+.align 5
+.ent bn_mul_mont
+bn_mul_mont:
+ lda sp,-40(sp)
+ stq ra,0(sp)
+ stq s3,8(sp)
+ stq s4,16(sp)
+ stq s5,24(sp)
+ stq fp,32(sp)
+ mov sp,fp
+ .mask 0x0400f000,-40
+ .frame fp,40,ra
+ .prologue 0
+
+ .align 4
+ .set reorder
+ sextl $num,$num
+ mov 0,v0
+ cmplt $num,4,AT
+ bne AT,.Lexit
+
+ ldq $hi0,0($ap) # ap[0]
+ s8addq $num,16,AT
+ ldq $aj,8($ap)
+ subq sp,AT,sp
+ ldq $bi,0($bp) # bp[0]
+ mov -4096,AT
+ ldq $n0,0($n0)
+ and sp,AT,sp
+
+ mulq $hi0,$bi,$lo0
+ ldq $hi1,0($np) # np[0]
+ umulh $hi0,$bi,$hi0
+ ldq $nj,8($np)
+
+ mulq $lo0,$n0,$m1
+
+ mulq $hi1,$m1,$lo1
+ umulh $hi1,$m1,$hi1
+
+ addq $lo1,$lo0,$lo1
+ cmpult $lo1,$lo0,AT
+ addq $hi1,AT,$hi1
+
+ mulq $aj,$bi,$alo
+ mov 2,$j
+ umulh $aj,$bi,$ahi
+ mov sp,$tp
+
+ mulq $nj,$m1,$nlo
+ s8addq $j,$ap,$aj
+ umulh $nj,$m1,$nhi
+ s8addq $j,$np,$nj
+.align 4
+.L1st:
+ .set noreorder
+ ldq $aj,($aj)
+ addl $j,1,$j
+ ldq $nj,($nj)
+ lda $tp,8($tp)
+
+ addq $alo,$hi0,$lo0
+ mulq $aj,$bi,$alo
+ cmpult $lo0,$hi0,AT
+ addq $nlo,$hi1,$lo1
+
+ mulq $nj,$m1,$nlo
+ addq $ahi,AT,$hi0
+ cmpult $lo1,$hi1,v0
+ cmplt $j,$num,$tj
+
+ umulh $aj,$bi,$ahi
+ addq $nhi,v0,$hi1
+ addq $lo1,$lo0,$lo1
+ s8addq $j,$ap,$aj
+
+ umulh $nj,$m1,$nhi
+ cmpult $lo1,$lo0,v0
+ addq $hi1,v0,$hi1
+ s8addq $j,$np,$nj
+
+ stq $lo1,-8($tp)
+ nop
+ unop
+ bne $tj,.L1st
+ .set reorder
+
+ addq $alo,$hi0,$lo0
+ addq $nlo,$hi1,$lo1
+ cmpult $lo0,$hi0,AT
+ cmpult $lo1,$hi1,v0
+ addq $ahi,AT,$hi0
+ addq $nhi,v0,$hi1
+
+ addq $lo1,$lo0,$lo1
+ cmpult $lo1,$lo0,v0
+ addq $hi1,v0,$hi1
+
+ stq $lo1,0($tp)
+
+ addq $hi1,$hi0,$hi1
+ cmpult $hi1,$hi0,AT
+ stq $hi1,8($tp)
+ stq AT,16($tp)
+
+ mov 1,$i
+.align 4
+.Louter:
+ s8addq $i,$bp,$bi
+ ldq $hi0,($ap)
+ ldq $aj,8($ap)
+ ldq $bi,($bi)
+ ldq $hi1,($np)
+ ldq $nj,8($np)
+ ldq $tj,(sp)
+
+ mulq $hi0,$bi,$lo0
+ umulh $hi0,$bi,$hi0
+
+ addq $lo0,$tj,$lo0
+ cmpult $lo0,$tj,AT
+ addq $hi0,AT,$hi0
+
+ mulq $lo0,$n0,$m1
+
+ mulq $hi1,$m1,$lo1
+ umulh $hi1,$m1,$hi1
+
+ addq $lo1,$lo0,$lo1
+ cmpult $lo1,$lo0,AT
+ mov 2,$j
+ addq $hi1,AT,$hi1
+
+ mulq $aj,$bi,$alo
+ mov sp,$tp
+ umulh $aj,$bi,$ahi
+
+ mulq $nj,$m1,$nlo
+ s8addq $j,$ap,$aj
+ umulh $nj,$m1,$nhi
+.align 4
+.Linner:
+ .set noreorder
+ ldq $tj,8($tp) #L0
+ nop #U1
+ ldq $aj,($aj) #L1
+ s8addq $j,$np,$nj #U0
+
+ ldq $nj,($nj) #L0
+ nop #U1
+ addq $alo,$hi0,$lo0 #L1
+ lda $tp,8($tp)
+
+ mulq $aj,$bi,$alo #U1
+ cmpult $lo0,$hi0,AT #L0
+ addq $nlo,$hi1,$lo1 #L1
+ addl $j,1,$j
+
+ mulq $nj,$m1,$nlo #U1
+ addq $ahi,AT,$hi0 #L0
+ addq $lo0,$tj,$lo0 #L1
+ cmpult $lo1,$hi1,v0 #U0
+
+ umulh $aj,$bi,$ahi #U1
+ cmpult $lo0,$tj,AT #L0
+ addq $lo1,$lo0,$lo1 #L1
+ addq $nhi,v0,$hi1 #U0
+
+ umulh $nj,$m1,$nhi #U1
+ s8addq $j,$ap,$aj #L0
+ cmpult $lo1,$lo0,v0 #L1
+ cmplt $j,$num,$tj #U0 # borrow $tj
+
+ addq $hi0,AT,$hi0 #L0
+ addq $hi1,v0,$hi1 #U1
+ stq $lo1,-8($tp) #L1
+ bne $tj,.Linner #U0
+ .set reorder
+
+ ldq $tj,8($tp)
+ addq $alo,$hi0,$lo0
+ addq $nlo,$hi1,$lo1
+ cmpult $lo0,$hi0,AT
+ cmpult $lo1,$hi1,v0
+ addq $ahi,AT,$hi0
+ addq $nhi,v0,$hi1
+
+ addq $lo0,$tj,$lo0
+ cmpult $lo0,$tj,AT
+ addq $hi0,AT,$hi0
+
+ ldq $tj,16($tp)
+ addq $lo1,$lo0,$j
+ cmpult $j,$lo0,v0
+ addq $hi1,v0,$hi1
+
+ addq $hi1,$hi0,$lo1
+ stq $j,($tp)
+ cmpult $lo1,$hi0,$hi1
+ addq $lo1,$tj,$lo1
+ cmpult $lo1,$tj,AT
+ addl $i,1,$i
+ addq $hi1,AT,$hi1
+ stq $lo1,8($tp)
+ cmplt $i,$num,$tj # borrow $tj
+ stq $hi1,16($tp)
+ bne $tj,.Louter
+
+ s8addq $num,sp,$tj # &tp[num]
+ mov $rp,$bp # put rp aside
+ mov sp,$tp
+ mov sp,$ap
+ mov 0,$hi0 # clear borrow bit
+
+.align 4
+.Lsub: ldq $lo0,($tp)
+ ldq $lo1,($np)
+ lda $tp,8($tp)
+ lda $np,8($np)
+ subq $lo0,$lo1,$lo1 # tp[i]-np[i]
+ cmpult $lo0,$lo1,AT
+ subq $lo1,$hi0,$lo0
+ cmpult $lo1,$lo0,$hi0
+ or $hi0,AT,$hi0
+ stq $lo0,($rp)
+ cmpult $tp,$tj,v0
+ lda $rp,8($rp)
+ bne v0,.Lsub
+
+ subq $hi1,$hi0,$hi0 # handle upmost overflow bit
+ mov sp,$tp
+ mov $bp,$rp # restore rp
+
+ and sp,$hi0,$ap
+ bic $bp,$hi0,$bp
+ bis $bp,$ap,$ap # ap=borrow?tp:rp
+
+.align 4
+.Lcopy: ldq $aj,($ap) # copy or in-place refresh
+ lda $tp,8($tp)
+ lda $rp,8($rp)
+ lda $ap,8($ap)
+ stq zero,-8($tp) # zap tp
+ cmpult $tp,$tj,AT
+ stq $aj,-8($rp)
+ bne AT,.Lcopy
+ mov 1,v0
+
+.Lexit:
+ .set noreorder
+ mov fp,sp
+ /*ldq ra,0(sp)*/
+ ldq s3,8(sp)
+ ldq s4,16(sp)
+ ldq s5,24(sp)
+ ldq fp,32(sp)
+ lda sp,40(sp)
+ ret (ra)
+.end bn_mul_mont
+.rdata
+.asciiz "Montgomery Multiplication for Alpha, CRYPTOGAMS by <appro\@openssl.org>"
+___
+
+print $code;
+close STDOUT;
diff --git a/crypto/bn/asm/armv4-mont.pl b/crypto/bn/asm/armv4-mont.pl
new file mode 100755
index 000000000000..05d5dc1a4822
--- /dev/null
+++ b/crypto/bn/asm/armv4-mont.pl
@@ -0,0 +1,200 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# January 2007.
+
+# Montgomery multiplication for ARMv4.
+#
+# Performance improvement naturally varies among CPU implementations
+# and compilers. The code was observed to provide +65-35% improvement
+# [depending on key length, less for longer keys] on ARM920T, and
+# +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code
+# base and compiler generated code with in-lined umull and even umlal
+# instructions. The latter means that this code didn't really have an
+# "advantage" of utilizing some "secret" instruction.
+#
+# The code is interoperable with Thumb ISA and is rather compact, less
+# than 1/2KB. Windows CE port would be trivial, as it's exclusively
+# about decorations, ABI and instruction syntax are identical.
+
+$num="r0"; # starts as num argument, but holds &tp[num-1]
+$ap="r1";
+$bp="r2"; $bi="r2"; $rp="r2";
+$np="r3";
+$tp="r4";
+$aj="r5";
+$nj="r6";
+$tj="r7";
+$n0="r8";
+########### # r9 is reserved by ELF as platform specific, e.g. TLS pointer
+$alo="r10"; # sl, gcc uses it to keep @GOT
+$ahi="r11"; # fp
+$nlo="r12"; # ip
+########### # r13 is stack pointer
+$nhi="r14"; # lr
+########### # r15 is program counter
+
+#### argument block layout relative to &tp[num-1], a.k.a. $num
+$_rp="$num,#12*4";
+# ap permanently resides in r1
+$_bp="$num,#13*4";
+# np permanently resides in r3
+$_n0="$num,#14*4";
+$_num="$num,#15*4"; $_bpend=$_num;
+
+$code=<<___;
+.text
+
+.global bn_mul_mont
+.type bn_mul_mont,%function
+
+.align 2
+bn_mul_mont:
+ stmdb sp!,{r0,r2} @ sp points at argument block
+ ldr $num,[sp,#3*4] @ load num
+ cmp $num,#2
+ movlt r0,#0
+ addlt sp,sp,#2*4
+ blt .Labrt
+
+ stmdb sp!,{r4-r12,lr} @ save 10 registers
+
+ mov $num,$num,lsl#2 @ rescale $num for byte count
+ sub sp,sp,$num @ alloca(4*num)
+ sub sp,sp,#4 @ +extra dword
+ sub $num,$num,#4 @ "num=num-1"
+ add $tp,$bp,$num @ &bp[num-1]
+
+ add $num,sp,$num @ $num to point at &tp[num-1]
+ ldr $n0,[$_n0] @ &n0
+ ldr $bi,[$bp] @ bp[0]
+ ldr $aj,[$ap],#4 @ ap[0],ap++
+ ldr $nj,[$np],#4 @ np[0],np++
+ ldr $n0,[$n0] @ *n0
+ str $tp,[$_bpend] @ save &bp[num]
+
+ umull $alo,$ahi,$aj,$bi @ ap[0]*bp[0]
+ str $n0,[$_n0] @ save n0 value
+ mul $n0,$alo,$n0 @ "tp[0]"*n0
+ mov $nlo,#0
+ umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"t[0]"
+ mov $tp,sp
+
+.L1st:
+ ldr $aj,[$ap],#4 @ ap[j],ap++
+ mov $alo,$ahi
+ mov $ahi,#0
+ umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[0]
+ ldr $nj,[$np],#4 @ np[j],np++
+ mov $nhi,#0
+ umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
+ adds $nlo,$nlo,$alo
+ str $nlo,[$tp],#4 @ tp[j-1]=,tp++
+ adc $nlo,$nhi,#0
+ cmp $tp,$num
+ bne .L1st
+
+ adds $nlo,$nlo,$ahi
+ mov $nhi,#0
+ adc $nhi,$nhi,#0
+ ldr $tp,[$_bp] @ restore bp
+ str $nlo,[$num] @ tp[num-1]=
+ ldr $n0,[$_n0] @ restore n0
+ str $nhi,[$num,#4] @ tp[num]=
+
+.Louter:
+ sub $tj,$num,sp @ "original" $num-1 value
+ sub $ap,$ap,$tj @ "rewind" ap to &ap[1]
+ sub $np,$np,$tj @ "rewind" np to &np[1]
+ ldr $bi,[$tp,#4]! @ *(++bp)
+ ldr $aj,[$ap,#-4] @ ap[0]
+ ldr $nj,[$np,#-4] @ np[0]
+ ldr $alo,[sp] @ tp[0]
+ ldr $tj,[sp,#4] @ tp[1]
+
+ mov $ahi,#0
+ umlal $alo,$ahi,$aj,$bi @ ap[0]*bp[i]+tp[0]
+ str $tp,[$_bp] @ save bp
+ mul $n0,$alo,$n0
+ mov $nlo,#0
+ umlal $alo,$nlo,$nj,$n0 @ np[0]*n0+"tp[0]"
+ mov $tp,sp
+
+.Linner:
+ ldr $aj,[$ap],#4 @ ap[j],ap++
+ adds $alo,$ahi,$tj @ +=tp[j]
+ mov $ahi,#0
+ umlal $alo,$ahi,$aj,$bi @ ap[j]*bp[i]
+ ldr $nj,[$np],#4 @ np[j],np++
+ mov $nhi,#0
+ umlal $nlo,$nhi,$nj,$n0 @ np[j]*n0
+ ldr $tj,[$tp,#8] @ tp[j+1]
+ adc $ahi,$ahi,#0
+ adds $nlo,$nlo,$alo
+ str $nlo,[$tp],#4 @ tp[j-1]=,tp++
+ adc $nlo,$nhi,#0
+ cmp $tp,$num
+ bne .Linner
+
+ adds $nlo,$nlo,$ahi
+ mov $nhi,#0
+ adc $nhi,$nhi,#0
+ adds $nlo,$nlo,$tj
+ adc $nhi,$nhi,#0
+ ldr $tp,[$_bp] @ restore bp
+ ldr $tj,[$_bpend] @ restore &bp[num]
+ str $nlo,[$num] @ tp[num-1]=
+ ldr $n0,[$_n0] @ restore n0
+ str $nhi,[$num,#4] @ tp[num]=
+
+ cmp $tp,$tj
+ bne .Louter
+
+ ldr $rp,[$_rp] @ pull rp
+ add $num,$num,#4 @ $num to point at &tp[num]
+ sub $aj,$num,sp @ "original" num value
+ mov $tp,sp @ "rewind" $tp
+ mov $ap,$tp @ "borrow" $ap
+ sub $np,$np,$aj @ "rewind" $np to &np[0]
+
+ subs $tj,$tj,$tj @ "clear" carry flag
+.Lsub: ldr $tj,[$tp],#4
+ ldr $nj,[$np],#4
+ sbcs $tj,$tj,$nj @ tp[j]-np[j]
+ str $tj,[$rp],#4 @ rp[j]=
+ teq $tp,$num @ preserve carry
+ bne .Lsub
+ sbcs $nhi,$nhi,#0 @ upmost carry
+ mov $tp,sp @ "rewind" $tp
+ sub $rp,$rp,$aj @ "rewind" $rp
+
+ and $ap,$tp,$nhi
+ bic $np,$rp,$nhi
+ orr $ap,$ap,$np @ ap=borrow?tp:rp
+
+.Lcopy: ldr $tj,[$ap],#4 @ copy or in-place refresh
+ str sp,[$tp],#4 @ zap tp
+ str $tj,[$rp],#4
+ cmp $tp,$num
+ bne .Lcopy
+
+ add sp,$num,#4 @ skip over tp[num+1]
+ ldmia sp!,{r4-r12,lr} @ restore registers
+ add sp,sp,#2*4 @ skip over {r0,r2}
+ mov r0,#1
+.Labrt: tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+.size bn_mul_mont,.-bn_mul_mont
+.asciz "Montgomery multiplication for ARMv4, CRYPTOGAMS by <appro\@openssl.org>"
+___
+
+$code =~ s/\bbx\s+lr\b/.word\t0xe12fff1e/gm; # make it possible to compile with -march=armv4
+print $code;
+close STDOUT;
diff --git a/crypto/bn/asm/mips3-mont.pl b/crypto/bn/asm/mips3-mont.pl
new file mode 100755
index 000000000000..8f9156e02af3
--- /dev/null
+++ b/crypto/bn/asm/mips3-mont.pl
@@ -0,0 +1,327 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# This module doesn't present direct interest for OpenSSL, because it
+# doesn't provide better performance for longer keys. While 512-bit
+# RSA private key operations are 40% faster, 1024-bit ones are hardly
+# faster at all, while longer key operations are slower by up to 20%.
+# It might be of interest to embedded system developers though, as
+# it's smaller than 1KB, yet offers ~3x improvement over compiler
+# generated code.
+#
+# The module targets N32 and N64 MIPS ABIs and currently is a bit
+# IRIX-centric, i.e. is likely to require adaptation for other OSes.
+
+# int bn_mul_mont(
+$rp="a0"; # BN_ULONG *rp,
+$ap="a1"; # const BN_ULONG *ap,
+$bp="a2"; # const BN_ULONG *bp,
+$np="a3"; # const BN_ULONG *np,
+$n0="a4"; # const BN_ULONG *n0,
+$num="a5"; # int num);
+
+$lo0="a6";
+$hi0="a7";
+$lo1="v0";
+$hi1="v1";
+$aj="t0";
+$bi="t1";
+$nj="t2";
+$tp="t3";
+$alo="s0";
+$ahi="s1";
+$nlo="s2";
+$nhi="s3";
+$tj="s4";
+$i="s5";
+$j="s6";
+$fp="t8";
+$m1="t9";
+
+$FRAME=8*(2+8);
+
+$code=<<___;
+#include <asm.h>
+#include <regdef.h>
+
+.text
+
+.set noat
+.set reorder
+
+.align 5
+.globl bn_mul_mont
+.ent bn_mul_mont
+bn_mul_mont:
+ .set noreorder
+ PTR_SUB sp,64
+ move $fp,sp
+ .frame $fp,64,ra
+ slt AT,$num,4
+ li v0,0
+ beqzl AT,.Lproceed
+ nop
+ jr ra
+ PTR_ADD sp,$fp,64
+ .set reorder
+.align 5
+.Lproceed:
+ ld $n0,0($n0)
+ ld $bi,0($bp) # bp[0]
+ ld $aj,0($ap) # ap[0]
+ ld $nj,0($np) # np[0]
+ PTR_SUB sp,16 # place for two extra words
+ sll $num,3
+ li AT,-4096
+ PTR_SUB sp,$num
+ and sp,AT
+
+ sd s0,0($fp)
+ sd s1,8($fp)
+ sd s2,16($fp)
+ sd s3,24($fp)
+ sd s4,32($fp)
+ sd s5,40($fp)
+ sd s6,48($fp)
+ sd s7,56($fp)
+
+ dmultu $aj,$bi
+ ld $alo,8($ap)
+ ld $nlo,8($np)
+ mflo $lo0
+ mfhi $hi0
+ dmultu $lo0,$n0
+ mflo $m1
+
+ dmultu $alo,$bi
+ mflo $alo
+ mfhi $ahi
+
+ dmultu $nj,$m1
+ mflo $lo1
+ mfhi $hi1
+ dmultu $nlo,$m1
+ daddu $lo1,$lo0
+ sltu AT,$lo1,$lo0
+ daddu $hi1,AT
+ mflo $nlo
+ mfhi $nhi
+
+ move $tp,sp
+ li $j,16
+.align 4
+.L1st:
+ .set noreorder
+ PTR_ADD $aj,$ap,$j
+ ld $aj,($aj)
+ PTR_ADD $nj,$np,$j
+ ld $nj,($nj)
+
+ dmultu $aj,$bi
+ daddu $lo0,$alo,$hi0
+ daddu $lo1,$nlo,$hi1
+ sltu AT,$lo0,$hi0
+ sltu s7,$lo1,$hi1
+ daddu $hi0,$ahi,AT
+ daddu $hi1,$nhi,s7
+ mflo $alo
+ mfhi $ahi
+
+ daddu $lo1,$lo0
+ sltu AT,$lo1,$lo0
+ dmultu $nj,$m1
+ daddu $hi1,AT
+ addu $j,8
+ sd $lo1,($tp)
+ sltu s7,$j,$num
+ mflo $nlo
+ mfhi $nhi
+
+ bnez s7,.L1st
+ PTR_ADD $tp,8
+ .set reorder
+
+ daddu $lo0,$alo,$hi0
+ sltu AT,$lo0,$hi0
+ daddu $hi0,$ahi,AT
+
+ daddu $lo1,$nlo,$hi1
+ sltu s7,$lo1,$hi1
+ daddu $hi1,$nhi,s7
+ daddu $lo1,$lo0
+ sltu AT,$lo1,$lo0
+ daddu $hi1,AT
+
+ sd $lo1,($tp)
+
+ daddu $hi1,$hi0
+ sltu AT,$hi1,$hi0
+ sd $hi1,8($tp)
+ sd AT,16($tp)
+
+ li $i,8
+.align 4
+.Louter:
+ PTR_ADD $bi,$bp,$i
+ ld $bi,($bi)
+ ld $aj,($ap)
+ ld $alo,8($ap)
+ ld $tj,(sp)
+
+ dmultu $aj,$bi
+ ld $nj,($np)
+ ld $nlo,8($np)
+ mflo $lo0
+ mfhi $hi0
+ daddu $lo0,$tj
+ dmultu $lo0,$n0
+ sltu AT,$lo0,$tj
+ daddu $hi0,AT
+ mflo $m1
+
+ dmultu $alo,$bi
+ mflo $alo
+ mfhi $ahi
+
+ dmultu $nj,$m1
+ mflo $lo1
+ mfhi $hi1
+
+ dmultu $nlo,$m1
+ daddu $lo1,$lo0
+ sltu AT,$lo1,$lo0
+ daddu $hi1,AT
+ mflo $nlo
+ mfhi $nhi
+
+ move $tp,sp
+ li $j,16
+ ld $tj,8($tp)
+.align 4
+.Linner:
+ .set noreorder
+ PTR_ADD $aj,$ap,$j
+ ld $aj,($aj)
+ PTR_ADD $nj,$np,$j
+ ld $nj,($nj)
+
+ dmultu $aj,$bi
+ daddu $lo0,$alo,$hi0
+ daddu $lo1,$nlo,$hi1
+ sltu AT,$lo0,$hi0
+ sltu s7,$lo1,$hi1
+ daddu $hi0,$ahi,AT
+ daddu $hi1,$nhi,s7
+ mflo $alo
+ mfhi $ahi
+
+ daddu $lo0,$tj
+ addu $j,8
+ dmultu $nj,$m1
+ sltu AT,$lo0,$tj
+ daddu $lo1,$lo0
+ daddu $hi0,AT
+ sltu s7,$lo1,$lo0
+ ld $tj,16($tp)
+ daddu $hi1,s7
+ sltu AT,$j,$num
+ mflo $nlo
+ mfhi $nhi
+ sd $lo1,($tp)
+ bnez AT,.Linner
+ PTR_ADD $tp,8
+ .set reorder
+
+ daddu $lo0,$alo,$hi0
+ sltu AT,$lo0,$hi0
+ daddu $hi0,$ahi,AT
+ daddu $lo0,$tj
+ sltu s7,$lo0,$tj
+ daddu $hi0,s7
+
+ ld $tj,16($tp)
+ daddu $lo1,$nlo,$hi1
+ sltu AT,$lo1,$hi1
+ daddu $hi1,$nhi,AT
+ daddu $lo1,$lo0
+ sltu s7,$lo1,$lo0
+ daddu $hi1,s7
+ sd $lo1,($tp)
+
+ daddu $lo1,$hi1,$hi0
+ sltu $hi1,$lo1,$hi0
+ daddu $lo1,$tj
+ sltu AT,$lo1,$tj
+ daddu $hi1,AT
+ sd $lo1,8($tp)
+ sd $hi1,16($tp)
+
+ addu $i,8
+ sltu s7,$i,$num
+ bnez s7,.Louter
+
+ .set noreorder
+ PTR_ADD $tj,sp,$num # &tp[num]
+ move $tp,sp
+ move $ap,sp
+ li $hi0,0 # clear borrow bit
+
+.align 4
+.Lsub: ld $lo0,($tp)
+ ld $lo1,($np)
+ PTR_ADD $tp,8
+ PTR_ADD $np,8
+ dsubu $lo1,$lo0,$lo1 # tp[i]-np[i]
+ sgtu AT,$lo1,$lo0
+ dsubu $lo0,$lo1,$hi0
+ sgtu $hi0,$lo0,$lo1
+ sd $lo0,($rp)
+ or $hi0,AT
+ sltu AT,$tp,$tj
+ bnez AT,.Lsub
+ PTR_ADD $rp,8
+
+ dsubu $hi0,$hi1,$hi0 # handle upmost overflow bit
+ move $tp,sp
+ PTR_SUB $rp,$num # restore rp
+ not $hi1,$hi0
+
+ and $ap,$hi0,sp
+ and $bp,$hi1,$rp
+ or $ap,$ap,$bp # ap=borrow?tp:rp
+
+.align 4
+.Lcopy: ld $aj,($ap)
+ PTR_ADD $ap,8
+ PTR_ADD $tp,8
+ sd zero,-8($tp)
+ sltu AT,$tp,$tj
+ sd $aj,($rp)
+ bnez AT,.Lcopy
+ PTR_ADD $rp,8
+
+ ld s0,0($fp)
+ ld s1,8($fp)
+ ld s2,16($fp)
+ ld s3,24($fp)
+ ld s4,32($fp)
+ ld s5,40($fp)
+ ld s6,48($fp)
+ ld s7,56($fp)
+ li v0,1
+ jr ra
+ PTR_ADD sp,$fp,64
+ .set reorder
+END(bn_mul_mont)
+.rdata
+.asciiz "Montgomery Multiplication for MIPS III/IV, CRYPTOGAMS by <appro\@openssl.org>"
+___
+
+print $code;
+close STDOUT;
diff --git a/crypto/bn/asm/ppc-mont.pl b/crypto/bn/asm/ppc-mont.pl
new file mode 100755
index 000000000000..7849eae95922
--- /dev/null
+++ b/crypto/bn/asm/ppc-mont.pl
@@ -0,0 +1,323 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# April 2006
+
+# "Teaser" Montgomery multiplication module for PowerPC. It's possible
+# to gain a bit more by modulo-scheduling outer loop, then dedicated
+# squaring procedure should give further 20% and code can be adapted
+# for 32-bit application running on 64-bit CPU. As for the latter.
+# It won't be able to achieve "native" 64-bit performance, because in
+# 32-bit application context every addc instruction will have to be
+# expanded as addc, twice right shift by 32 and finally adde, etc.
+# So far RSA *sign* performance improvement over pre-bn_mul_mont asm
+# for 64-bit application running on PPC970/G5 is:
+#
+# 512-bit +65%
+# 1024-bit +35%
+# 2048-bit +18%
+# 4096-bit +4%
+
+$flavour = shift;
+
+if ($flavour =~ /32/) {
+ $BITS= 32;
+ $BNSZ= $BITS/8;
+ $SIZE_T=4;
+ $RZONE= 224;
+ $FRAME= $SIZE_T*16;
+
+ $LD= "lwz"; # load
+ $LDU= "lwzu"; # load and update
+ $LDX= "lwzx"; # load indexed
+ $ST= "stw"; # store
+ $STU= "stwu"; # store and update
+ $STX= "stwx"; # store indexed
+ $STUX= "stwux"; # store indexed and update
+ $UMULL= "mullw"; # unsigned multiply low
+ $UMULH= "mulhwu"; # unsigned multiply high
+ $UCMP= "cmplw"; # unsigned compare
+ $SHRI= "srwi"; # unsigned shift right by immediate
+ $PUSH= $ST;
+ $POP= $LD;
+} elsif ($flavour =~ /64/) {
+ $BITS= 64;
+ $BNSZ= $BITS/8;
+ $SIZE_T=8;
+ $RZONE= 288;
+ $FRAME= $SIZE_T*16;
+
+ # same as above, but 64-bit mnemonics...
+ $LD= "ld"; # load
+ $LDU= "ldu"; # load and update
+ $LDX= "ldx"; # load indexed
+ $ST= "std"; # store
+ $STU= "stdu"; # store and update
+ $STX= "stdx"; # store indexed
+ $STUX= "stdux"; # store indexed and update
+ $UMULL= "mulld"; # unsigned multiply low
+ $UMULH= "mulhdu"; # unsigned multiply high
+ $UCMP= "cmpld"; # unsigned compare
+ $SHRI= "srdi"; # unsigned shift right by immediate
+ $PUSH= $ST;
+ $POP= $LD;
+} else { die "nonsense $flavour"; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
+
+$sp="r1";
+$toc="r2";
+$rp="r3"; $ovf="r3";
+$ap="r4";
+$bp="r5";
+$np="r6";
+$n0="r7";
+$num="r8";
+$rp="r9"; # $rp is reassigned
+$aj="r10";
+$nj="r11";
+$tj="r12";
+# non-volatile registers
+$i="r14";
+$j="r15";
+$tp="r16";
+$m0="r17";
+$m1="r18";
+$lo0="r19";
+$hi0="r20";
+$lo1="r21";
+$hi1="r22";
+$alo="r23";
+$ahi="r24";
+$nlo="r25";
+#
+$nhi="r0";
+
+$code=<<___;
+.machine "any"
+.text
+
+.globl .bn_mul_mont
+.align 4
+.bn_mul_mont:
+ cmpwi $num,4
+ mr $rp,r3 ; $rp is reassigned
+ li r3,0
+ bltlr
+
+ slwi $num,$num,`log($BNSZ)/log(2)`
+ li $tj,-4096
+ addi $ovf,$num,`$FRAME+$RZONE`
+ subf $ovf,$ovf,$sp ; $sp-$ovf
+ and $ovf,$ovf,$tj ; minimize TLB usage
+ subf $ovf,$sp,$ovf ; $ovf-$sp
+ srwi $num,$num,`log($BNSZ)/log(2)`
+ $STUX $sp,$sp,$ovf
+
+ $PUSH r14,`4*$SIZE_T`($sp)
+ $PUSH r15,`5*$SIZE_T`($sp)
+ $PUSH r16,`6*$SIZE_T`($sp)
+ $PUSH r17,`7*$SIZE_T`($sp)
+ $PUSH r18,`8*$SIZE_T`($sp)
+ $PUSH r19,`9*$SIZE_T`($sp)
+ $PUSH r20,`10*$SIZE_T`($sp)
+ $PUSH r21,`11*$SIZE_T`($sp)
+ $PUSH r22,`12*$SIZE_T`($sp)
+ $PUSH r23,`13*$SIZE_T`($sp)
+ $PUSH r24,`14*$SIZE_T`($sp)
+ $PUSH r25,`15*$SIZE_T`($sp)
+
+ $LD $n0,0($n0) ; pull n0[0] value
+ addi $num,$num,-2 ; adjust $num for counter register
+
+ $LD $m0,0($bp) ; m0=bp[0]
+ $LD $aj,0($ap) ; ap[0]
+ addi $tp,$sp,$FRAME
+ $UMULL $lo0,$aj,$m0 ; ap[0]*bp[0]
+ $UMULH $hi0,$aj,$m0
+
+ $LD $aj,$BNSZ($ap) ; ap[1]
+ $LD $nj,0($np) ; np[0]
+
+ $UMULL $m1,$lo0,$n0 ; "tp[0]"*n0
+
+ $UMULL $alo,$aj,$m0 ; ap[1]*bp[0]
+ $UMULH $ahi,$aj,$m0
+
+ $UMULL $lo1,$nj,$m1 ; np[0]*m1
+ $UMULH $hi1,$nj,$m1
+ $LD $nj,$BNSZ($np) ; np[1]
+ addc $lo1,$lo1,$lo0
+ addze $hi1,$hi1
+
+ $UMULL $nlo,$nj,$m1 ; np[1]*m1
+ $UMULH $nhi,$nj,$m1
+
+ mtctr $num
+ li $j,`2*$BNSZ`
+.align 4
+L1st:
+ $LDX $aj,$ap,$j ; ap[j]
+ addc $lo0,$alo,$hi0
+ $LDX $nj,$np,$j ; np[j]
+ addze $hi0,$ahi
+ $UMULL $alo,$aj,$m0 ; ap[j]*bp[0]
+ addc $lo1,$nlo,$hi1
+ $UMULH $ahi,$aj,$m0
+ addze $hi1,$nhi
+ $UMULL $nlo,$nj,$m1 ; np[j]*m1
+ addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0]
+ $UMULH $nhi,$nj,$m1
+ addze $hi1,$hi1
+ $ST $lo1,0($tp) ; tp[j-1]
+
+ addi $j,$j,$BNSZ ; j++
+ addi $tp,$tp,$BNSZ ; tp++
+ bdnz- L1st
+;L1st
+ addc $lo0,$alo,$hi0
+ addze $hi0,$ahi
+
+ addc $lo1,$nlo,$hi1
+ addze $hi1,$nhi
+ addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[0]
+ addze $hi1,$hi1
+ $ST $lo1,0($tp) ; tp[j-1]
+
+ li $ovf,0
+ addc $hi1,$hi1,$hi0
+ addze $ovf,$ovf ; upmost overflow bit
+ $ST $hi1,$BNSZ($tp)
+
+ li $i,$BNSZ
+.align 4
+Louter:
+ $LDX $m0,$bp,$i ; m0=bp[i]
+ $LD $aj,0($ap) ; ap[0]
+ addi $tp,$sp,$FRAME
+ $LD $tj,$FRAME($sp) ; tp[0]
+ $UMULL $lo0,$aj,$m0 ; ap[0]*bp[i]
+ $UMULH $hi0,$aj,$m0
+ $LD $aj,$BNSZ($ap) ; ap[1]
+ $LD $nj,0($np) ; np[0]
+ addc $lo0,$lo0,$tj ; ap[0]*bp[i]+tp[0]
+ $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
+ addze $hi0,$hi0
+ $UMULL $m1,$lo0,$n0 ; tp[0]*n0
+ $UMULH $ahi,$aj,$m0
+ $UMULL $lo1,$nj,$m1 ; np[0]*m1
+ $UMULH $hi1,$nj,$m1
+ $LD $nj,$BNSZ($np) ; np[1]
+ addc $lo1,$lo1,$lo0
+ $UMULL $nlo,$nj,$m1 ; np[1]*m1
+ addze $hi1,$hi1
+ $UMULH $nhi,$nj,$m1
+
+ mtctr $num
+ li $j,`2*$BNSZ`
+.align 4
+Linner:
+ $LDX $aj,$ap,$j ; ap[j]
+ addc $lo0,$alo,$hi0
+ $LD $tj,$BNSZ($tp) ; tp[j]
+ addze $hi0,$ahi
+ $LDX $nj,$np,$j ; np[j]
+ addc $lo1,$nlo,$hi1
+ $UMULL $alo,$aj,$m0 ; ap[j]*bp[i]
+ addze $hi1,$nhi
+ $UMULH $ahi,$aj,$m0
+ addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
+ $UMULL $nlo,$nj,$m1 ; np[j]*m1
+ addze $hi0,$hi0
+ $UMULH $nhi,$nj,$m1
+ addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j]
+ addi $j,$j,$BNSZ ; j++
+ addze $hi1,$hi1
+ $ST $lo1,0($tp) ; tp[j-1]
+ addi $tp,$tp,$BNSZ ; tp++
+ bdnz- Linner
+;Linner
+ $LD $tj,$BNSZ($tp) ; tp[j]
+ addc $lo0,$alo,$hi0
+ addze $hi0,$ahi
+ addc $lo0,$lo0,$tj ; ap[j]*bp[i]+tp[j]
+ addze $hi0,$hi0
+
+ addc $lo1,$nlo,$hi1
+ addze $hi1,$nhi
+ addc $lo1,$lo1,$lo0 ; np[j]*m1+ap[j]*bp[i]+tp[j]
+ addze $hi1,$hi1
+ $ST $lo1,0($tp) ; tp[j-1]
+
+ addic $ovf,$ovf,-1 ; move upmost overflow to XER[CA]
+ li $ovf,0
+ adde $hi1,$hi1,$hi0
+ addze $ovf,$ovf
+ $ST $hi1,$BNSZ($tp)
+;
+ slwi $tj,$num,`log($BNSZ)/log(2)`
+ $UCMP $i,$tj
+ addi $i,$i,$BNSZ
+ ble- Louter
+
+ addi $num,$num,2 ; restore $num
+ subfc $j,$j,$j ; j=0 and "clear" XER[CA]
+ addi $tp,$sp,$FRAME
+ mtctr $num
+
+.align 4
+Lsub: $LDX $tj,$tp,$j
+ $LDX $nj,$np,$j
+ subfe $aj,$nj,$tj ; tp[j]-np[j]
+ $STX $aj,$rp,$j
+ addi $j,$j,$BNSZ
+ bdnz- Lsub
+
+ li $j,0
+ mtctr $num
+ subfe $ovf,$j,$ovf ; handle upmost overflow bit
+ and $ap,$tp,$ovf
+ andc $np,$rp,$ovf
+ or $ap,$ap,$np ; ap=borrow?tp:rp
+
+.align 4
+Lcopy: ; copy or in-place refresh
+ $LDX $tj,$ap,$j
+ $STX $tj,$rp,$j
+ $STX $j,$tp,$j ; zap at once
+ addi $j,$j,$BNSZ
+ bdnz- Lcopy
+
+ $POP r14,`4*$SIZE_T`($sp)
+ $POP r15,`5*$SIZE_T`($sp)
+ $POP r16,`6*$SIZE_T`($sp)
+ $POP r17,`7*$SIZE_T`($sp)
+ $POP r18,`8*$SIZE_T`($sp)
+ $POP r19,`9*$SIZE_T`($sp)
+ $POP r20,`10*$SIZE_T`($sp)
+ $POP r21,`11*$SIZE_T`($sp)
+ $POP r22,`12*$SIZE_T`($sp)
+ $POP r23,`13*$SIZE_T`($sp)
+ $POP r24,`14*$SIZE_T`($sp)
+ $POP r25,`15*$SIZE_T`($sp)
+ $POP $sp,0($sp)
+ li r3,1
+ blr
+ .long 0
+.asciz "Montgomery Multiplication for PPC, CRYPTOGAMS by <appro\@fy.chalmers.se>"
+___
+
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+print $code;
+close STDOUT;
diff --git a/crypto/bn/asm/ppc64-mont.pl b/crypto/bn/asm/ppc64-mont.pl
new file mode 100755
index 000000000000..3449b35855da
--- /dev/null
+++ b/crypto/bn/asm/ppc64-mont.pl
@@ -0,0 +1,918 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# December 2007
+
+# The reason for undertaken effort is basically following. Even though
+# Power 6 CPU operates at incredible 4.7GHz clock frequency, its PKI
+# performance was observed to be less than impressive, essentially as
+# fast as 1.8GHz PPC970, or 2.6 times(!) slower than one would hope.
+# Well, it's not surprising that IBM had to make some sacrifices to
+# boost the clock frequency that much, but no overall improvement?
+# Having observed how much difference did switching to FPU make on
+# UltraSPARC, playing same stunt on Power 6 appeared appropriate...
+# Unfortunately the resulting performance improvement is not as
+# impressive, ~30%, and in absolute terms is still very far from what
+# one would expect from 4.7GHz CPU. There is a chance that I'm doing
+# something wrong, but in the lack of assembler level micro-profiling
+# data or at least decent platform guide I can't tell... Or better
+# results might be achieved with VMX... Anyway, this module provides
+# *worse* performance on other PowerPC implementations, ~40-15% slower
+# on PPC970 depending on key length and ~40% slower on Power 5 for all
+# key lengths. As it's obviously inappropriate as "best all-round"
+# alternative, it has to be complemented with run-time CPU family
+# detection. Oh! It should also be noted that unlike other PowerPC
+# implementation IALU ppc-mont.pl module performs *suboptimaly* on
+# >=1024-bit key lengths on Power 6. It should also be noted that
+# *everything* said so far applies to 64-bit builds! As far as 32-bit
+# application executed on 64-bit CPU goes, this module is likely to
+# become preferred choice, because it's easy to adapt it for such
+# case and *is* faster than 32-bit ppc-mont.pl on *all* processors.
+
+# February 2008
+
+# Micro-profiling assisted optimization results in ~15% improvement
+# over original ppc64-mont.pl version, or overall ~50% improvement
+# over ppc.pl module on Power 6. If compared to ppc-mont.pl on same
+# Power 6 CPU, this module is 5-150% faster depending on key length,
+# [hereafter] more for longer keys. But if compared to ppc-mont.pl
+# on 1.8GHz PPC970, it's only 5-55% faster. Still far from impressive
+# in absolute terms, but it's apparently the way Power 6 is...
+
+$flavour = shift;
+
+if ($flavour =~ /32/) {
+ $SIZE_T=4;
+ $RZONE= 224;
+ $FRAME= $SIZE_T*12+8*12;
+ $fname= "bn_mul_mont_ppc64";
+
+ $STUX= "stwux"; # store indexed and update
+ $PUSH= "stw";
+ $POP= "lwz";
+ die "not implemented yet";
+} elsif ($flavour =~ /64/) {
+ $SIZE_T=8;
+ $RZONE= 288;
+ $FRAME= $SIZE_T*12+8*12;
+ $fname= "bn_mul_mont";
+
+ # same as above, but 64-bit mnemonics...
+ $STUX= "stdux"; # store indexed and update
+ $PUSH= "std";
+ $POP= "ld";
+} else { die "nonsense $flavour"; }
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
+( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
+die "can't locate ppc-xlate.pl";
+
+open STDOUT,"| $^X $xlate $flavour ".shift || die "can't call $xlate: $!";
+
+$FRAME=($FRAME+63)&~63;
+$TRANSFER=16*8;
+
+$carry="r0";
+$sp="r1";
+$toc="r2";
+$rp="r3"; $ovf="r3";
+$ap="r4";
+$bp="r5";
+$np="r6";
+$n0="r7";
+$num="r8";
+$rp="r9"; # $rp is reassigned
+$tp="r10";
+$j="r11";
+$i="r12";
+# non-volatile registers
+$nap_d="r14"; # interleaved ap and np in double format
+$a0="r15"; # ap[0]
+$t0="r16"; # temporary registers
+$t1="r17";
+$t2="r18";
+$t3="r19";
+$t4="r20";
+$t5="r21";
+$t6="r22";
+$t7="r23";
+
+# PPC offers enough register bank capacity to unroll inner loops twice
+#
+# ..A3A2A1A0
+# dcba
+# -----------
+# A0a
+# A0b
+# A0c
+# A0d
+# A1a
+# A1b
+# A1c
+# A1d
+# A2a
+# A2b
+# A2c
+# A2d
+# A3a
+# A3b
+# A3c
+# A3d
+# ..a
+# ..b
+#
+$ba="f0"; $bb="f1"; $bc="f2"; $bd="f3";
+$na="f4"; $nb="f5"; $nc="f6"; $nd="f7";
+$dota="f8"; $dotb="f9";
+$A0="f10"; $A1="f11"; $A2="f12"; $A3="f13";
+$N0="f14"; $N1="f15"; $N2="f16"; $N3="f17";
+$T0a="f18"; $T0b="f19";
+$T1a="f20"; $T1b="f21";
+$T2a="f22"; $T2b="f23";
+$T3a="f24"; $T3b="f25";
+
+# sp----------->+-------------------------------+
+# | saved sp |
+# +-------------------------------+
+# | |
+# +-------------------------------+
+# | 10 saved gpr, r14-r23 |
+# . .
+# . .
+# +12*size_t +-------------------------------+
+# | 12 saved fpr, f14-f25 |
+# . .
+# . .
+# +12*8 +-------------------------------+
+# | padding to 64 byte boundary |
+# . .
+# +X +-------------------------------+
+# | 16 gpr<->fpr transfer zone |
+# . .
+# . .
+# +16*8 +-------------------------------+
+# | __int64 tmp[-1] |
+# +-------------------------------+
+# | __int64 tmp[num] |
+# . .
+# . .
+# . .
+# +(num+1)*8 +-------------------------------+
+# | padding to 64 byte boundary |
+# . .
+# +X +-------------------------------+
+# | double nap_d[4*num] |
+# . .
+# . .
+# . .
+# +-------------------------------+
+
+$code=<<___;
+.machine "any"
+.text
+
+.globl .$fname
+.align 5
+.$fname:
+ cmpwi $num,4
+ mr $rp,r3 ; $rp is reassigned
+ li r3,0 ; possible "not handled" return code
+ bltlr-
+ andi. r0,$num,1 ; $num has to be even
+ bnelr-
+
+ slwi $num,$num,3 ; num*=8
+ li $i,-4096
+ slwi $tp,$num,2 ; place for {an}p_{lh}[num], i.e. 4*num
+ add $tp,$tp,$num ; place for tp[num+1]
+ addi $tp,$tp,`$FRAME+$TRANSFER+8+64+$RZONE`
+ subf $tp,$tp,$sp ; $sp-$tp
+ and $tp,$tp,$i ; minimize TLB usage
+ subf $tp,$sp,$tp ; $tp-$sp
+ $STUX $sp,$sp,$tp ; alloca
+
+ $PUSH r14,`2*$SIZE_T`($sp)
+ $PUSH r15,`3*$SIZE_T`($sp)
+ $PUSH r16,`4*$SIZE_T`($sp)
+ $PUSH r17,`5*$SIZE_T`($sp)
+ $PUSH r18,`6*$SIZE_T`($sp)
+ $PUSH r19,`7*$SIZE_T`($sp)
+ $PUSH r20,`8*$SIZE_T`($sp)
+ $PUSH r21,`9*$SIZE_T`($sp)
+ $PUSH r22,`10*$SIZE_T`($sp)
+ $PUSH r23,`11*$SIZE_T`($sp)
+ stfd f14,`12*$SIZE_T+0`($sp)
+ stfd f15,`12*$SIZE_T+8`($sp)
+ stfd f16,`12*$SIZE_T+16`($sp)
+ stfd f17,`12*$SIZE_T+24`($sp)
+ stfd f18,`12*$SIZE_T+32`($sp)
+ stfd f19,`12*$SIZE_T+40`($sp)
+ stfd f20,`12*$SIZE_T+48`($sp)
+ stfd f21,`12*$SIZE_T+56`($sp)
+ stfd f22,`12*$SIZE_T+64`($sp)
+ stfd f23,`12*$SIZE_T+72`($sp)
+ stfd f24,`12*$SIZE_T+80`($sp)
+ stfd f25,`12*$SIZE_T+88`($sp)
+
+ ld $a0,0($ap) ; pull ap[0] value
+ ld $n0,0($n0) ; pull n0[0] value
+ ld $t3,0($bp) ; bp[0]
+
+ addi $tp,$sp,`$FRAME+$TRANSFER+8+64`
+ li $i,-64
+ add $nap_d,$tp,$num
+ and $nap_d,$nap_d,$i ; align to 64 bytes
+
+ mulld $t7,$a0,$t3 ; ap[0]*bp[0]
+ ; nap_d is off by 1, because it's used with stfdu/lfdu
+ addi $nap_d,$nap_d,-8
+ srwi $j,$num,`3+1` ; counter register, num/2
+ mulld $t7,$t7,$n0 ; tp[0]*n0
+ addi $j,$j,-1
+ addi $tp,$sp,`$FRAME+$TRANSFER-8`
+ li $carry,0
+ mtctr $j
+
+ ; transfer bp[0] to FPU as 4x16-bit values
+ extrdi $t0,$t3,16,48
+ extrdi $t1,$t3,16,32
+ extrdi $t2,$t3,16,16
+ extrdi $t3,$t3,16,0
+ std $t0,`$FRAME+0`($sp)
+ std $t1,`$FRAME+8`($sp)
+ std $t2,`$FRAME+16`($sp)
+ std $t3,`$FRAME+24`($sp)
+ ; transfer (ap[0]*bp[0])*n0 to FPU as 4x16-bit values
+ extrdi $t4,$t7,16,48
+ extrdi $t5,$t7,16,32
+ extrdi $t6,$t7,16,16
+ extrdi $t7,$t7,16,0
+ std $t4,`$FRAME+32`($sp)
+ std $t5,`$FRAME+40`($sp)
+ std $t6,`$FRAME+48`($sp)
+ std $t7,`$FRAME+56`($sp)
+ lwz $t0,4($ap) ; load a[j] as 32-bit word pair
+ lwz $t1,0($ap)
+ lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair
+ lwz $t3,8($ap)
+ lwz $t4,4($np) ; load n[j] as 32-bit word pair
+ lwz $t5,0($np)
+ lwz $t6,12($np) ; load n[j+1] as 32-bit word pair
+ lwz $t7,8($np)
+ lfd $ba,`$FRAME+0`($sp)
+ lfd $bb,`$FRAME+8`($sp)
+ lfd $bc,`$FRAME+16`($sp)
+ lfd $bd,`$FRAME+24`($sp)
+ lfd $na,`$FRAME+32`($sp)
+ lfd $nb,`$FRAME+40`($sp)
+ lfd $nc,`$FRAME+48`($sp)
+ lfd $nd,`$FRAME+56`($sp)
+ std $t0,`$FRAME+64`($sp)
+ std $t1,`$FRAME+72`($sp)
+ std $t2,`$FRAME+80`($sp)
+ std $t3,`$FRAME+88`($sp)
+ std $t4,`$FRAME+96`($sp)
+ std $t5,`$FRAME+104`($sp)
+ std $t6,`$FRAME+112`($sp)
+ std $t7,`$FRAME+120`($sp)
+ fcfid $ba,$ba
+ fcfid $bb,$bb
+ fcfid $bc,$bc
+ fcfid $bd,$bd
+ fcfid $na,$na
+ fcfid $nb,$nb
+ fcfid $nc,$nc
+ fcfid $nd,$nd
+
+ lfd $A0,`$FRAME+64`($sp)
+ lfd $A1,`$FRAME+72`($sp)
+ lfd $A2,`$FRAME+80`($sp)
+ lfd $A3,`$FRAME+88`($sp)
+ lfd $N0,`$FRAME+96`($sp)
+ lfd $N1,`$FRAME+104`($sp)
+ lfd $N2,`$FRAME+112`($sp)
+ lfd $N3,`$FRAME+120`($sp)
+ fcfid $A0,$A0
+ fcfid $A1,$A1
+ fcfid $A2,$A2
+ fcfid $A3,$A3
+ fcfid $N0,$N0
+ fcfid $N1,$N1
+ fcfid $N2,$N2
+ fcfid $N3,$N3
+ addi $ap,$ap,16
+ addi $np,$np,16
+
+ fmul $T1a,$A1,$ba
+ fmul $T1b,$A1,$bb
+ stfd $A0,8($nap_d) ; save a[j] in double format
+ stfd $A1,16($nap_d)
+ fmul $T2a,$A2,$ba
+ fmul $T2b,$A2,$bb
+ stfd $A2,24($nap_d) ; save a[j+1] in double format
+ stfd $A3,32($nap_d)
+ fmul $T3a,$A3,$ba
+ fmul $T3b,$A3,$bb
+ stfd $N0,40($nap_d) ; save n[j] in double format
+ stfd $N1,48($nap_d)
+ fmul $T0a,$A0,$ba
+ fmul $T0b,$A0,$bb
+ stfd $N2,56($nap_d) ; save n[j+1] in double format
+ stfdu $N3,64($nap_d)
+
+ fmadd $T1a,$A0,$bc,$T1a
+ fmadd $T1b,$A0,$bd,$T1b
+ fmadd $T2a,$A1,$bc,$T2a
+ fmadd $T2b,$A1,$bd,$T2b
+ fmadd $T3a,$A2,$bc,$T3a
+ fmadd $T3b,$A2,$bd,$T3b
+ fmul $dota,$A3,$bc
+ fmul $dotb,$A3,$bd
+
+ fmadd $T1a,$N1,$na,$T1a
+ fmadd $T1b,$N1,$nb,$T1b
+ fmadd $T2a,$N2,$na,$T2a
+ fmadd $T2b,$N2,$nb,$T2b
+ fmadd $T3a,$N3,$na,$T3a
+ fmadd $T3b,$N3,$nb,$T3b
+ fmadd $T0a,$N0,$na,$T0a
+ fmadd $T0b,$N0,$nb,$T0b
+
+ fmadd $T1a,$N0,$nc,$T1a
+ fmadd $T1b,$N0,$nd,$T1b
+ fmadd $T2a,$N1,$nc,$T2a
+ fmadd $T2b,$N1,$nd,$T2b
+ fmadd $T3a,$N2,$nc,$T3a
+ fmadd $T3b,$N2,$nd,$T3b
+ fmadd $dota,$N3,$nc,$dota
+ fmadd $dotb,$N3,$nd,$dotb
+
+ fctid $T0a,$T0a
+ fctid $T0b,$T0b
+ fctid $T1a,$T1a
+ fctid $T1b,$T1b
+ fctid $T2a,$T2a
+ fctid $T2b,$T2b
+ fctid $T3a,$T3a
+ fctid $T3b,$T3b
+
+ stfd $T0a,`$FRAME+0`($sp)
+ stfd $T0b,`$FRAME+8`($sp)
+ stfd $T1a,`$FRAME+16`($sp)
+ stfd $T1b,`$FRAME+24`($sp)
+ stfd $T2a,`$FRAME+32`($sp)
+ stfd $T2b,`$FRAME+40`($sp)
+ stfd $T3a,`$FRAME+48`($sp)
+ stfd $T3b,`$FRAME+56`($sp)
+
+.align 5
+L1st:
+ lwz $t0,4($ap) ; load a[j] as 32-bit word pair
+ lwz $t1,0($ap)
+ lwz $t2,12($ap) ; load a[j+1] as 32-bit word pair
+ lwz $t3,8($ap)
+ lwz $t4,4($np) ; load n[j] as 32-bit word pair
+ lwz $t5,0($np)
+ lwz $t6,12($np) ; load n[j+1] as 32-bit word pair
+ lwz $t7,8($np)
+ std $t0,`$FRAME+64`($sp)
+ std $t1,`$FRAME+72`($sp)
+ std $t2,`$FRAME+80`($sp)
+ std $t3,`$FRAME+88`($sp)
+ std $t4,`$FRAME+96`($sp)
+ std $t5,`$FRAME+104`($sp)
+ std $t6,`$FRAME+112`($sp)
+ std $t7,`$FRAME+120`($sp)
+ ld $t0,`$FRAME+0`($sp)
+ ld $t1,`$FRAME+8`($sp)
+ ld $t2,`$FRAME+16`($sp)
+ ld $t3,`$FRAME+24`($sp)
+ ld $t4,`$FRAME+32`($sp)
+ ld $t5,`$FRAME+40`($sp)
+ ld $t6,`$FRAME+48`($sp)
+ ld $t7,`$FRAME+56`($sp)
+ lfd $A0,`$FRAME+64`($sp)
+ lfd $A1,`$FRAME+72`($sp)
+ lfd $A2,`$FRAME+80`($sp)
+ lfd $A3,`$FRAME+88`($sp)
+ lfd $N0,`$FRAME+96`($sp)
+ lfd $N1,`$FRAME+104`($sp)
+ lfd $N2,`$FRAME+112`($sp)
+ lfd $N3,`$FRAME+120`($sp)
+ fcfid $A0,$A0
+ fcfid $A1,$A1
+ fcfid $A2,$A2
+ fcfid $A3,$A3
+ fcfid $N0,$N0
+ fcfid $N1,$N1
+ fcfid $N2,$N2
+ fcfid $N3,$N3
+ addi $ap,$ap,16
+ addi $np,$np,16
+
+ fmul $T1a,$A1,$ba
+ fmul $T1b,$A1,$bb
+ fmul $T2a,$A2,$ba
+ fmul $T2b,$A2,$bb
+ stfd $A0,8($nap_d) ; save a[j] in double format
+ stfd $A1,16($nap_d)
+ fmul $T3a,$A3,$ba
+ fmul $T3b,$A3,$bb
+ fmadd $T0a,$A0,$ba,$dota
+ fmadd $T0b,$A0,$bb,$dotb
+ stfd $A2,24($nap_d) ; save a[j+1] in double format
+ stfd $A3,32($nap_d)
+
+ fmadd $T1a,$A0,$bc,$T1a
+ fmadd $T1b,$A0,$bd,$T1b
+ fmadd $T2a,$A1,$bc,$T2a
+ fmadd $T2b,$A1,$bd,$T2b
+ stfd $N0,40($nap_d) ; save n[j] in double format
+ stfd $N1,48($nap_d)
+ fmadd $T3a,$A2,$bc,$T3a
+ fmadd $T3b,$A2,$bd,$T3b
+ add $t0,$t0,$carry ; can not overflow
+ fmul $dota,$A3,$bc
+ fmul $dotb,$A3,$bd
+ stfd $N2,56($nap_d) ; save n[j+1] in double format
+ stfdu $N3,64($nap_d)
+ srdi $carry,$t0,16
+ add $t1,$t1,$carry
+ srdi $carry,$t1,16
+
+ fmadd $T1a,$N1,$na,$T1a
+ fmadd $T1b,$N1,$nb,$T1b
+ insrdi $t0,$t1,16,32
+ fmadd $T2a,$N2,$na,$T2a
+ fmadd $T2b,$N2,$nb,$T2b
+ add $t2,$t2,$carry
+ fmadd $T3a,$N3,$na,$T3a
+ fmadd $T3b,$N3,$nb,$T3b
+ srdi $carry,$t2,16
+ fmadd $T0a,$N0,$na,$T0a
+ fmadd $T0b,$N0,$nb,$T0b
+ insrdi $t0,$t2,16,16
+ add $t3,$t3,$carry
+ srdi $carry,$t3,16
+
+ fmadd $T1a,$N0,$nc,$T1a
+ fmadd $T1b,$N0,$nd,$T1b
+ insrdi $t0,$t3,16,0 ; 0..63 bits
+ fmadd $T2a,$N1,$nc,$T2a
+ fmadd $T2b,$N1,$nd,$T2b
+ add $t4,$t4,$carry
+ fmadd $T3a,$N2,$nc,$T3a
+ fmadd $T3b,$N2,$nd,$T3b
+ srdi $carry,$t4,16
+ fmadd $dota,$N3,$nc,$dota
+ fmadd $dotb,$N3,$nd,$dotb
+ add $t5,$t5,$carry
+ srdi $carry,$t5,16
+ insrdi $t4,$t5,16,32
+
+ fctid $T0a,$T0a
+ fctid $T0b,$T0b
+ add $t6,$t6,$carry
+ fctid $T1a,$T1a
+ fctid $T1b,$T1b
+ srdi $carry,$t6,16
+ fctid $T2a,$T2a
+ fctid $T2b,$T2b
+ insrdi $t4,$t6,16,16
+ fctid $T3a,$T3a
+ fctid $T3b,$T3b
+ add $t7,$t7,$carry
+ insrdi $t4,$t7,16,0 ; 64..127 bits
+ srdi $carry,$t7,16 ; upper 33 bits
+
+ stfd $T0a,`$FRAME+0`($sp)
+ stfd $T0b,`$FRAME+8`($sp)
+ stfd $T1a,`$FRAME+16`($sp)
+ stfd $T1b,`$FRAME+24`($sp)
+ stfd $T2a,`$FRAME+32`($sp)
+ stfd $T2b,`$FRAME+40`($sp)
+ stfd $T3a,`$FRAME+48`($sp)
+ stfd $T3b,`$FRAME+56`($sp)
+ std $t0,8($tp) ; tp[j-1]
+ stdu $t4,16($tp) ; tp[j]
+ bdnz- L1st
+
+ fctid $dota,$dota
+ fctid $dotb,$dotb
+
+ ld $t0,`$FRAME+0`($sp)
+ ld $t1,`$FRAME+8`($sp)
+ ld $t2,`$FRAME+16`($sp)
+ ld $t3,`$FRAME+24`($sp)
+ ld $t4,`$FRAME+32`($sp)
+ ld $t5,`$FRAME+40`($sp)
+ ld $t6,`$FRAME+48`($sp)
+ ld $t7,`$FRAME+56`($sp)
+ stfd $dota,`$FRAME+64`($sp)
+ stfd $dotb,`$FRAME+72`($sp)
+
+ add $t0,$t0,$carry ; can not overflow
+ srdi $carry,$t0,16
+ add $t1,$t1,$carry
+ srdi $carry,$t1,16
+ insrdi $t0,$t1,16,32
+ add $t2,$t2,$carry
+ srdi $carry,$t2,16
+ insrdi $t0,$t2,16,16
+ add $t3,$t3,$carry
+ srdi $carry,$t3,16
+ insrdi $t0,$t3,16,0 ; 0..63 bits
+ add $t4,$t4,$carry
+ srdi $carry,$t4,16
+ add $t5,$t5,$carry
+ srdi $carry,$t5,16
+ insrdi $t4,$t5,16,32
+ add $t6,$t6,$carry
+ srdi $carry,$t6,16
+ insrdi $t4,$t6,16,16
+ add $t7,$t7,$carry
+ insrdi $t4,$t7,16,0 ; 64..127 bits
+ srdi $carry,$t7,16 ; upper 33 bits
+ ld $t6,`$FRAME+64`($sp)
+ ld $t7,`$FRAME+72`($sp)
+
+ std $t0,8($tp) ; tp[j-1]
+ stdu $t4,16($tp) ; tp[j]
+
+ add $t6,$t6,$carry ; can not overflow
+ srdi $carry,$t6,16
+ add $t7,$t7,$carry
+ insrdi $t6,$t7,48,0
+ srdi $ovf,$t7,48
+ std $t6,8($tp) ; tp[num-1]
+
+ slwi $t7,$num,2
+ subf $nap_d,$t7,$nap_d ; rewind pointer
+
+ li $i,8 ; i=1
+.align 5
+Louter:
+ ldx $t3,$bp,$i ; bp[i]
+ ld $t6,`$FRAME+$TRANSFER+8`($sp) ; tp[0]
+ mulld $t7,$a0,$t3 ; ap[0]*bp[i]
+
+ addi $tp,$sp,`$FRAME+$TRANSFER`
+ add $t7,$t7,$t6 ; ap[0]*bp[i]+tp[0]
+ li $carry,0
+ mulld $t7,$t7,$n0 ; tp[0]*n0
+ mtctr $j
+
+ ; transfer bp[i] to FPU as 4x16-bit values
+ extrdi $t0,$t3,16,48
+ extrdi $t1,$t3,16,32
+ extrdi $t2,$t3,16,16
+ extrdi $t3,$t3,16,0
+ std $t0,`$FRAME+0`($sp)
+ std $t1,`$FRAME+8`($sp)
+ std $t2,`$FRAME+16`($sp)
+ std $t3,`$FRAME+24`($sp)
+ ; transfer (ap[0]*bp[i]+tp[0])*n0 to FPU as 4x16-bit values
+ extrdi $t4,$t7,16,48
+ extrdi $t5,$t7,16,32
+ extrdi $t6,$t7,16,16
+ extrdi $t7,$t7,16,0
+ std $t4,`$FRAME+32`($sp)
+ std $t5,`$FRAME+40`($sp)
+ std $t6,`$FRAME+48`($sp)
+ std $t7,`$FRAME+56`($sp)
+
+ lfd $A0,8($nap_d) ; load a[j] in double format
+ lfd $A1,16($nap_d)
+ lfd $A2,24($nap_d) ; load a[j+1] in double format
+ lfd $A3,32($nap_d)
+ lfd $N0,40($nap_d) ; load n[j] in double format
+ lfd $N1,48($nap_d)
+ lfd $N2,56($nap_d) ; load n[j+1] in double format
+ lfdu $N3,64($nap_d)
+
+ lfd $ba,`$FRAME+0`($sp)
+ lfd $bb,`$FRAME+8`($sp)
+ lfd $bc,`$FRAME+16`($sp)
+ lfd $bd,`$FRAME+24`($sp)
+ lfd $na,`$FRAME+32`($sp)
+ lfd $nb,`$FRAME+40`($sp)
+ lfd $nc,`$FRAME+48`($sp)
+ lfd $nd,`$FRAME+56`($sp)
+
+ fcfid $ba,$ba
+ fcfid $bb,$bb
+ fcfid $bc,$bc
+ fcfid $bd,$bd
+ fcfid $na,$na
+ fcfid $nb,$nb
+ fcfid $nc,$nc
+ fcfid $nd,$nd
+
+ fmul $T1a,$A1,$ba
+ fmul $T1b,$A1,$bb
+ fmul $T2a,$A2,$ba
+ fmul $T2b,$A2,$bb
+ fmul $T3a,$A3,$ba
+ fmul $T3b,$A3,$bb
+ fmul $T0a,$A0,$ba
+ fmul $T0b,$A0,$bb
+
+ fmadd $T1a,$A0,$bc,$T1a
+ fmadd $T1b,$A0,$bd,$T1b
+ fmadd $T2a,$A1,$bc,$T2a
+ fmadd $T2b,$A1,$bd,$T2b
+ fmadd $T3a,$A2,$bc,$T3a
+ fmadd $T3b,$A2,$bd,$T3b
+ fmul $dota,$A3,$bc
+ fmul $dotb,$A3,$bd
+
+ fmadd $T1a,$N1,$na,$T1a
+ fmadd $T1b,$N1,$nb,$T1b
+ lfd $A0,8($nap_d) ; load a[j] in double format
+ lfd $A1,16($nap_d)
+ fmadd $T2a,$N2,$na,$T2a
+ fmadd $T2b,$N2,$nb,$T2b
+ lfd $A2,24($nap_d) ; load a[j+1] in double format
+ lfd $A3,32($nap_d)
+ fmadd $T3a,$N3,$na,$T3a
+ fmadd $T3b,$N3,$nb,$T3b
+ fmadd $T0a,$N0,$na,$T0a
+ fmadd $T0b,$N0,$nb,$T0b
+
+ fmadd $T1a,$N0,$nc,$T1a
+ fmadd $T1b,$N0,$nd,$T1b
+ fmadd $T2a,$N1,$nc,$T2a
+ fmadd $T2b,$N1,$nd,$T2b
+ fmadd $T3a,$N2,$nc,$T3a
+ fmadd $T3b,$N2,$nd,$T3b
+ fmadd $dota,$N3,$nc,$dota
+ fmadd $dotb,$N3,$nd,$dotb
+
+ fctid $T0a,$T0a
+ fctid $T0b,$T0b
+ fctid $T1a,$T1a
+ fctid $T1b,$T1b
+ fctid $T2a,$T2a
+ fctid $T2b,$T2b
+ fctid $T3a,$T3a
+ fctid $T3b,$T3b
+
+ stfd $T0a,`$FRAME+0`($sp)
+ stfd $T0b,`$FRAME+8`($sp)
+ stfd $T1a,`$FRAME+16`($sp)
+ stfd $T1b,`$FRAME+24`($sp)
+ stfd $T2a,`$FRAME+32`($sp)
+ stfd $T2b,`$FRAME+40`($sp)
+ stfd $T3a,`$FRAME+48`($sp)
+ stfd $T3b,`$FRAME+56`($sp)
+
+.align 5
+Linner:
+ fmul $T1a,$A1,$ba
+ fmul $T1b,$A1,$bb
+ fmul $T2a,$A2,$ba
+ fmul $T2b,$A2,$bb
+ lfd $N0,40($nap_d) ; load n[j] in double format
+ lfd $N1,48($nap_d)
+ fmul $T3a,$A3,$ba
+ fmul $T3b,$A3,$bb
+ fmadd $T0a,$A0,$ba,$dota
+ fmadd $T0b,$A0,$bb,$dotb
+ lfd $N2,56($nap_d) ; load n[j+1] in double format
+ lfdu $N3,64($nap_d)
+
+ fmadd $T1a,$A0,$bc,$T1a
+ fmadd $T1b,$A0,$bd,$T1b
+ fmadd $T2a,$A1,$bc,$T2a
+ fmadd $T2b,$A1,$bd,$T2b
+ lfd $A0,8($nap_d) ; load a[j] in double format
+ lfd $A1,16($nap_d)
+ fmadd $T3a,$A2,$bc,$T3a
+ fmadd $T3b,$A2,$bd,$T3b
+ fmul $dota,$A3,$bc
+ fmul $dotb,$A3,$bd
+ lfd $A2,24($nap_d) ; load a[j+1] in double format
+ lfd $A3,32($nap_d)
+
+ fmadd $T1a,$N1,$na,$T1a
+ fmadd $T1b,$N1,$nb,$T1b
+ ld $t0,`$FRAME+0`($sp)
+ ld $t1,`$FRAME+8`($sp)
+ fmadd $T2a,$N2,$na,$T2a
+ fmadd $T2b,$N2,$nb,$T2b
+ ld $t2,`$FRAME+16`($sp)
+ ld $t3,`$FRAME+24`($sp)
+ fmadd $T3a,$N3,$na,$T3a
+ fmadd $T3b,$N3,$nb,$T3b
+ add $t0,$t0,$carry ; can not overflow
+ ld $t4,`$FRAME+32`($sp)
+ ld $t5,`$FRAME+40`($sp)
+ fmadd $T0a,$N0,$na,$T0a
+ fmadd $T0b,$N0,$nb,$T0b
+ srdi $carry,$t0,16
+ add $t1,$t1,$carry
+ srdi $carry,$t1,16
+ ld $t6,`$FRAME+48`($sp)
+ ld $t7,`$FRAME+56`($sp)
+
+ fmadd $T1a,$N0,$nc,$T1a
+ fmadd $T1b,$N0,$nd,$T1b
+ insrdi $t0,$t1,16,32
+ ld $t1,8($tp) ; tp[j]
+ fmadd $T2a,$N1,$nc,$T2a
+ fmadd $T2b,$N1,$nd,$T2b
+ add $t2,$t2,$carry
+ fmadd $T3a,$N2,$nc,$T3a
+ fmadd $T3b,$N2,$nd,$T3b
+ srdi $carry,$t2,16
+ insrdi $t0,$t2,16,16
+ fmadd $dota,$N3,$nc,$dota
+ fmadd $dotb,$N3,$nd,$dotb
+ add $t3,$t3,$carry
+ ldu $t2,16($tp) ; tp[j+1]
+ srdi $carry,$t3,16
+ insrdi $t0,$t3,16,0 ; 0..63 bits
+ add $t4,$t4,$carry
+
+ fctid $T0a,$T0a
+ fctid $T0b,$T0b
+ srdi $carry,$t4,16
+ fctid $T1a,$T1a
+ fctid $T1b,$T1b
+ add $t5,$t5,$carry
+ fctid $T2a,$T2a
+ fctid $T2b,$T2b
+ srdi $carry,$t5,16
+ insrdi $t4,$t5,16,32
+ fctid $T3a,$T3a
+ fctid $T3b,$T3b
+ add $t6,$t6,$carry
+ srdi $carry,$t6,16
+ insrdi $t4,$t6,16,16
+
+ stfd $T0a,`$FRAME+0`($sp)
+ stfd $T0b,`$FRAME+8`($sp)
+ add $t7,$t7,$carry
+ addc $t3,$t0,$t1
+ stfd $T1a,`$FRAME+16`($sp)
+ stfd $T1b,`$FRAME+24`($sp)
+ insrdi $t4,$t7,16,0 ; 64..127 bits
+ srdi $carry,$t7,16 ; upper 33 bits
+ stfd $T2a,`$FRAME+32`($sp)
+ stfd $T2b,`$FRAME+40`($sp)
+ adde $t5,$t4,$t2
+ stfd $T3a,`$FRAME+48`($sp)
+ stfd $T3b,`$FRAME+56`($sp)
+ addze $carry,$carry
+ std $t3,-16($tp) ; tp[j-1]
+ std $t5,-8($tp) ; tp[j]
+ bdnz- Linner
+
+ fctid $dota,$dota
+ fctid $dotb,$dotb
+ ld $t0,`$FRAME+0`($sp)
+ ld $t1,`$FRAME+8`($sp)
+ ld $t2,`$FRAME+16`($sp)
+ ld $t3,`$FRAME+24`($sp)
+ ld $t4,`$FRAME+32`($sp)
+ ld $t5,`$FRAME+40`($sp)
+ ld $t6,`$FRAME+48`($sp)
+ ld $t7,`$FRAME+56`($sp)
+ stfd $dota,`$FRAME+64`($sp)
+ stfd $dotb,`$FRAME+72`($sp)
+
+ add $t0,$t0,$carry ; can not overflow
+ srdi $carry,$t0,16
+ add $t1,$t1,$carry
+ srdi $carry,$t1,16
+ insrdi $t0,$t1,16,32
+ add $t2,$t2,$carry
+ ld $t1,8($tp) ; tp[j]
+ srdi $carry,$t2,16
+ insrdi $t0,$t2,16,16
+ add $t3,$t3,$carry
+ ldu $t2,16($tp) ; tp[j+1]
+ srdi $carry,$t3,16
+ insrdi $t0,$t3,16,0 ; 0..63 bits
+ add $t4,$t4,$carry
+ srdi $carry,$t4,16
+ add $t5,$t5,$carry
+ srdi $carry,$t5,16
+ insrdi $t4,$t5,16,32
+ add $t6,$t6,$carry
+ srdi $carry,$t6,16
+ insrdi $t4,$t6,16,16
+ add $t7,$t7,$carry
+ insrdi $t4,$t7,16,0 ; 64..127 bits
+ srdi $carry,$t7,16 ; upper 33 bits
+ ld $t6,`$FRAME+64`($sp)
+ ld $t7,`$FRAME+72`($sp)
+
+ addc $t3,$t0,$t1
+ adde $t5,$t4,$t2
+ addze $carry,$carry
+
+ std $t3,-16($tp) ; tp[j-1]
+ std $t5,-8($tp) ; tp[j]
+
+ add $carry,$carry,$ovf ; comsume upmost overflow
+ add $t6,$t6,$carry ; can not overflow
+ srdi $carry,$t6,16
+ add $t7,$t7,$carry
+ insrdi $t6,$t7,48,0
+ srdi $ovf,$t7,48
+ std $t6,0($tp) ; tp[num-1]
+
+ slwi $t7,$num,2
+ addi $i,$i,8
+ subf $nap_d,$t7,$nap_d ; rewind pointer
+ cmpw $i,$num
+ blt- Louter
+
+ subf $np,$num,$np ; rewind np
+ addi $j,$j,1 ; restore counter
+ subfc $i,$i,$i ; j=0 and "clear" XER[CA]
+ addi $tp,$sp,`$FRAME+$TRANSFER+8`
+ addi $t4,$sp,`$FRAME+$TRANSFER+16`
+ addi $t5,$np,8
+ addi $t6,$rp,8
+ mtctr $j
+
+.align 4
+Lsub: ldx $t0,$tp,$i
+ ldx $t1,$np,$i
+ ldx $t2,$t4,$i
+ ldx $t3,$t5,$i
+ subfe $t0,$t1,$t0 ; tp[j]-np[j]
+ subfe $t2,$t3,$t2 ; tp[j+1]-np[j+1]
+ stdx $t0,$rp,$i
+ stdx $t2,$t6,$i
+ addi $i,$i,16
+ bdnz- Lsub
+
+ li $i,0
+ subfe $ovf,$i,$ovf ; handle upmost overflow bit
+ and $ap,$tp,$ovf
+ andc $np,$rp,$ovf
+ or $ap,$ap,$np ; ap=borrow?tp:rp
+ addi $t7,$ap,8
+ mtctr $j
+
+.align 4
+Lcopy: ; copy or in-place refresh
+ ldx $t0,$ap,$i
+ ldx $t1,$t7,$i
+ std $i,8($nap_d) ; zap nap_d
+ std $i,16($nap_d)
+ std $i,24($nap_d)
+ std $i,32($nap_d)
+ std $i,40($nap_d)
+ std $i,48($nap_d)
+ std $i,56($nap_d)
+ stdu $i,64($nap_d)
+ stdx $t0,$rp,$i
+ stdx $t1,$t6,$i
+ stdx $i,$tp,$i ; zap tp at once
+ stdx $i,$t4,$i
+ addi $i,$i,16
+ bdnz- Lcopy
+
+ $POP r14,`2*$SIZE_T`($sp)
+ $POP r15,`3*$SIZE_T`($sp)
+ $POP r16,`4*$SIZE_T`($sp)
+ $POP r17,`5*$SIZE_T`($sp)
+ $POP r18,`6*$SIZE_T`($sp)
+ $POP r19,`7*$SIZE_T`($sp)
+ $POP r20,`8*$SIZE_T`($sp)
+ $POP r21,`9*$SIZE_T`($sp)
+ $POP r22,`10*$SIZE_T`($sp)
+ $POP r23,`11*$SIZE_T`($sp)
+ lfd f14,`12*$SIZE_T+0`($sp)
+ lfd f15,`12*$SIZE_T+8`($sp)
+ lfd f16,`12*$SIZE_T+16`($sp)
+ lfd f17,`12*$SIZE_T+24`($sp)
+ lfd f18,`12*$SIZE_T+32`($sp)
+ lfd f19,`12*$SIZE_T+40`($sp)
+ lfd f20,`12*$SIZE_T+48`($sp)
+ lfd f21,`12*$SIZE_T+56`($sp)
+ lfd f22,`12*$SIZE_T+64`($sp)
+ lfd f23,`12*$SIZE_T+72`($sp)
+ lfd f24,`12*$SIZE_T+80`($sp)
+ lfd f25,`12*$SIZE_T+88`($sp)
+ $POP $sp,0($sp)
+ li r3,1 ; signal "handled"
+ blr
+ .long 0
+.asciz "Montgomery Multiplication for PPC64, CRYPTOGAMS by <appro\@fy.chalmers.se>"
+___
+
+$code =~ s/\`([^\`]*)\`/eval $1/gem;
+print $code;
+close STDOUT;
diff --git a/crypto/bn/asm/s390x-mont.pl b/crypto/bn/asm/s390x-mont.pl
new file mode 100755
index 000000000000..d23251033b00
--- /dev/null
+++ b/crypto/bn/asm/s390x-mont.pl
@@ -0,0 +1,225 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# April 2007.
+#
+# Performance improvement over vanilla C code varies from 85% to 45%
+# depending on key length and benchmark. Unfortunately in this context
+# these are not very impressive results [for code that utilizes "wide"
+# 64x64=128-bit multiplication, which is not commonly available to C
+# programmers], at least hand-coded bn_asm.c replacement is known to
+# provide 30-40% better results for longest keys. Well, on a second
+# thought it's not very surprising, because z-CPUs are single-issue
+# and _strictly_ in-order execution, while bn_mul_mont is more or less
+# dependent on CPU ability to pipe-line instructions and have several
+# of them "in-flight" at the same time. I mean while other methods,
+# for example Karatsuba, aim to minimize amount of multiplications at
+# the cost of other operations increase, bn_mul_mont aim to neatly
+# "overlap" multiplications and the other operations [and on most
+# platforms even minimize the amount of the other operations, in
+# particular references to memory]. But it's possible to improve this
+# module performance by implementing dedicated squaring code-path and
+# possibly by unrolling loops...
+
+# January 2009.
+#
+# Reschedule to minimize/avoid Address Generation Interlock hazard,
+# make inner loops counter-based.
+
+$mn0="%r0";
+$num="%r1";
+
+# int bn_mul_mont(
+$rp="%r2"; # BN_ULONG *rp,
+$ap="%r3"; # const BN_ULONG *ap,
+$bp="%r4"; # const BN_ULONG *bp,
+$np="%r5"; # const BN_ULONG *np,
+$n0="%r6"; # const BN_ULONG *n0,
+#$num="160(%r15)" # int num);
+
+$bi="%r2"; # zaps rp
+$j="%r7";
+
+$ahi="%r8";
+$alo="%r9";
+$nhi="%r10";
+$nlo="%r11";
+$AHI="%r12";
+$NHI="%r13";
+$count="%r14";
+$sp="%r15";
+
+$code.=<<___;
+.text
+.globl bn_mul_mont
+.type bn_mul_mont,\@function
+bn_mul_mont:
+ lgf $num,164($sp) # pull $num
+ sla $num,3 # $num to enumerate bytes
+ la $bp,0($num,$bp)
+
+ stg %r2,16($sp)
+
+ cghi $num,16 #
+ lghi %r2,0 #
+ blr %r14 # if($num<16) return 0;
+ cghi $num,128 #
+ bhr %r14 # if($num>128) return 0;
+
+ stmg %r3,%r15,24($sp)
+
+ lghi $rp,-160-8 # leave room for carry bit
+ lcgr $j,$num # -$num
+ lgr %r0,$sp
+ la $rp,0($rp,$sp)
+ la $sp,0($j,$rp) # alloca
+ stg %r0,0($sp) # back chain
+
+ sra $num,3 # restore $num
+ la $bp,0($j,$bp) # restore $bp
+ ahi $num,-1 # adjust $num for inner loop
+ lg $n0,0($n0) # pull n0
+
+ lg $bi,0($bp)
+ lg $alo,0($ap)
+ mlgr $ahi,$bi # ap[0]*bp[0]
+ lgr $AHI,$ahi
+
+ lgr $mn0,$alo # "tp[0]"*n0
+ msgr $mn0,$n0
+
+ lg $nlo,0($np) #
+ mlgr $nhi,$mn0 # np[0]*m1
+ algr $nlo,$alo # +="tp[0]"
+ lghi $NHI,0
+ alcgr $NHI,$nhi
+
+ la $j,8(%r0) # j=1
+ lr $count,$num
+
+.align 16
+.L1st:
+ lg $alo,0($j,$ap)
+ mlgr $ahi,$bi # ap[j]*bp[0]
+ algr $alo,$AHI
+ lghi $AHI,0
+ alcgr $AHI,$ahi
+
+ lg $nlo,0($j,$np)
+ mlgr $nhi,$mn0 # np[j]*m1
+ algr $nlo,$NHI
+ lghi $NHI,0
+ alcgr $nhi,$NHI # +="tp[j]"
+ algr $nlo,$alo
+ alcgr $NHI,$nhi
+
+ stg $nlo,160-8($j,$sp) # tp[j-1]=
+ la $j,8($j) # j++
+ brct $count,.L1st
+
+ algr $NHI,$AHI
+ lghi $AHI,0
+ alcgr $AHI,$AHI # upmost overflow bit
+ stg $NHI,160-8($j,$sp)
+ stg $AHI,160($j,$sp)
+ la $bp,8($bp) # bp++
+
+.Louter:
+ lg $bi,0($bp) # bp[i]
+ lg $alo,0($ap)
+ mlgr $ahi,$bi # ap[0]*bp[i]
+ alg $alo,160($sp) # +=tp[0]
+ lghi $AHI,0
+ alcgr $AHI,$ahi
+
+ lgr $mn0,$alo
+ msgr $mn0,$n0 # tp[0]*n0
+
+ lg $nlo,0($np) # np[0]
+ mlgr $nhi,$mn0 # np[0]*m1
+ algr $nlo,$alo # +="tp[0]"
+ lghi $NHI,0
+ alcgr $NHI,$nhi
+
+ la $j,8(%r0) # j=1
+ lr $count,$num
+
+.align 16
+.Linner:
+ lg $alo,0($j,$ap)
+ mlgr $ahi,$bi # ap[j]*bp[i]
+ algr $alo,$AHI
+ lghi $AHI,0
+ alcgr $ahi,$AHI
+ alg $alo,160($j,$sp)# +=tp[j]
+ alcgr $AHI,$ahi
+
+ lg $nlo,0($j,$np)
+ mlgr $nhi,$mn0 # np[j]*m1
+ algr $nlo,$NHI
+ lghi $NHI,0
+ alcgr $nhi,$NHI
+ algr $nlo,$alo # +="tp[j]"
+ alcgr $NHI,$nhi
+
+ stg $nlo,160-8($j,$sp) # tp[j-1]=
+ la $j,8($j) # j++
+ brct $count,.Linner
+
+ algr $NHI,$AHI
+ lghi $AHI,0
+ alcgr $AHI,$AHI
+ alg $NHI,160($j,$sp)# accumulate previous upmost overflow bit
+ lghi $ahi,0
+ alcgr $AHI,$ahi # new upmost overflow bit
+ stg $NHI,160-8($j,$sp)
+ stg $AHI,160($j,$sp)
+
+ la $bp,8($bp) # bp++
+ clg $bp,160+8+32($j,$sp) # compare to &bp[num]
+ jne .Louter
+
+ lg $rp,160+8+16($j,$sp) # reincarnate rp
+ la $ap,160($sp)
+ ahi $num,1 # restore $num, incidentally clears "borrow"
+
+ la $j,0(%r0)
+ lr $count,$num
+.Lsub: lg $alo,0($j,$ap)
+ slbg $alo,0($j,$np)
+ stg $alo,0($j,$rp)
+ la $j,8($j)
+ brct $count,.Lsub
+ lghi $ahi,0
+ slbgr $AHI,$ahi # handle upmost carry
+
+ ngr $ap,$AHI
+ lghi $np,-1
+ xgr $np,$AHI
+ ngr $np,$rp
+ ogr $ap,$np # ap=borrow?tp:rp
+
+ la $j,0(%r0)
+ lgr $count,$num
+.Lcopy: lg $alo,0($j,$ap) # copy or in-place refresh
+ stg $j,160($j,$sp) # zap tp
+ stg $alo,0($j,$rp)
+ la $j,8($j)
+ brct $count,.Lcopy
+
+ la %r1,160+8+48($j,$sp)
+ lmg %r6,%r15,0(%r1)
+ lghi %r2,1 # signal "processed"
+ br %r14
+.size bn_mul_mont,.-bn_mul_mont
+.string "Montgomery Multiplication for s390x, CRYPTOGAMS by <appro\@openssl.org>"
+___
+
+print $code;
+close STDOUT;
diff --git a/crypto/bn/asm/s390x.S b/crypto/bn/asm/s390x.S
new file mode 100755
index 000000000000..8f45f5d513ce
--- /dev/null
+++ b/crypto/bn/asm/s390x.S
@@ -0,0 +1,678 @@
+.ident "s390x.S, version 1.0"
+// ====================================================================
+// Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+// project.
+//
+// Rights for redistribution and usage in source and binary forms are
+// granted according to the OpenSSL license. Warranty of any kind is
+// disclaimed.
+// ====================================================================
+
+.text
+
+#define zero %r0
+
+// BN_ULONG bn_mul_add_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
+.globl bn_mul_add_words
+.type bn_mul_add_words,@function
+.align 4
+bn_mul_add_words:
+ lghi zero,0 // zero = 0
+ la %r1,0(%r2) // put rp aside
+ lghi %r2,0 // i=0;
+ ltgfr %r4,%r4
+ bler %r14 // if (len<=0) return 0;
+
+ stmg %r6,%r10,48(%r15)
+ lghi %r8,0 // carry = 0
+ srag %r10,%r4,2 // cnt=len/4
+ jz .Loop1_madd
+
+.Loop4_madd:
+ lg %r7,0(%r2,%r3) // ap[i]
+ mlgr %r6,%r5 // *=w
+ algr %r7,%r8 // +=carry
+ alcgr %r6,zero
+ alg %r7,0(%r2,%r1) // +=rp[i]
+ alcgr %r6,zero
+ stg %r7,0(%r2,%r1) // rp[i]=
+
+ lg %r9,8(%r2,%r3)
+ mlgr %r8,%r5
+ algr %r9,%r6
+ alcgr %r8,zero
+ alg %r9,8(%r2,%r1)
+ alcgr %r8,zero
+ stg %r9,8(%r2,%r1)
+
+ lg %r7,16(%r2,%r3)
+ mlgr %r6,%r5
+ algr %r7,%r8
+ alcgr %r6,zero
+ alg %r7,16(%r2,%r1)
+ alcgr %r6,zero
+ stg %r7,16(%r2,%r1)
+
+ lg %r9,24(%r2,%r3)
+ mlgr %r8,%r5
+ algr %r9,%r6
+ alcgr %r8,zero
+ alg %r9,24(%r2,%r1)
+ alcgr %r8,zero
+ stg %r9,24(%r2,%r1)
+
+ la %r2,32(%r2) // i+=4
+ brct %r10,.Loop4_madd
+
+ lghi %r10,3
+ nr %r4,%r10 // cnt=len%4
+ jz .Lend_madd
+
+.Loop1_madd:
+ lg %r7,0(%r2,%r3) // ap[i]
+ mlgr %r6,%r5 // *=w
+ algr %r7,%r8 // +=carry
+ alcgr %r6,zero
+ alg %r7,0(%r2,%r1) // +=rp[i]
+ alcgr %r6,zero
+ stg %r7,0(%r2,%r1) // rp[i]=
+
+ lgr %r8,%r6
+ la %r2,8(%r2) // i++
+ brct %r4,.Loop1_madd
+
+.Lend_madd:
+ lgr %r2,%r8
+ lmg %r6,%r10,48(%r15)
+ br %r14
+.size bn_mul_add_words,.-bn_mul_add_words
+
+// BN_ULONG bn_mul_words(BN_ULONG *r2,BN_ULONG *r3,int r4,BN_ULONG r5);
+.globl bn_mul_words
+.type bn_mul_words,@function
+.align 4
+bn_mul_words:
+ lghi zero,0 // zero = 0
+ la %r1,0(%r2) // put rp aside
+ lghi %r2,0 // i=0;
+ ltgfr %r4,%r4
+ bler %r14 // if (len<=0) return 0;
+
+ stmg %r6,%r10,48(%r15)
+ lghi %r8,0 // carry = 0
+ srag %r10,%r4,2 // cnt=len/4
+ jz .Loop1_mul
+
+.Loop4_mul:
+ lg %r7,0(%r2,%r3) // ap[i]
+ mlgr %r6,%r5 // *=w
+ algr %r7,%r8 // +=carry
+ alcgr %r6,zero
+ stg %r7,0(%r2,%r1) // rp[i]=
+
+ lg %r9,8(%r2,%r3)
+ mlgr %r8,%r5
+ algr %r9,%r6
+ alcgr %r8,zero
+ stg %r9,8(%r2,%r1)
+
+ lg %r7,16(%r2,%r3)
+ mlgr %r6,%r5
+ algr %r7,%r8
+ alcgr %r6,zero
+ stg %r7,16(%r2,%r1)
+
+ lg %r9,24(%r2,%r3)
+ mlgr %r8,%r5
+ algr %r9,%r6
+ alcgr %r8,zero
+ stg %r9,24(%r2,%r1)
+
+ la %r2,32(%r2) // i+=4
+ brct %r10,.Loop4_mul
+
+ lghi %r10,3
+ nr %r4,%r10 // cnt=len%4
+ jz .Lend_mul
+
+.Loop1_mul:
+ lg %r7,0(%r2,%r3) // ap[i]
+ mlgr %r6,%r5 // *=w
+ algr %r7,%r8 // +=carry
+ alcgr %r6,zero
+ stg %r7,0(%r2,%r1) // rp[i]=
+
+ lgr %r8,%r6
+ la %r2,8(%r2) // i++
+ brct %r4,.Loop1_mul
+
+.Lend_mul:
+ lgr %r2,%r8
+ lmg %r6,%r10,48(%r15)
+ br %r14
+.size bn_mul_words,.-bn_mul_words
+
+// void bn_sqr_words(BN_ULONG *r2,BN_ULONG *r2,int r4)
+.globl bn_sqr_words
+.type bn_sqr_words,@function
+.align 4
+bn_sqr_words:
+ ltgfr %r4,%r4
+ bler %r14
+
+ stmg %r6,%r7,48(%r15)
+ srag %r1,%r4,2 // cnt=len/4
+ jz .Loop1_sqr
+
+.Loop4_sqr:
+ lg %r7,0(%r3)
+ mlgr %r6,%r7
+ stg %r7,0(%r2)
+ stg %r6,8(%r2)
+
+ lg %r7,8(%r3)
+ mlgr %r6,%r7
+ stg %r7,16(%r2)
+ stg %r6,24(%r2)
+
+ lg %r7,16(%r3)
+ mlgr %r6,%r7
+ stg %r7,32(%r2)
+ stg %r6,40(%r2)
+
+ lg %r7,24(%r3)
+ mlgr %r6,%r7
+ stg %r7,48(%r2)
+ stg %r6,56(%r2)
+
+ la %r3,32(%r3)
+ la %r2,64(%r2)
+ brct %r1,.Loop4_sqr
+
+ lghi %r1,3
+ nr %r4,%r1 // cnt=len%4
+ jz .Lend_sqr
+
+.Loop1_sqr:
+ lg %r7,0(%r3)
+ mlgr %r6,%r7
+ stg %r7,0(%r2)
+ stg %r6,8(%r2)
+
+ la %r3,8(%r3)
+ la %r2,16(%r2)
+ brct %r4,.Loop1_sqr
+
+.Lend_sqr:
+ lmg %r6,%r7,48(%r15)
+ br %r14
+.size bn_sqr_words,.-bn_sqr_words
+
+// BN_ULONG bn_div_words(BN_ULONG h,BN_ULONG l,BN_ULONG d);
+.globl bn_div_words
+.type bn_div_words,@function
+.align 4
+bn_div_words:
+ dlgr %r2,%r4
+ lgr %r2,%r3
+ br %r14
+.size bn_div_words,.-bn_div_words
+
+// BN_ULONG bn_add_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
+.globl bn_add_words
+.type bn_add_words,@function
+.align 4
+bn_add_words:
+ la %r1,0(%r2) // put rp aside
+ lghi %r2,0 // i=0
+ ltgfr %r5,%r5
+ bler %r14 // if (len<=0) return 0;
+
+ stg %r6,48(%r15)
+ lghi %r6,3
+ nr %r6,%r5 // len%4
+ sra %r5,2 // len/4, use sra because it sets condition code
+ jz .Loop1_add // carry is incidentally cleared if branch taken
+ algr %r2,%r2 // clear carry
+
+.Loop4_add:
+ lg %r0,0(%r2,%r3)
+ alcg %r0,0(%r2,%r4)
+ stg %r0,0(%r2,%r1)
+ lg %r0,8(%r2,%r3)
+ alcg %r0,8(%r2,%r4)
+ stg %r0,8(%r2,%r1)
+ lg %r0,16(%r2,%r3)
+ alcg %r0,16(%r2,%r4)
+ stg %r0,16(%r2,%r1)
+ lg %r0,24(%r2,%r3)
+ alcg %r0,24(%r2,%r4)
+ stg %r0,24(%r2,%r1)
+
+ la %r2,32(%r2) // i+=4
+ brct %r5,.Loop4_add
+
+ la %r6,1(%r6) // see if len%4 is zero ...
+ brct %r6,.Loop1_add // without touching condition code:-)
+
+.Lexit_add:
+ lghi %r2,0
+ alcgr %r2,%r2
+ lg %r6,48(%r15)
+ br %r14
+
+.Loop1_add:
+ lg %r0,0(%r2,%r3)
+ alcg %r0,0(%r2,%r4)
+ stg %r0,0(%r2,%r1)
+
+ la %r2,8(%r2) // i++
+ brct %r6,.Loop1_add
+
+ j .Lexit_add
+.size bn_add_words,.-bn_add_words
+
+// BN_ULONG bn_sub_words(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4,int r5);
+.globl bn_sub_words
+.type bn_sub_words,@function
+.align 4
+bn_sub_words:
+ la %r1,0(%r2) // put rp aside
+ lghi %r2,0 // i=0
+ ltgfr %r5,%r5
+ bler %r14 // if (len<=0) return 0;
+
+ stg %r6,48(%r15)
+ lghi %r6,3
+ nr %r6,%r5 // len%4
+ sra %r5,2 // len/4, use sra because it sets condition code
+ jnz .Loop4_sub // borrow is incidentally cleared if branch taken
+ slgr %r2,%r2 // clear borrow
+
+.Loop1_sub:
+ lg %r0,0(%r2,%r3)
+ slbg %r0,0(%r2,%r4)
+ stg %r0,0(%r2,%r1)
+
+ la %r2,8(%r2) // i++
+ brct %r6,.Loop1_sub
+ j .Lexit_sub
+
+.Loop4_sub:
+ lg %r0,0(%r2,%r3)
+ slbg %r0,0(%r2,%r4)
+ stg %r0,0(%r2,%r1)
+ lg %r0,8(%r2,%r3)
+ slbg %r0,8(%r2,%r4)
+ stg %r0,8(%r2,%r1)
+ lg %r0,16(%r2,%r3)
+ slbg %r0,16(%r2,%r4)
+ stg %r0,16(%r2,%r1)
+ lg %r0,24(%r2,%r3)
+ slbg %r0,24(%r2,%r4)
+ stg %r0,24(%r2,%r1)
+
+ la %r2,32(%r2) // i+=4
+ brct %r5,.Loop4_sub
+
+ la %r6,1(%r6) // see if len%4 is zero ...
+ brct %r6,.Loop1_sub // without touching condition code:-)
+
+.Lexit_sub:
+ lghi %r2,0
+ slbgr %r2,%r2
+ lcgr %r2,%r2
+ lg %r6,48(%r15)
+ br %r14
+.size bn_sub_words,.-bn_sub_words
+
+#define c1 %r1
+#define c2 %r5
+#define c3 %r8
+
+#define mul_add_c(ai,bi,c1,c2,c3) \
+ lg %r7,ai*8(%r3); \
+ mlg %r6,bi*8(%r4); \
+ algr c1,%r7; \
+ alcgr c2,%r6; \
+ alcgr c3,zero
+
+// void bn_mul_comba8(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
+.globl bn_mul_comba8
+.type bn_mul_comba8,@function
+.align 4
+bn_mul_comba8:
+ stmg %r6,%r8,48(%r15)
+
+ lghi c1,0
+ lghi c2,0
+ lghi c3,0
+ lghi zero,0
+
+ mul_add_c(0,0,c1,c2,c3);
+ stg c1,0*8(%r2)
+ lghi c1,0
+
+ mul_add_c(0,1,c2,c3,c1);
+ mul_add_c(1,0,c2,c3,c1);
+ stg c2,1*8(%r2)
+ lghi c2,0
+
+ mul_add_c(2,0,c3,c1,c2);
+ mul_add_c(1,1,c3,c1,c2);
+ mul_add_c(0,2,c3,c1,c2);
+ stg c3,2*8(%r2)
+ lghi c3,0
+
+ mul_add_c(0,3,c1,c2,c3);
+ mul_add_c(1,2,c1,c2,c3);
+ mul_add_c(2,1,c1,c2,c3);
+ mul_add_c(3,0,c1,c2,c3);
+ stg c1,3*8(%r2)
+ lghi c1,0
+
+ mul_add_c(4,0,c2,c3,c1);
+ mul_add_c(3,1,c2,c3,c1);
+ mul_add_c(2,2,c2,c3,c1);
+ mul_add_c(1,3,c2,c3,c1);
+ mul_add_c(0,4,c2,c3,c1);
+ stg c2,4*8(%r2)
+ lghi c2,0
+
+ mul_add_c(0,5,c3,c1,c2);
+ mul_add_c(1,4,c3,c1,c2);
+ mul_add_c(2,3,c3,c1,c2);
+ mul_add_c(3,2,c3,c1,c2);
+ mul_add_c(4,1,c3,c1,c2);
+ mul_add_c(5,0,c3,c1,c2);
+ stg c3,5*8(%r2)
+ lghi c3,0
+
+ mul_add_c(6,0,c1,c2,c3);
+ mul_add_c(5,1,c1,c2,c3);
+ mul_add_c(4,2,c1,c2,c3);
+ mul_add_c(3,3,c1,c2,c3);
+ mul_add_c(2,4,c1,c2,c3);
+ mul_add_c(1,5,c1,c2,c3);
+ mul_add_c(0,6,c1,c2,c3);
+ stg c1,6*8(%r2)
+ lghi c1,0
+
+ mul_add_c(0,7,c2,c3,c1);
+ mul_add_c(1,6,c2,c3,c1);
+ mul_add_c(2,5,c2,c3,c1);
+ mul_add_c(3,4,c2,c3,c1);
+ mul_add_c(4,3,c2,c3,c1);
+ mul_add_c(5,2,c2,c3,c1);
+ mul_add_c(6,1,c2,c3,c1);
+ mul_add_c(7,0,c2,c3,c1);
+ stg c2,7*8(%r2)
+ lghi c2,0
+
+ mul_add_c(7,1,c3,c1,c2);
+ mul_add_c(6,2,c3,c1,c2);
+ mul_add_c(5,3,c3,c1,c2);
+ mul_add_c(4,4,c3,c1,c2);
+ mul_add_c(3,5,c3,c1,c2);
+ mul_add_c(2,6,c3,c1,c2);
+ mul_add_c(1,7,c3,c1,c2);
+ stg c3,8*8(%r2)
+ lghi c3,0
+
+ mul_add_c(2,7,c1,c2,c3);
+ mul_add_c(3,6,c1,c2,c3);
+ mul_add_c(4,5,c1,c2,c3);
+ mul_add_c(5,4,c1,c2,c3);
+ mul_add_c(6,3,c1,c2,c3);
+ mul_add_c(7,2,c1,c2,c3);
+ stg c1,9*8(%r2)
+ lghi c1,0
+
+ mul_add_c(7,3,c2,c3,c1);
+ mul_add_c(6,4,c2,c3,c1);
+ mul_add_c(5,5,c2,c3,c1);
+ mul_add_c(4,6,c2,c3,c1);
+ mul_add_c(3,7,c2,c3,c1);
+ stg c2,10*8(%r2)
+ lghi c2,0
+
+ mul_add_c(4,7,c3,c1,c2);
+ mul_add_c(5,6,c3,c1,c2);
+ mul_add_c(6,5,c3,c1,c2);
+ mul_add_c(7,4,c3,c1,c2);
+ stg c3,11*8(%r2)
+ lghi c3,0
+
+ mul_add_c(7,5,c1,c2,c3);
+ mul_add_c(6,6,c1,c2,c3);
+ mul_add_c(5,7,c1,c2,c3);
+ stg c1,12*8(%r2)
+ lghi c1,0
+
+
+ mul_add_c(6,7,c2,c3,c1);
+ mul_add_c(7,6,c2,c3,c1);
+ stg c2,13*8(%r2)
+ lghi c2,0
+
+ mul_add_c(7,7,c3,c1,c2);
+ stg c3,14*8(%r2)
+ stg c1,15*8(%r2)
+
+ lmg %r6,%r8,48(%r15)
+ br %r14
+.size bn_mul_comba8,.-bn_mul_comba8
+
+// void bn_mul_comba4(BN_ULONG *r2,BN_ULONG *r3,BN_ULONG *r4);
+.globl bn_mul_comba4
+.type bn_mul_comba4,@function
+.align 4
+bn_mul_comba4:
+ stmg %r6,%r8,48(%r15)
+
+ lghi c1,0
+ lghi c2,0
+ lghi c3,0
+ lghi zero,0
+
+ mul_add_c(0,0,c1,c2,c3);
+ stg c1,0*8(%r3)
+ lghi c1,0
+
+ mul_add_c(0,1,c2,c3,c1);
+ mul_add_c(1,0,c2,c3,c1);
+ stg c2,1*8(%r2)
+ lghi c2,0
+
+ mul_add_c(2,0,c3,c1,c2);
+ mul_add_c(1,1,c3,c1,c2);
+ mul_add_c(0,2,c3,c1,c2);
+ stg c3,2*8(%r2)
+ lghi c3,0
+
+ mul_add_c(0,3,c1,c2,c3);
+ mul_add_c(1,2,c1,c2,c3);
+ mul_add_c(2,1,c1,c2,c3);
+ mul_add_c(3,0,c1,c2,c3);
+ stg c1,3*8(%r2)
+ lghi c1,0
+
+ mul_add_c(3,1,c2,c3,c1);
+ mul_add_c(2,2,c2,c3,c1);
+ mul_add_c(1,3,c2,c3,c1);
+ stg c2,4*8(%r2)
+ lghi c2,0
+
+ mul_add_c(2,3,c3,c1,c2);
+ mul_add_c(3,2,c3,c1,c2);
+ stg c3,5*8(%r2)
+ lghi c3,0
+
+ mul_add_c(3,3,c1,c2,c3);
+ stg c1,6*8(%r2)
+ stg c2,7*8(%r2)
+
+ stmg %r6,%r8,48(%r15)
+ br %r14
+.size bn_mul_comba4,.-bn_mul_comba4
+
+#define sqr_add_c(ai,c1,c2,c3) \
+ lg %r7,ai*8(%r3); \
+ mlgr %r6,%r7; \
+ algr c1,%r7; \
+ alcgr c2,%r6; \
+ alcgr c3,zero
+
+#define sqr_add_c2(ai,aj,c1,c2,c3) \
+ lg %r7,ai*8(%r3); \
+ mlg %r6,aj*8(%r3); \
+ algr c1,%r7; \
+ alcgr c2,%r6; \
+ alcgr c3,zero; \
+ algr c1,%r7; \
+ alcgr c2,%r6; \
+ alcgr c3,zero
+
+// void bn_sqr_comba8(BN_ULONG *r2,BN_ULONG *r3);
+.globl bn_sqr_comba8
+.type bn_sqr_comba8,@function
+.align 4
+bn_sqr_comba8:
+ stmg %r6,%r8,48(%r15)
+
+ lghi c1,0
+ lghi c2,0
+ lghi c3,0
+ lghi zero,0
+
+ sqr_add_c(0,c1,c2,c3);
+ stg c1,0*8(%r2)
+ lghi c1,0
+
+ sqr_add_c2(1,0,c2,c3,c1);
+ stg c2,1*8(%r2)
+ lghi c2,0
+
+ sqr_add_c(1,c3,c1,c2);
+ sqr_add_c2(2,0,c3,c1,c2);
+ stg c3,2*8(%r2)
+ lghi c3,0
+
+ sqr_add_c2(3,0,c1,c2,c3);
+ sqr_add_c2(2,1,c1,c2,c3);
+ stg c1,3*8(%r2)
+ lghi c1,0
+
+ sqr_add_c(2,c2,c3,c1);
+ sqr_add_c2(3,1,c2,c3,c1);
+ sqr_add_c2(4,0,c2,c3,c1);
+ stg c2,4*8(%r2)
+ lghi c2,0
+
+ sqr_add_c2(5,0,c3,c1,c2);
+ sqr_add_c2(4,1,c3,c1,c2);
+ sqr_add_c2(3,2,c3,c1,c2);
+ stg c3,5*8(%r2)
+ lghi c3,0
+
+ sqr_add_c(3,c1,c2,c3);
+ sqr_add_c2(4,2,c1,c2,c3);
+ sqr_add_c2(5,1,c1,c2,c3);
+ sqr_add_c2(6,0,c1,c2,c3);
+ stg c1,6*8(%r2)
+ lghi c1,0
+
+ sqr_add_c2(7,0,c2,c3,c1);
+ sqr_add_c2(6,1,c2,c3,c1);
+ sqr_add_c2(5,2,c2,c3,c1);
+ sqr_add_c2(4,3,c2,c3,c1);
+ stg c2,7*8(%r2)
+ lghi c2,0
+
+ sqr_add_c(4,c3,c1,c2);
+ sqr_add_c2(5,3,c3,c1,c2);
+ sqr_add_c2(6,2,c3,c1,c2);
+ sqr_add_c2(7,1,c3,c1,c2);
+ stg c3,8*8(%r2)
+ lghi c3,0
+
+ sqr_add_c2(7,2,c1,c2,c3);
+ sqr_add_c2(6,3,c1,c2,c3);
+ sqr_add_c2(5,4,c1,c2,c3);
+ stg c1,9*8(%r2)
+ lghi c1,0
+
+ sqr_add_c(5,c2,c3,c1);
+ sqr_add_c2(6,4,c2,c3,c1);
+ sqr_add_c2(7,3,c2,c3,c1);
+ stg c2,10*8(%r2)
+ lghi c2,0
+
+ sqr_add_c2(7,4,c3,c1,c2);
+ sqr_add_c2(6,5,c3,c1,c2);
+ stg c3,11*8(%r2)
+ lghi c3,0
+
+ sqr_add_c(6,c1,c2,c3);
+ sqr_add_c2(7,5,c1,c2,c3);
+ stg c1,12*8(%r2)
+ lghi c1,0
+
+ sqr_add_c2(7,6,c2,c3,c1);
+ stg c2,13*8(%r2)
+ lghi c2,0
+
+ sqr_add_c(7,c3,c1,c2);
+ stg c3,14*8(%r2)
+ stg c1,15*8(%r2)
+
+ lmg %r6,%r8,48(%r15)
+ br %r14
+.size bn_sqr_comba8,.-bn_sqr_comba8
+
+// void bn_sqr_comba4(BN_ULONG *r2,BN_ULONG *r3);
+.globl bn_sqr_comba4
+.type bn_sqr_comba4,@function
+.align 4
+bn_sqr_comba4:
+ stmg %r6,%r8,48(%r15)
+
+ lghi c1,0
+ lghi c2,0
+ lghi c3,0
+ lghi zero,0
+
+ sqr_add_c(0,c1,c2,c3);
+ stg c1,0*8(%r2)
+ lghi c1,0
+
+ sqr_add_c2(1,0,c2,c3,c1);
+ stg c2,1*8(%r2)
+ lghi c2,0
+
+ sqr_add_c(1,c3,c1,c2);
+ sqr_add_c2(2,0,c3,c1,c2);
+ stg c3,2*8(%r2)
+ lghi c3,0
+
+ sqr_add_c2(3,0,c1,c2,c3);
+ sqr_add_c2(2,1,c1,c2,c3);
+ stg c1,3*8(%r2)
+ lghi c1,0
+
+ sqr_add_c(2,c2,c3,c1);
+ sqr_add_c2(3,1,c2,c3,c1);
+ stg c2,4*8(%r2)
+ lghi c2,0
+
+ sqr_add_c2(3,2,c3,c1,c2);
+ stg c3,5*8(%r2)
+ lghi c3,0
+
+ sqr_add_c(3,c1,c2,c3);
+ stg c1,6*8(%r2)
+ stg c2,7*8(%r2)
+
+ lmg %r6,%r8,48(%r15)
+ br %r14
+.size bn_sqr_comba4,.-bn_sqr_comba4
diff --git a/crypto/bn/asm/sparcv9-mont.pl b/crypto/bn/asm/sparcv9-mont.pl
new file mode 100755
index 000000000000..b8fb1e8a25dc
--- /dev/null
+++ b/crypto/bn/asm/sparcv9-mont.pl
@@ -0,0 +1,606 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# December 2005
+#
+# Pure SPARCv9/8+ and IALU-only bn_mul_mont implementation. The reasons
+# for undertaken effort are multiple. First of all, UltraSPARC is not
+# the whole SPARCv9 universe and other VIS-free implementations deserve
+# optimized code as much. Secondly, newly introduced UltraSPARC T1,
+# a.k.a. Niagara, has shared FPU and concurrent FPU-intensive pathes,
+# such as sparcv9a-mont, will simply sink it. Yes, T1 is equipped with
+# several integrated RSA/DSA accelerator circuits accessible through
+# kernel driver [only(*)], but having decent user-land software
+# implementation is important too. Finally, reasons like desire to
+# experiment with dedicated squaring procedure. Yes, this module
+# implements one, because it was easiest to draft it in SPARCv9
+# instructions...
+
+# (*) Engine accessing the driver in question is on my TODO list.
+# For reference, acceleator is estimated to give 6 to 10 times
+# improvement on single-threaded RSA sign. It should be noted
+# that 6-10x improvement coefficient does not actually mean
+# something extraordinary in terms of absolute [single-threaded]
+# performance, as SPARCv9 instruction set is by all means least
+# suitable for high performance crypto among other 64 bit
+# platforms. 6-10x factor simply places T1 in same performance
+# domain as say AMD64 and IA-64. Improvement of RSA verify don't
+# appear impressive at all, but it's the sign operation which is
+# far more critical/interesting.
+
+# You might notice that inner loops are modulo-scheduled:-) This has
+# essentially negligible impact on UltraSPARC performance, it's
+# Fujitsu SPARC64 V users who should notice and hopefully appreciate
+# the advantage... Currently this module surpasses sparcv9a-mont.pl
+# by ~20% on UltraSPARC-III and later cores, but recall that sparcv9a
+# module still have hidden potential [see TODO list there], which is
+# estimated to be larger than 20%...
+
+# int bn_mul_mont(
+$rp="%i0"; # BN_ULONG *rp,
+$ap="%i1"; # const BN_ULONG *ap,
+$bp="%i2"; # const BN_ULONG *bp,
+$np="%i3"; # const BN_ULONG *np,
+$n0="%i4"; # const BN_ULONG *n0,
+$num="%i5"; # int num);
+
+$bits=32;
+for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
+if ($bits==64) { $bias=2047; $frame=192; }
+else { $bias=0; $frame=128; }
+
+$car0="%o0";
+$car1="%o1";
+$car2="%o2"; # 1 bit
+$acc0="%o3";
+$acc1="%o4";
+$mask="%g1"; # 32 bits, what a waste...
+$tmp0="%g4";
+$tmp1="%g5";
+
+$i="%l0";
+$j="%l1";
+$mul0="%l2";
+$mul1="%l3";
+$tp="%l4";
+$apj="%l5";
+$npj="%l6";
+$tpj="%l7";
+
+$fname="bn_mul_mont_int";
+
+$code=<<___;
+.section ".text",#alloc,#execinstr
+
+.global $fname
+.align 32
+$fname:
+ cmp %o5,4 ! 128 bits minimum
+ bge,pt %icc,.Lenter
+ sethi %hi(0xffffffff),$mask
+ retl
+ clr %o0
+.align 32
+.Lenter:
+ save %sp,-$frame,%sp
+ sll $num,2,$num ! num*=4
+ or $mask,%lo(0xffffffff),$mask
+ ld [$n0],$n0
+ cmp $ap,$bp
+ and $num,$mask,$num
+ ld [$bp],$mul0 ! bp[0]
+ nop
+
+ add %sp,$bias,%o7 ! real top of stack
+ ld [$ap],$car0 ! ap[0] ! redundant in squaring context
+ sub %o7,$num,%o7
+ ld [$ap+4],$apj ! ap[1]
+ and %o7,-1024,%o7
+ ld [$np],$car1 ! np[0]
+ sub %o7,$bias,%sp ! alloca
+ ld [$np+4],$npj ! np[1]
+ be,pt `$bits==32?"%icc":"%xcc"`,.Lbn_sqr_mont
+ mov 12,$j
+
+ mulx $car0,$mul0,$car0 ! ap[0]*bp[0]
+ mulx $apj,$mul0,$tmp0 !prologue! ap[1]*bp[0]
+ and $car0,$mask,$acc0
+ add %sp,$bias+$frame,$tp
+ ld [$ap+8],$apj !prologue!
+
+ mulx $n0,$acc0,$mul1 ! "t[0]"*n0
+ and $mul1,$mask,$mul1
+
+ mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0
+ mulx $npj,$mul1,$acc1 !prologue! np[1]*"t[0]"*n0
+ srlx $car0,32,$car0
+ add $acc0,$car1,$car1
+ ld [$np+8],$npj !prologue!
+ srlx $car1,32,$car1
+ mov $tmp0,$acc0 !prologue!
+
+.L1st:
+ mulx $apj,$mul0,$tmp0
+ mulx $npj,$mul1,$tmp1
+ add $acc0,$car0,$car0
+ ld [$ap+$j],$apj ! ap[j]
+ and $car0,$mask,$acc0
+ add $acc1,$car1,$car1
+ ld [$np+$j],$npj ! np[j]
+ srlx $car0,32,$car0
+ add $acc0,$car1,$car1
+ add $j,4,$j ! j++
+ mov $tmp0,$acc0
+ st $car1,[$tp]
+ cmp $j,$num
+ mov $tmp1,$acc1
+ srlx $car1,32,$car1
+ bl %icc,.L1st
+ add $tp,4,$tp ! tp++
+!.L1st
+
+ mulx $apj,$mul0,$tmp0 !epilogue!
+ mulx $npj,$mul1,$tmp1
+ add $acc0,$car0,$car0
+ and $car0,$mask,$acc0
+ add $acc1,$car1,$car1
+ srlx $car0,32,$car0
+ add $acc0,$car1,$car1
+ st $car1,[$tp]
+ srlx $car1,32,$car1
+
+ add $tmp0,$car0,$car0
+ and $car0,$mask,$acc0
+ add $tmp1,$car1,$car1
+ srlx $car0,32,$car0
+ add $acc0,$car1,$car1
+ st $car1,[$tp+4]
+ srlx $car1,32,$car1
+
+ add $car0,$car1,$car1
+ st $car1,[$tp+8]
+ srlx $car1,32,$car2
+
+ mov 4,$i ! i++
+ ld [$bp+4],$mul0 ! bp[1]
+.Louter:
+ add %sp,$bias+$frame,$tp
+ ld [$ap],$car0 ! ap[0]
+ ld [$ap+4],$apj ! ap[1]
+ ld [$np],$car1 ! np[0]
+ ld [$np+4],$npj ! np[1]
+ ld [$tp],$tmp1 ! tp[0]
+ ld [$tp+4],$tpj ! tp[1]
+ mov 12,$j
+
+ mulx $car0,$mul0,$car0
+ mulx $apj,$mul0,$tmp0 !prologue!
+ add $tmp1,$car0,$car0
+ ld [$ap+8],$apj !prologue!
+ and $car0,$mask,$acc0
+
+ mulx $n0,$acc0,$mul1
+ and $mul1,$mask,$mul1
+
+ mulx $car1,$mul1,$car1
+ mulx $npj,$mul1,$acc1 !prologue!
+ srlx $car0,32,$car0
+ add $acc0,$car1,$car1
+ ld [$np+8],$npj !prologue!
+ srlx $car1,32,$car1
+ mov $tmp0,$acc0 !prologue!
+
+.Linner:
+ mulx $apj,$mul0,$tmp0
+ mulx $npj,$mul1,$tmp1
+ add $tpj,$car0,$car0
+ ld [$ap+$j],$apj ! ap[j]
+ add $acc0,$car0,$car0
+ add $acc1,$car1,$car1
+ ld [$np+$j],$npj ! np[j]
+ and $car0,$mask,$acc0
+ ld [$tp+8],$tpj ! tp[j]
+ srlx $car0,32,$car0
+ add $acc0,$car1,$car1
+ add $j,4,$j ! j++
+ mov $tmp0,$acc0
+ st $car1,[$tp] ! tp[j-1]
+ srlx $car1,32,$car1
+ mov $tmp1,$acc1
+ cmp $j,$num
+ bl %icc,.Linner
+ add $tp,4,$tp ! tp++
+!.Linner
+
+ mulx $apj,$mul0,$tmp0 !epilogue!
+ mulx $npj,$mul1,$tmp1
+ add $tpj,$car0,$car0
+ add $acc0,$car0,$car0
+ ld [$tp+8],$tpj ! tp[j]
+ and $car0,$mask,$acc0
+ add $acc1,$car1,$car1
+ srlx $car0,32,$car0
+ add $acc0,$car1,$car1
+ st $car1,[$tp] ! tp[j-1]
+ srlx $car1,32,$car1
+
+ add $tpj,$car0,$car0
+ add $tmp0,$car0,$car0
+ and $car0,$mask,$acc0
+ add $tmp1,$car1,$car1
+ add $acc0,$car1,$car1
+ st $car1,[$tp+4] ! tp[j-1]
+ srlx $car0,32,$car0
+ add $i,4,$i ! i++
+ srlx $car1,32,$car1
+
+ add $car0,$car1,$car1
+ cmp $i,$num
+ add $car2,$car1,$car1
+ st $car1,[$tp+8]
+
+ srlx $car1,32,$car2
+ bl,a %icc,.Louter
+ ld [$bp+$i],$mul0 ! bp[i]
+!.Louter
+
+ add $tp,12,$tp
+
+.Ltail:
+ add $np,$num,$np
+ add $rp,$num,$rp
+ mov $tp,$ap
+ sub %g0,$num,%o7 ! k=-num
+ ba .Lsub
+ subcc %g0,%g0,%g0 ! clear %icc.c
+.align 16
+.Lsub:
+ ld [$tp+%o7],%o0
+ ld [$np+%o7],%o1
+ subccc %o0,%o1,%o1 ! tp[j]-np[j]
+ add $rp,%o7,$i
+ add %o7,4,%o7
+ brnz %o7,.Lsub
+ st %o1,[$i]
+ subc $car2,0,$car2 ! handle upmost overflow bit
+ and $tp,$car2,$ap
+ andn $rp,$car2,$np
+ or $ap,$np,$ap
+ sub %g0,$num,%o7
+
+.Lcopy:
+ ld [$ap+%o7],%o0 ! copy or in-place refresh
+ st %g0,[$tp+%o7] ! zap tp
+ st %o0,[$rp+%o7]
+ add %o7,4,%o7
+ brnz %o7,.Lcopy
+ nop
+ mov 1,%i0
+ ret
+ restore
+___
+
+########
+######## .Lbn_sqr_mont gives up to 20% *overall* improvement over
+######## code without following dedicated squaring procedure.
+########
+$sbit="%i2"; # re-use $bp!
+
+$code.=<<___;
+.align 32
+.Lbn_sqr_mont:
+ mulx $mul0,$mul0,$car0 ! ap[0]*ap[0]
+ mulx $apj,$mul0,$tmp0 !prologue!
+ and $car0,$mask,$acc0
+ add %sp,$bias+$frame,$tp
+ ld [$ap+8],$apj !prologue!
+
+ mulx $n0,$acc0,$mul1 ! "t[0]"*n0
+ srlx $car0,32,$car0
+ and $mul1,$mask,$mul1
+
+ mulx $car1,$mul1,$car1 ! np[0]*"t[0]"*n0
+ mulx $npj,$mul1,$acc1 !prologue!
+ and $car0,1,$sbit
+ ld [$np+8],$npj !prologue!
+ srlx $car0,1,$car0
+ add $acc0,$car1,$car1
+ srlx $car1,32,$car1
+ mov $tmp0,$acc0 !prologue!
+
+.Lsqr_1st:
+ mulx $apj,$mul0,$tmp0
+ mulx $npj,$mul1,$tmp1
+ add $acc0,$car0,$car0 ! ap[j]*a0+c0
+ add $acc1,$car1,$car1
+ ld [$ap+$j],$apj ! ap[j]
+ and $car0,$mask,$acc0
+ ld [$np+$j],$npj ! np[j]
+ srlx $car0,32,$car0
+ add $acc0,$acc0,$acc0
+ or $sbit,$acc0,$acc0
+ mov $tmp1,$acc1
+ srlx $acc0,32,$sbit
+ add $j,4,$j ! j++
+ and $acc0,$mask,$acc0
+ cmp $j,$num
+ add $acc0,$car1,$car1
+ st $car1,[$tp]
+ mov $tmp0,$acc0
+ srlx $car1,32,$car1
+ bl %icc,.Lsqr_1st
+ add $tp,4,$tp ! tp++
+!.Lsqr_1st
+
+ mulx $apj,$mul0,$tmp0 ! epilogue
+ mulx $npj,$mul1,$tmp1
+ add $acc0,$car0,$car0 ! ap[j]*a0+c0
+ add $acc1,$car1,$car1
+ and $car0,$mask,$acc0
+ srlx $car0,32,$car0
+ add $acc0,$acc0,$acc0
+ or $sbit,$acc0,$acc0
+ srlx $acc0,32,$sbit
+ and $acc0,$mask,$acc0
+ add $acc0,$car1,$car1
+ st $car1,[$tp]
+ srlx $car1,32,$car1
+
+ add $tmp0,$car0,$car0 ! ap[j]*a0+c0
+ add $tmp1,$car1,$car1
+ and $car0,$mask,$acc0
+ srlx $car0,32,$car0
+ add $acc0,$acc0,$acc0
+ or $sbit,$acc0,$acc0
+ srlx $acc0,32,$sbit
+ and $acc0,$mask,$acc0
+ add $acc0,$car1,$car1
+ st $car1,[$tp+4]
+ srlx $car1,32,$car1
+
+ add $car0,$car0,$car0
+ or $sbit,$car0,$car0
+ add $car0,$car1,$car1
+ st $car1,[$tp+8]
+ srlx $car1,32,$car2
+
+ ld [%sp+$bias+$frame],$tmp0 ! tp[0]
+ ld [%sp+$bias+$frame+4],$tmp1 ! tp[1]
+ ld [%sp+$bias+$frame+8],$tpj ! tp[2]
+ ld [$ap+4],$mul0 ! ap[1]
+ ld [$ap+8],$apj ! ap[2]
+ ld [$np],$car1 ! np[0]
+ ld [$np+4],$npj ! np[1]
+ mulx $n0,$tmp0,$mul1
+
+ mulx $mul0,$mul0,$car0
+ and $mul1,$mask,$mul1
+
+ mulx $car1,$mul1,$car1
+ mulx $npj,$mul1,$acc1
+ add $tmp0,$car1,$car1
+ and $car0,$mask,$acc0
+ ld [$np+8],$npj ! np[2]
+ srlx $car1,32,$car1
+ add $tmp1,$car1,$car1
+ srlx $car0,32,$car0
+ add $acc0,$car1,$car1
+ and $car0,1,$sbit
+ add $acc1,$car1,$car1
+ srlx $car0,1,$car0
+ mov 12,$j
+ st $car1,[%sp+$bias+$frame] ! tp[0]=
+ srlx $car1,32,$car1
+ add %sp,$bias+$frame+4,$tp
+
+.Lsqr_2nd:
+ mulx $apj,$mul0,$acc0
+ mulx $npj,$mul1,$acc1
+ add $acc0,$car0,$car0
+ add $tpj,$car1,$car1
+ ld [$ap+$j],$apj ! ap[j]
+ and $car0,$mask,$acc0
+ ld [$np+$j],$npj ! np[j]
+ srlx $car0,32,$car0
+ add $acc1,$car1,$car1
+ ld [$tp+8],$tpj ! tp[j]
+ add $acc0,$acc0,$acc0
+ add $j,4,$j ! j++
+ or $sbit,$acc0,$acc0
+ srlx $acc0,32,$sbit
+ and $acc0,$mask,$acc0
+ cmp $j,$num
+ add $acc0,$car1,$car1
+ st $car1,[$tp] ! tp[j-1]
+ srlx $car1,32,$car1
+ bl %icc,.Lsqr_2nd
+ add $tp,4,$tp ! tp++
+!.Lsqr_2nd
+
+ mulx $apj,$mul0,$acc0
+ mulx $npj,$mul1,$acc1
+ add $acc0,$car0,$car0
+ add $tpj,$car1,$car1
+ and $car0,$mask,$acc0
+ srlx $car0,32,$car0
+ add $acc1,$car1,$car1
+ add $acc0,$acc0,$acc0
+ or $sbit,$acc0,$acc0
+ srlx $acc0,32,$sbit
+ and $acc0,$mask,$acc0
+ add $acc0,$car1,$car1
+ st $car1,[$tp] ! tp[j-1]
+ srlx $car1,32,$car1
+
+ add $car0,$car0,$car0
+ or $sbit,$car0,$car0
+ add $car0,$car1,$car1
+ add $car2,$car1,$car1
+ st $car1,[$tp+4]
+ srlx $car1,32,$car2
+
+ ld [%sp+$bias+$frame],$tmp1 ! tp[0]
+ ld [%sp+$bias+$frame+4],$tpj ! tp[1]
+ ld [$ap+8],$mul0 ! ap[2]
+ ld [$np],$car1 ! np[0]
+ ld [$np+4],$npj ! np[1]
+ mulx $n0,$tmp1,$mul1
+ and $mul1,$mask,$mul1
+ mov 8,$i
+
+ mulx $mul0,$mul0,$car0
+ mulx $car1,$mul1,$car1
+ and $car0,$mask,$acc0
+ add $tmp1,$car1,$car1
+ srlx $car0,32,$car0
+ add %sp,$bias+$frame,$tp
+ srlx $car1,32,$car1
+ and $car0,1,$sbit
+ srlx $car0,1,$car0
+ mov 4,$j
+
+.Lsqr_outer:
+.Lsqr_inner1:
+ mulx $npj,$mul1,$acc1
+ add $tpj,$car1,$car1
+ add $j,4,$j
+ ld [$tp+8],$tpj
+ cmp $j,$i
+ add $acc1,$car1,$car1
+ ld [$np+$j],$npj
+ st $car1,[$tp]
+ srlx $car1,32,$car1
+ bl %icc,.Lsqr_inner1
+ add $tp,4,$tp
+!.Lsqr_inner1
+
+ add $j,4,$j
+ ld [$ap+$j],$apj ! ap[j]
+ mulx $npj,$mul1,$acc1
+ add $tpj,$car1,$car1
+ ld [$np+$j],$npj ! np[j]
+ add $acc0,$car1,$car1
+ ld [$tp+8],$tpj ! tp[j]
+ add $acc1,$car1,$car1
+ st $car1,[$tp]
+ srlx $car1,32,$car1
+
+ add $j,4,$j
+ cmp $j,$num
+ be,pn %icc,.Lsqr_no_inner2
+ add $tp,4,$tp
+
+.Lsqr_inner2:
+ mulx $apj,$mul0,$acc0
+ mulx $npj,$mul1,$acc1
+ add $tpj,$car1,$car1
+ add $acc0,$car0,$car0
+ ld [$ap+$j],$apj ! ap[j]
+ and $car0,$mask,$acc0
+ ld [$np+$j],$npj ! np[j]
+ srlx $car0,32,$car0
+ add $acc0,$acc0,$acc0
+ ld [$tp+8],$tpj ! tp[j]
+ or $sbit,$acc0,$acc0
+ add $j,4,$j ! j++
+ srlx $acc0,32,$sbit
+ and $acc0,$mask,$acc0
+ cmp $j,$num
+ add $acc0,$car1,$car1
+ add $acc1,$car1,$car1
+ st $car1,[$tp] ! tp[j-1]
+ srlx $car1,32,$car1
+ bl %icc,.Lsqr_inner2
+ add $tp,4,$tp ! tp++
+
+.Lsqr_no_inner2:
+ mulx $apj,$mul0,$acc0
+ mulx $npj,$mul1,$acc1
+ add $tpj,$car1,$car1
+ add $acc0,$car0,$car0
+ and $car0,$mask,$acc0
+ srlx $car0,32,$car0
+ add $acc0,$acc0,$acc0
+ or $sbit,$acc0,$acc0
+ srlx $acc0,32,$sbit
+ and $acc0,$mask,$acc0
+ add $acc0,$car1,$car1
+ add $acc1,$car1,$car1
+ st $car1,[$tp] ! tp[j-1]
+ srlx $car1,32,$car1
+
+ add $car0,$car0,$car0
+ or $sbit,$car0,$car0
+ add $car0,$car1,$car1
+ add $car2,$car1,$car1
+ st $car1,[$tp+4]
+ srlx $car1,32,$car2
+
+ add $i,4,$i ! i++
+ ld [%sp+$bias+$frame],$tmp1 ! tp[0]
+ ld [%sp+$bias+$frame+4],$tpj ! tp[1]
+ ld [$ap+$i],$mul0 ! ap[j]
+ ld [$np],$car1 ! np[0]
+ ld [$np+4],$npj ! np[1]
+ mulx $n0,$tmp1,$mul1
+ and $mul1,$mask,$mul1
+ add $i,4,$tmp0
+
+ mulx $mul0,$mul0,$car0
+ mulx $car1,$mul1,$car1
+ and $car0,$mask,$acc0
+ add $tmp1,$car1,$car1
+ srlx $car0,32,$car0
+ add %sp,$bias+$frame,$tp
+ srlx $car1,32,$car1
+ and $car0,1,$sbit
+ srlx $car0,1,$car0
+
+ cmp $tmp0,$num ! i<num-1
+ bl %icc,.Lsqr_outer
+ mov 4,$j
+
+.Lsqr_last:
+ mulx $npj,$mul1,$acc1
+ add $tpj,$car1,$car1
+ add $j,4,$j
+ ld [$tp+8],$tpj
+ cmp $j,$i
+ add $acc1,$car1,$car1
+ ld [$np+$j],$npj
+ st $car1,[$tp]
+ srlx $car1,32,$car1
+ bl %icc,.Lsqr_last
+ add $tp,4,$tp
+!.Lsqr_last
+
+ mulx $npj,$mul1,$acc1
+ add $tpj,$car1,$car1
+ add $acc0,$car1,$car1
+ add $acc1,$car1,$car1
+ st $car1,[$tp]
+ srlx $car1,32,$car1
+
+ add $car0,$car0,$car0 ! recover $car0
+ or $sbit,$car0,$car0
+ add $car0,$car1,$car1
+ add $car2,$car1,$car1
+ st $car1,[$tp+4]
+ srlx $car1,32,$car2
+
+ ba .Ltail
+ add $tp,8,$tp
+.type $fname,#function
+.size $fname,(.-$fname)
+.asciz "Montgomery Multipltication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
+.align 32
+___
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+print $code;
+close STDOUT;
diff --git a/crypto/bn/asm/sparcv9a-mont.pl b/crypto/bn/asm/sparcv9a-mont.pl
new file mode 100755
index 000000000000..a14205f2f006
--- /dev/null
+++ b/crypto/bn/asm/sparcv9a-mont.pl
@@ -0,0 +1,882 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# October 2005
+#
+# "Teaser" Montgomery multiplication module for UltraSPARC. Why FPU?
+# Because unlike integer multiplier, which simply stalls whole CPU,
+# FPU is fully pipelined and can effectively emit 48 bit partial
+# product every cycle. Why not blended SPARC v9? One can argue that
+# making this module dependent on UltraSPARC VIS extension limits its
+# binary compatibility. Well yes, it does exclude SPARC64 prior-V(!)
+# implementations from compatibility matrix. But the rest, whole Sun
+# UltraSPARC family and brand new Fujitsu's SPARC64 V, all support
+# VIS extension instructions used in this module. This is considered
+# good enough to not care about HAL SPARC64 users [if any] who have
+# integer-only pure SPARCv9 module to "fall down" to.
+
+# USI&II cores currently exhibit uniform 2x improvement [over pre-
+# bn_mul_mont codebase] for all key lengths and benchmarks. On USIII
+# performance improves few percents for shorter keys and worsens few
+# percents for longer keys. This is because USIII integer multiplier
+# is >3x faster than USI&II one, which is harder to match [but see
+# TODO list below]. It should also be noted that SPARC64 V features
+# out-of-order execution, which *might* mean that integer multiplier
+# is pipelined, which in turn *might* be impossible to match... On
+# additional note, SPARC64 V implements FP Multiply-Add instruction,
+# which is perfectly usable in this context... In other words, as far
+# as Fujitsu SPARC64 V goes, talk to the author:-)
+
+# The implementation implies following "non-natural" limitations on
+# input arguments:
+# - num may not be less than 4;
+# - num has to be even;
+# Failure to meet either condition has no fatal effects, simply
+# doesn't give any performance gain.
+
+# TODO:
+# - modulo-schedule inner loop for better performance (on in-order
+# execution core such as UltraSPARC this shall result in further
+# noticeable(!) improvement);
+# - dedicated squaring procedure[?];
+
+######################################################################
+# November 2006
+#
+# Modulo-scheduled inner loops allow to interleave floating point and
+# integer instructions and minimize Read-After-Write penalties. This
+# results in *further* 20-50% perfromance improvement [depending on
+# key length, more for longer keys] on USI&II cores and 30-80% - on
+# USIII&IV.
+
+$fname="bn_mul_mont_fpu";
+$bits=32;
+for (@ARGV) { $bits=64 if (/\-m64/ || /\-xarch\=v9/); }
+
+if ($bits==64) {
+ $bias=2047;
+ $frame=192;
+} else {
+ $bias=0;
+ $frame=128; # 96 rounded up to largest known cache-line
+}
+$locals=64;
+
+# In order to provide for 32-/64-bit ABI duality, I keep integers wider
+# than 32 bit in %g1-%g4 and %o0-%o5. %l0-%l7 and %i0-%i5 are used
+# exclusively for pointers, indexes and other small values...
+# int bn_mul_mont(
+$rp="%i0"; # BN_ULONG *rp,
+$ap="%i1"; # const BN_ULONG *ap,
+$bp="%i2"; # const BN_ULONG *bp,
+$np="%i3"; # const BN_ULONG *np,
+$n0="%i4"; # const BN_ULONG *n0,
+$num="%i5"; # int num);
+
+$tp="%l0"; # t[num]
+$ap_l="%l1"; # a[num],n[num] are smashed to 32-bit words and saved
+$ap_h="%l2"; # to these four vectors as double-precision FP values.
+$np_l="%l3"; # This way a bunch of fxtods are eliminated in second
+$np_h="%l4"; # loop and L1-cache aliasing is minimized...
+$i="%l5";
+$j="%l6";
+$mask="%l7"; # 16-bit mask, 0xffff
+
+$n0="%g4"; # reassigned(!) to "64-bit" register
+$carry="%i4"; # %i4 reused(!) for a carry bit
+
+# FP register naming chart
+#
+# ..HILO
+# dcba
+# --------
+# LOa
+# LOb
+# LOc
+# LOd
+# HIa
+# HIb
+# HIc
+# HId
+# ..a
+# ..b
+$ba="%f0"; $bb="%f2"; $bc="%f4"; $bd="%f6";
+$na="%f8"; $nb="%f10"; $nc="%f12"; $nd="%f14";
+$alo="%f16"; $alo_="%f17"; $ahi="%f18"; $ahi_="%f19";
+$nlo="%f20"; $nlo_="%f21"; $nhi="%f22"; $nhi_="%f23";
+
+$dota="%f24"; $dotb="%f26";
+
+$aloa="%f32"; $alob="%f34"; $aloc="%f36"; $alod="%f38";
+$ahia="%f40"; $ahib="%f42"; $ahic="%f44"; $ahid="%f46";
+$nloa="%f48"; $nlob="%f50"; $nloc="%f52"; $nlod="%f54";
+$nhia="%f56"; $nhib="%f58"; $nhic="%f60"; $nhid="%f62";
+
+$ASI_FL16_P=0xD2; # magic ASI value to engage 16-bit FP load
+
+$code=<<___;
+.section ".text",#alloc,#execinstr
+
+.global $fname
+.align 32
+$fname:
+ save %sp,-$frame-$locals,%sp
+
+ cmp $num,4
+ bl,a,pn %icc,.Lret
+ clr %i0
+ andcc $num,1,%g0 ! $num has to be even...
+ bnz,a,pn %icc,.Lret
+ clr %i0 ! signal "unsupported input value"
+
+ srl $num,1,$num
+ sethi %hi(0xffff),$mask
+ ld [%i4+0],$n0 ! $n0 reassigned, remember?
+ or $mask,%lo(0xffff),$mask
+ ld [%i4+4],%o0
+ sllx %o0,32,%o0
+ or %o0,$n0,$n0 ! $n0=n0[1].n0[0]
+
+ sll $num,3,$num ! num*=8
+
+ add %sp,$bias,%o0 ! real top of stack
+ sll $num,2,%o1
+ add %o1,$num,%o1 ! %o1=num*5
+ sub %o0,%o1,%o0
+ and %o0,-2048,%o0 ! optimize TLB utilization
+ sub %o0,$bias,%sp ! alloca(5*num*8)
+
+ rd %asi,%o7 ! save %asi
+ add %sp,$bias+$frame+$locals,$tp
+ add $tp,$num,$ap_l
+ add $ap_l,$num,$ap_l ! [an]p_[lh] point at the vectors' ends !
+ add $ap_l,$num,$ap_h
+ add $ap_h,$num,$np_l
+ add $np_l,$num,$np_h
+
+ wr %g0,$ASI_FL16_P,%asi ! setup %asi for 16-bit FP loads
+
+ add $rp,$num,$rp ! readjust input pointers to point
+ add $ap,$num,$ap ! at the ends too...
+ add $bp,$num,$bp
+ add $np,$num,$np
+
+ stx %o7,[%sp+$bias+$frame+48] ! save %asi
+
+ sub %g0,$num,$i ! i=-num
+ sub %g0,$num,$j ! j=-num
+
+ add $ap,$j,%o3
+ add $bp,$i,%o4
+
+ ld [%o3+4],%g1 ! bp[0]
+ ld [%o3+0],%o0
+ ld [%o4+4],%g5 ! ap[0]
+ sllx %g1,32,%g1
+ ld [%o4+0],%o1
+ sllx %g5,32,%g5
+ or %g1,%o0,%o0
+ or %g5,%o1,%o1
+
+ add $np,$j,%o5
+
+ mulx %o1,%o0,%o0 ! ap[0]*bp[0]
+ mulx $n0,%o0,%o0 ! ap[0]*bp[0]*n0
+ stx %o0,[%sp+$bias+$frame+0]
+
+ ld [%o3+0],$alo_ ! load a[j] as pair of 32-bit words
+ fzeros $alo
+ ld [%o3+4],$ahi_
+ fzeros $ahi
+ ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
+ fzeros $nlo
+ ld [%o5+4],$nhi_
+ fzeros $nhi
+
+ ! transfer b[i] to FPU as 4x16-bit values
+ ldda [%o4+2]%asi,$ba
+ fxtod $alo,$alo
+ ldda [%o4+0]%asi,$bb
+ fxtod $ahi,$ahi
+ ldda [%o4+6]%asi,$bc
+ fxtod $nlo,$nlo
+ ldda [%o4+4]%asi,$bd
+ fxtod $nhi,$nhi
+
+ ! transfer ap[0]*b[0]*n0 to FPU as 4x16-bit values
+ ldda [%sp+$bias+$frame+6]%asi,$na
+ fxtod $ba,$ba
+ ldda [%sp+$bias+$frame+4]%asi,$nb
+ fxtod $bb,$bb
+ ldda [%sp+$bias+$frame+2]%asi,$nc
+ fxtod $bc,$bc
+ ldda [%sp+$bias+$frame+0]%asi,$nd
+ fxtod $bd,$bd
+
+ std $alo,[$ap_l+$j] ! save smashed ap[j] in double format
+ fxtod $na,$na
+ std $ahi,[$ap_h+$j]
+ fxtod $nb,$nb
+ std $nlo,[$np_l+$j] ! save smashed np[j] in double format
+ fxtod $nc,$nc
+ std $nhi,[$np_h+$j]
+ fxtod $nd,$nd
+
+ fmuld $alo,$ba,$aloa
+ fmuld $nlo,$na,$nloa
+ fmuld $alo,$bb,$alob
+ fmuld $nlo,$nb,$nlob
+ fmuld $alo,$bc,$aloc
+ faddd $aloa,$nloa,$nloa
+ fmuld $nlo,$nc,$nloc
+ fmuld $alo,$bd,$alod
+ faddd $alob,$nlob,$nlob
+ fmuld $nlo,$nd,$nlod
+ fmuld $ahi,$ba,$ahia
+ faddd $aloc,$nloc,$nloc
+ fmuld $nhi,$na,$nhia
+ fmuld $ahi,$bb,$ahib
+ faddd $alod,$nlod,$nlod
+ fmuld $nhi,$nb,$nhib
+ fmuld $ahi,$bc,$ahic
+ faddd $ahia,$nhia,$nhia
+ fmuld $nhi,$nc,$nhic
+ fmuld $ahi,$bd,$ahid
+ faddd $ahib,$nhib,$nhib
+ fmuld $nhi,$nd,$nhid
+
+ faddd $ahic,$nhic,$dota ! $nhic
+ faddd $ahid,$nhid,$dotb ! $nhid
+
+ faddd $nloc,$nhia,$nloc
+ faddd $nlod,$nhib,$nlod
+
+ fdtox $nloa,$nloa
+ fdtox $nlob,$nlob
+ fdtox $nloc,$nloc
+ fdtox $nlod,$nlod
+
+ std $nloa,[%sp+$bias+$frame+0]
+ add $j,8,$j
+ std $nlob,[%sp+$bias+$frame+8]
+ add $ap,$j,%o4
+ std $nloc,[%sp+$bias+$frame+16]
+ add $np,$j,%o5
+ std $nlod,[%sp+$bias+$frame+24]
+
+ ld [%o4+0],$alo_ ! load a[j] as pair of 32-bit words
+ fzeros $alo
+ ld [%o4+4],$ahi_
+ fzeros $ahi
+ ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
+ fzeros $nlo
+ ld [%o5+4],$nhi_
+ fzeros $nhi
+
+ fxtod $alo,$alo
+ fxtod $ahi,$ahi
+ fxtod $nlo,$nlo
+ fxtod $nhi,$nhi
+
+ ldx [%sp+$bias+$frame+0],%o0
+ fmuld $alo,$ba,$aloa
+ ldx [%sp+$bias+$frame+8],%o1
+ fmuld $nlo,$na,$nloa
+ ldx [%sp+$bias+$frame+16],%o2
+ fmuld $alo,$bb,$alob
+ ldx [%sp+$bias+$frame+24],%o3
+ fmuld $nlo,$nb,$nlob
+
+ srlx %o0,16,%o7
+ std $alo,[$ap_l+$j] ! save smashed ap[j] in double format
+ fmuld $alo,$bc,$aloc
+ add %o7,%o1,%o1
+ std $ahi,[$ap_h+$j]
+ faddd $aloa,$nloa,$nloa
+ fmuld $nlo,$nc,$nloc
+ srlx %o1,16,%o7
+ std $nlo,[$np_l+$j] ! save smashed np[j] in double format
+ fmuld $alo,$bd,$alod
+ add %o7,%o2,%o2
+ std $nhi,[$np_h+$j]
+ faddd $alob,$nlob,$nlob
+ fmuld $nlo,$nd,$nlod
+ srlx %o2,16,%o7
+ fmuld $ahi,$ba,$ahia
+ add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
+ faddd $aloc,$nloc,$nloc
+ fmuld $nhi,$na,$nhia
+ !and %o0,$mask,%o0
+ !and %o1,$mask,%o1
+ !and %o2,$mask,%o2
+ !sllx %o1,16,%o1
+ !sllx %o2,32,%o2
+ !sllx %o3,48,%o7
+ !or %o1,%o0,%o0
+ !or %o2,%o0,%o0
+ !or %o7,%o0,%o0 ! 64-bit result
+ srlx %o3,16,%g1 ! 34-bit carry
+ fmuld $ahi,$bb,$ahib
+
+ faddd $alod,$nlod,$nlod
+ fmuld $nhi,$nb,$nhib
+ fmuld $ahi,$bc,$ahic
+ faddd $ahia,$nhia,$nhia
+ fmuld $nhi,$nc,$nhic
+ fmuld $ahi,$bd,$ahid
+ faddd $ahib,$nhib,$nhib
+ fmuld $nhi,$nd,$nhid
+
+ faddd $dota,$nloa,$nloa
+ faddd $dotb,$nlob,$nlob
+ faddd $ahic,$nhic,$dota ! $nhic
+ faddd $ahid,$nhid,$dotb ! $nhid
+
+ faddd $nloc,$nhia,$nloc
+ faddd $nlod,$nhib,$nlod
+
+ fdtox $nloa,$nloa
+ fdtox $nlob,$nlob
+ fdtox $nloc,$nloc
+ fdtox $nlod,$nlod
+
+ std $nloa,[%sp+$bias+$frame+0]
+ std $nlob,[%sp+$bias+$frame+8]
+ addcc $j,8,$j
+ std $nloc,[%sp+$bias+$frame+16]
+ bz,pn %icc,.L1stskip
+ std $nlod,[%sp+$bias+$frame+24]
+
+.align 32 ! incidentally already aligned !
+.L1st:
+ add $ap,$j,%o4
+ add $np,$j,%o5
+ ld [%o4+0],$alo_ ! load a[j] as pair of 32-bit words
+ fzeros $alo
+ ld [%o4+4],$ahi_
+ fzeros $ahi
+ ld [%o5+0],$nlo_ ! load n[j] as pair of 32-bit words
+ fzeros $nlo
+ ld [%o5+4],$nhi_
+ fzeros $nhi
+
+ fxtod $alo,$alo
+ fxtod $ahi,$ahi
+ fxtod $nlo,$nlo
+ fxtod $nhi,$nhi
+
+ ldx [%sp+$bias+$frame+0],%o0
+ fmuld $alo,$ba,$aloa
+ ldx [%sp+$bias+$frame+8],%o1
+ fmuld $nlo,$na,$nloa
+ ldx [%sp+$bias+$frame+16],%o2
+ fmuld $alo,$bb,$alob
+ ldx [%sp+$bias+$frame+24],%o3
+ fmuld $nlo,$nb,$nlob
+
+ srlx %o0,16,%o7
+ std $alo,[$ap_l+$j] ! save smashed ap[j] in double format
+ fmuld $alo,$bc,$aloc
+ add %o7,%o1,%o1
+ std $ahi,[$ap_h+$j]
+ faddd $aloa,$nloa,$nloa
+ fmuld $nlo,$nc,$nloc
+ srlx %o1,16,%o7
+ std $nlo,[$np_l+$j] ! save smashed np[j] in double format
+ fmuld $alo,$bd,$alod
+ add %o7,%o2,%o2
+ std $nhi,[$np_h+$j]
+ faddd $alob,$nlob,$nlob
+ fmuld $nlo,$nd,$nlod
+ srlx %o2,16,%o7
+ fmuld $ahi,$ba,$ahia
+ add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
+ and %o0,$mask,%o0
+ faddd $aloc,$nloc,$nloc
+ fmuld $nhi,$na,$nhia
+ and %o1,$mask,%o1
+ and %o2,$mask,%o2
+ fmuld $ahi,$bb,$ahib
+ sllx %o1,16,%o1
+ faddd $alod,$nlod,$nlod
+ fmuld $nhi,$nb,$nhib
+ sllx %o2,32,%o2
+ fmuld $ahi,$bc,$ahic
+ sllx %o3,48,%o7
+ or %o1,%o0,%o0
+ faddd $ahia,$nhia,$nhia
+ fmuld $nhi,$nc,$nhic
+ or %o2,%o0,%o0
+ fmuld $ahi,$bd,$ahid
+ or %o7,%o0,%o0 ! 64-bit result
+ faddd $ahib,$nhib,$nhib
+ fmuld $nhi,$nd,$nhid
+ addcc %g1,%o0,%o0
+ faddd $dota,$nloa,$nloa
+ srlx %o3,16,%g1 ! 34-bit carry
+ faddd $dotb,$nlob,$nlob
+ bcs,a %xcc,.+8
+ add %g1,1,%g1
+
+ stx %o0,[$tp] ! tp[j-1]=
+
+ faddd $ahic,$nhic,$dota ! $nhic
+ faddd $ahid,$nhid,$dotb ! $nhid
+
+ faddd $nloc,$nhia,$nloc
+ faddd $nlod,$nhib,$nlod
+
+ fdtox $nloa,$nloa
+ fdtox $nlob,$nlob
+ fdtox $nloc,$nloc
+ fdtox $nlod,$nlod
+
+ std $nloa,[%sp+$bias+$frame+0]
+ std $nlob,[%sp+$bias+$frame+8]
+ std $nloc,[%sp+$bias+$frame+16]
+ std $nlod,[%sp+$bias+$frame+24]
+
+ addcc $j,8,$j
+ bnz,pt %icc,.L1st
+ add $tp,8,$tp
+
+.L1stskip:
+ fdtox $dota,$dota
+ fdtox $dotb,$dotb
+
+ ldx [%sp+$bias+$frame+0],%o0
+ ldx [%sp+$bias+$frame+8],%o1
+ ldx [%sp+$bias+$frame+16],%o2
+ ldx [%sp+$bias+$frame+24],%o3
+
+ srlx %o0,16,%o7
+ std $dota,[%sp+$bias+$frame+32]
+ add %o7,%o1,%o1
+ std $dotb,[%sp+$bias+$frame+40]
+ srlx %o1,16,%o7
+ add %o7,%o2,%o2
+ srlx %o2,16,%o7
+ add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
+ and %o0,$mask,%o0
+ and %o1,$mask,%o1
+ and %o2,$mask,%o2
+ sllx %o1,16,%o1
+ sllx %o2,32,%o2
+ sllx %o3,48,%o7
+ or %o1,%o0,%o0
+ or %o2,%o0,%o0
+ or %o7,%o0,%o0 ! 64-bit result
+ ldx [%sp+$bias+$frame+32],%o4
+ addcc %g1,%o0,%o0
+ ldx [%sp+$bias+$frame+40],%o5
+ srlx %o3,16,%g1 ! 34-bit carry
+ bcs,a %xcc,.+8
+ add %g1,1,%g1
+
+ stx %o0,[$tp] ! tp[j-1]=
+ add $tp,8,$tp
+
+ srlx %o4,16,%o7
+ add %o7,%o5,%o5
+ and %o4,$mask,%o4
+ sllx %o5,16,%o7
+ or %o7,%o4,%o4
+ addcc %g1,%o4,%o4
+ srlx %o5,48,%g1
+ bcs,a %xcc,.+8
+ add %g1,1,%g1
+
+ mov %g1,$carry
+ stx %o4,[$tp] ! tp[num-1]=
+
+ ba .Louter
+ add $i,8,$i
+.align 32
+.Louter:
+ sub %g0,$num,$j ! j=-num
+ add %sp,$bias+$frame+$locals,$tp
+
+ add $ap,$j,%o3
+ add $bp,$i,%o4
+
+ ld [%o3+4],%g1 ! bp[i]
+ ld [%o3+0],%o0
+ ld [%o4+4],%g5 ! ap[0]
+ sllx %g1,32,%g1
+ ld [%o4+0],%o1
+ sllx %g5,32,%g5
+ or %g1,%o0,%o0
+ or %g5,%o1,%o1
+
+ ldx [$tp],%o2 ! tp[0]
+ mulx %o1,%o0,%o0
+ addcc %o2,%o0,%o0
+ mulx $n0,%o0,%o0 ! (ap[0]*bp[i]+t[0])*n0
+ stx %o0,[%sp+$bias+$frame+0]
+
+ ! transfer b[i] to FPU as 4x16-bit values
+ ldda [%o4+2]%asi,$ba
+ ldda [%o4+0]%asi,$bb
+ ldda [%o4+6]%asi,$bc
+ ldda [%o4+4]%asi,$bd
+
+ ! transfer (ap[0]*b[i]+t[0])*n0 to FPU as 4x16-bit values
+ ldda [%sp+$bias+$frame+6]%asi,$na
+ fxtod $ba,$ba
+ ldda [%sp+$bias+$frame+4]%asi,$nb
+ fxtod $bb,$bb
+ ldda [%sp+$bias+$frame+2]%asi,$nc
+ fxtod $bc,$bc
+ ldda [%sp+$bias+$frame+0]%asi,$nd
+ fxtod $bd,$bd
+ ldd [$ap_l+$j],$alo ! load a[j] in double format
+ fxtod $na,$na
+ ldd [$ap_h+$j],$ahi
+ fxtod $nb,$nb
+ ldd [$np_l+$j],$nlo ! load n[j] in double format
+ fxtod $nc,$nc
+ ldd [$np_h+$j],$nhi
+ fxtod $nd,$nd
+
+ fmuld $alo,$ba,$aloa
+ fmuld $nlo,$na,$nloa
+ fmuld $alo,$bb,$alob
+ fmuld $nlo,$nb,$nlob
+ fmuld $alo,$bc,$aloc
+ faddd $aloa,$nloa,$nloa
+ fmuld $nlo,$nc,$nloc
+ fmuld $alo,$bd,$alod
+ faddd $alob,$nlob,$nlob
+ fmuld $nlo,$nd,$nlod
+ fmuld $ahi,$ba,$ahia
+ faddd $aloc,$nloc,$nloc
+ fmuld $nhi,$na,$nhia
+ fmuld $ahi,$bb,$ahib
+ faddd $alod,$nlod,$nlod
+ fmuld $nhi,$nb,$nhib
+ fmuld $ahi,$bc,$ahic
+ faddd $ahia,$nhia,$nhia
+ fmuld $nhi,$nc,$nhic
+ fmuld $ahi,$bd,$ahid
+ faddd $ahib,$nhib,$nhib
+ fmuld $nhi,$nd,$nhid
+
+ faddd $ahic,$nhic,$dota ! $nhic
+ faddd $ahid,$nhid,$dotb ! $nhid
+
+ faddd $nloc,$nhia,$nloc
+ faddd $nlod,$nhib,$nlod
+
+ fdtox $nloa,$nloa
+ fdtox $nlob,$nlob
+ fdtox $nloc,$nloc
+ fdtox $nlod,$nlod
+
+ std $nloa,[%sp+$bias+$frame+0]
+ std $nlob,[%sp+$bias+$frame+8]
+ std $nloc,[%sp+$bias+$frame+16]
+ add $j,8,$j
+ std $nlod,[%sp+$bias+$frame+24]
+
+ ldd [$ap_l+$j],$alo ! load a[j] in double format
+ ldd [$ap_h+$j],$ahi
+ ldd [$np_l+$j],$nlo ! load n[j] in double format
+ ldd [$np_h+$j],$nhi
+
+ fmuld $alo,$ba,$aloa
+ fmuld $nlo,$na,$nloa
+ fmuld $alo,$bb,$alob
+ fmuld $nlo,$nb,$nlob
+ fmuld $alo,$bc,$aloc
+ ldx [%sp+$bias+$frame+0],%o0
+ faddd $aloa,$nloa,$nloa
+ fmuld $nlo,$nc,$nloc
+ ldx [%sp+$bias+$frame+8],%o1
+ fmuld $alo,$bd,$alod
+ ldx [%sp+$bias+$frame+16],%o2
+ faddd $alob,$nlob,$nlob
+ fmuld $nlo,$nd,$nlod
+ ldx [%sp+$bias+$frame+24],%o3
+ fmuld $ahi,$ba,$ahia
+
+ srlx %o0,16,%o7
+ faddd $aloc,$nloc,$nloc
+ fmuld $nhi,$na,$nhia
+ add %o7,%o1,%o1
+ fmuld $ahi,$bb,$ahib
+ srlx %o1,16,%o7
+ faddd $alod,$nlod,$nlod
+ fmuld $nhi,$nb,$nhib
+ add %o7,%o2,%o2
+ fmuld $ahi,$bc,$ahic
+ srlx %o2,16,%o7
+ faddd $ahia,$nhia,$nhia
+ fmuld $nhi,$nc,$nhic
+ add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
+ ! why?
+ and %o0,$mask,%o0
+ fmuld $ahi,$bd,$ahid
+ and %o1,$mask,%o1
+ and %o2,$mask,%o2
+ faddd $ahib,$nhib,$nhib
+ fmuld $nhi,$nd,$nhid
+ sllx %o1,16,%o1
+ faddd $dota,$nloa,$nloa
+ sllx %o2,32,%o2
+ faddd $dotb,$nlob,$nlob
+ sllx %o3,48,%o7
+ or %o1,%o0,%o0
+ faddd $ahic,$nhic,$dota ! $nhic
+ or %o2,%o0,%o0
+ faddd $ahid,$nhid,$dotb ! $nhid
+ or %o7,%o0,%o0 ! 64-bit result
+ ldx [$tp],%o7
+ faddd $nloc,$nhia,$nloc
+ addcc %o7,%o0,%o0
+ ! end-of-why?
+ faddd $nlod,$nhib,$nlod
+ srlx %o3,16,%g1 ! 34-bit carry
+ fdtox $nloa,$nloa
+ bcs,a %xcc,.+8
+ add %g1,1,%g1
+
+ fdtox $nlob,$nlob
+ fdtox $nloc,$nloc
+ fdtox $nlod,$nlod
+
+ std $nloa,[%sp+$bias+$frame+0]
+ std $nlob,[%sp+$bias+$frame+8]
+ addcc $j,8,$j
+ std $nloc,[%sp+$bias+$frame+16]
+ bz,pn %icc,.Linnerskip
+ std $nlod,[%sp+$bias+$frame+24]
+
+ ba .Linner
+ nop
+.align 32
+.Linner:
+ ldd [$ap_l+$j],$alo ! load a[j] in double format
+ ldd [$ap_h+$j],$ahi
+ ldd [$np_l+$j],$nlo ! load n[j] in double format
+ ldd [$np_h+$j],$nhi
+
+ fmuld $alo,$ba,$aloa
+ fmuld $nlo,$na,$nloa
+ fmuld $alo,$bb,$alob
+ fmuld $nlo,$nb,$nlob
+ fmuld $alo,$bc,$aloc
+ ldx [%sp+$bias+$frame+0],%o0
+ faddd $aloa,$nloa,$nloa
+ fmuld $nlo,$nc,$nloc
+ ldx [%sp+$bias+$frame+8],%o1
+ fmuld $alo,$bd,$alod
+ ldx [%sp+$bias+$frame+16],%o2
+ faddd $alob,$nlob,$nlob
+ fmuld $nlo,$nd,$nlod
+ ldx [%sp+$bias+$frame+24],%o3
+ fmuld $ahi,$ba,$ahia
+
+ srlx %o0,16,%o7
+ faddd $aloc,$nloc,$nloc
+ fmuld $nhi,$na,$nhia
+ add %o7,%o1,%o1
+ fmuld $ahi,$bb,$ahib
+ srlx %o1,16,%o7
+ faddd $alod,$nlod,$nlod
+ fmuld $nhi,$nb,$nhib
+ add %o7,%o2,%o2
+ fmuld $ahi,$bc,$ahic
+ srlx %o2,16,%o7
+ faddd $ahia,$nhia,$nhia
+ fmuld $nhi,$nc,$nhic
+ add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
+ and %o0,$mask,%o0
+ fmuld $ahi,$bd,$ahid
+ and %o1,$mask,%o1
+ and %o2,$mask,%o2
+ faddd $ahib,$nhib,$nhib
+ fmuld $nhi,$nd,$nhid
+ sllx %o1,16,%o1
+ faddd $dota,$nloa,$nloa
+ sllx %o2,32,%o2
+ faddd $dotb,$nlob,$nlob
+ sllx %o3,48,%o7
+ or %o1,%o0,%o0
+ faddd $ahic,$nhic,$dota ! $nhic
+ or %o2,%o0,%o0
+ faddd $ahid,$nhid,$dotb ! $nhid
+ or %o7,%o0,%o0 ! 64-bit result
+ faddd $nloc,$nhia,$nloc
+ addcc %g1,%o0,%o0
+ ldx [$tp+8],%o7 ! tp[j]
+ faddd $nlod,$nhib,$nlod
+ srlx %o3,16,%g1 ! 34-bit carry
+ fdtox $nloa,$nloa
+ bcs,a %xcc,.+8
+ add %g1,1,%g1
+ fdtox $nlob,$nlob
+ addcc %o7,%o0,%o0
+ fdtox $nloc,$nloc
+ bcs,a %xcc,.+8
+ add %g1,1,%g1
+
+ stx %o0,[$tp] ! tp[j-1]
+ fdtox $nlod,$nlod
+
+ std $nloa,[%sp+$bias+$frame+0]
+ std $nlob,[%sp+$bias+$frame+8]
+ std $nloc,[%sp+$bias+$frame+16]
+ addcc $j,8,$j
+ std $nlod,[%sp+$bias+$frame+24]
+ bnz,pt %icc,.Linner
+ add $tp,8,$tp
+
+.Linnerskip:
+ fdtox $dota,$dota
+ fdtox $dotb,$dotb
+
+ ldx [%sp+$bias+$frame+0],%o0
+ ldx [%sp+$bias+$frame+8],%o1
+ ldx [%sp+$bias+$frame+16],%o2
+ ldx [%sp+$bias+$frame+24],%o3
+
+ srlx %o0,16,%o7
+ std $dota,[%sp+$bias+$frame+32]
+ add %o7,%o1,%o1
+ std $dotb,[%sp+$bias+$frame+40]
+ srlx %o1,16,%o7
+ add %o7,%o2,%o2
+ srlx %o2,16,%o7
+ add %o7,%o3,%o3 ! %o3.%o2[0..15].%o1[0..15].%o0[0..15]
+ and %o0,$mask,%o0
+ and %o1,$mask,%o1
+ and %o2,$mask,%o2
+ sllx %o1,16,%o1
+ sllx %o2,32,%o2
+ sllx %o3,48,%o7
+ or %o1,%o0,%o0
+ or %o2,%o0,%o0
+ ldx [%sp+$bias+$frame+32],%o4
+ or %o7,%o0,%o0 ! 64-bit result
+ ldx [%sp+$bias+$frame+40],%o5
+ addcc %g1,%o0,%o0
+ ldx [$tp+8],%o7 ! tp[j]
+ srlx %o3,16,%g1 ! 34-bit carry
+ bcs,a %xcc,.+8
+ add %g1,1,%g1
+
+ addcc %o7,%o0,%o0
+ bcs,a %xcc,.+8
+ add %g1,1,%g1
+
+ stx %o0,[$tp] ! tp[j-1]
+ add $tp,8,$tp
+
+ srlx %o4,16,%o7
+ add %o7,%o5,%o5
+ and %o4,$mask,%o4
+ sllx %o5,16,%o7
+ or %o7,%o4,%o4
+ addcc %g1,%o4,%o4
+ srlx %o5,48,%g1
+ bcs,a %xcc,.+8
+ add %g1,1,%g1
+
+ addcc $carry,%o4,%o4
+ stx %o4,[$tp] ! tp[num-1]
+ mov %g1,$carry
+ bcs,a %xcc,.+8
+ add $carry,1,$carry
+
+ addcc $i,8,$i
+ bnz %icc,.Louter
+ nop
+
+ add $tp,8,$tp ! adjust tp to point at the end
+ orn %g0,%g0,%g4
+ sub %g0,$num,%o7 ! n=-num
+ ba .Lsub
+ subcc %g0,%g0,%g0 ! clear %icc.c
+
+.align 32
+.Lsub:
+ ldx [$tp+%o7],%o0
+ add $np,%o7,%g1
+ ld [%g1+0],%o2
+ ld [%g1+4],%o3
+ srlx %o0,32,%o1
+ subccc %o0,%o2,%o2
+ add $rp,%o7,%g1
+ subccc %o1,%o3,%o3
+ st %o2,[%g1+0]
+ add %o7,8,%o7
+ brnz,pt %o7,.Lsub
+ st %o3,[%g1+4]
+ subc $carry,0,%g4
+ sub %g0,$num,%o7 ! n=-num
+ ba .Lcopy
+ nop
+
+.align 32
+.Lcopy:
+ ldx [$tp+%o7],%o0
+ add $rp,%o7,%g1
+ ld [%g1+0],%o2
+ ld [%g1+4],%o3
+ stx %g0,[$tp+%o7]
+ and %o0,%g4,%o0
+ srlx %o0,32,%o1
+ andn %o2,%g4,%o2
+ andn %o3,%g4,%o3
+ or %o2,%o0,%o0
+ or %o3,%o1,%o1
+ st %o0,[%g1+0]
+ add %o7,8,%o7
+ brnz,pt %o7,.Lcopy
+ st %o1,[%g1+4]
+ sub %g0,$num,%o7 ! n=-num
+
+.Lzap:
+ stx %g0,[$ap_l+%o7]
+ stx %g0,[$ap_h+%o7]
+ stx %g0,[$np_l+%o7]
+ stx %g0,[$np_h+%o7]
+ add %o7,8,%o7
+ brnz,pt %o7,.Lzap
+ nop
+
+ ldx [%sp+$bias+$frame+48],%o7
+ wr %g0,%o7,%asi ! restore %asi
+
+ mov 1,%i0
+.Lret:
+ ret
+ restore
+.type $fname,#function
+.size $fname,(.-$fname)
+.asciz "Montgomery Multipltication for UltraSPARC, CRYPTOGAMS by <appro\@openssl.org>"
+.align 32
+___
+
+$code =~ s/\`([^\`]*)\`/eval($1)/gem;
+
+# Below substitution makes it possible to compile without demanding
+# VIS extentions on command line, e.g. -xarch=v9 vs. -xarch=v9a. I
+# dare to do this, because VIS capability is detected at run-time now
+# and this routine is not called on CPU not capable to execute it. Do
+# note that fzeros is not the only VIS dependency! Another dependency
+# is implicit and is just _a_ numerical value loaded to %asi register,
+# which assembler can't recognize as VIS specific...
+$code =~ s/fzeros\s+%f([0-9]+)/
+ sprintf(".word\t0x%x\t! fzeros %%f%d",0x81b00c20|($1<<25),$1)
+ /gem;
+
+print $code;
+# flush
+close STDOUT;
diff --git a/crypto/bn/asm/via-mont.pl b/crypto/bn/asm/via-mont.pl
new file mode 100755
index 000000000000..c046a514c873
--- /dev/null
+++ b/crypto/bn/asm/via-mont.pl
@@ -0,0 +1,242 @@
+#!/usr/bin/env perl
+#
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+#
+# Wrapper around 'rep montmul', VIA-specific instruction accessing
+# PadLock Montgomery Multiplier. The wrapper is designed as drop-in
+# replacement for OpenSSL bn_mul_mont [first implemented in 0.9.9].
+#
+# Below are interleaved outputs from 'openssl speed rsa dsa' for 4
+# different software configurations on 1.5GHz VIA Esther processor.
+# Lines marked with "software integer" denote performance of hand-
+# coded integer-only assembler found in OpenSSL 0.9.7. "Software SSE2"
+# refers to hand-coded SSE2 Montgomery multiplication procedure found
+# OpenSSL 0.9.9. "Hardware VIA SDK" refers to padlock_pmm routine from
+# Padlock SDK 2.0.1 available for download from VIA, which naturally
+# utilizes the magic 'repz montmul' instruction. And finally "hardware
+# this" refers to *this* implementation which also uses 'repz montmul'
+#
+# sign verify sign/s verify/s
+# rsa 512 bits 0.001720s 0.000140s 581.4 7149.7 software integer
+# rsa 512 bits 0.000690s 0.000086s 1450.3 11606.0 software SSE2
+# rsa 512 bits 0.006136s 0.000201s 163.0 4974.5 hardware VIA SDK
+# rsa 512 bits 0.000712s 0.000050s 1404.9 19858.5 hardware this
+#
+# rsa 1024 bits 0.008518s 0.000413s 117.4 2420.8 software integer
+# rsa 1024 bits 0.004275s 0.000277s 233.9 3609.7 software SSE2
+# rsa 1024 bits 0.012136s 0.000260s 82.4 3844.5 hardware VIA SDK
+# rsa 1024 bits 0.002522s 0.000116s 396.5 8650.9 hardware this
+#
+# rsa 2048 bits 0.050101s 0.001371s 20.0 729.6 software integer
+# rsa 2048 bits 0.030273s 0.001008s 33.0 991.9 software SSE2
+# rsa 2048 bits 0.030833s 0.000976s 32.4 1025.1 hardware VIA SDK
+# rsa 2048 bits 0.011879s 0.000342s 84.2 2921.7 hardware this
+#
+# rsa 4096 bits 0.327097s 0.004859s 3.1 205.8 software integer
+# rsa 4096 bits 0.229318s 0.003859s 4.4 259.2 software SSE2
+# rsa 4096 bits 0.233953s 0.003274s 4.3 305.4 hardware VIA SDK
+# rsa 4096 bits 0.070493s 0.001166s 14.2 857.6 hardware this
+#
+# dsa 512 bits 0.001342s 0.001651s 745.2 605.7 software integer
+# dsa 512 bits 0.000844s 0.000987s 1185.3 1013.1 software SSE2
+# dsa 512 bits 0.001902s 0.002247s 525.6 444.9 hardware VIA SDK
+# dsa 512 bits 0.000458s 0.000524s 2182.2 1909.1 hardware this
+#
+# dsa 1024 bits 0.003964s 0.004926s 252.3 203.0 software integer
+# dsa 1024 bits 0.002686s 0.003166s 372.3 315.8 software SSE2
+# dsa 1024 bits 0.002397s 0.002823s 417.1 354.3 hardware VIA SDK
+# dsa 1024 bits 0.000978s 0.001170s 1022.2 855.0 hardware this
+#
+# dsa 2048 bits 0.013280s 0.016518s 75.3 60.5 software integer
+# dsa 2048 bits 0.009911s 0.011522s 100.9 86.8 software SSE2
+# dsa 2048 bits 0.009542s 0.011763s 104.8 85.0 hardware VIA SDK
+# dsa 2048 bits 0.002884s 0.003352s 346.8 298.3 hardware this
+#
+# To give you some other reference point here is output for 2.4GHz P4
+# running hand-coded SSE2 bn_mul_mont found in 0.9.9, i.e. "software
+# SSE2" in above terms.
+#
+# rsa 512 bits 0.000407s 0.000047s 2454.2 21137.0
+# rsa 1024 bits 0.002426s 0.000141s 412.1 7100.0
+# rsa 2048 bits 0.015046s 0.000491s 66.5 2034.9
+# rsa 4096 bits 0.109770s 0.002379s 9.1 420.3
+# dsa 512 bits 0.000438s 0.000525s 2281.1 1904.1
+# dsa 1024 bits 0.001346s 0.001595s 742.7 627.0
+# dsa 2048 bits 0.004745s 0.005582s 210.7 179.1
+#
+# Conclusions:
+# - VIA SDK leaves a *lot* of room for improvement (which this
+# implementation successfully fills:-);
+# - 'rep montmul' gives up to >3x performance improvement depending on
+# key length;
+# - in terms of absolute performance it delivers approximately as much
+# as modern out-of-order 32-bit cores [again, for longer keys].
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
+require "x86asm.pl";
+
+&asm_init($ARGV[0],"via-mont.pl");
+
+# int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num);
+$func="bn_mul_mont_padlock";
+
+$pad=16*1; # amount of reserved bytes on top of every vector
+
+# stack layout
+$mZeroPrime=&DWP(0,"esp"); # these are specified by VIA
+$A=&DWP(4,"esp");
+$B=&DWP(8,"esp");
+$T=&DWP(12,"esp");
+$M=&DWP(16,"esp");
+$scratch=&DWP(20,"esp");
+$rp=&DWP(24,"esp"); # these are mine
+$sp=&DWP(28,"esp");
+# &DWP(32,"esp") # 32 byte scratch area
+# &DWP(64+(4*$num+$pad)*0,"esp") # padded tp[num]
+# &DWP(64+(4*$num+$pad)*1,"esp") # padded copy of ap[num]
+# &DWP(64+(4*$num+$pad)*2,"esp") # padded copy of bp[num]
+# &DWP(64+(4*$num+$pad)*3,"esp") # padded copy of np[num]
+# Note that SDK suggests to unconditionally allocate 2K per vector. This
+# has quite an impact on performance. It naturally depends on key length,
+# but to give an example 1024 bit private RSA key operations suffer >30%
+# penalty. I allocate only as much as actually required...
+
+&function_begin($func);
+ &xor ("eax","eax");
+ &mov ("ecx",&wparam(5)); # num
+ # meet VIA's limitations for num [note that the specification
+ # expresses them in bits, while we work with amount of 32-bit words]
+ &test ("ecx",3);
+ &jnz (&label("leave")); # num % 4 != 0
+ &cmp ("ecx",8);
+ &jb (&label("leave")); # num < 8
+ &cmp ("ecx",1024);
+ &ja (&label("leave")); # num > 1024
+
+ &pushf ();
+ &cld ();
+
+ &mov ("edi",&wparam(0)); # rp
+ &mov ("eax",&wparam(1)); # ap
+ &mov ("ebx",&wparam(2)); # bp
+ &mov ("edx",&wparam(3)); # np
+ &mov ("esi",&wparam(4)); # n0
+ &mov ("esi",&DWP(0,"esi")); # *n0
+
+ &lea ("ecx",&DWP($pad,"","ecx",4)); # ecx becomes vector size in bytes
+ &lea ("ebp",&DWP(64,"","ecx",4)); # allocate 4 vectors + 64 bytes
+ &neg ("ebp");
+ &add ("ebp","esp");
+ &and ("ebp",-64); # align to cache-line
+ &xchg ("ebp","esp"); # alloca
+
+ &mov ($rp,"edi"); # save rp
+ &mov ($sp,"ebp"); # save esp
+
+ &mov ($mZeroPrime,"esi");
+ &lea ("esi",&DWP(64,"esp")); # tp
+ &mov ($T,"esi");
+ &lea ("edi",&DWP(32,"esp")); # scratch area
+ &mov ($scratch,"edi");
+ &mov ("esi","eax");
+
+ &lea ("ebp",&DWP(-$pad,"ecx"));
+ &shr ("ebp",2); # restore original num value in ebp
+
+ &xor ("eax","eax");
+
+ &mov ("ecx","ebp");
+ &lea ("ecx",&DWP((32+$pad)/4,"ecx"));# padded tp + scratch
+ &data_byte(0xf3,0xab); # rep stosl, bzero
+
+ &mov ("ecx","ebp");
+ &lea ("edi",&DWP(64+$pad,"esp","ecx",4));# pointer to ap copy
+ &mov ($A,"edi");
+ &data_byte(0xf3,0xa5); # rep movsl, memcpy
+ &mov ("ecx",$pad/4);
+ &data_byte(0xf3,0xab); # rep stosl, bzero pad
+ # edi points at the end of padded ap copy...
+
+ &mov ("ecx","ebp");
+ &mov ("esi","ebx");
+ &mov ($B,"edi");
+ &data_byte(0xf3,0xa5); # rep movsl, memcpy
+ &mov ("ecx",$pad/4);
+ &data_byte(0xf3,0xab); # rep stosl, bzero pad
+ # edi points at the end of padded bp copy...
+
+ &mov ("ecx","ebp");
+ &mov ("esi","edx");
+ &mov ($M,"edi");
+ &data_byte(0xf3,0xa5); # rep movsl, memcpy
+ &mov ("ecx",$pad/4);
+ &data_byte(0xf3,0xab); # rep stosl, bzero pad
+ # edi points at the end of padded np copy...
+
+ # let magic happen...
+ &mov ("ecx","ebp");
+ &mov ("esi","esp");
+ &shl ("ecx",5); # convert word counter to bit counter
+ &align (4);
+ &data_byte(0xf3,0x0f,0xa6,0xc0);# rep montmul
+
+ &mov ("ecx","ebp");
+ &lea ("esi",&DWP(64,"esp")); # tp
+ # edi still points at the end of padded np copy...
+ &neg ("ebp");
+ &lea ("ebp",&DWP(-$pad,"edi","ebp",4)); # so just "rewind"
+ &mov ("edi",$rp); # restore rp
+ &xor ("edx","edx"); # i=0 and clear CF
+
+&set_label("sub",8);
+ &mov ("eax",&DWP(0,"esi","edx",4));
+ &sbb ("eax",&DWP(0,"ebp","edx",4));
+ &mov (&DWP(0,"edi","edx",4),"eax"); # rp[i]=tp[i]-np[i]
+ &lea ("edx",&DWP(1,"edx")); # i++
+ &loop (&label("sub")); # doesn't affect CF!
+
+ &mov ("eax",&DWP(0,"esi","edx",4)); # upmost overflow bit
+ &sbb ("eax",0);
+ &and ("esi","eax");
+ &not ("eax");
+ &mov ("ebp","edi");
+ &and ("ebp","eax");
+ &or ("esi","ebp"); # tp=carry?tp:rp
+
+ &mov ("ecx","edx"); # num
+ &xor ("edx","edx"); # i=0
+
+&set_label("copy",8);
+ &mov ("eax",&DWP(0,"esi","edx",4));
+ &mov (&DWP(64,"esp","edx",4),"ecx"); # zap tp
+ &mov (&DWP(0,"edi","edx",4),"eax");
+ &lea ("edx",&DWP(1,"edx")); # i++
+ &loop (&label("copy"));
+
+ &mov ("ebp",$sp);
+ &xor ("eax","eax");
+
+ &mov ("ecx",64/4);
+ &mov ("edi","esp"); # zap frame including scratch area
+ &data_byte(0xf3,0xab); # rep stosl, bzero
+
+ # zap copies of ap, bp and np
+ &lea ("edi",&DWP(64+$pad,"esp","edx",4));# pointer to ap
+ &lea ("ecx",&DWP(3*$pad/4,"edx","edx",2));
+ &data_byte(0xf3,0xab); # rep stosl, bzero
+
+ &mov ("esp","ebp");
+ &inc ("eax"); # signal "done"
+ &popf ();
+&set_label("leave");
+&function_end($func);
+
+&asciz("Padlock Montgomery Multiplication, CRYPTOGAMS by <appro\@openssl.org>");
+
+&asm_finish();
diff --git a/crypto/bn/asm/x86-mont.pl b/crypto/bn/asm/x86-mont.pl
new file mode 100755
index 000000000000..5cd3cd2ed50a
--- /dev/null
+++ b/crypto/bn/asm/x86-mont.pl
@@ -0,0 +1,591 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see http://www.openssl.org/~appro/cryptogams/.
+# ====================================================================
+
+# October 2005
+#
+# This is a "teaser" code, as it can be improved in several ways...
+# First of all non-SSE2 path should be implemented (yes, for now it
+# performs Montgomery multiplication/convolution only on SSE2-capable
+# CPUs such as P4, others fall down to original code). Then inner loop
+# can be unrolled and modulo-scheduled to improve ILP and possibly
+# moved to 128-bit XMM register bank (though it would require input
+# rearrangement and/or increase bus bandwidth utilization). Dedicated
+# squaring procedure should give further performance improvement...
+# Yet, for being draft, the code improves rsa512 *sign* benchmark by
+# 110%(!), rsa1024 one - by 70% and rsa4096 - by 20%:-)
+
+# December 2006
+#
+# Modulo-scheduling SSE2 loops results in further 15-20% improvement.
+# Integer-only code [being equipped with dedicated squaring procedure]
+# gives ~40% on rsa512 sign benchmark...
+
+$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
+push(@INC,"${dir}","${dir}../../perlasm");
+require "x86asm.pl";
+
+&asm_init($ARGV[0],$0);
+
+$sse2=0;
+for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
+
+&external_label("OPENSSL_ia32cap_P") if ($sse2);
+
+&function_begin("bn_mul_mont");
+
+$i="edx";
+$j="ecx";
+$ap="esi"; $tp="esi"; # overlapping variables!!!
+$rp="edi"; $bp="edi"; # overlapping variables!!!
+$np="ebp";
+$num="ebx";
+
+$_num=&DWP(4*0,"esp"); # stack top layout
+$_rp=&DWP(4*1,"esp");
+$_ap=&DWP(4*2,"esp");
+$_bp=&DWP(4*3,"esp");
+$_np=&DWP(4*4,"esp");
+$_n0=&DWP(4*5,"esp"); $_n0q=&QWP(4*5,"esp");
+$_sp=&DWP(4*6,"esp");
+$_bpend=&DWP(4*7,"esp");
+$frame=32; # size of above frame rounded up to 16n
+
+ &xor ("eax","eax");
+ &mov ("edi",&wparam(5)); # int num
+ &cmp ("edi",4);
+ &jl (&label("just_leave"));
+
+ &lea ("esi",&wparam(0)); # put aside pointer to argument block
+ &lea ("edx",&wparam(1)); # load ap
+ &mov ("ebp","esp"); # saved stack pointer!
+ &add ("edi",2); # extra two words on top of tp
+ &neg ("edi");
+ &lea ("esp",&DWP(-$frame,"esp","edi",4)); # alloca($frame+4*(num+2))
+ &neg ("edi");
+
+ # minimize cache contention by arraning 2K window between stack
+ # pointer and ap argument [np is also position sensitive vector,
+ # but it's assumed to be near ap, as it's allocated at ~same
+ # time].
+ &mov ("eax","esp");
+ &sub ("eax","edx");
+ &and ("eax",2047);
+ &sub ("esp","eax"); # this aligns sp and ap modulo 2048
+
+ &xor ("edx","esp");
+ &and ("edx",2048);
+ &xor ("edx",2048);
+ &sub ("esp","edx"); # this splits them apart modulo 4096
+
+ &and ("esp",-64); # align to cache line
+
+ ################################# load argument block...
+ &mov ("eax",&DWP(0*4,"esi"));# BN_ULONG *rp
+ &mov ("ebx",&DWP(1*4,"esi"));# const BN_ULONG *ap
+ &mov ("ecx",&DWP(2*4,"esi"));# const BN_ULONG *bp
+ &mov ("edx",&DWP(3*4,"esi"));# const BN_ULONG *np
+ &mov ("esi",&DWP(4*4,"esi"));# const BN_ULONG *n0
+ #&mov ("edi",&DWP(5*4,"esi"));# int num
+
+ &mov ("esi",&DWP(0,"esi")); # pull n0[0]
+ &mov ($_rp,"eax"); # ... save a copy of argument block
+ &mov ($_ap,"ebx");
+ &mov ($_bp,"ecx");
+ &mov ($_np,"edx");
+ &mov ($_n0,"esi");
+ &lea ($num,&DWP(-3,"edi")); # num=num-1 to assist modulo-scheduling
+ #&mov ($_num,$num); # redundant as $num is not reused
+ &mov ($_sp,"ebp"); # saved stack pointer!
+
+if($sse2) {
+$acc0="mm0"; # mmx register bank layout
+$acc1="mm1";
+$car0="mm2";
+$car1="mm3";
+$mul0="mm4";
+$mul1="mm5";
+$temp="mm6";
+$mask="mm7";
+
+ &picmeup("eax","OPENSSL_ia32cap_P");
+ &bt (&DWP(0,"eax"),26);
+ &jnc (&label("non_sse2"));
+
+ &mov ("eax",-1);
+ &movd ($mask,"eax"); # mask 32 lower bits
+
+ &mov ($ap,$_ap); # load input pointers
+ &mov ($bp,$_bp);
+ &mov ($np,$_np);
+
+ &xor ($i,$i); # i=0
+ &xor ($j,$j); # j=0
+
+ &movd ($mul0,&DWP(0,$bp)); # bp[0]
+ &movd ($mul1,&DWP(0,$ap)); # ap[0]
+ &movd ($car1,&DWP(0,$np)); # np[0]
+
+ &pmuludq($mul1,$mul0); # ap[0]*bp[0]
+ &movq ($car0,$mul1);
+ &movq ($acc0,$mul1); # I wish movd worked for
+ &pand ($acc0,$mask); # inter-register transfers
+
+ &pmuludq($mul1,$_n0q); # *=n0
+
+ &pmuludq($car1,$mul1); # "t[0]"*np[0]*n0
+ &paddq ($car1,$acc0);
+
+ &movd ($acc1,&DWP(4,$np)); # np[1]
+ &movd ($acc0,&DWP(4,$ap)); # ap[1]
+
+ &psrlq ($car0,32);
+ &psrlq ($car1,32);
+
+ &inc ($j); # j++
+&set_label("1st",16);
+ &pmuludq($acc0,$mul0); # ap[j]*bp[0]
+ &pmuludq($acc1,$mul1); # np[j]*m1
+ &paddq ($car0,$acc0); # +=c0
+ &paddq ($car1,$acc1); # +=c1
+
+ &movq ($acc0,$car0);
+ &pand ($acc0,$mask);
+ &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
+ &paddq ($car1,$acc0); # +=ap[j]*bp[0];
+ &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
+ &psrlq ($car0,32);
+ &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[j-1]=
+ &psrlq ($car1,32);
+
+ &lea ($j,&DWP(1,$j));
+ &cmp ($j,$num);
+ &jl (&label("1st"));
+
+ &pmuludq($acc0,$mul0); # ap[num-1]*bp[0]
+ &pmuludq($acc1,$mul1); # np[num-1]*m1
+ &paddq ($car0,$acc0); # +=c0
+ &paddq ($car1,$acc1); # +=c1
+
+ &movq ($acc0,$car0);
+ &pand ($acc0,$mask);
+ &paddq ($car1,$acc0); # +=ap[num-1]*bp[0];
+ &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
+
+ &psrlq ($car0,32);
+ &psrlq ($car1,32);
+
+ &paddq ($car1,$car0);
+ &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
+
+ &inc ($i); # i++
+&set_label("outer");
+ &xor ($j,$j); # j=0
+
+ &movd ($mul0,&DWP(0,$bp,$i,4)); # bp[i]
+ &movd ($mul1,&DWP(0,$ap)); # ap[0]
+ &movd ($temp,&DWP($frame,"esp")); # tp[0]
+ &movd ($car1,&DWP(0,$np)); # np[0]
+ &pmuludq($mul1,$mul0); # ap[0]*bp[i]
+
+ &paddq ($mul1,$temp); # +=tp[0]
+ &movq ($acc0,$mul1);
+ &movq ($car0,$mul1);
+ &pand ($acc0,$mask);
+
+ &pmuludq($mul1,$_n0q); # *=n0
+
+ &pmuludq($car1,$mul1);
+ &paddq ($car1,$acc0);
+
+ &movd ($temp,&DWP($frame+4,"esp")); # tp[1]
+ &movd ($acc1,&DWP(4,$np)); # np[1]
+ &movd ($acc0,&DWP(4,$ap)); # ap[1]
+
+ &psrlq ($car0,32);
+ &psrlq ($car1,32);
+ &paddq ($car0,$temp); # +=tp[1]
+
+ &inc ($j); # j++
+ &dec ($num);
+&set_label("inner");
+ &pmuludq($acc0,$mul0); # ap[j]*bp[i]
+ &pmuludq($acc1,$mul1); # np[j]*m1
+ &paddq ($car0,$acc0); # +=c0
+ &paddq ($car1,$acc1); # +=c1
+
+ &movq ($acc0,$car0);
+ &movd ($temp,&DWP($frame+4,"esp",$j,4));# tp[j+1]
+ &pand ($acc0,$mask);
+ &movd ($acc1,&DWP(4,$np,$j,4)); # np[j+1]
+ &paddq ($car1,$acc0); # +=ap[j]*bp[i]+tp[j]
+ &movd ($acc0,&DWP(4,$ap,$j,4)); # ap[j+1]
+ &psrlq ($car0,32);
+ &movd (&DWP($frame-4,"esp",$j,4),$car1);# tp[j-1]=
+ &psrlq ($car1,32);
+ &paddq ($car0,$temp); # +=tp[j+1]
+
+ &dec ($num);
+ &lea ($j,&DWP(1,$j)); # j++
+ &jnz (&label("inner"));
+
+ &mov ($num,$j);
+ &pmuludq($acc0,$mul0); # ap[num-1]*bp[i]
+ &pmuludq($acc1,$mul1); # np[num-1]*m1
+ &paddq ($car0,$acc0); # +=c0
+ &paddq ($car1,$acc1); # +=c1
+
+ &movq ($acc0,$car0);
+ &pand ($acc0,$mask);
+ &paddq ($car1,$acc0); # +=ap[num-1]*bp[i]+tp[num-1]
+ &movd (&DWP($frame-4,"esp",$j,4),$car1); # tp[num-2]=
+ &psrlq ($car0,32);
+ &psrlq ($car1,32);
+
+ &movd ($temp,&DWP($frame+4,"esp",$num,4)); # += tp[num]
+ &paddq ($car1,$car0);
+ &paddq ($car1,$temp);
+ &movq (&QWP($frame,"esp",$num,4),$car1); # tp[num].tp[num-1]
+
+ &lea ($i,&DWP(1,$i)); # i++
+ &cmp ($i,$num);
+ &jle (&label("outer"));
+
+ &emms (); # done with mmx bank
+ &jmp (&label("common_tail"));
+
+&set_label("non_sse2",16);
+}
+
+if (0) {
+ &mov ("esp",$_sp);
+ &xor ("eax","eax"); # signal "not fast enough [yet]"
+ &jmp (&label("just_leave"));
+ # While the below code provides competitive performance for
+ # all key lengthes on modern Intel cores, it's still more
+ # than 10% slower for 4096-bit key elsewhere:-( "Competitive"
+ # means compared to the original integer-only assembler.
+ # 512-bit RSA sign is better by ~40%, but that's about all
+ # one can say about all CPUs...
+} else {
+$inp="esi"; # integer path uses these registers differently
+$word="edi";
+$carry="ebp";
+
+ &mov ($inp,$_ap);
+ &lea ($carry,&DWP(1,$num));
+ &mov ($word,$_bp);
+ &xor ($j,$j); # j=0
+ &mov ("edx",$inp);
+ &and ($carry,1); # see if num is even
+ &sub ("edx",$word); # see if ap==bp
+ &lea ("eax",&DWP(4,$word,$num,4)); # &bp[num]
+ &or ($carry,"edx");
+ &mov ($word,&DWP(0,$word)); # bp[0]
+ &jz (&label("bn_sqr_mont"));
+ &mov ($_bpend,"eax");
+ &mov ("eax",&DWP(0,$inp));
+ &xor ("edx","edx");
+
+&set_label("mull",16);
+ &mov ($carry,"edx");
+ &mul ($word); # ap[j]*bp[0]
+ &add ($carry,"eax");
+ &lea ($j,&DWP(1,$j));
+ &adc ("edx",0);
+ &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
+ &cmp ($j,$num);
+ &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
+ &jl (&label("mull"));
+
+ &mov ($carry,"edx");
+ &mul ($word); # ap[num-1]*bp[0]
+ &mov ($word,$_n0);
+ &add ("eax",$carry);
+ &mov ($inp,$_np);
+ &adc ("edx",0);
+ &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
+
+ &mov (&DWP($frame,"esp",$num,4),"eax"); # tp[num-1]=
+ &xor ($j,$j);
+ &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
+ &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
+
+ &mov ("eax",&DWP(0,$inp)); # np[0]
+ &mul ($word); # np[0]*m
+ &add ("eax",&DWP($frame,"esp")); # +=tp[0]
+ &mov ("eax",&DWP(4,$inp)); # np[1]
+ &adc ("edx",0);
+ &inc ($j);
+
+ &jmp (&label("2ndmadd"));
+
+&set_label("1stmadd",16);
+ &mov ($carry,"edx");
+ &mul ($word); # ap[j]*bp[i]
+ &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
+ &lea ($j,&DWP(1,$j));
+ &adc ("edx",0);
+ &add ($carry,"eax");
+ &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j+1]
+ &adc ("edx",0);
+ &cmp ($j,$num);
+ &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
+ &jl (&label("1stmadd"));
+
+ &mov ($carry,"edx");
+ &mul ($word); # ap[num-1]*bp[i]
+ &add ("eax",&DWP($frame,"esp",$num,4)); # +=tp[num-1]
+ &mov ($word,$_n0);
+ &adc ("edx",0);
+ &mov ($inp,$_np);
+ &add ($carry,"eax");
+ &adc ("edx",0);
+ &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
+
+ &xor ($j,$j);
+ &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
+ &mov (&DWP($frame,"esp",$num,4),$carry); # tp[num-1]=
+ &adc ($j,0);
+ &mov ("eax",&DWP(0,$inp)); # np[0]
+ &mov (&DWP($frame+4,"esp",$num,4),"edx"); # tp[num]=
+ &mov (&DWP($frame+8,"esp",$num,4),$j); # tp[num+1]=
+
+ &mul ($word); # np[0]*m
+ &add ("eax",&DWP($frame,"esp")); # +=tp[0]
+ &mov ("eax",&DWP(4,$inp)); # np[1]
+ &adc ("edx",0);
+ &mov ($j,1);
+
+&set_label("2ndmadd",16);
+ &mov ($carry,"edx");
+ &mul ($word); # np[j]*m
+ &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
+ &lea ($j,&DWP(1,$j));
+ &adc ("edx",0);
+ &add ($carry,"eax");
+ &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+1]
+ &adc ("edx",0);
+ &cmp ($j,$num);
+ &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j-1]=
+ &jl (&label("2ndmadd"));
+
+ &mov ($carry,"edx");
+ &mul ($word); # np[j]*m
+ &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
+ &adc ("edx",0);
+ &add ($carry,"eax");
+ &adc ("edx",0);
+ &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
+
+ &xor ("eax","eax");
+ &mov ($j,$_bp); # &bp[i]
+ &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
+ &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
+ &lea ($j,&DWP(4,$j));
+ &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
+ &cmp ($j,$_bpend);
+ &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
+ &je (&label("common_tail"));
+
+ &mov ($word,&DWP(0,$j)); # bp[i+1]
+ &mov ($inp,$_ap);
+ &mov ($_bp,$j); # &bp[++i]
+ &xor ($j,$j);
+ &xor ("edx","edx");
+ &mov ("eax",&DWP(0,$inp));
+ &jmp (&label("1stmadd"));
+
+&set_label("bn_sqr_mont",16);
+$sbit=$num;
+ &mov ($_num,$num);
+ &mov ($_bp,$j); # i=0
+
+ &mov ("eax",$word); # ap[0]
+ &mul ($word); # ap[0]*ap[0]
+ &mov (&DWP($frame,"esp"),"eax"); # tp[0]=
+ &mov ($sbit,"edx");
+ &shr ("edx",1);
+ &and ($sbit,1);
+ &inc ($j);
+&set_label("sqr",16);
+ &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
+ &mov ($carry,"edx");
+ &mul ($word); # ap[j]*ap[0]
+ &add ("eax",$carry);
+ &lea ($j,&DWP(1,$j));
+ &adc ("edx",0);
+ &lea ($carry,&DWP(0,$sbit,"eax",2));
+ &shr ("eax",31);
+ &cmp ($j,$_num);
+ &mov ($sbit,"eax");
+ &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
+ &jl (&label("sqr"));
+
+ &mov ("eax",&DWP(0,$inp,$j,4)); # ap[num-1]
+ &mov ($carry,"edx");
+ &mul ($word); # ap[num-1]*ap[0]
+ &add ("eax",$carry);
+ &mov ($word,$_n0);
+ &adc ("edx",0);
+ &mov ($inp,$_np);
+ &lea ($carry,&DWP(0,$sbit,"eax",2));
+ &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
+ &shr ("eax",31);
+ &mov (&DWP($frame,"esp",$j,4),$carry); # tp[num-1]=
+
+ &lea ($carry,&DWP(0,"eax","edx",2));
+ &mov ("eax",&DWP(0,$inp)); # np[0]
+ &shr ("edx",31);
+ &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num]=
+ &mov (&DWP($frame+8,"esp",$j,4),"edx"); # tp[num+1]=
+
+ &mul ($word); # np[0]*m
+ &add ("eax",&DWP($frame,"esp")); # +=tp[0]
+ &mov ($num,$j);
+ &adc ("edx",0);
+ &mov ("eax",&DWP(4,$inp)); # np[1]
+ &mov ($j,1);
+
+&set_label("3rdmadd",16);
+ &mov ($carry,"edx");
+ &mul ($word); # np[j]*m
+ &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
+ &adc ("edx",0);
+ &add ($carry,"eax");
+ &mov ("eax",&DWP(4,$inp,$j,4)); # np[j+1]
+ &adc ("edx",0);
+ &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j-1]=
+
+ &mov ($carry,"edx");
+ &mul ($word); # np[j+1]*m
+ &add ($carry,&DWP($frame+4,"esp",$j,4)); # +=tp[j+1]
+ &lea ($j,&DWP(2,$j));
+ &adc ("edx",0);
+ &add ($carry,"eax");
+ &mov ("eax",&DWP(0,$inp,$j,4)); # np[j+2]
+ &adc ("edx",0);
+ &cmp ($j,$num);
+ &mov (&DWP($frame-8,"esp",$j,4),$carry); # tp[j]=
+ &jl (&label("3rdmadd"));
+
+ &mov ($carry,"edx");
+ &mul ($word); # np[j]*m
+ &add ($carry,&DWP($frame,"esp",$num,4)); # +=tp[num-1]
+ &adc ("edx",0);
+ &add ($carry,"eax");
+ &adc ("edx",0);
+ &mov (&DWP($frame-4,"esp",$num,4),$carry); # tp[num-2]=
+
+ &mov ($j,$_bp); # i
+ &xor ("eax","eax");
+ &mov ($inp,$_ap);
+ &add ("edx",&DWP($frame+4,"esp",$num,4)); # carry+=tp[num]
+ &adc ("eax",&DWP($frame+8,"esp",$num,4)); # +=tp[num+1]
+ &mov (&DWP($frame,"esp",$num,4),"edx"); # tp[num-1]=
+ &cmp ($j,$num);
+ &mov (&DWP($frame+4,"esp",$num,4),"eax"); # tp[num]=
+ &je (&label("common_tail"));
+
+ &mov ($word,&DWP(4,$inp,$j,4)); # ap[i]
+ &lea ($j,&DWP(1,$j));
+ &mov ("eax",$word);
+ &mov ($_bp,$j); # ++i
+ &mul ($word); # ap[i]*ap[i]
+ &add ("eax",&DWP($frame,"esp",$j,4)); # +=tp[i]
+ &adc ("edx",0);
+ &mov (&DWP($frame,"esp",$j,4),"eax"); # tp[i]=
+ &xor ($carry,$carry);
+ &cmp ($j,$num);
+ &lea ($j,&DWP(1,$j));
+ &je (&label("sqrlast"));
+
+ &mov ($sbit,"edx"); # zaps $num
+ &shr ("edx",1);
+ &and ($sbit,1);
+&set_label("sqradd",16);
+ &mov ("eax",&DWP(0,$inp,$j,4)); # ap[j]
+ &mov ($carry,"edx");
+ &mul ($word); # ap[j]*ap[i]
+ &add ("eax",$carry);
+ &lea ($carry,&DWP(0,"eax","eax"));
+ &adc ("edx",0);
+ &shr ("eax",31);
+ &add ($carry,&DWP($frame,"esp",$j,4)); # +=tp[j]
+ &lea ($j,&DWP(1,$j));
+ &adc ("eax",0);
+ &add ($carry,$sbit);
+ &adc ("eax",0);
+ &cmp ($j,$_num);
+ &mov (&DWP($frame-4,"esp",$j,4),$carry); # tp[j]=
+ &mov ($sbit,"eax");
+ &jle (&label("sqradd"));
+
+ &mov ($carry,"edx");
+ &lea ("edx",&DWP(0,$sbit,"edx",2));
+ &shr ($carry,31);
+&set_label("sqrlast");
+ &mov ($word,$_n0);
+ &mov ($inp,$_np);
+ &imul ($word,&DWP($frame,"esp")); # n0*tp[0]
+
+ &add ("edx",&DWP($frame,"esp",$j,4)); # +=tp[num]
+ &mov ("eax",&DWP(0,$inp)); # np[0]
+ &adc ($carry,0);
+ &mov (&DWP($frame,"esp",$j,4),"edx"); # tp[num]=
+ &mov (&DWP($frame+4,"esp",$j,4),$carry); # tp[num+1]=
+
+ &mul ($word); # np[0]*m
+ &add ("eax",&DWP($frame,"esp")); # +=tp[0]
+ &lea ($num,&DWP(-1,$j));
+ &adc ("edx",0);
+ &mov ($j,1);
+ &mov ("eax",&DWP(4,$inp)); # np[1]
+
+ &jmp (&label("3rdmadd"));
+}
+
+&set_label("common_tail",16);
+ &mov ($np,$_np); # load modulus pointer
+ &mov ($rp,$_rp); # load result pointer
+ &lea ($tp,&DWP($frame,"esp")); # [$ap and $bp are zapped]
+
+ &mov ("eax",&DWP(0,$tp)); # tp[0]
+ &mov ($j,$num); # j=num-1
+ &xor ($i,$i); # i=0 and clear CF!
+
+&set_label("sub",16);
+ &sbb ("eax",&DWP(0,$np,$i,4));
+ &mov (&DWP(0,$rp,$i,4),"eax"); # rp[i]=tp[i]-np[i]
+ &dec ($j); # doesn't affect CF!
+ &mov ("eax",&DWP(4,$tp,$i,4)); # tp[i+1]
+ &lea ($i,&DWP(1,$i)); # i++
+ &jge (&label("sub"));
+
+ &sbb ("eax",0); # handle upmost overflow bit
+ &and ($tp,"eax");
+ &not ("eax");
+ &mov ($np,$rp);
+ &and ($np,"eax");
+ &or ($tp,$np); # tp=carry?tp:rp
+
+&set_label("copy",16); # copy or in-place refresh
+ &mov ("eax",&DWP(0,$tp,$num,4));
+ &mov (&DWP(0,$rp,$num,4),"eax"); # rp[i]=tp[i]
+ &mov (&DWP($frame,"esp",$num,4),$j); # zap temporary vector
+ &dec ($num);
+ &jge (&label("copy"));
+
+ &mov ("esp",$_sp); # pull saved stack pointer
+ &mov ("eax",1);
+&set_label("just_leave");
+&function_end("bn_mul_mont");
+
+&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
+
+&asm_finish();
diff --git a/crypto/bn/bn.h b/crypto/bn/bn.h
index 6d754d554776..f1719a5877f7 100644
--- a/crypto/bn/bn.h
+++ b/crypto/bn/bn.h
@@ -408,8 +408,8 @@ BIGNUM *BN_CTX_get(BN_CTX *ctx);
void BN_CTX_end(BN_CTX *ctx);
int BN_rand(BIGNUM *rnd, int bits, int top,int bottom);
int BN_pseudo_rand(BIGNUM *rnd, int bits, int top,int bottom);
-int BN_rand_range(BIGNUM *rnd, BIGNUM *range);
-int BN_pseudo_rand_range(BIGNUM *rnd, BIGNUM *range);
+int BN_rand_range(BIGNUM *rnd, const BIGNUM *range);
+int BN_pseudo_rand_range(BIGNUM *rnd, const BIGNUM *range);
int BN_num_bits(const BIGNUM *a);
int BN_num_bits_word(BN_ULONG);
BIGNUM *BN_new(void);
@@ -531,6 +531,17 @@ int BN_is_prime_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx, BN_GENCB *cb);
int BN_is_prime_fasttest_ex(const BIGNUM *p,int nchecks, BN_CTX *ctx,
int do_trial_division, BN_GENCB *cb);
+int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx);
+
+int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
+ const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2,
+ const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb);
+int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
+ BIGNUM *Xp1, BIGNUM *Xp2,
+ const BIGNUM *Xp,
+ const BIGNUM *e, BN_CTX *ctx,
+ BN_GENCB *cb);
+
BN_MONT_CTX *BN_MONT_CTX_new(void );
void BN_MONT_CTX_init(BN_MONT_CTX *ctx);
int BN_mod_mul_montgomery(BIGNUM *r,const BIGNUM *a,const BIGNUM *b,
diff --git a/crypto/bn/bn_lib.c b/crypto/bn/bn_lib.c
index 2649b8c53851..32a8fbaf51ee 100644
--- a/crypto/bn/bn_lib.c
+++ b/crypto/bn/bn_lib.c
@@ -139,25 +139,6 @@ const BIGNUM *BN_value_one(void)
return(&const_one);
}
-char *BN_options(void)
- {
- static int init=0;
- static char data[16];
-
- if (!init)
- {
- init++;
-#ifdef BN_LLONG
- BIO_snprintf(data,sizeof data,"bn(%d,%d)",
- (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8);
-#else
- BIO_snprintf(data,sizeof data,"bn(%d,%d)",
- (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8);
-#endif
- }
- return(data);
- }
-
int BN_num_bits_word(BN_ULONG l)
{
static const char bits[256]={
diff --git a/crypto/bn/bn_nist.c b/crypto/bn/bn_nist.c
index 1fc94f55c32c..2ca5b0139111 100644
--- a/crypto/bn/bn_nist.c
+++ b/crypto/bn/bn_nist.c
@@ -66,46 +66,157 @@
#define BN_NIST_384_TOP (384+BN_BITS2-1)/BN_BITS2
#define BN_NIST_521_TOP (521+BN_BITS2-1)/BN_BITS2
+/* pre-computed tables are "carry-less" values of modulus*(i+1) */
#if BN_BITS2 == 64
-static const BN_ULONG _nist_p_192[] =
- {0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFEULL,
- 0xFFFFFFFFFFFFFFFFULL};
-static const BN_ULONG _nist_p_224[] =
+static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
+ {0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFFULL},
+ {0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFDULL,0xFFFFFFFFFFFFFFFFULL},
+ {0xFFFFFFFFFFFFFFFDULL,0xFFFFFFFFFFFFFFFCULL,0xFFFFFFFFFFFFFFFFULL}
+ };
+static const BN_ULONG _nist_p_192_sqr[] = {
+ 0x0000000000000001ULL,0x0000000000000002ULL,0x0000000000000001ULL,
+ 0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFDULL,0xFFFFFFFFFFFFFFFFULL
+ };
+static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
{0x0000000000000001ULL,0xFFFFFFFF00000000ULL,
- 0xFFFFFFFFFFFFFFFFULL,0x00000000FFFFFFFFULL};
-static const BN_ULONG _nist_p_256[] =
+ 0xFFFFFFFFFFFFFFFFULL,0x00000000FFFFFFFFULL},
+ {0x0000000000000002ULL,0xFFFFFFFE00000000ULL,
+ 0xFFFFFFFFFFFFFFFFULL,0x00000001FFFFFFFFULL} /* this one is "carry-full" */
+ };
+static const BN_ULONG _nist_p_224_sqr[] = {
+ 0x0000000000000001ULL,0xFFFFFFFE00000000ULL,
+ 0xFFFFFFFFFFFFFFFFULL,0x0000000200000000ULL,
+ 0x0000000000000000ULL,0xFFFFFFFFFFFFFFFEULL,
+ 0xFFFFFFFFFFFFFFFFULL
+ };
+static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
{0xFFFFFFFFFFFFFFFFULL,0x00000000FFFFFFFFULL,
- 0x0000000000000000ULL,0xFFFFFFFF00000001ULL};
-static const BN_ULONG _nist_p_384[] =
- {0x00000000FFFFFFFFULL,0xFFFFFFFF00000000ULL,
- 0xFFFFFFFFFFFFFFFEULL,0xFFFFFFFFFFFFFFFFULL,
- 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL};
+ 0x0000000000000000ULL,0xFFFFFFFF00000001ULL},
+ {0xFFFFFFFFFFFFFFFEULL,0x00000001FFFFFFFFULL,
+ 0x0000000000000000ULL,0xFFFFFFFE00000002ULL},
+ {0xFFFFFFFFFFFFFFFDULL,0x00000002FFFFFFFFULL,
+ 0x0000000000000000ULL,0xFFFFFFFD00000003ULL},
+ {0xFFFFFFFFFFFFFFFCULL,0x00000003FFFFFFFFULL,
+ 0x0000000000000000ULL,0xFFFFFFFC00000004ULL},
+ {0xFFFFFFFFFFFFFFFBULL,0x00000004FFFFFFFFULL,
+ 0x0000000000000000ULL,0xFFFFFFFB00000005ULL},
+ };
+static const BN_ULONG _nist_p_256_sqr[] = {
+ 0x0000000000000001ULL,0xFFFFFFFE00000000ULL,
+ 0xFFFFFFFFFFFFFFFFULL,0x00000001FFFFFFFEULL,
+ 0x00000001FFFFFFFEULL,0x00000001FFFFFFFEULL,
+ 0xFFFFFFFE00000001ULL,0xFFFFFFFE00000002ULL
+ };
+static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
+ {0x00000000FFFFFFFFULL,0xFFFFFFFF00000000ULL,0xFFFFFFFFFFFFFFFEULL,
+ 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
+ {0x00000001FFFFFFFEULL,0xFFFFFFFE00000000ULL,0xFFFFFFFFFFFFFFFDULL,
+ 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
+ {0x00000002FFFFFFFDULL,0xFFFFFFFD00000000ULL,0xFFFFFFFFFFFFFFFCULL,
+ 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
+ {0x00000003FFFFFFFCULL,0xFFFFFFFC00000000ULL,0xFFFFFFFFFFFFFFFBULL,
+ 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
+ {0x00000004FFFFFFFBULL,0xFFFFFFFB00000000ULL,0xFFFFFFFFFFFFFFFAULL,
+ 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL},
+ };
+static const BN_ULONG _nist_p_384_sqr[] = {
+ 0xFFFFFFFE00000001ULL,0x0000000200000000ULL,0xFFFFFFFE00000000ULL,
+ 0x0000000200000000ULL,0x0000000000000001ULL,0x0000000000000000ULL,
+ 0x00000001FFFFFFFEULL,0xFFFFFFFE00000000ULL,0xFFFFFFFFFFFFFFFDULL,
+ 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL
+ };
static const BN_ULONG _nist_p_521[] =
{0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
0x00000000000001FFULL};
+static const BN_ULONG _nist_p_521_sqr[] = {
+ 0x0000000000000001ULL,0x0000000000000000ULL,0x0000000000000000ULL,
+ 0x0000000000000000ULL,0x0000000000000000ULL,0x0000000000000000ULL,
+ 0x0000000000000000ULL,0x0000000000000000ULL,0xFFFFFFFFFFFFFC00ULL,
+ 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
+ 0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,0xFFFFFFFFFFFFFFFFULL,
+ 0xFFFFFFFFFFFFFFFFULL,0x000000000003FFFFULL
+ };
#elif BN_BITS2 == 32
-static const BN_ULONG _nist_p_192[] = {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFE,
- 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF};
-static const BN_ULONG _nist_p_224[] = {0x00000001,0x00000000,0x00000000,
- 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF};
-static const BN_ULONG _nist_p_256[] = {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
- 0x00000000,0x00000000,0x00000000,0x00000001,0xFFFFFFFF};
-static const BN_ULONG _nist_p_384[] = {0xFFFFFFFF,0x00000000,0x00000000,
- 0xFFFFFFFF,0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
- 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF};
+static const BN_ULONG _nist_p_192[][BN_NIST_192_TOP] = {
+ {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
+ {0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
+ {0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFC,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}
+ };
+static const BN_ULONG _nist_p_192_sqr[] = {
+ 0x00000001,0x00000000,0x00000002,0x00000000,0x00000001,0x00000000,
+ 0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF
+ };
+static const BN_ULONG _nist_p_224[][BN_NIST_224_TOP] = {
+ {0x00000001,0x00000000,0x00000000,0xFFFFFFFF,
+ 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
+ {0x00000002,0x00000000,0x00000000,0xFFFFFFFE,
+ 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF}
+ };
+static const BN_ULONG _nist_p_224_sqr[] = {
+ 0x00000001,0x00000000,0x00000000,0xFFFFFFFE,
+ 0xFFFFFFFF,0xFFFFFFFF,0x00000000,0x00000002,
+ 0x00000000,0x00000000,0xFFFFFFFE,0xFFFFFFFF,
+ 0xFFFFFFFF,0xFFFFFFFF
+ };
+static const BN_ULONG _nist_p_256[][BN_NIST_256_TOP] = {
+ {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0x00000000,
+ 0x00000000,0x00000000,0x00000001,0xFFFFFFFF},
+ {0xFFFFFFFE,0xFFFFFFFF,0xFFFFFFFF,0x00000001,
+ 0x00000000,0x00000000,0x00000002,0xFFFFFFFE},
+ {0xFFFFFFFD,0xFFFFFFFF,0xFFFFFFFF,0x00000002,
+ 0x00000000,0x00000000,0x00000003,0xFFFFFFFD},
+ {0xFFFFFFFC,0xFFFFFFFF,0xFFFFFFFF,0x00000003,
+ 0x00000000,0x00000000,0x00000004,0xFFFFFFFC},
+ {0xFFFFFFFB,0xFFFFFFFF,0xFFFFFFFF,0x00000004,
+ 0x00000000,0x00000000,0x00000005,0xFFFFFFFB},
+ };
+static const BN_ULONG _nist_p_256_sqr[] = {
+ 0x00000001,0x00000000,0x00000000,0xFFFFFFFE,
+ 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFE,0x00000001,
+ 0xFFFFFFFE,0x00000001,0xFFFFFFFE,0x00000001,
+ 0x00000001,0xFFFFFFFE,0x00000002,0xFFFFFFFE
+ };
+static const BN_ULONG _nist_p_384[][BN_NIST_384_TOP] = {
+ {0xFFFFFFFF,0x00000000,0x00000000,0xFFFFFFFF,0xFFFFFFFE,0xFFFFFFFF,
+ 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
+ {0xFFFFFFFE,0x00000001,0x00000000,0xFFFFFFFE,0xFFFFFFFD,0xFFFFFFFF,
+ 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
+ {0xFFFFFFFD,0x00000002,0x00000000,0xFFFFFFFD,0xFFFFFFFC,0xFFFFFFFF,
+ 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
+ {0xFFFFFFFC,0x00000003,0x00000000,0xFFFFFFFC,0xFFFFFFFB,0xFFFFFFFF,
+ 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
+ {0xFFFFFFFB,0x00000004,0x00000000,0xFFFFFFFB,0xFFFFFFFA,0xFFFFFFFF,
+ 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF},
+ };
+static const BN_ULONG _nist_p_384_sqr[] = {
+ 0x00000001,0xFFFFFFFE,0x00000000,0x00000002,0x00000000,0xFFFFFFFE,
+ 0x00000000,0x00000002,0x00000001,0x00000000,0x00000000,0x00000000,
+ 0xFFFFFFFE,0x00000001,0x00000000,0xFFFFFFFE,0xFFFFFFFD,0xFFFFFFFF,
+ 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF
+ };
static const BN_ULONG _nist_p_521[] = {0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
0xFFFFFFFF,0x000001FF};
+static const BN_ULONG _nist_p_521_sqr[] = {
+ 0x00000001,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+ 0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,0x00000000,
+ 0x00000000,0x00000000,0x00000000,0x00000000,0xFFFFFC00,0xFFFFFFFF,
+ 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
+ 0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,0xFFFFFFFF,
+ 0xFFFFFFFF,0xFFFFFFFF,0x0003FFFF
+ };
+#else
+#error "unsupported BN_BITS2"
#endif
static const BIGNUM _bignum_nist_p_192 =
{
- (BN_ULONG *)_nist_p_192,
+ (BN_ULONG *)_nist_p_192[0],
BN_NIST_192_TOP,
BN_NIST_192_TOP,
0,
@@ -114,7 +225,7 @@ static const BIGNUM _bignum_nist_p_192 =
static const BIGNUM _bignum_nist_p_224 =
{
- (BN_ULONG *)_nist_p_224,
+ (BN_ULONG *)_nist_p_224[0],
BN_NIST_224_TOP,
BN_NIST_224_TOP,
0,
@@ -123,7 +234,7 @@ static const BIGNUM _bignum_nist_p_224 =
static const BIGNUM _bignum_nist_p_256 =
{
- (BN_ULONG *)_nist_p_256,
+ (BN_ULONG *)_nist_p_256[0],
BN_NIST_256_TOP,
BN_NIST_256_TOP,
0,
@@ -132,7 +243,7 @@ static const BIGNUM _bignum_nist_p_256 =
static const BIGNUM _bignum_nist_p_384 =
{
- (BN_ULONG *)_nist_p_384,
+ (BN_ULONG *)_nist_p_384[0],
BN_NIST_384_TOP,
BN_NIST_384_TOP,
0,
@@ -180,7 +291,9 @@ static void nist_cp_bn_0(BN_ULONG *buf, BN_ULONG *a, int top, int max)
int i;
BN_ULONG *_tmp1 = (buf), *_tmp2 = (a);
+#ifdef BN_DEBUG
OPENSSL_assert(top <= max);
+#endif
for (i = (top); i != 0; i--)
*_tmp1++ = *_tmp2++;
for (i = (max) - (top); i != 0; i--)
@@ -198,9 +311,14 @@ static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
#if BN_BITS2 == 64
#define bn_cp_64(to, n, from, m) (to)[n] = (m>=0)?((from)[m]):0;
#define bn_64_set_0(to, n) (to)[n] = (BN_ULONG)0;
-/* TBD */
-#define bn_cp_32(to, n, from, m) (to)[n] = (m>=0)?((from)[m]):0;
-#define bn_32_set_0(to, n) (to)[n] = (BN_ULONG)0;
+/*
+ * two following macros are implemented under assumption that they
+ * are called in a sequence with *ascending* n, i.e. as they are...
+ */
+#define bn_cp_32_naked(to, n, from, m) (((n)&1)?(to[(n)/2]|=((m)&1)?(from[(m)/2]&BN_MASK2h):(from[(m)/2]<<32))\
+ :(to[(n)/2] =((m)&1)?(from[(m)/2]>>32):(from[(m)/2]&BN_MASK2l)))
+#define bn_32_set_0(to, n) (((n)&1)?(to[(n)/2]&=BN_MASK2l):(to[(n)/2]=0));
+#define bn_cp_32(to,n,from,m) ((m)>=0)?bn_cp_32_naked(to,n,from,m):bn_32_set_0(to,n)
#else
#define bn_cp_64(to, n, from, m) \
{ \
@@ -221,9 +339,9 @@ static void nist_cp_bn(BN_ULONG *buf, BN_ULONG *a, int top)
#define nist_set_192(to, from, a1, a2, a3) \
{ \
- if (a3 != 0) bn_cp_64(to, 0, from, (a3) - 3) else bn_64_set_0(to, 0)\
+ bn_cp_64(to, 0, from, (a3) - 3) \
bn_cp_64(to, 1, from, (a2) - 3) \
- if (a1 != 0) bn_cp_64(to, 2, from, (a1) - 3) else bn_64_set_0(to, 2)\
+ bn_cp_64(to, 2, from, (a1) - 3) \
}
int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
@@ -237,11 +355,16 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
c_d[BN_NIST_192_TOP],
*res;
size_t mask;
+ static const BIGNUM _bignum_nist_p_192_sqr = {
+ (BN_ULONG *)_nist_p_192_sqr,
+ sizeof(_nist_p_192_sqr)/sizeof(_nist_p_192_sqr[0]),
+ sizeof(_nist_p_192_sqr)/sizeof(_nist_p_192_sqr[0]),
+ 0,BN_FLG_STATIC_DATA };
field = &_bignum_nist_p_192; /* just to make sure */
- if (BN_is_negative(a) || a->top > 2*BN_NIST_192_TOP)
- return BN_nnmod(r, field, a, ctx);
+ if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_192_sqr)>=0)
+ return BN_nnmod(r, a, field, ctx);
i = BN_ucmp(field, a);
if (i == 0)
@@ -265,50 +388,49 @@ int BN_nist_mod_192(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
nist_cp_bn_0(buf, a_d + BN_NIST_192_TOP, top - BN_NIST_192_TOP, BN_NIST_192_TOP);
nist_set_192(t_d, buf, 0, 3, 3);
- carry = bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
- mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_192,BN_NIST_192_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-
+ carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
nist_set_192(t_d, buf, 4, 4, 0);
- carry = bn_add_words(r_d, res, t_d, BN_NIST_192_TOP);
- mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_192,BN_NIST_192_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-
+ carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
nist_set_192(t_d, buf, 5, 5, 5)
- carry = bn_add_words(r_d, res, t_d, BN_NIST_192_TOP);
- mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_192,BN_NIST_192_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+ carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_192_TOP);
+ if (carry > 0)
+ carry = (int)bn_sub_words(r_d,r_d,_nist_p_192[carry-1],BN_NIST_192_TOP);
+ else
+ carry = 1;
+
+ /*
+ * we need 'if (carry==0 || result>=modulus) result-=modulus;'
+ * as comparison implies subtraction, we can write
+ * 'tmp=result-modulus; if (!carry || !borrow) result=tmp;'
+ * this is what happens below, but without explicit if:-) a.
+ */
+ mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_192[0],BN_NIST_192_TOP);
+ mask &= 0-(size_t)carry;
+ res = (BN_ULONG *)(((size_t)c_d&~mask) | ((size_t)r_d&mask));
nist_cp_bn(r_d, res, BN_NIST_192_TOP);
r->top = BN_NIST_192_TOP;
bn_correct_top(r);
- if (BN_ucmp(field, r) <= 0)
- {
- if (!BN_usub(r, r, field)) return 0;
- }
-
return 1;
}
+typedef BN_ULONG (*bn_addsub_f)(BN_ULONG *,const BN_ULONG *,const BN_ULONG *,int);
+
#define nist_set_224(to, from, a1, a2, a3, a4, a5, a6, a7) \
{ \
- if (a7 != 0) bn_cp_32(to, 0, from, (a7) - 7) else bn_32_set_0(to, 0)\
- if (a6 != 0) bn_cp_32(to, 1, from, (a6) - 7) else bn_32_set_0(to, 1)\
- if (a5 != 0) bn_cp_32(to, 2, from, (a5) - 7) else bn_32_set_0(to, 2)\
- if (a4 != 0) bn_cp_32(to, 3, from, (a4) - 7) else bn_32_set_0(to, 3)\
- if (a3 != 0) bn_cp_32(to, 4, from, (a3) - 7) else bn_32_set_0(to, 4)\
- if (a2 != 0) bn_cp_32(to, 5, from, (a2) - 7) else bn_32_set_0(to, 5)\
- if (a1 != 0) bn_cp_32(to, 6, from, (a1) - 7) else bn_32_set_0(to, 6)\
+ bn_cp_32(to, 0, from, (a7) - 7) \
+ bn_cp_32(to, 1, from, (a6) - 7) \
+ bn_cp_32(to, 2, from, (a5) - 7) \
+ bn_cp_32(to, 3, from, (a4) - 7) \
+ bn_cp_32(to, 4, from, (a3) - 7) \
+ bn_cp_32(to, 5, from, (a2) - 7) \
+ bn_cp_32(to, 6, from, (a1) - 7) \
}
int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
BN_CTX *ctx)
{
-#if BN_BITS2 == 32
int top = a->top, i;
int carry;
BN_ULONG *r_d, *a_d = a->d;
@@ -317,11 +439,18 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
c_d[BN_NIST_224_TOP],
*res;
size_t mask;
+ union { bn_addsub_f f; size_t p; } u;
+ static const BIGNUM _bignum_nist_p_224_sqr = {
+ (BN_ULONG *)_nist_p_224_sqr,
+ sizeof(_nist_p_224_sqr)/sizeof(_nist_p_224_sqr[0]),
+ sizeof(_nist_p_224_sqr)/sizeof(_nist_p_224_sqr[0]),
+ 0,BN_FLG_STATIC_DATA };
+
field = &_bignum_nist_p_224; /* just to make sure */
- if (BN_is_negative(a) || a->top > 2*BN_NIST_224_TOP)
- return BN_nnmod(r, field, a, ctx);
+ if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_224_sqr)>=0)
+ return BN_nnmod(r, a, field, ctx);
i = BN_ucmp(field, a);
if (i == 0)
@@ -342,72 +471,77 @@ int BN_nist_mod_224(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
else
r_d = a_d;
+#if BN_BITS2==64
+ /* copy upper 256 bits of 448 bit number ... */
+ nist_cp_bn_0(t_d, a_d + (BN_NIST_224_TOP-1), top - (BN_NIST_224_TOP-1), BN_NIST_224_TOP);
+ /* ... and right shift by 32 to obtain upper 224 bits */
+ nist_set_224(buf, t_d, 14, 13, 12, 11, 10, 9, 8);
+ /* truncate lower part to 224 bits too */
+ r_d[BN_NIST_224_TOP-1] &= BN_MASK2l;
+#else
nist_cp_bn_0(buf, a_d + BN_NIST_224_TOP, top - BN_NIST_224_TOP, BN_NIST_224_TOP);
-
+#endif
nist_set_224(t_d, buf, 10, 9, 8, 7, 0, 0, 0);
- carry = bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
- mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_224,BN_NIST_224_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-
+ carry = (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
nist_set_224(t_d, buf, 0, 13, 12, 11, 0, 0, 0);
- carry = bn_add_words(r_d, res, t_d, BN_NIST_224_TOP);
- mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_224,BN_NIST_224_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-
+ carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_224_TOP);
nist_set_224(t_d, buf, 13, 12, 11, 10, 9, 8, 7);
-#if BRANCH_FREE
- carry = bn_sub_words(r_d, res, t_d, BN_NIST_224_TOP);
- bn_add_words(c_d,r_d,_nist_p_224,BN_NIST_224_TOP);
- mask = 0-(size_t)carry;
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-#else
- if (bn_sub_words(r_d, res, t_d, BN_NIST_224_TOP))
- bn_add_words(r_d,r_d,_nist_p_224,BN_NIST_224_TOP);
-#endif
+ carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
nist_set_224(t_d, buf, 0, 0, 0, 0, 13, 12, 11);
-#if BRANCH_FREE
- carry = bn_sub_words(r_d, res, t_d, BN_NIST_224_TOP);
- bn_add_words(c_d,r_d,_nist_p_224,BN_NIST_224_TOP);
- mask = 0-(size_t)carry;
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+ carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP);
- nist_cp_bn(r_d, res, BN_NIST_224_TOP);
-#else
- if (bn_sub_words(r_d, r_d, t_d, BN_NIST_224_TOP))
- bn_add_words(r_d,r_d,_nist_p_224,BN_NIST_224_TOP);
+#if BN_BITS2==64
+ carry = (int)(r_d[BN_NIST_224_TOP-1]>>32);
#endif
- r->top = BN_NIST_224_TOP;
- bn_correct_top(r);
-
- if (BN_ucmp(field, r) <= 0)
+ u.f = bn_sub_words;
+ if (carry > 0)
{
- if (!BN_usub(r, r, field)) return 0;
+ carry = (int)bn_sub_words(r_d,r_d,_nist_p_224[carry-1],BN_NIST_224_TOP);
+#if BN_BITS2==64
+ carry=(int)(~(r_d[BN_NIST_224_TOP-1]>>32))&1;
+#endif
}
+ else if (carry < 0)
+ {
+ /* it's a bit more comlicated logic in this case.
+ * if bn_add_words yields no carry, then result
+ * has to be adjusted by unconditionally *adding*
+ * the modulus. but if it does, then result has
+ * to be compared to the modulus and conditionally
+ * adjusted by *subtracting* the latter. */
+ carry = (int)bn_add_words(r_d,r_d,_nist_p_224[-carry-1],BN_NIST_224_TOP);
+ mask = 0-(size_t)carry;
+ u.p = ((size_t)bn_sub_words&mask) | ((size_t)bn_add_words&~mask);
+ }
+ else
+ carry = 1;
+
+ /* otherwise it's effectively same as in BN_nist_mod_192... */
+ mask = 0-(size_t)(*u.f)(c_d,r_d,_nist_p_224[0],BN_NIST_224_TOP);
+ mask &= 0-(size_t)carry;
+ res = (BN_ULONG *)(((size_t)c_d&~mask) | ((size_t)r_d&mask));
+ nist_cp_bn(r_d, res, BN_NIST_224_TOP);
+ r->top = BN_NIST_224_TOP;
+ bn_correct_top(r);
return 1;
-#else /* BN_BITS!=32 */
- return 0;
-#endif
}
#define nist_set_256(to, from, a1, a2, a3, a4, a5, a6, a7, a8) \
{ \
- if (a8 != 0) bn_cp_32(to, 0, from, (a8) - 8) else bn_32_set_0(to, 0)\
- if (a7 != 0) bn_cp_32(to, 1, from, (a7) - 8) else bn_32_set_0(to, 1)\
- if (a6 != 0) bn_cp_32(to, 2, from, (a6) - 8) else bn_32_set_0(to, 2)\
- if (a5 != 0) bn_cp_32(to, 3, from, (a5) - 8) else bn_32_set_0(to, 3)\
- if (a4 != 0) bn_cp_32(to, 4, from, (a4) - 8) else bn_32_set_0(to, 4)\
- if (a3 != 0) bn_cp_32(to, 5, from, (a3) - 8) else bn_32_set_0(to, 5)\
- if (a2 != 0) bn_cp_32(to, 6, from, (a2) - 8) else bn_32_set_0(to, 6)\
- if (a1 != 0) bn_cp_32(to, 7, from, (a1) - 8) else bn_32_set_0(to, 7)\
+ bn_cp_32(to, 0, from, (a8) - 8) \
+ bn_cp_32(to, 1, from, (a7) - 8) \
+ bn_cp_32(to, 2, from, (a6) - 8) \
+ bn_cp_32(to, 3, from, (a5) - 8) \
+ bn_cp_32(to, 4, from, (a4) - 8) \
+ bn_cp_32(to, 5, from, (a3) - 8) \
+ bn_cp_32(to, 6, from, (a2) - 8) \
+ bn_cp_32(to, 7, from, (a1) - 8) \
}
int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
BN_CTX *ctx)
{
-#if BN_BITS2 == 32
int i, top = a->top;
int carry = 0;
register BN_ULONG *a_d = a->d, *r_d;
@@ -416,11 +550,17 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
c_d[BN_NIST_256_TOP],
*res;
size_t mask;
+ union { bn_addsub_f f; size_t p; } u;
+ static const BIGNUM _bignum_nist_p_256_sqr = {
+ (BN_ULONG *)_nist_p_256_sqr,
+ sizeof(_nist_p_256_sqr)/sizeof(_nist_p_256_sqr[0]),
+ sizeof(_nist_p_256_sqr)/sizeof(_nist_p_256_sqr[0]),
+ 0,BN_FLG_STATIC_DATA };
field = &_bignum_nist_p_256; /* just to make sure */
- if (BN_is_negative(a) || a->top > 2*BN_NIST_256_TOP)
- return BN_nnmod(r, field, a, ctx);
+ if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_256_sqr)>=0)
+ return BN_nnmod(r, a, field, ctx);
i = BN_ucmp(field, a);
if (i == 0)
@@ -446,116 +586,84 @@ int BN_nist_mod_256(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
/*S1*/
nist_set_256(t_d, buf, 15, 14, 13, 12, 11, 0, 0, 0);
/*S2*/
- nist_set_256(c_d,buf, 0, 15, 14, 13, 12, 0, 0, 0);
- carry = bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
- mask = 0-(size_t)bn_sub_words(c_d,t_d,_nist_p_256,BN_NIST_256_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)t_d&~mask));
-
- carry = bn_add_words(t_d, res, res, BN_NIST_256_TOP);
- mask = 0-(size_t)bn_sub_words(c_d,t_d,_nist_p_256,BN_NIST_256_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)t_d&~mask));
-
- carry = bn_add_words(r_d, r_d, res, BN_NIST_256_TOP);
- mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_256,BN_NIST_256_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-
+ nist_set_256(c_d, buf, 0, 15, 14, 13, 12, 0, 0, 0);
+ carry = (int)bn_add_words(t_d, t_d, c_d, BN_NIST_256_TOP);
+ /* left shift */
+ {
+ register BN_ULONG *ap,t,c;
+ ap = t_d;
+ c=0;
+ for (i = BN_NIST_256_TOP; i != 0; --i)
+ {
+ t= *ap;
+ *(ap++)=((t<<1)|c)&BN_MASK2;
+ c=(t & BN_TBIT)?1:0;
+ }
+ carry <<= 1;
+ carry |= c;
+ }
+ carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
/*S3*/
nist_set_256(t_d, buf, 15, 14, 0, 0, 0, 10, 9, 8);
- carry = bn_add_words(r_d, res, t_d, BN_NIST_256_TOP);
- mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_256,BN_NIST_256_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-
+ carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
/*S4*/
nist_set_256(t_d, buf, 8, 13, 15, 14, 13, 11, 10, 9);
- carry = bn_add_words(r_d, res, t_d, BN_NIST_256_TOP);
- mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_256,BN_NIST_256_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-
+ carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_256_TOP);
/*D1*/
nist_set_256(t_d, buf, 10, 8, 0, 0, 0, 13, 12, 11);
-#if BRANCH_FREE
- carry = bn_sub_words(r_d, res, t_d, BN_NIST_256_TOP);
- bn_add_words(c_d,r_d,_nist_p_256,BN_NIST_256_TOP);
- mask = 0-(size_t)carry;
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-#else
- if (bn_sub_words(r_d, res, t_d, BN_NIST_256_TOP))
- bn_add_words(r_d,r_d,_nist_p_256,BN_NIST_256_TOP);
-#endif
+ carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
/*D2*/
nist_set_256(t_d, buf, 11, 9, 0, 0, 15, 14, 13, 12);
-#if BRANCH_FREE
- carry = bn_sub_words(r_d, res, t_d, BN_NIST_256_TOP);
- bn_add_words(c_d,r_d,_nist_p_256,BN_NIST_256_TOP);
- mask = 0-(size_t)carry;
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-#else
- if (bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP))
- bn_add_words(r_d,r_d,_nist_p_256,BN_NIST_256_TOP);
-#endif
+ carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
/*D3*/
nist_set_256(t_d, buf, 12, 0, 10, 9, 8, 15, 14, 13);
-#if BRANCH_FREE
- carry = bn_sub_words(r_d, res, t_d, BN_NIST_256_TOP);
- bn_add_words(c_d,r_d,_nist_p_256,BN_NIST_256_TOP);
- mask = 0-(size_t)carry;
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-#else
- if (bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP))
- bn_add_words(r_d,r_d,_nist_p_256,BN_NIST_256_TOP);
-#endif
+ carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
/*D4*/
nist_set_256(t_d, buf, 13, 0, 11, 10, 9, 0, 15, 14);
-#if BRANCH_FREE
- carry = bn_sub_words(r_d, res, t_d, BN_NIST_256_TOP);
- bn_add_words(c_d,r_d,_nist_p_256,BN_NIST_256_TOP);
- mask = 0-(size_t)carry;
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-
- nist_cp_bn(r_d, res, BN_NIST_384_TOP);
-#else
- if (bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP))
- bn_add_words(r_d,r_d,_nist_p_256,BN_NIST_256_TOP);
-#endif
- r->top = BN_NIST_256_TOP;
- bn_correct_top(r);
+ carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_256_TOP);
- if (BN_ucmp(field, r) <= 0)
+ /* see BN_nist_mod_224 for explanation */
+ u.f = bn_sub_words;
+ if (carry > 0)
+ carry = (int)bn_sub_words(r_d,r_d,_nist_p_256[carry-1],BN_NIST_256_TOP);
+ else if (carry < 0)
{
- if (!BN_usub(r, r, field)) return 0;
+ carry = (int)bn_add_words(r_d,r_d,_nist_p_256[-carry-1],BN_NIST_256_TOP);
+ mask = 0-(size_t)carry;
+ u.p = ((size_t)bn_sub_words&mask) | ((size_t)bn_add_words&~mask);
}
+ else
+ carry = 1;
+
+ mask = 0-(size_t)(*u.f)(c_d,r_d,_nist_p_256[0],BN_NIST_256_TOP);
+ mask &= 0-(size_t)carry;
+ res = (BN_ULONG *)(((size_t)c_d&~mask) | ((size_t)r_d&mask));
+ nist_cp_bn(r_d, res, BN_NIST_256_TOP);
+ r->top = BN_NIST_256_TOP;
+ bn_correct_top(r);
return 1;
-#else /* BN_BITS!=32 */
- return 0;
-#endif
}
#define nist_set_384(to,from,a1,a2,a3,a4,a5,a6,a7,a8,a9,a10,a11,a12) \
{ \
- if (a12 != 0) bn_cp_32(to, 0, from, (a12) - 12) else bn_32_set_0(to, 0)\
- if (a11 != 0) bn_cp_32(to, 1, from, (a11) - 12) else bn_32_set_0(to, 1)\
- if (a10 != 0) bn_cp_32(to, 2, from, (a10) - 12) else bn_32_set_0(to, 2)\
- if (a9 != 0) bn_cp_32(to, 3, from, (a9) - 12) else bn_32_set_0(to, 3)\
- if (a8 != 0) bn_cp_32(to, 4, from, (a8) - 12) else bn_32_set_0(to, 4)\
- if (a7 != 0) bn_cp_32(to, 5, from, (a7) - 12) else bn_32_set_0(to, 5)\
- if (a6 != 0) bn_cp_32(to, 6, from, (a6) - 12) else bn_32_set_0(to, 6)\
- if (a5 != 0) bn_cp_32(to, 7, from, (a5) - 12) else bn_32_set_0(to, 7)\
- if (a4 != 0) bn_cp_32(to, 8, from, (a4) - 12) else bn_32_set_0(to, 8)\
- if (a3 != 0) bn_cp_32(to, 9, from, (a3) - 12) else bn_32_set_0(to, 9)\
- if (a2 != 0) bn_cp_32(to, 10, from, (a2) - 12) else bn_32_set_0(to, 10)\
- if (a1 != 0) bn_cp_32(to, 11, from, (a1) - 12) else bn_32_set_0(to, 11)\
+ bn_cp_32(to, 0, from, (a12) - 12) \
+ bn_cp_32(to, 1, from, (a11) - 12) \
+ bn_cp_32(to, 2, from, (a10) - 12) \
+ bn_cp_32(to, 3, from, (a9) - 12) \
+ bn_cp_32(to, 4, from, (a8) - 12) \
+ bn_cp_32(to, 5, from, (a7) - 12) \
+ bn_cp_32(to, 6, from, (a6) - 12) \
+ bn_cp_32(to, 7, from, (a5) - 12) \
+ bn_cp_32(to, 8, from, (a4) - 12) \
+ bn_cp_32(to, 9, from, (a3) - 12) \
+ bn_cp_32(to, 10, from, (a2) - 12) \
+ bn_cp_32(to, 11, from, (a1) - 12) \
}
int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
BN_CTX *ctx)
{
-#if BN_BITS2 == 32
int i, top = a->top;
int carry = 0;
register BN_ULONG *r_d, *a_d = a->d;
@@ -564,11 +672,18 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
c_d[BN_NIST_384_TOP],
*res;
size_t mask;
+ union { bn_addsub_f f; size_t p; } u;
+ static const BIGNUM _bignum_nist_p_384_sqr = {
+ (BN_ULONG *)_nist_p_384_sqr,
+ sizeof(_nist_p_384_sqr)/sizeof(_nist_p_384_sqr[0]),
+ sizeof(_nist_p_384_sqr)/sizeof(_nist_p_384_sqr[0]),
+ 0,BN_FLG_STATIC_DATA };
+
field = &_bignum_nist_p_384; /* just to make sure */
- if (BN_is_negative(a) || a->top > 2*BN_NIST_384_TOP)
- return BN_nnmod(r, field, a, ctx);
+ if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_384_sqr)>=0)
+ return BN_nnmod(r, a, field, ctx);
i = BN_ucmp(field, a);
if (i == 0)
@@ -606,171 +721,116 @@ int BN_nist_mod_384(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
}
*ap=c;
}
- carry = bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2),
+ carry = (int)bn_add_words(r_d+(128/BN_BITS2), r_d+(128/BN_BITS2),
t_d, BN_NIST_256_TOP);
- /*
- * we need if (result>=modulus) subtract(result,modulus);
- * in n-bit space this can be expressed as
- * if (carry || result>=modulus) subtract(result,modulus);
- * the catch is that comparison implies subtraction and
- * therefore one can write tmp=subtract(result,modulus);
- * and then if(carry || !borrow) result=tmp; this's what
- * happens below, but without explicit if:-) a.
- */
- mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-
/*S2 */
- carry = bn_add_words(r_d, res, buf, BN_NIST_384_TOP);
- mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-
+ carry += (int)bn_add_words(r_d, r_d, buf, BN_NIST_384_TOP);
/*S3*/
nist_set_384(t_d,buf,20,19,18,17,16,15,14,13,12,23,22,21);
- carry = bn_add_words(r_d, res, t_d, BN_NIST_384_TOP);
- mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-
+ carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
/*S4*/
nist_set_384(t_d,buf,19,18,17,16,15,14,13,12,20,0,23,0);
- carry = bn_add_words(r_d, res, t_d, BN_NIST_384_TOP);
- mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-
+ carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
/*S5*/
nist_set_384(t_d, buf,0,0,0,0,23,22,21,20,0,0,0,0);
- carry = bn_add_words(r_d, res, t_d, BN_NIST_384_TOP);
- mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-
+ carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
/*S6*/
nist_set_384(t_d,buf,0,0,0,0,0,0,23,22,21,0,0,20);
- carry = bn_add_words(r_d, res, t_d, BN_NIST_384_TOP);
- mask = 0-(size_t)bn_sub_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
- mask = ~mask | (0-(size_t)carry);
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-
+ carry += (int)bn_add_words(r_d, r_d, t_d, BN_NIST_384_TOP);
/*D1*/
nist_set_384(t_d,buf,22,21,20,19,18,17,16,15,14,13,12,23);
-#if BRANCH_FREE
- carry = bn_sub_words(r_d, res, t_d, BN_NIST_384_TOP);
- bn_add_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
- mask = 0-(size_t)carry;
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-#else
- if (bn_sub_words(r_d, res, t_d, BN_NIST_384_TOP))
- bn_add_words(r_d,r_d,_nist_p_384,BN_NIST_384_TOP);
-#endif
+ carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
/*D2*/
nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,22,21,20,0);
-#if BRANCH_FREE
- carry = bn_sub_words(r_d, res, t_d, BN_NIST_384_TOP);
- bn_add_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
- mask = 0-(size_t)carry;
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
-#else
- if (bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP))
- bn_add_words(r_d,r_d,_nist_p_384,BN_NIST_384_TOP);
-#endif
+ carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
/*D3*/
nist_set_384(t_d,buf,0,0,0,0,0,0,0,23,23,0,0,0);
-#if BRANCH_FREE
- carry = bn_sub_words(r_d, res, t_d, BN_NIST_384_TOP);
- bn_add_words(c_d,r_d,_nist_p_384,BN_NIST_384_TOP);
- mask = 0-(size_t)carry;
- res = (BN_ULONG *)(((size_t)c_d&mask) | ((size_t)r_d&~mask));
+ carry -= (int)bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP);
+ /* see BN_nist_mod_224 for explanation */
+ u.f = bn_sub_words;
+ if (carry > 0)
+ carry = (int)bn_sub_words(r_d,r_d,_nist_p_384[carry-1],BN_NIST_384_TOP);
+ else if (carry < 0)
+ {
+ carry = (int)bn_add_words(r_d,r_d,_nist_p_384[-carry-1],BN_NIST_384_TOP);
+ mask = 0-(size_t)carry;
+ u.p = ((size_t)bn_sub_words&mask) | ((size_t)bn_add_words&~mask);
+ }
+ else
+ carry = 1;
+
+ mask = 0-(size_t)(*u.f)(c_d,r_d,_nist_p_384[0],BN_NIST_384_TOP);
+ mask &= 0-(size_t)carry;
+ res = (BN_ULONG *)(((size_t)c_d&~mask) | ((size_t)r_d&mask));
nist_cp_bn(r_d, res, BN_NIST_384_TOP);
-#else
- if (bn_sub_words(r_d, r_d, t_d, BN_NIST_384_TOP))
- bn_add_words(r_d,r_d,_nist_p_384,BN_NIST_384_TOP);
-#endif
r->top = BN_NIST_384_TOP;
bn_correct_top(r);
- if (BN_ucmp(field, r) <= 0)
- {
- if (!BN_usub(r, r, field)) return 0;
- }
-
return 1;
-#else /* BN_BITS!=32 */
- return 0;
-#endif
}
+#define BN_NIST_521_RSHIFT (521%BN_BITS2)
+#define BN_NIST_521_LSHIFT (BN_BITS2-BN_NIST_521_RSHIFT)
+#define BN_NIST_521_TOP_MASK ((BN_ULONG)BN_MASK2>>BN_NIST_521_LSHIFT)
+
int BN_nist_mod_521(BIGNUM *r, const BIGNUM *a, const BIGNUM *field,
BN_CTX *ctx)
{
-#if BN_BITS2 == 64
-#define BN_NIST_521_TOP_MASK (BN_ULONG)0x1FF
-#elif BN_BITS2 == 32
-#define BN_NIST_521_TOP_MASK (BN_ULONG)0x1FF
-#endif
- int top, ret = 0;
- BIGNUM *tmp;
+ int top = a->top, i;
+ BN_ULONG *r_d, *a_d = a->d,
+ t_d[BN_NIST_521_TOP],
+ val,tmp,*res;
+ size_t mask;
+ static const BIGNUM _bignum_nist_p_521_sqr = {
+ (BN_ULONG *)_nist_p_521_sqr,
+ sizeof(_nist_p_521_sqr)/sizeof(_nist_p_521_sqr[0]),
+ sizeof(_nist_p_521_sqr)/sizeof(_nist_p_521_sqr[0]),
+ 0,BN_FLG_STATIC_DATA };
field = &_bignum_nist_p_521; /* just to make sure */
- if (BN_is_negative(a))
- return BN_nnmod(r, field, a, ctx);
+ if (BN_is_negative(a) || BN_ucmp(a,&_bignum_nist_p_521_sqr)>=0)
+ return BN_nnmod(r, a, field, ctx);
- /* check whether a reduction is necessary */
- top = a->top;
- if (top < BN_NIST_521_TOP || ( top == BN_NIST_521_TOP &&
- (!(a->d[BN_NIST_521_TOP-1] & ~(BN_NIST_521_TOP_MASK)))))
+ i = BN_ucmp(field, a);
+ if (i == 0)
{
- int i = BN_ucmp(field, a);
- if (i == 0)
- {
- BN_zero(r);
- return 1;
- }
- else
- {
-#ifdef BN_DEBUG
- OPENSSL_assert(i > 0); /* because 'field' is 1111...1111 */
-#endif
- return (r == a)? 1 : (BN_copy(r ,a) != NULL);
- }
+ BN_zero(r);
+ return 1;
}
+ else if (i > 0)
+ return (r == a)? 1 : (BN_copy(r ,a) != NULL);
- if (BN_num_bits(a) > 2*521)
- return BN_nnmod(r, field, a, ctx);
-
- BN_CTX_start(ctx);
- tmp = BN_CTX_get(ctx);
- if (!tmp)
- goto err;
-
- if (!bn_wexpand(tmp, BN_NIST_521_TOP))
- goto err;
- nist_cp_bn(tmp->d, a->d, BN_NIST_521_TOP);
-
- tmp->top = BN_NIST_521_TOP;
- tmp->d[BN_NIST_521_TOP-1] &= BN_NIST_521_TOP_MASK;
- bn_correct_top(tmp);
-
- if (!BN_rshift(r, a, 521))
- goto err;
-
- if (!BN_uadd(r, tmp, r))
- goto err;
-
- if (BN_ucmp(field, r) <= 0)
+ if (r != a)
{
- if (!BN_usub(r, r, field)) goto err;
+ if (!bn_wexpand(r,BN_NIST_521_TOP))
+ return 0;
+ r_d = r->d;
+ nist_cp_bn(r_d,a_d, BN_NIST_521_TOP);
}
+ else
+ r_d = a_d;
- ret = 1;
-err:
- BN_CTX_end(ctx);
+ /* upper 521 bits, copy ... */
+ nist_cp_bn_0(t_d,a_d + (BN_NIST_521_TOP-1), top - (BN_NIST_521_TOP-1),BN_NIST_521_TOP);
+ /* ... and right shift */
+ for (val=t_d[0],i=0; i<BN_NIST_521_TOP-1; i++)
+ {
+ tmp = val>>BN_NIST_521_RSHIFT;
+ val = t_d[i+1];
+ t_d[i] = (tmp | val<<BN_NIST_521_LSHIFT) & BN_MASK2;
+ }
+ t_d[i] = val>>BN_NIST_521_RSHIFT;
+ /* lower 521 bits */
+ r_d[i] &= BN_NIST_521_TOP_MASK;
+
+ bn_add_words(r_d,r_d,t_d,BN_NIST_521_TOP);
+ mask = 0-(size_t)bn_sub_words(t_d,r_d,_nist_p_521,BN_NIST_521_TOP);
+ res = (BN_ULONG *)(((size_t)t_d&~mask) | ((size_t)r_d&mask));
+ nist_cp_bn(r_d,res,BN_NIST_521_TOP);
+ r->top = BN_NIST_521_TOP;
+ bn_correct_top(r);
- bn_check_top(r);
- return ret;
+ return 1;
}
diff --git a/crypto/bn/bn_opt.c b/crypto/bn/bn_opt.c
new file mode 100644
index 000000000000..21cbb38f622b
--- /dev/null
+++ b/crypto/bn/bn_opt.c
@@ -0,0 +1,87 @@
+/* crypto/bn/bn_opt.c */
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to. The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code. The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * "This product includes cryptographic software written by
+ * Eric Young (eay@cryptsoft.com)"
+ * The word 'cryptographic' can be left out if the rouines from the library
+ * being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ * the apps directory (application code) you must include an acknowledgement:
+ * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed. i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+#ifndef BN_DEBUG
+# undef NDEBUG /* avoid conflicting definitions */
+# define NDEBUG
+#endif
+
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+#include "cryptlib.h"
+#include "bn_lcl.h"
+
+char *BN_options(void)
+ {
+ static int init=0;
+ static char data[16];
+
+ if (!init)
+ {
+ init++;
+#ifdef BN_LLONG
+ BIO_snprintf(data,sizeof data,"bn(%d,%d)",
+ (int)sizeof(BN_ULLONG)*8,(int)sizeof(BN_ULONG)*8);
+#else
+ BIO_snprintf(data,sizeof data,"bn(%d,%d)",
+ (int)sizeof(BN_ULONG)*8,(int)sizeof(BN_ULONG)*8);
+#endif
+ }
+ return(data);
+ }
diff --git a/crypto/bn/bn_rand.c b/crypto/bn/bn_rand.c
index f51830b12ba8..b376c28ff3ff 100644
--- a/crypto/bn/bn_rand.c
+++ b/crypto/bn/bn_rand.c
@@ -227,7 +227,7 @@ int BN_bntest_rand(BIGNUM *rnd, int bits, int top, int bottom)
/* random number r: 0 <= r < range */
-static int bn_rand_range(int pseudo, BIGNUM *r, BIGNUM *range)
+static int bn_rand_range(int pseudo, BIGNUM *r, const BIGNUM *range)
{
int (*bn_rand)(BIGNUM *, int, int, int) = pseudo ? BN_pseudo_rand : BN_rand;
int n;
@@ -294,12 +294,12 @@ static int bn_rand_range(int pseudo, BIGNUM *r, BIGNUM *range)
}
-int BN_rand_range(BIGNUM *r, BIGNUM *range)
+int BN_rand_range(BIGNUM *r, const BIGNUM *range)
{
return bn_rand_range(0, r, range);
}
-int BN_pseudo_rand_range(BIGNUM *r, BIGNUM *range)
+int BN_pseudo_rand_range(BIGNUM *r, const BIGNUM *range)
{
return bn_rand_range(1, r, range);
}
diff --git a/crypto/bn/bn_shift.c b/crypto/bn/bn_shift.c
index de9312dce231..c4d301afc467 100644
--- a/crypto/bn/bn_shift.c
+++ b/crypto/bn/bn_shift.c
@@ -177,7 +177,7 @@ int BN_rshift(BIGNUM *r, const BIGNUM *a, int n)
nw=n/BN_BITS2;
rb=n%BN_BITS2;
lb=BN_BITS2-rb;
- if (nw > a->top || a->top == 0)
+ if (nw >= a->top || a->top == 0)
{
BN_zero(r);
return(1);
diff --git a/crypto/bn/bn_x931p.c b/crypto/bn/bn_x931p.c
new file mode 100644
index 000000000000..04c5c874ec9f
--- /dev/null
+++ b/crypto/bn/bn_x931p.c
@@ -0,0 +1,272 @@
+/* bn_x931p.c */
+/* Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL
+ * project 2005.
+ */
+/* ====================================================================
+ * Copyright (c) 2005 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com). This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+#include <stdio.h>
+#include <openssl/bn.h>
+
+/* X9.31 routines for prime derivation */
+
+/* X9.31 prime derivation. This is used to generate the primes pi
+ * (p1, p2, q1, q2) from a parameter Xpi by checking successive odd
+ * integers.
+ */
+
+static int bn_x931_derive_pi(BIGNUM *pi, const BIGNUM *Xpi, BN_CTX *ctx,
+ BN_GENCB *cb)
+ {
+ int i = 0;
+ if (!BN_copy(pi, Xpi))
+ return 0;
+ if (!BN_is_odd(pi) && !BN_add_word(pi, 1))
+ return 0;
+ for(;;)
+ {
+ i++;
+ BN_GENCB_call(cb, 0, i);
+ /* NB 27 MR is specificed in X9.31 */
+ if (BN_is_prime_fasttest_ex(pi, 27, ctx, 1, cb))
+ break;
+ if (!BN_add_word(pi, 2))
+ return 0;
+ }
+ BN_GENCB_call(cb, 2, i);
+ return 1;
+ }
+
+/* This is the main X9.31 prime derivation function. From parameters
+ * Xp1, Xp2 and Xp derive the prime p. If the parameters p1 or p2 are
+ * not NULL they will be returned too: this is needed for testing.
+ */
+
+int BN_X931_derive_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
+ const BIGNUM *Xp, const BIGNUM *Xp1, const BIGNUM *Xp2,
+ const BIGNUM *e, BN_CTX *ctx, BN_GENCB *cb)
+ {
+ int ret = 0;
+
+ BIGNUM *t, *p1p2, *pm1;
+
+ /* Only even e supported */
+ if (!BN_is_odd(e))
+ return 0;
+
+ BN_CTX_start(ctx);
+ if (!p1)
+ p1 = BN_CTX_get(ctx);
+
+ if (!p2)
+ p2 = BN_CTX_get(ctx);
+
+ t = BN_CTX_get(ctx);
+
+ p1p2 = BN_CTX_get(ctx);
+
+ pm1 = BN_CTX_get(ctx);
+
+ if (!bn_x931_derive_pi(p1, Xp1, ctx, cb))
+ goto err;
+
+ if (!bn_x931_derive_pi(p2, Xp2, ctx, cb))
+ goto err;
+
+ if (!BN_mul(p1p2, p1, p2, ctx))
+ goto err;
+
+ /* First set p to value of Rp */
+
+ if (!BN_mod_inverse(p, p2, p1, ctx))
+ goto err;
+
+ if (!BN_mul(p, p, p2, ctx))
+ goto err;
+
+ if (!BN_mod_inverse(t, p1, p2, ctx))
+ goto err;
+
+ if (!BN_mul(t, t, p1, ctx))
+ goto err;
+
+ if (!BN_sub(p, p, t))
+ goto err;
+
+ if (p->neg && !BN_add(p, p, p1p2))
+ goto err;
+
+ /* p now equals Rp */
+
+ if (!BN_mod_sub(p, p, Xp, p1p2, ctx))
+ goto err;
+
+ if (!BN_add(p, p, Xp))
+ goto err;
+
+ /* p now equals Yp0 */
+
+ for (;;)
+ {
+ int i = 1;
+ BN_GENCB_call(cb, 0, i++);
+ if (!BN_copy(pm1, p))
+ goto err;
+ if (!BN_sub_word(pm1, 1))
+ goto err;
+ if (!BN_gcd(t, pm1, e, ctx))
+ goto err;
+ if (BN_is_one(t)
+ /* X9.31 specifies 8 MR and 1 Lucas test or any prime test
+ * offering similar or better guarantees 50 MR is considerably
+ * better.
+ */
+ && BN_is_prime_fasttest_ex(p, 50, ctx, 1, cb))
+ break;
+ if (!BN_add(p, p, p1p2))
+ goto err;
+ }
+
+ BN_GENCB_call(cb, 3, 0);
+
+ ret = 1;
+
+ err:
+
+ BN_CTX_end(ctx);
+
+ return ret;
+ }
+
+/* Generate pair of paramters Xp, Xq for X9.31 prime generation.
+ * Note: nbits paramter is sum of number of bits in both.
+ */
+
+int BN_X931_generate_Xpq(BIGNUM *Xp, BIGNUM *Xq, int nbits, BN_CTX *ctx)
+ {
+ BIGNUM *t;
+ int i;
+ /* Number of bits for each prime is of the form
+ * 512+128s for s = 0, 1, ...
+ */
+ if ((nbits < 1024) || (nbits & 0xff))
+ return 0;
+ nbits >>= 1;
+ /* The random value Xp must be between sqrt(2) * 2^(nbits-1) and
+ * 2^nbits - 1. By setting the top two bits we ensure that the lower
+ * bound is exceeded.
+ */
+ if (!BN_rand(Xp, nbits, 1, 0))
+ return 0;
+
+ BN_CTX_start(ctx);
+ t = BN_CTX_get(ctx);
+
+ for (i = 0; i < 1000; i++)
+ {
+ if (!BN_rand(Xq, nbits, 1, 0))
+ return 0;
+ /* Check that |Xp - Xq| > 2^(nbits - 100) */
+ BN_sub(t, Xp, Xq);
+ if (BN_num_bits(t) > (nbits - 100))
+ break;
+ }
+
+ BN_CTX_end(ctx);
+
+ if (i < 1000)
+ return 1;
+
+ return 0;
+
+ }
+
+/* Generate primes using X9.31 algorithm. Of the values p, p1, p2, Xp1
+ * and Xp2 only 'p' needs to be non-NULL. If any of the others are not NULL
+ * the relevant parameter will be stored in it.
+ *
+ * Due to the fact that |Xp - Xq| > 2^(nbits - 100) must be satisfied Xp and Xq
+ * are generated using the previous function and supplied as input.
+ */
+
+int BN_X931_generate_prime_ex(BIGNUM *p, BIGNUM *p1, BIGNUM *p2,
+ BIGNUM *Xp1, BIGNUM *Xp2,
+ const BIGNUM *Xp,
+ const BIGNUM *e, BN_CTX *ctx,
+ BN_GENCB *cb)
+ {
+ int ret = 0;
+
+ BN_CTX_start(ctx);
+ if (!Xp1)
+ Xp1 = BN_CTX_get(ctx);
+ if (!Xp2)
+ Xp2 = BN_CTX_get(ctx);
+
+ if (!BN_rand(Xp1, 101, 0, 0))
+ goto error;
+ if (!BN_rand(Xp2, 101, 0, 0))
+ goto error;
+ if (!BN_X931_derive_prime_ex(p, p1, p2, Xp, Xp1, Xp2, e, ctx, cb))
+ goto error;
+
+ ret = 1;
+
+ error:
+ BN_CTX_end(ctx);
+
+ return ret;
+
+ }
+
diff --git a/crypto/bn/bntest.c b/crypto/bn/bntest.c
index 310763eca0c6..cf190380f558 100644
--- a/crypto/bn/bntest.c
+++ b/crypto/bn/bntest.c
@@ -926,7 +926,7 @@ int test_mod_exp(BIO *bp, BN_CTX *ctx)
BN_bntest_rand(b,2+i,0,0); /**/
if (!BN_mod_exp(d,a,b,c,ctx))
- return(00);
+ return(0);
if (bp != NULL)
{
@@ -1028,7 +1028,7 @@ int test_exp(BIO *bp, BN_CTX *ctx)
BN_bntest_rand(b,2+i,0,0); /**/
if (!BN_exp(d,a,b,ctx))
- return(00);
+ return(0);
if (bp != NULL)
{
diff --git a/crypto/buffer/Makefile b/crypto/buffer/Makefile
index 9f3a88d2d6a1..9e0f46e19ab0 100644
--- a/crypto/buffer/Makefile
+++ b/crypto/buffer/Makefile
@@ -17,8 +17,8 @@ TEST=
APPS=
LIB=$(TOP)/libcrypto.a
-LIBSRC= buffer.c buf_err.c
-LIBOBJ= buffer.o buf_err.o
+LIBSRC= buffer.c buf_str.c buf_err.c
+LIBOBJ= buffer.o buf_str.o buf_err.o
SRC= $(LIBSRC)
@@ -33,7 +33,7 @@ top:
all: lib
lib: $(LIBOBJ)
- $(AR) $(LIB) $(LIBOBJ)
+ $(ARX) $(LIB) $(LIBOBJ)
$(RANLIB) $(LIB) || echo Never mind.
@touch lib
@@ -81,6 +81,13 @@ buf_err.o: ../../include/openssl/opensslconf.h ../../include/openssl/opensslv.h
buf_err.o: ../../include/openssl/ossl_typ.h ../../include/openssl/safestack.h
buf_err.o: ../../include/openssl/stack.h ../../include/openssl/symhacks.h
buf_err.o: buf_err.c
+buf_str.o: ../../e_os.h ../../include/openssl/bio.h
+buf_str.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
+buf_str.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
+buf_str.o: ../../include/openssl/lhash.h ../../include/openssl/opensslconf.h
+buf_str.o: ../../include/openssl/opensslv.h ../../include/openssl/ossl_typ.h
+buf_str.o: ../../include/openssl/safestack.h ../../include/openssl/stack.h
+buf_str.o: ../../include/openssl/symhacks.h ../cryptlib.h buf_str.c
buffer.o: ../../e_os.h ../../include/openssl/bio.h
buffer.o: ../../include/openssl/buffer.h ../../include/openssl/crypto.h
buffer.o: ../../include/openssl/e_os2.h ../../include/openssl/err.h
diff --git a/crypto/buffer/buf_str.c b/crypto/buffer/buf_str.c
new file mode 100644
index 000000000000..28dd1e401e2a
--- /dev/null
+++ b/crypto/buffer/buf_str.c
@@ -0,0 +1,116 @@
+/* crypto/buffer/buf_str.c */
+/* ====================================================================
+ * Copyright (c) 2007 The OpenSSL Project. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ * software must display the following acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ * endorse or promote products derived from this software without
+ * prior written permission. For written permission, please contact
+ * licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ * nor may "OpenSSL" appear in their names without prior written
+ * permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ * acknowledgment:
+ * "This product includes software developed by the OpenSSL Project
+ * for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com). This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+#include <stdio.h>
+#include "cryptlib.h"
+#include <openssl/buffer.h>
+
+char *BUF_strdup(const char *str)
+ {
+ if (str == NULL) return(NULL);
+ return BUF_strndup(str, strlen(str));
+ }
+
+char *BUF_strndup(const char *str, size_t siz)
+ {
+ char *ret;
+
+ if (str == NULL) return(NULL);
+
+ ret=OPENSSL_malloc(siz+1);
+ if (ret == NULL)
+ {
+ BUFerr(BUF_F_BUF_STRNDUP,ERR_R_MALLOC_FAILURE);
+ return(NULL);
+ }
+ BUF_strlcpy(ret,str,siz+1);
+ return(ret);
+ }
+
+void *BUF_memdup(const void *data, size_t siz)
+ {
+ void *ret;
+
+ if (data == NULL) return(NULL);
+
+ ret=OPENSSL_malloc(siz);
+ if (ret == NULL)
+ {
+ BUFerr(BUF_F_BUF_MEMDUP,ERR_R_MALLOC_FAILURE);
+ return(NULL);
+ }
+ return memcpy(ret, data, siz);
+ }
+
+size_t BUF_strlcpy(char *dst, const char *src, size_t size)
+ {
+ size_t l = 0;
+ for(; size > 1 && *src; size--)
+ {
+ *dst++ = *src++;
+ l++;
+ }
+ if (size)
+ *dst = '\0';
+ return l + strlen(src);
+ }
+
+size_t BUF_strlcat(char *dst, const char *src, size_t size)
+ {
+ size_t l = 0;
+ for(; size > 0 && *dst; size--, dst++)
+ l++;
+ return l + BUF_strlcpy(dst, src, size);
+ }
diff --git a/crypto/buffer/buffer.c b/crypto/buffer/buffer.c
index 3bf03c7eff07..b3e947771d58 100644
--- a/crypto/buffer/buffer.c
+++ b/crypto/buffer/buffer.c
@@ -161,61 +161,3 @@ int BUF_MEM_grow_clean(BUF_MEM *str, int len)
}
return(len);
}
-
-char *BUF_strdup(const char *str)
- {
- if (str == NULL) return(NULL);
- return BUF_strndup(str, strlen(str));
- }
-
-char *BUF_strndup(const char *str, size_t siz)
- {
- char *ret;
-
- if (str == NULL) return(NULL);
-
- ret=OPENSSL_malloc(siz+1);
- if (ret == NULL)
- {
- BUFerr(BUF_F_BUF_STRNDUP,ERR_R_MALLOC_FAILURE);
- return(NULL);
- }
- BUF_strlcpy(ret,str,siz+1);
- return(ret);
- }
-
-void *BUF_memdup(const void *data, size_t siz)
- {
- void *ret;
-
- if (data == NULL) return(NULL);
-
- ret=OPENSSL_malloc(siz);
- if (ret == NULL)
- {
- BUFerr(BUF_F_BUF_MEMDUP,ERR_R_MALLOC_FAILURE);
- return(NULL);
- }
- return memcpy(ret, data, siz);
- }
-
-size_t BUF_strlcpy(char *dst, const char *src, size_t size)
- {
- size_t l = 0;
- for(; size > 1 && *src; size--)
- {
- *dst++ = *src++;
- l++;
- }
- if (size)
- *dst = '\0';
- return l + strlen(src);
- }
-
-size_t BUF_strlcat(char *dst, const char *src, size_t size)
- {
- size_t l = 0;
- for(; size > 0 && *dst; size--, dst++)
- l++;
- return l + BUF_strlcpy(dst, src, size);
- }
diff --git a/crypto/camellia/Makefile b/crypto/camellia/Makefile
index 1579de5ce51f..dfb12951fd4a 100644
--- a/crypto/camellia/Makefile
+++ b/crypto/camellia/Makefile
@@ -41,7 +41,7 @@ top:
all: lib
lib: $(LIBOBJ)
- $(AR) $(LIB) $(LIBOBJ)
+ $(ARX) $(LIB) $(LIBOBJ)
$(RANLIB) $(LIB) || echo Never mind.
@touch lib
diff --git a/crypto/camellia/asm/cmll-x86.pl b/crypto/camellia/asm/cmll-x86.pl
new file mode 100755
index 000000000000..0812815bfb8f
--- /dev/null
+++ b/crypto/camellia/asm/cmll-x86.pl
@@ -0,0 +1,1138 @@
+#!/usr/bin/env perl
+
+# ====================================================================
+# Copyright (c) 2008 Andy Polyakov <appro@openssl.org>
+#
+# This module may be used under the terms of either the GNU General
+# Public License version 2 or later, the GNU Lesser General Public
+# License version 2.1 or later, the Mozilla Public License version
+# 1.1 or the BSD License. The exact terms of either license are
+# distributed along with this module. For further details see
+# http://www.openssl.org/~appro/camellia/.
+# ====================================================================
+
+# Performance in cycles per processed byte (less is better) in
+# 'openssl speed ...' benchmark:
+#
+# AMD K8 Core2 PIII P4
+# -evp camellia-128-ecb 21.5 22.8 27.0 28.9
+# + over gcc 3.4.6 +90/11% +70/10% +53/4% +160/64%
+# + over icc 8.0 +48/19% +21/15% +21/17% +55/37%
+#
+# camellia-128-cbc 17.3 21.1 23.9 25.9
+#
+# 128-bit key setup 196 280 256 240 cycles/key
+# + over gcc 3.4.6 +30/0% +17/11% +11/0% +63/40%
+# + over icc 8.0 +18/3% +10/0% +10/3% +21/10%
+#
+# Pairs of numbers in "+" rows represent performance improvement over
+# compiler generated position-independent code, PIC, and non-PIC
+# respectively. PIC results are of greater relevance, as this module
+# is position-independent, i.e. suitable for a shared library or PIE.
+# Position independence "costs" one register, which is why compilers
+# are so close with non-PIC results, they have an extra register to